mirror of
https://github.com/facebook/zstd.git
synced 2025-11-22 00:10:22 -05:00
speed optimized version of targetCBlockSize
note that the size of individual compressed blocks will vary more wildly with this modification. But it seems good enough for a first test, and fix the speed regression issue. Further refinements can be attempted later.
This commit is contained in:
parent
68a232c591
commit
cc4530924b
@ -122,7 +122,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
|
|||||||
}
|
}
|
||||||
*entropyWritten = 1;
|
*entropyWritten = 1;
|
||||||
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
|
DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
|
||||||
return op-ostart;
|
return (size_t)(op-ostart);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
@ -187,7 +187,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|||||||
else
|
else
|
||||||
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
|
||||||
if (nbSeq==0) {
|
if (nbSeq==0) {
|
||||||
return op - ostart;
|
return (size_t)(op - ostart);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* seqHead : flags for FSE encoding type */
|
/* seqHead : flags for FSE encoding type */
|
||||||
@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|||||||
}
|
}
|
||||||
|
|
||||||
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
||||||
op, oend - op,
|
op, (size_t)(oend - op),
|
||||||
fseTables->matchlengthCTable, mlCode,
|
fseTables->matchlengthCTable, mlCode,
|
||||||
fseTables->offcodeCTable, ofCode,
|
fseTables->offcodeCTable, ofCode,
|
||||||
fseTables->litlengthCTable, llCode,
|
fseTables->litlengthCTable, llCode,
|
||||||
@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
*entropyWritten = 1;
|
*entropyWritten = 1;
|
||||||
return op - ostart;
|
return (size_t)(op - ostart);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** ZSTD_compressSubBlock() :
|
/** ZSTD_compressSubBlock() :
|
||||||
@ -296,11 +296,11 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
|
|||||||
op += cSeqSize;
|
op += cSeqSize;
|
||||||
}
|
}
|
||||||
/* Write block header */
|
/* Write block header */
|
||||||
{ size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
|
{ size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
|
||||||
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
|
||||||
MEM_writeLE24(ostart, cBlockHeader24);
|
MEM_writeLE24(ostart, cBlockHeader24);
|
||||||
}
|
}
|
||||||
return op-ostart;
|
return (size_t)(op-ostart);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
|
||||||
@ -419,6 +419,16 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t countLiterals(const seqDef* sp, size_t seqCount)
|
||||||
|
{
|
||||||
|
size_t n, total = 0;
|
||||||
|
assert(sp != NULL);
|
||||||
|
for (n=0; n<seqCount; n++) {
|
||||||
|
total += sp[n].litLength;
|
||||||
|
}
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
/** ZSTD_compressSubBlock_multi() :
|
/** ZSTD_compressSubBlock_multi() :
|
||||||
* Breaks super-block into multiple sub-blocks and compresses them.
|
* Breaks super-block into multiple sub-blocks and compresses them.
|
||||||
* Entropy will be written to the first block.
|
* Entropy will be written to the first block.
|
||||||
@ -438,10 +448,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|||||||
{
|
{
|
||||||
const seqDef* const sstart = seqStorePtr->sequencesStart;
|
const seqDef* const sstart = seqStorePtr->sequencesStart;
|
||||||
const seqDef* const send = seqStorePtr->sequences;
|
const seqDef* const send = seqStorePtr->sequences;
|
||||||
const seqDef* sp = sstart;
|
size_t const nbSeqs = (size_t)(send - sstart);
|
||||||
|
size_t nbSeqsPerBlock = nbSeqs;
|
||||||
const BYTE* const lstart = seqStorePtr->litStart;
|
const BYTE* const lstart = seqStorePtr->litStart;
|
||||||
const BYTE* const lend = seqStorePtr->lit;
|
const BYTE* const lend = seqStorePtr->lit;
|
||||||
const BYTE* lp = lstart;
|
const BYTE* lp = lstart;
|
||||||
|
size_t const nbLiterals = (size_t)(lend - lstart);
|
||||||
BYTE const* ip = (BYTE const*)src;
|
BYTE const* ip = (BYTE const*)src;
|
||||||
BYTE const* const iend = ip + srcSize;
|
BYTE const* const iend = ip + srcSize;
|
||||||
BYTE* const ostart = (BYTE*)dst;
|
BYTE* const ostart = (BYTE*)dst;
|
||||||
@ -451,52 +463,50 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|||||||
const BYTE* mlCodePtr = seqStorePtr->mlCode;
|
const BYTE* mlCodePtr = seqStorePtr->mlCode;
|
||||||
const BYTE* ofCodePtr = seqStorePtr->ofCode;
|
const BYTE* ofCodePtr = seqStorePtr->ofCode;
|
||||||
size_t targetCBlockSize = cctxParams->targetCBlockSize;
|
size_t targetCBlockSize = cctxParams->targetCBlockSize;
|
||||||
size_t litSize, seqCount;
|
|
||||||
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
|
int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
|
||||||
int writeSeqEntropy = 1;
|
int writeSeqEntropy = 1;
|
||||||
int lastSequence = 0;
|
size_t nbSubBlocks = 1;
|
||||||
|
|
||||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
|
||||||
(unsigned)(lend-lp), (unsigned)(send-sstart));
|
(unsigned)(lend-lp), (unsigned)(send-sstart));
|
||||||
|
|
||||||
litSize = 0;
|
/* let's start by a general estimation for the full block */
|
||||||
seqCount = 0;
|
{ size_t const cBlockSizeEstimate =
|
||||||
do {
|
ZSTD_estimateSubBlockSize(lp, nbLiterals,
|
||||||
size_t cBlockSizeEstimate = 0;
|
ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
|
||||||
if (sstart == send) {
|
|
||||||
lastSequence = 1;
|
|
||||||
} else {
|
|
||||||
const seqDef* const sequence = sp + seqCount;
|
|
||||||
lastSequence = sequence == send - 1;
|
|
||||||
litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
|
|
||||||
seqCount++;
|
|
||||||
}
|
|
||||||
if (lastSequence) {
|
|
||||||
assert(lp <= lend);
|
|
||||||
assert(litSize <= (size_t)(lend - lp));
|
|
||||||
litSize = (size_t)(lend - lp);
|
|
||||||
}
|
|
||||||
/* I think there is an optimization opportunity here.
|
|
||||||
* Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
|
|
||||||
* since it recalculates estimate from scratch.
|
|
||||||
* For example, it would recount literal distribution and symbol codes every time.
|
|
||||||
*/
|
|
||||||
cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
|
|
||||||
&nextCBlock->entropy, entropyMetadata,
|
&nextCBlock->entropy, entropyMetadata,
|
||||||
workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
|
workspace, wkspSize,
|
||||||
if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
|
writeLitEntropy, writeSeqEntropy);
|
||||||
|
/* quick estimation */
|
||||||
|
nbSubBlocks = (cBlockSizeEstimate + (targetCBlockSize-1)) / targetCBlockSize;
|
||||||
|
assert(nbSubBlocks > 0);
|
||||||
|
nbSeqsPerBlock = nbSeqs / nbSubBlocks;
|
||||||
|
/* Note: this is very approximative. Obviously, some sub-blocks will be larger and others faster.
|
||||||
|
* But the contract of this feature has always been approximative, so for now we'll leverage it for speed.
|
||||||
|
* It can be refined later, for closer-to-target compressed block size, if it ever matters. */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* write sub-blocks */
|
||||||
|
{ size_t n;
|
||||||
|
for (n=0; n < nbSubBlocks; n++) {
|
||||||
|
const seqDef* sp = sstart + n*nbSeqsPerBlock;
|
||||||
|
int lastSubBlock = (n==nbSubBlocks-1);
|
||||||
|
size_t const nbSeqsLastSubBlock = nbSeqs - (nbSubBlocks-1) * nbSeqsPerBlock;
|
||||||
|
size_t seqCount = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock;
|
||||||
|
size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(sp, seqCount);
|
||||||
int litEntropyWritten = 0;
|
int litEntropyWritten = 0;
|
||||||
int seqEntropyWritten = 0;
|
int seqEntropyWritten = 0;
|
||||||
const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
|
const size_t decompressedSize =
|
||||||
|
ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSubBlock);
|
||||||
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
|
||||||
sp, seqCount,
|
sp, seqCount,
|
||||||
lp, litSize,
|
lp, litSize,
|
||||||
llCodePtr, mlCodePtr, ofCodePtr,
|
llCodePtr, mlCodePtr, ofCodePtr,
|
||||||
cctxParams,
|
cctxParams,
|
||||||
op, oend-op,
|
op, (size_t)(oend-op),
|
||||||
bmi2, writeLitEntropy, writeSeqEntropy,
|
bmi2, writeLitEntropy, writeSeqEntropy,
|
||||||
&litEntropyWritten, &seqEntropyWritten,
|
&litEntropyWritten, &seqEntropyWritten,
|
||||||
lastBlock && lastSequence);
|
lastBlock && lastSubBlock);
|
||||||
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
|
||||||
if (cSize > 0 && cSize < decompressedSize) {
|
if (cSize > 0 && cSize < decompressedSize) {
|
||||||
DEBUGLOG(5, "Committed the sub-block");
|
DEBUGLOG(5, "Committed the sub-block");
|
||||||
@ -519,7 +529,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (!lastSequence);
|
}
|
||||||
|
|
||||||
if (writeLitEntropy) {
|
if (writeLitEntropy) {
|
||||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
|
||||||
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
|
||||||
@ -531,25 +542,10 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
|
|||||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (ip < iend) {
|
assert(ip == iend); (void)iend;
|
||||||
size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
|
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed: %u subBlocks, total compressed size = %u",
|
||||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
|
(unsigned)nbSubBlocks, (unsigned)(op-ostart));
|
||||||
FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
|
return (size_t)(op-ostart);
|
||||||
assert(cSize != 0);
|
|
||||||
op += cSize;
|
|
||||||
/* We have to regenerate the repcodes because we've skipped some sequences */
|
|
||||||
if (sp < send) {
|
|
||||||
seqDef const* seq;
|
|
||||||
repcodes_t rep;
|
|
||||||
ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
|
|
||||||
for (seq = sstart; seq < sp; ++seq) {
|
|
||||||
ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
|
|
||||||
}
|
|
||||||
ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
|
|
||||||
return op-ostart;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
|
||||||
|
|||||||
@ -221,7 +221,10 @@ the last one takes effect.
|
|||||||
* `--target-compressed-block-size=#`:
|
* `--target-compressed-block-size=#`:
|
||||||
Attempt to produce compressed blocks of approximately this size.
|
Attempt to produce compressed blocks of approximately this size.
|
||||||
This will split larger blocks in order to approach this target.
|
This will split larger blocks in order to approach this target.
|
||||||
Notably useful to improve latency when the receiver can make use of early data sooner.
|
This feature is notably useful for improved latency, when the receiver can leverage receiving early incomplete data.
|
||||||
|
This parameter defines a loose target: compressed blocks will target this size "on average", but individual blocks can still be larger or smaller.
|
||||||
|
Enabling this feature can decrease compression speed by up to ~10% at level 1.
|
||||||
|
Higher levels will see smaller relative speed regression, becoming invisible at higher settings.
|
||||||
* `-o FILE`:
|
* `-o FILE`:
|
||||||
save result into `FILE`.
|
save result into `FILE`.
|
||||||
* `-f`, `--force`:
|
* `-f`, `--force`:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user