mirror of
https://github.com/facebook/zstd.git
synced 2025-10-07 00:12:40 -04:00
Fix external sequence corner cases
* Clear external sequences when we reset the `ZSTD_CCtx`. * Skip external sequences when a block is too small to compress.
This commit is contained in:
parent
d19f803a3b
commit
136b9e2392
@ -922,6 +922,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
|
|||||||
cctx->dictID = 0;
|
cctx->dictID = 0;
|
||||||
if (params.ldmParams.enableLdm)
|
if (params.ldmParams.enableLdm)
|
||||||
ZSTD_window_clear(&cctx->ldmState.window);
|
ZSTD_window_clear(&cctx->ldmState.window);
|
||||||
|
ZSTD_referenceExternalSequences(cctx, NULL, 0);
|
||||||
ZSTD_invalidateMatchState(&cctx->blockState.matchState);
|
ZSTD_invalidateMatchState(&cctx->blockState.matchState);
|
||||||
ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
|
ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
|
||||||
XXH64_reset(&cctx->xxhState, 0);
|
XXH64_reset(&cctx->xxhState, 0);
|
||||||
@ -1108,6 +1109,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||||||
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
|
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
|
||||||
ZSTD_window_clear(&zc->ldmState.window);
|
ZSTD_window_clear(&zc->ldmState.window);
|
||||||
}
|
}
|
||||||
|
ZSTD_referenceExternalSequences(zc, NULL, 0);
|
||||||
|
|
||||||
/* buffers */
|
/* buffers */
|
||||||
zc->inBuffSize = buffInSize;
|
zc->inBuffSize = buffInSize;
|
||||||
@ -1818,8 +1820,10 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
|
|||||||
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
|
ZSTD_matchState_t* const ms = &zc->blockState.matchState;
|
||||||
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
|
||||||
(U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
|
(U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
|
||||||
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
|
if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
|
||||||
|
ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
|
||||||
return 0; /* don't even attempt compression below a certain srcSize */
|
return 0; /* don't even attempt compression below a certain srcSize */
|
||||||
|
}
|
||||||
ZSTD_resetSeqStore(&(zc->seqStore));
|
ZSTD_resetSeqStore(&(zc->seqStore));
|
||||||
|
|
||||||
/* limited update after a very long match */
|
/* limited update after a very long match */
|
||||||
|
@ -536,6 +536,34 @@ size_t ZSTD_ldm_generateSequences(
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
|
||||||
|
while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
|
||||||
|
rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
|
||||||
|
if (srcSize <= seq->litLength) {
|
||||||
|
/* Skip past srcSize literals */
|
||||||
|
seq->litLength -= srcSize;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
srcSize -= seq->litLength;
|
||||||
|
seq->litLength = 0;
|
||||||
|
if (srcSize < seq->matchLength) {
|
||||||
|
/* Skip past the first srcSize of the match */
|
||||||
|
seq->matchLength -= srcSize;
|
||||||
|
if (seq->matchLength < minMatch) {
|
||||||
|
/* The match is too short, omit it */
|
||||||
|
if (rawSeqStore->pos + 1 < rawSeqStore->size) {
|
||||||
|
seq[1].litLength += seq[0].matchLength;
|
||||||
|
}
|
||||||
|
rawSeqStore->pos++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
srcSize -= seq->matchLength;
|
||||||
|
seq->matchLength = 0;
|
||||||
|
rawSeqStore->pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the sequence length is longer than remaining then the sequence is split
|
* If the sequence length is longer than remaining then the sequence is split
|
||||||
* between this block and the next.
|
* between this block and the next.
|
||||||
@ -546,51 +574,24 @@ size_t ZSTD_ldm_generateSequences(
|
|||||||
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
|
||||||
U32 const remaining, U32 const minMatch)
|
U32 const remaining, U32 const minMatch)
|
||||||
{
|
{
|
||||||
size_t const pos = rawSeqStore->pos;
|
|
||||||
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
|
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
|
||||||
assert(sequence.offset > 0);
|
assert(sequence.offset > 0);
|
||||||
/* Handle partial sequences */
|
/* Likely: No partial sequence */
|
||||||
|
if (remaining >= sequence.litLength + sequence.matchLength) {
|
||||||
|
rawSeqStore->pos++;
|
||||||
|
return sequence;
|
||||||
|
}
|
||||||
|
/* Cut the sequence short (offset == 0 ==> rest is literals). */
|
||||||
if (remaining <= sequence.litLength) {
|
if (remaining <= sequence.litLength) {
|
||||||
/* Split the literals that we have out of the sequence.
|
|
||||||
* They will become the last literals of this block.
|
|
||||||
* The next block starts off with the remaining literals.
|
|
||||||
*/
|
|
||||||
rawSeqStore->seq[pos].litLength -= remaining;
|
|
||||||
sequence.offset = 0;
|
sequence.offset = 0;
|
||||||
} else if (remaining < sequence.litLength + sequence.matchLength) {
|
} else if (remaining < sequence.litLength + sequence.matchLength) {
|
||||||
/* Split the match up into two sequences. One in this block, and one
|
sequence.matchLength = remaining - sequence.litLength;
|
||||||
* in the next with no literals. If either match would be shorter
|
|
||||||
* than searchLength we omit it.
|
|
||||||
*/
|
|
||||||
U32 const matchPrefix = remaining - sequence.litLength;
|
|
||||||
U32 const matchSuffix = sequence.matchLength - matchPrefix;
|
|
||||||
|
|
||||||
assert(remaining > sequence.litLength);
|
|
||||||
assert(matchPrefix < sequence.matchLength);
|
|
||||||
assert(matchPrefix + matchSuffix == sequence.matchLength);
|
|
||||||
/* Update the first sequence */
|
|
||||||
sequence.matchLength = matchPrefix;
|
|
||||||
/* Update the second sequence */
|
|
||||||
if (matchSuffix >= minMatch) {
|
|
||||||
/* Update the second sequence, since the suffix is long enough */
|
|
||||||
rawSeqStore->seq[pos].litLength = 0;
|
|
||||||
rawSeqStore->seq[pos].matchLength = matchSuffix;
|
|
||||||
} else {
|
|
||||||
/* Omit the second sequence since the match suffix is too short.
|
|
||||||
* Add to the next sequences literals (if any).
|
|
||||||
*/
|
|
||||||
if (pos + 1 < rawSeqStore->size)
|
|
||||||
rawSeqStore->seq[pos + 1].litLength += matchSuffix;
|
|
||||||
rawSeqStore->pos++; /* Consume the sequence */
|
|
||||||
}
|
|
||||||
if (sequence.matchLength < minMatch) {
|
if (sequence.matchLength < minMatch) {
|
||||||
/* Skip the current sequence if it is too short */
|
|
||||||
sequence.offset = 0;
|
sequence.offset = 0;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
/* No partial sequence */
|
|
||||||
rawSeqStore->pos++; /* Consume the sequence */
|
|
||||||
}
|
}
|
||||||
|
/* Skip past `remaining` bytes for the future sequences. */
|
||||||
|
ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
|
||||||
return sequence;
|
return sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,6 +65,16 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
|
|||||||
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
|
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
|
||||||
int const extDict);
|
int const extDict);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ZSTD_ldm_skipSequences():
|
||||||
|
*
|
||||||
|
* Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
|
||||||
|
* Avoids emitting matches less than `minMatch` bytes.
|
||||||
|
* Must be called for data with is not passed to ZSTD_ldm_blockCompress().
|
||||||
|
*/
|
||||||
|
void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
|
||||||
|
U32 const minMatch);
|
||||||
|
|
||||||
|
|
||||||
/** ZSTD_ldm_initializeParameters() :
|
/** ZSTD_ldm_initializeParameters() :
|
||||||
* Initialize the long distance matching parameters to their default values. */
|
* Initialize the long distance matching parameters to their default values. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user