mirror of
https://github.com/facebook/zstd.git
synced 2025-12-05 00:03:19 -05:00
[lib] Fix determinism bug in the optimal parser
`ZSTD_insertBt1()` has a speed optimization that skips the prefix of
very long matches.
40def70387/lib/compress/zstd_opt.c (L476)
This optimization is based off the length longest match found. However,
when indices are reset, we only ensure that we can reference the whole
window starting from `ip`. If the previous block ended with a long match
then `nextToUpdate` could be much less than `ip`. It might be far enough
back that `nextToUpdate < maxDist`, so it doesn't have a full window of
data to reference. This can cause non-determinism bugs, because we may
find a match that is beyond `ip - maxDist`, and may sometimes be
un-referencable, and that match triggers the speed optimization.
The fix is to base the `windowLow` off of the `target` of
`ZSTD_updateTree_internal()`, because anything below that value will be
obsolete by the time `ZSTD_updateTree_internal()` completes.
This commit is contained in:
parent
06718087f8
commit
91c9a247b6
@ -364,11 +364,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
|||||||
* Binary Tree search
|
* Binary Tree search
|
||||||
***************************************/
|
***************************************/
|
||||||
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
||||||
* ip : assumed <= iend-8 .
|
* @param ip assumed <= iend-8 .
|
||||||
|
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
||||||
* @return : nb of positions added */
|
* @return : nb of positions added */
|
||||||
static U32 ZSTD_insertBt1(
|
static U32 ZSTD_insertBt1(
|
||||||
ZSTD_matchState_t* ms,
|
ZSTD_matchState_t* ms,
|
||||||
const BYTE* const ip, const BYTE* const iend,
|
const BYTE* const ip, const BYTE* const iend,
|
||||||
|
U32 const target,
|
||||||
U32 const mls, const int extDict)
|
U32 const mls, const int extDict)
|
||||||
{
|
{
|
||||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||||
@ -391,7 +393,10 @@ static U32 ZSTD_insertBt1(
|
|||||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||||
U32* largerPtr = smallerPtr + 1;
|
U32* largerPtr = smallerPtr + 1;
|
||||||
U32 dummy32; /* to be nullified at the end */
|
U32 dummy32; /* to be nullified at the end */
|
||||||
U32 const windowLow = ms->window.lowLimit;
|
/* windowLow is based on target because we're only need positions that will be
|
||||||
|
* in the window at the end of the tree update.
|
||||||
|
*/
|
||||||
|
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
||||||
U32 matchEndIdx = curr+8+1;
|
U32 matchEndIdx = curr+8+1;
|
||||||
size_t bestLength = 8;
|
size_t bestLength = 8;
|
||||||
U32 nbCompares = 1U << cParams->searchLog;
|
U32 nbCompares = 1U << cParams->searchLog;
|
||||||
@ -404,6 +409,7 @@ static U32 ZSTD_insertBt1(
|
|||||||
|
|
||||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
||||||
|
|
||||||
|
assert(curr <= target);
|
||||||
assert(ip <= iend-8); /* required for h calculation */
|
assert(ip <= iend-8); /* required for h calculation */
|
||||||
hashTable[h] = curr; /* Update Hash Table */
|
hashTable[h] = curr; /* Update Hash Table */
|
||||||
|
|
||||||
@ -492,7 +498,7 @@ void ZSTD_updateTree_internal(
|
|||||||
idx, target, dictMode);
|
idx, target, dictMode);
|
||||||
|
|
||||||
while(idx < target) {
|
while(idx < target) {
|
||||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
||||||
assert(idx < (U32)(idx + forward));
|
assert(idx < (U32)(idx + forward));
|
||||||
idx += forward;
|
idx += forward;
|
||||||
}
|
}
|
||||||
@ -893,7 +899,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
|||||||
*/
|
*/
|
||||||
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
||||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
||||||
}
|
}
|
||||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
||||||
}
|
}
|
||||||
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user