[lib] Fix determinism bug in the optimal parser

`ZSTD_insertBt1()` has a speed optimization that skips the prefix of
very long matches.

40def70387/lib/compress/zstd_opt.c (L476)

This optimization is based off the length longest match found. However,
when indices are reset, we only ensure that we can reference the whole
window starting from `ip`. If the previous block ended with a long match
then `nextToUpdate` could be much less than `ip`. It might be far enough
back that `nextToUpdate < maxDist`, so it doesn't have a full window of
data to reference. This can cause non-determinism bugs, because we may
find a match that is beyond `ip - maxDist`, and may sometimes be
un-referencable, and that match triggers the speed optimization.

The fix is to base the `windowLow` off of the `target` of
`ZSTD_updateTree_internal()`, because anything below that value will be
obsolete by the time `ZSTD_updateTree_internal()` completes.
This commit is contained in:
Nick Terrell 2021-05-13 15:44:12 -07:00
parent 06718087f8
commit 91c9a247b6

View File

@ -364,11 +364,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
* Binary Tree search
***************************************/
/** ZSTD_insertBt1() : add one or multiple positions to tree.
* ip : assumed <= iend-8 .
* @param ip assumed <= iend-8 .
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
* @return : nb of positions added */
static U32 ZSTD_insertBt1(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
U32 const target,
U32 const mls, const int extDict)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -391,7 +393,10 @@ static U32 ZSTD_insertBt1(
U32* smallerPtr = bt + 2*(curr&btMask);
U32* largerPtr = smallerPtr + 1;
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
/* windowLow is based on target because we're only need positions that will be
* in the window at the end of the tree update.
*/
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
U32 matchEndIdx = curr+8+1;
size_t bestLength = 8;
U32 nbCompares = 1U << cParams->searchLog;
@ -404,6 +409,7 @@ static U32 ZSTD_insertBt1(
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
assert(curr <= target);
assert(ip <= iend-8); /* required for h calculation */
hashTable[h] = curr; /* Update Hash Table */
@ -492,7 +498,7 @@ void ZSTD_updateTree_internal(
idx, target, dictMode);
while(idx < target) {
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
assert(idx < (U32)(idx + forward));
idx += forward;
}
@ -893,7 +899,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
*/
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
}
}
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
}
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);