Merge pull request #2648 from terrelln/determinism-fuzzer

Add determinism fuzzers and fix rare determinism bugs
This commit is contained in:
Nick Terrell 2021-05-14 17:19:41 -07:00 committed by GitHub
commit accbf0af5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 83 additions and 17 deletions

View File

@ -3915,6 +3915,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
ZSTD_overflowCorrectIfNeeded(
ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;

View File

@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
& (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
& (offset_2 < current2 - dictStartIndex))
& (offset_2 <= current2 - dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;

View File

@ -418,7 +418,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
& (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;

View File

@ -1995,7 +1995,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
& (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
& (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -2027,7 +2027,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
& (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -2059,7 +2059,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
& (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -2106,7 +2106,7 @@ _storeSequence:
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex;
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
& (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
& (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;

View File

@ -364,11 +364,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
* Binary Tree search
***************************************/
/** ZSTD_insertBt1() : add one or multiple positions to tree.
* ip : assumed <= iend-8 .
* @param ip assumed <= iend-8 .
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
* @return : nb of positions added */
static U32 ZSTD_insertBt1(
ZSTD_matchState_t* ms,
const BYTE* const ip, const BYTE* const iend,
U32 const target,
U32 const mls, const int extDict)
{
const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -391,7 +393,10 @@ static U32 ZSTD_insertBt1(
U32* smallerPtr = bt + 2*(curr&btMask);
U32* largerPtr = smallerPtr + 1;
U32 dummy32; /* to be nullified at the end */
U32 const windowLow = ms->window.lowLimit;
/* windowLow is based on target because we're only need positions that will be
* in the window at the end of the tree update.
*/
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
U32 matchEndIdx = curr+8+1;
size_t bestLength = 8;
U32 nbCompares = 1U << cParams->searchLog;
@ -404,6 +409,7 @@ static U32 ZSTD_insertBt1(
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
assert(curr <= target);
assert(ip <= iend-8); /* required for h calculation */
hashTable[h] = curr; /* Update Hash Table */
@ -492,7 +498,7 @@ void ZSTD_updateTree_internal(
idx, target, dictMode);
while(idx < target) {
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
assert(idx < (U32)(idx + forward));
idx += forward;
}
@ -893,7 +899,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
*/
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
}
}
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
}
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);

View File

@ -42,8 +42,23 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
src, srcSize,
dict.buff, dict.size,
cLevel);
FUZZ_ZASSERT(cSize);
// Compress a second time and check for determinism
{
size_t const cSize0 = cSize;
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
cSize = ZSTD_compress_usingDict(cctx,
compressed, compressedCapacity,
src, srcSize,
dict.buff, dict.size,
cLevel);
FUZZ_ASSERT(cSize == cSize0);
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
}
} else {
size_t remainingBytes;
dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2);
remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
FUZZ_setRandomParameters(cctx, srcSize, producer);
/* Disable checksum so we can use sizes smaller than compress bound. */
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
@ -51,14 +66,29 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
cctx, dict.buff, dict.size,
dictContentType));
else
else
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
cctx, dict.buff, dict.size,
(ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
dictContentType));
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
FUZZ_ZASSERT(cSize);
// Compress a second time and check for determinism
{
size_t const cSize0 = cSize;
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
FUZZ_dataProducer_rollBack(producer, remainingBytes);
FUZZ_setRandomParameters(cctx, srcSize, producer);
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
if (refPrefix)
FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
cctx, dict.buff, dict.size,
dictContentType));
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
FUZZ_ASSERT(cSize == cSize0);
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
}
}
FUZZ_ZASSERT(cSize);
if (refPrefix)
FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced(
dctx, dict.buff, dict.size,

View File

@ -66,6 +66,12 @@ size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){
return producer->size;
}
void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes)
{
FUZZ_ASSERT(remainingBytes >= producer->size);
producer->size = remainingBytes;
}
int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) {
return producer->size == 0;
}

View File

@ -49,6 +49,9 @@ int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
/* Returns the size of the remaining bytes of data in the producer */
size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer);
/* Rolls back the data producer state to have remainingBytes remaining */
void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes);
/* Returns true if the data producer is out of bytes */
int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer);

View File

@ -35,16 +35,36 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
size_t dSize;
int targetCBlockSize = 0;
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
size_t const remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
FUZZ_setRandomParameters(cctx, srcSize, producer);
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
FUZZ_ZASSERT(cSize);
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize));
// Compress a second time and check for determinism
{
size_t const cSize0 = cSize;
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
FUZZ_dataProducer_rollBack(producer, remainingBytes);
FUZZ_setRandomParameters(cctx, srcSize, producer);
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
FUZZ_ASSERT(cSize == cSize0);
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
}
} else {
int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
cSize = ZSTD_compressCCtx(
cctx, compressed, compressedCapacity, src, srcSize, cLevel);
FUZZ_ZASSERT(cSize);
// Compress a second time and check for determinism
{
size_t const cSize0 = cSize;
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
cSize = ZSTD_compressCCtx(
cctx, compressed, compressedCapacity, src, srcSize, cLevel);
FUZZ_ASSERT(cSize == cSize0);
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
}
}
FUZZ_ZASSERT(cSize);
dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
FUZZ_ZASSERT(dSize);
/* When superblock is enabled make sure we don't expand the block more than expected.