Merge pull request #2648 from terrelln/determinism-fuzzer

Add determinism fuzzers and fix rare determinism bugs
2025-10-17 00:07:08 -04:00 · 2021-05-14 17:19:41 -07:00 · 2021-05-14 17:19:41 -07:00 · accbf0af5a
commit accbf0af5a
parent 06718087f8 725c5e4e38
9 changed files with 83 additions and 17 deletions
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -3915,6 +3915,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
        ZSTD_overflowCorrectIfNeeded(
            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+        ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);

        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
--- a/lib/compress/zstd_double_fast.c
+++ b/lib/compress/zstd_double_fast.c
@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */

        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
-            & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
+            & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
-                    & (offset_2 < current2 - dictStartIndex))
+                    & (offset_2 <= current2 - dictStartIndex))
                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@ -418,7 +418,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);

        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
-             & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
+             & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex))  /* intentional overflow */
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
--- a/lib/compress/zstd_lazy.c
+++ b/lib/compress/zstd_lazy.c
@ -1995,7 +1995,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
-               & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
+               & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -2027,7 +2027,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                const BYTE* const repMatch = repBase + repIndex;
                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
-                   & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
+                   & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                if (MEM_read32(ip) == MEM_read32(repMatch)) {
                    /* repcode detected */
                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -2059,7 +2059,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                    const BYTE* const repMatch = repBase + repIndex;
                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
-                       & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
+                       & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
                        /* repcode detected */
                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -2106,7 +2106,7 @@ _storeSequence:
            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
            const BYTE* const repMatch = repBase + repIndex;
            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
-               & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
+               & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
            if (MEM_read32(ip) == MEM_read32(repMatch)) {
                /* repcode detected we should take it */
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
--- a/lib/compress/zstd_opt.c
+++ b/lib/compress/zstd_opt.c
@ -364,11 +364,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
 *  Binary Tree search
 ***************************************/
 /** ZSTD_insertBt1() : add one or multiple positions to tree.
- *  ip : assumed <= iend-8 .
+ * @param ip assumed <= iend-8 .
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
 * @return : nb of positions added */
 static U32 ZSTD_insertBt1(
                ZSTD_matchState_t* ms,
                const BYTE* const ip, const BYTE* const iend,
+                U32 const target,
                U32 const mls, const int extDict)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
@ -391,7 +393,10 @@ static U32 ZSTD_insertBt1(
    U32* smallerPtr = bt + 2*(curr&btMask);
    U32* largerPtr  = smallerPtr + 1;
    U32 dummy32;   /* to be nullified at the end */
-    U32 const windowLow = ms->window.lowLimit;
+    /* windowLow is based on target because we're only need positions that will be
+     * in the window at the end of the tree update.
+     */
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
    U32 matchEndIdx = curr+8+1;
    size_t bestLength = 8;
    U32 nbCompares = 1U << cParams->searchLog;
@ -404,6 +409,7 @@ static U32 ZSTD_insertBt1(

    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);

+    assert(curr <= target);
    assert(ip <= iend-8);   /* required for h calculation */
    hashTable[h] = curr;   /* Update Hash Table */

@ -492,7 +498,7 @@ void ZSTD_updateTree_internal(
                idx, target, dictMode);

    while(idx < target) {
-        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
        assert(idx < (U32)(idx + forward));
        idx += forward;
    }
@ -893,7 +899,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
             */
            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
-        } 
+        }
        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
    }
    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
--- a/tests/fuzz/dictionary_round_trip.c
+++ b/tests/fuzz/dictionary_round_trip.c
@ -42,8 +42,23 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
                src, srcSize,
                dict.buff, dict.size,
                cLevel);
+        FUZZ_ZASSERT(cSize);
+        // Compress a second time and check for determinism
+        {
+            size_t const cSize0 = cSize;
+            XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
+            cSize = ZSTD_compress_usingDict(cctx,
+                    compressed, compressedCapacity,
+                    src, srcSize,
+                    dict.buff, dict.size,
+                    cLevel);
+            FUZZ_ASSERT(cSize == cSize0);
+            FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
+        }
    } else {
+        size_t remainingBytes;
        dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2);
+        remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
        FUZZ_setRandomParameters(cctx, srcSize, producer);
        /* Disable checksum so we can use sizes smaller than compress bound. */
        FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
@ -51,14 +66,29 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
            FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
                cctx, dict.buff, dict.size,
                dictContentType));
-        else 
+        else
            FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
                cctx, dict.buff, dict.size,
                (ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
                dictContentType));
        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+        FUZZ_ZASSERT(cSize);
+        // Compress a second time and check for determinism
+        {
+            size_t const cSize0 = cSize;
+            XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
+            FUZZ_dataProducer_rollBack(producer, remainingBytes);
+            FUZZ_setRandomParameters(cctx, srcSize, producer);
+            FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
+            if (refPrefix)
+                FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
+                    cctx, dict.buff, dict.size,
+                    dictContentType));
+            cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+            FUZZ_ASSERT(cSize == cSize0);
+            FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
+        }
    }
-    FUZZ_ZASSERT(cSize);
    if (refPrefix)
        FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced(
            dctx, dict.buff, dict.size,
--- a/tests/fuzz/fuzz_data_producer.c
+++ b/tests/fuzz/fuzz_data_producer.c
@ -66,6 +66,12 @@ size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){
    return producer->size;
 }

+void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes)
+{
+    FUZZ_ASSERT(remainingBytes >= producer->size);
+    producer->size = remainingBytes;
+}
+
 int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) {
    return producer->size == 0;
 }
--- a/tests/fuzz/fuzz_data_producer.h
+++ b/tests/fuzz/fuzz_data_producer.h
@ -49,6 +49,9 @@ int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
 /* Returns the size of the remaining bytes of data in the producer */
 size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer);

+/* Rolls back the data producer state to have remainingBytes remaining */
+void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes);
+
 /* Returns true if the data producer is out of bytes */
 int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer);

--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@ -35,16 +35,36 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
    size_t dSize;
    int targetCBlockSize = 0;
    if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
+        size_t const remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
        FUZZ_setRandomParameters(cctx, srcSize, producer);
        cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+        FUZZ_ZASSERT(cSize);
        FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize));
+        // Compress a second time and check for determinism
+        {
+            size_t const cSize0 = cSize;
+            XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
+            FUZZ_dataProducer_rollBack(producer, remainingBytes);
+            FUZZ_setRandomParameters(cctx, srcSize, producer);
+            cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
+            FUZZ_ASSERT(cSize == cSize0);
+            FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
+        }
    } else {
-      int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
-
+        int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
        cSize = ZSTD_compressCCtx(
            cctx, compressed, compressedCapacity, src, srcSize, cLevel);
+        FUZZ_ZASSERT(cSize);
+        // Compress a second time and check for determinism
+        {
+            size_t const cSize0 = cSize;
+            XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
+            cSize = ZSTD_compressCCtx(
+                cctx, compressed, compressedCapacity, src, srcSize, cLevel);
+            FUZZ_ASSERT(cSize == cSize0);
+            FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
+        }
    }
-    FUZZ_ZASSERT(cSize);
    dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
    FUZZ_ZASSERT(dSize);
    /* When superblock is enabled make sure we don't expand the block more than expected.