mirror of
https://github.com/facebook/zstd.git
synced 2025-10-17 00:07:08 -04:00
Merge pull request #2648 from terrelln/determinism-fuzzer
Add determinism fuzzers and fix rare determinism bugs
This commit is contained in:
commit
accbf0af5a
@ -3915,6 +3915,7 @@ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
|
||||
ZSTD_overflowCorrectIfNeeded(
|
||||
ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
|
||||
ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||
ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
|
||||
|
||||
/* Ensure hash/chain table insertion resumes no sooner than lowlimit */
|
||||
if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
|
||||
|
@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
|
||||
|
||||
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
|
||||
& (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
& (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
|
||||
@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
|
||||
& (offset_2 < current2 - dictStartIndex))
|
||||
& (offset_2 <= current2 - dictStartIndex))
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
|
@ -418,7 +418,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
|
||||
|
||||
if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
|
||||
& (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
||||
& (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
|
||||
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
|
||||
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
|
||||
@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
|
||||
U32 const current2 = (U32)(ip-base);
|
||||
U32 const repIndex2 = current2 - offset_2;
|
||||
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */
|
||||
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
|
||||
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
|
||||
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
|
||||
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
|
||||
|
@ -1995,7 +1995,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
|
||||
& (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */
|
||||
& (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
|
||||
if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
|
||||
/* repcode detected we should take it */
|
||||
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
||||
@ -2027,7 +2027,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
||||
& (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
||||
/* repcode detected */
|
||||
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
||||
@ -2059,7 +2059,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
||||
& (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
& (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
||||
/* repcode detected */
|
||||
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
||||
@ -2106,7 +2106,7 @@ _storeSequence:
|
||||
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
|
||||
const BYTE* const repMatch = repBase + repIndex;
|
||||
if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
|
||||
& (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
& (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
|
||||
if (MEM_read32(ip) == MEM_read32(repMatch)) {
|
||||
/* repcode detected we should take it */
|
||||
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
|
||||
|
@ -364,11 +364,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
|
||||
* Binary Tree search
|
||||
***************************************/
|
||||
/** ZSTD_insertBt1() : add one or multiple positions to tree.
|
||||
* ip : assumed <= iend-8 .
|
||||
* @param ip assumed <= iend-8 .
|
||||
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
||||
* @return : nb of positions added */
|
||||
static U32 ZSTD_insertBt1(
|
||||
ZSTD_matchState_t* ms,
|
||||
const BYTE* const ip, const BYTE* const iend,
|
||||
U32 const target,
|
||||
U32 const mls, const int extDict)
|
||||
{
|
||||
const ZSTD_compressionParameters* const cParams = &ms->cParams;
|
||||
@ -391,7 +393,10 @@ static U32 ZSTD_insertBt1(
|
||||
U32* smallerPtr = bt + 2*(curr&btMask);
|
||||
U32* largerPtr = smallerPtr + 1;
|
||||
U32 dummy32; /* to be nullified at the end */
|
||||
U32 const windowLow = ms->window.lowLimit;
|
||||
/* windowLow is based on target because we're only need positions that will be
|
||||
* in the window at the end of the tree update.
|
||||
*/
|
||||
U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
|
||||
U32 matchEndIdx = curr+8+1;
|
||||
size_t bestLength = 8;
|
||||
U32 nbCompares = 1U << cParams->searchLog;
|
||||
@ -404,6 +409,7 @@ static U32 ZSTD_insertBt1(
|
||||
|
||||
DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
|
||||
|
||||
assert(curr <= target);
|
||||
assert(ip <= iend-8); /* required for h calculation */
|
||||
hashTable[h] = curr; /* Update Hash Table */
|
||||
|
||||
@ -492,7 +498,7 @@ void ZSTD_updateTree_internal(
|
||||
idx, target, dictMode);
|
||||
|
||||
while(idx < target) {
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
|
||||
U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
|
||||
assert(idx < (U32)(idx + forward));
|
||||
idx += forward;
|
||||
}
|
||||
@ -893,7 +899,7 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
|
||||
*/
|
||||
U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
|
||||
ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
|
||||
}
|
||||
}
|
||||
ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
|
||||
}
|
||||
ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
|
||||
|
@ -42,8 +42,23 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
|
||||
src, srcSize,
|
||||
dict.buff, dict.size,
|
||||
cLevel);
|
||||
FUZZ_ZASSERT(cSize);
|
||||
// Compress a second time and check for determinism
|
||||
{
|
||||
size_t const cSize0 = cSize;
|
||||
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
|
||||
cSize = ZSTD_compress_usingDict(cctx,
|
||||
compressed, compressedCapacity,
|
||||
src, srcSize,
|
||||
dict.buff, dict.size,
|
||||
cLevel);
|
||||
FUZZ_ASSERT(cSize == cSize0);
|
||||
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
|
||||
}
|
||||
} else {
|
||||
size_t remainingBytes;
|
||||
dictContentType = FUZZ_dataProducer_uint32Range(producer, 0, 2);
|
||||
remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
|
||||
FUZZ_setRandomParameters(cctx, srcSize, producer);
|
||||
/* Disable checksum so we can use sizes smaller than compress bound. */
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
|
||||
@ -51,14 +66,29 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
|
||||
cctx, dict.buff, dict.size,
|
||||
dictContentType));
|
||||
else
|
||||
else
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary_advanced(
|
||||
cctx, dict.buff, dict.size,
|
||||
(ZSTD_dictLoadMethod_e)FUZZ_dataProducer_uint32Range(producer, 0, 1),
|
||||
dictContentType));
|
||||
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
|
||||
FUZZ_ZASSERT(cSize);
|
||||
// Compress a second time and check for determinism
|
||||
{
|
||||
size_t const cSize0 = cSize;
|
||||
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
|
||||
FUZZ_dataProducer_rollBack(producer, remainingBytes);
|
||||
FUZZ_setRandomParameters(cctx, srcSize, producer);
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0));
|
||||
if (refPrefix)
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_refPrefix_advanced(
|
||||
cctx, dict.buff, dict.size,
|
||||
dictContentType));
|
||||
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
|
||||
FUZZ_ASSERT(cSize == cSize0);
|
||||
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
|
||||
}
|
||||
}
|
||||
FUZZ_ZASSERT(cSize);
|
||||
if (refPrefix)
|
||||
FUZZ_ZASSERT(ZSTD_DCtx_refPrefix_advanced(
|
||||
dctx, dict.buff, dict.size,
|
||||
|
@ -66,6 +66,12 @@ size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){
|
||||
return producer->size;
|
||||
}
|
||||
|
||||
void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes)
|
||||
{
|
||||
FUZZ_ASSERT(remainingBytes >= producer->size);
|
||||
producer->size = remainingBytes;
|
||||
}
|
||||
|
||||
int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer) {
|
||||
return producer->size == 0;
|
||||
}
|
||||
|
@ -49,6 +49,9 @@ int32_t FUZZ_dataProducer_int32Range(FUZZ_dataProducer_t *producer,
|
||||
/* Returns the size of the remaining bytes of data in the producer */
|
||||
size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer);
|
||||
|
||||
/* Rolls back the data producer state to have remainingBytes remaining */
|
||||
void FUZZ_dataProducer_rollBack(FUZZ_dataProducer_t *producer, size_t remainingBytes);
|
||||
|
||||
/* Returns true if the data producer is out of bytes */
|
||||
int FUZZ_dataProducer_empty(FUZZ_dataProducer_t *producer);
|
||||
|
||||
|
@ -35,16 +35,36 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
|
||||
size_t dSize;
|
||||
int targetCBlockSize = 0;
|
||||
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
|
||||
size_t const remainingBytes = FUZZ_dataProducer_remainingBytes(producer);
|
||||
FUZZ_setRandomParameters(cctx, srcSize, producer);
|
||||
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
|
||||
FUZZ_ZASSERT(cSize);
|
||||
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_targetCBlockSize, &targetCBlockSize));
|
||||
// Compress a second time and check for determinism
|
||||
{
|
||||
size_t const cSize0 = cSize;
|
||||
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
|
||||
FUZZ_dataProducer_rollBack(producer, remainingBytes);
|
||||
FUZZ_setRandomParameters(cctx, srcSize, producer);
|
||||
cSize = ZSTD_compress2(cctx, compressed, compressedCapacity, src, srcSize);
|
||||
FUZZ_ASSERT(cSize == cSize0);
|
||||
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
|
||||
}
|
||||
} else {
|
||||
int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
|
||||
|
||||
int const cLevel = FUZZ_dataProducer_int32Range(producer, kMinClevel, kMaxClevel);
|
||||
cSize = ZSTD_compressCCtx(
|
||||
cctx, compressed, compressedCapacity, src, srcSize, cLevel);
|
||||
FUZZ_ZASSERT(cSize);
|
||||
// Compress a second time and check for determinism
|
||||
{
|
||||
size_t const cSize0 = cSize;
|
||||
XXH64_hash_t const hash0 = XXH64(compressed, cSize, 0);
|
||||
cSize = ZSTD_compressCCtx(
|
||||
cctx, compressed, compressedCapacity, src, srcSize, cLevel);
|
||||
FUZZ_ASSERT(cSize == cSize0);
|
||||
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
|
||||
}
|
||||
}
|
||||
FUZZ_ZASSERT(cSize);
|
||||
dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
|
||||
FUZZ_ZASSERT(dSize);
|
||||
/* When superblock is enabled make sure we don't expand the block more than expected.
|
||||
|
Loading…
x
Reference in New Issue
Block a user