diff --git a/lib/common/huf.h b/lib/common/huf.h index 3026c43ea..4a87db5c1 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -247,7 +247,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); /** HUF_getNbBits() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 0cbba2c99..b8e6fb386 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -169,7 +169,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, } -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) { BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ @@ -192,9 +192,11 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void } } /* fill nbBits */ + *hasZeroWeights = 0; { U32 n; for (n=0; nentropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); + unsigned hasZeroWeights; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + else bs->entropy.huf.repeatMode = HUF_repeat_check; + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); dictPtr += hufHeaderSize; @@ -2967,7 +2976,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); } } - bs->entropy.huf.repeatMode = HUF_repeat_valid; bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; diff --git a/tests/dict-files/zero-weight-dict b/tests/dict-files/zero-weight-dict new file mode 100644 index 000000000..c40412052 Binary files /dev/null and b/tests/dict-files/zero-weight-dict differ diff --git a/tests/playTests.sh b/tests/playTests.sh index 295525114..df9568eb5 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -502,6 +502,22 @@ cmp tmp tmp_decompress || die "difference between original and decompressed file println "test : incorrect stream size" cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size" +println "\n===> zstd zero weight dict test " +rm -f tmp* +cp "$TESTDIR/dict-files/zero-weight-dict" tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp +$DIFF tmp_decomp tmp_input +rm -rf tmp* + +println "\n===> zstd (valid) zero weight dict test " +rm -f tmp* +# 0 has a non-zero weight in the dictionary +echo "0000000000000000000000000" > tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" tmp_input +$ZSTD -D "$TESTDIR/dict-files/zero-weight-dict" -d tmp_input.zst -o tmp_decomp +$DIFF tmp_decomp tmp_input +rm -rf tmp* println "\n===> size-hint mode" @@ -1189,7 +1205,6 @@ $ZSTD --train-cover "$TESTDIR"/*.c "$PRGDIR"/*.c test -f dictionary rm -f tmp* dictionary - if [ "$isWindows" = false ] ; then println "\n===> zstd fifo named pipe test "