From d687d603e4a451de9620f5ffd548d29dd17000c4 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 6 Sep 2019 10:46:19 -0700 Subject: [PATCH 1/6] Removing redundant condition in decompression, making first block rles valid to deocmpress --- lib/compress/zstd_compress.c | 8 -------- lib/compress/zstd_compress_internal.h | 1 - programs/fileio.c | 5 ----- 3 files changed, 14 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 62cab4ed5..4ba779805 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1314,7 +1314,6 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl cctx->blockState.matchState.cParams = params.cParams; cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; cctx->consumedSrcSize = 0; - cctx->isFirstBlock = 1; cctx->producedCSize = 0; if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) cctx->appliedParams.fParams.contentSizeFlag = 0; @@ -1417,7 +1416,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, (U32)pledgedSrcSize, params.cParams.windowLog); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - zc->isFirstBlock = 1; if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(zc->appliedParams, params, zc->inBuffSize, @@ -2307,11 +2305,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->bmi2); if (frame && - /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder to throw a "should consume all input error." - * https://github.com/facebook/zstd/blob/dev/programs/fileio.c#L1723 - */ - !zc->isFirstBlock && cSize < rleMaxLength && ZSTD_isRLE(ip, srcSize)) { @@ -2416,7 +2409,6 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op += cSize; assert(dstCapacity >= cSize); dstCapacity -= cSize; - cctx->isFirstBlock = 0; DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 3a5c7f2d1..6d623cc6b 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -238,7 +238,6 @@ struct ZSTD_CCtx_s { XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; - int isFirstBlock; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ diff --git a/programs/fileio.c b/programs/fileio.c index 569a410c1..d63742d47 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1711,11 +1711,6 @@ static unsigned long long FIO_decompressZstdFrame( } if (readSizeHint == 0) break; /* end of frame */ - if (inBuff.size != inBuff.pos) { - DISPLAYLEVEL(1, "%s : Decoding error (37) : should consume entire input \n", - srcFileName); - return FIO_ERROR_FRAME_DECODING; - } /* Fill input buffer */ { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ From a917cd597dc0a1a37e2300bc2d131c833999621c Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 6 Sep 2019 13:44:25 -0700 Subject: [PATCH 2/6] Put back omission for first rle block and updated comment as suggested --- lib/compress/zstd_compress.c | 8 ++++++++ lib/compress/zstd_compress_internal.h | 1 + 2 files changed, 9 insertions(+) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 4ba779805..f88cc82f0 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1314,6 +1314,7 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl cctx->blockState.matchState.cParams = params.cParams; cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; cctx->consumedSrcSize = 0; + cctx->isFirstBlock = 1; cctx->producedCSize = 0; if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) cctx->appliedParams.fParams.contentSizeFlag = 0; @@ -1416,6 +1417,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, (U32)pledgedSrcSize, params.cParams.windowLog); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + zc->isFirstBlock = 1; if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(zc->appliedParams, params, zc->inBuffSize, @@ -2305,6 +2307,11 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, zc->bmi2); if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && cSize < rleMaxLength && ZSTD_isRLE(ip, srcSize)) { @@ -2409,6 +2416,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, op += cSize; assert(dstCapacity >= cSize); dstCapacity -= cSize; + cctx->isFirstBlock = 0; DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", (unsigned)cSize); } } diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 6d623cc6b..3a5c7f2d1 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -238,6 +238,7 @@ struct ZSTD_CCtx_s { XXH64_state_t xxhState; ZSTD_customMem customMem; size_t staticSize; + int isFirstBlock; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ From 44e122053bba31d39cf0e280eddc48b8f7edfeb7 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Fri, 6 Sep 2019 14:48:41 -0700 Subject: [PATCH 3/6] Mentioning cli only in the comment as suggested --- lib/compress/zstd_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f88cc82f0..f7f571e87 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2308,7 +2308,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, if (frame && /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder to throw a "should consume all input error." + * doing so will cause the decoder (cli only) to throw a "should consume all input error." * This is only an issue for zstd <= v1.4.3 */ !zc->isFirstBlock && From 0b25ab2202fec7db59ccab8d73dc1a5fc1d679c3 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 9 Sep 2019 11:54:43 -0700 Subject: [PATCH 4/6] Testing first block decompression cli --- tests/files/rle-first-block.zst | Bin 0 -> 45 bytes tests/golden/huffman-compressed-larger | Bin 0 -> 143 bytes tests/playTests.sh | 9 +++++++-- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 tests/files/rle-first-block.zst create mode 100644 tests/golden/huffman-compressed-larger diff --git a/tests/files/rle-first-block.zst b/tests/files/rle-first-block.zst new file mode 100644 index 0000000000000000000000000000000000000000..fd067edd74ef9bab1dcf9af83baa7fee24f73287 GIT binary patch literal 45 acmdPcs{eNh1A_nq6CTVAl>2BW_7DJtPX%=V literal 0 HcmV?d00001 diff --git a/tests/golden/huffman-compressed-larger b/tests/golden/huffman-compressed-larger new file mode 100644 index 0000000000000000000000000000000000000000..f594f1ae9816a52054935aab96eec94c4ffe14b7 GIT binary patch literal 143 zcmV;A0C4{(wJ-eyP!$9KW`-Ueka7hx0Dvr|0RR9101)1^uIswqo=F4%0Du4hfEEB3 z02Tl|2JCX%cWtRD`Y@9(#KBM?3Vi8VgP*Qj(lfvmE1c0RaG879=~%<1Ug5Gim?; literal 0 HcmV?d00001 diff --git a/tests/playTests.sh b/tests/playTests.sh index 69387321f..34f4dfcc2 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -242,6 +242,11 @@ $ZSTD -f tmp && die "attempt to compress a non existing file" test -f tmp.zst # destination file should still be present rm tmp* +println "\n===> decompression only tests " +head -c 1M /dev/zero > tmp +$ZSTD -d -o tmp1 "$TESTDIR/files/rle-first-block.zst" +$DIFF -s tmp1 tmp +rm tmp* println "test : compress multiple files" println hello > tmp1 @@ -583,8 +588,8 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !" println "\n===> golden files tests " -$ZSTD -t -r "$TESTDIR/files" -$ZSTD -c -r "$TESTDIR/files" | $ZSTD -t +$ZSTD -t -r "$TESTDIR/golden" +$ZSTD -c -r "$TESTDIR/golden" | $ZSTD -t println "\n===> benchmark mode tests " From e6be4cf4eb16b6840b558fded58d834067bccfcc Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Mon, 9 Sep 2019 12:08:33 -0700 Subject: [PATCH 5/6] Changing test file directory names to be more descriptive --- .../huffman-compressed-larger | Bin .../rle-first-block.zst | Bin tests/golden/huffman-compressed-larger | Bin 143 -> 0 bytes tests/playTests.sh | 6 +++--- 4 files changed, 3 insertions(+), 3 deletions(-) rename tests/{files => golden-compression}/huffman-compressed-larger (100%) rename tests/{files => golden-decompression}/rle-first-block.zst (100%) delete mode 100644 tests/golden/huffman-compressed-larger diff --git a/tests/files/huffman-compressed-larger b/tests/golden-compression/huffman-compressed-larger similarity index 100% rename from tests/files/huffman-compressed-larger rename to tests/golden-compression/huffman-compressed-larger diff --git a/tests/files/rle-first-block.zst b/tests/golden-decompression/rle-first-block.zst similarity index 100% rename from tests/files/rle-first-block.zst rename to tests/golden-decompression/rle-first-block.zst diff --git a/tests/golden/huffman-compressed-larger b/tests/golden/huffman-compressed-larger deleted file mode 100644 index f594f1ae9816a52054935aab96eec94c4ffe14b7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmV;A0C4{(wJ-eyP!$9KW`-Ueka7hx0Dvr|0RR9101)1^uIswqo=F4%0Du4hfEEB3 z02Tl|2JCX%cWtRD`Y@9(#KBM?3Vi8VgP*Qj(lfvmE1c0RaG879=~%<1Ug5Gim?; diff --git a/tests/playTests.sh b/tests/playTests.sh index 34f4dfcc2..5d9373a09 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -244,7 +244,7 @@ rm tmp* println "\n===> decompression only tests " head -c 1M /dev/zero > tmp -$ZSTD -d -o tmp1 "$TESTDIR/files/rle-first-block.zst" +$ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" $DIFF -s tmp1 tmp rm tmp* @@ -588,8 +588,8 @@ $ZSTD -t tmpSplit.* && die "bad file not detected !" println "\n===> golden files tests " -$ZSTD -t -r "$TESTDIR/golden" -$ZSTD -c -r "$TESTDIR/golden" | $ZSTD -t +$ZSTD -t -r "$TESTDIR/golden-compression" +$ZSTD -c -r "$TESTDIR/golden-compression" | $ZSTD -t println "\n===> benchmark mode tests " From caaf43b258f7ff40c3ddfa7dcfa7f4c0980bb6a6 Mon Sep 17 00:00:00 2001 From: Bimba Shrestha Date: Tue, 10 Sep 2019 09:30:37 -0700 Subject: [PATCH 6/6] Using a number instead of M prefix in head call --- tests/playTests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/playTests.sh b/tests/playTests.sh index 5d9373a09..3b74ed22b 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -243,7 +243,7 @@ test -f tmp.zst # destination file should still be present rm tmp* println "\n===> decompression only tests " -head -c 1M /dev/zero > tmp +head -c 1048576 /dev/zero > tmp $ZSTD -d -o tmp1 "$TESTDIR/golden-decompression/rle-first-block.zst" $DIFF -s tmp1 tmp rm tmp*