From f9513115e4c0bfbc79b5932382a8db06a155acb9 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 13 Feb 2019 14:59:22 -0800 Subject: [PATCH 1/2] [libzstd] Add ZSTD_c_literalCompressionMode flag It controls the literals compression. It is either `auto`, `huffman`, or `uncompressed`. It defaults to `auto`, which is the current behavior. --- lib/compress/zstd_compress.c | 39 +++++++++++++++++++++++++-- lib/compress/zstd_compress_internal.h | 1 + lib/zstd.h | 19 ++++++++++++- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2bf8ee535..e18051f00 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -360,6 +360,12 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ return bounds; + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + default: { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; return boundError; @@ -396,6 +402,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_minMatch: case ZSTD_c_targetLength: case ZSTD_c_strategy: + case ZSTD_c_literalCompressionMode: return 1; case ZSTD_c_format: @@ -463,6 +470,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_forceAttachDict: return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_c_literalCompressionMode: + return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); + case ZSTD_c_nbWorkers: RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, "MT not compatible with static alloc"); @@ -575,6 +585,13 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, return CCtxParams->attachDictPref; } + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + case ZSTD_c_nbWorkers : #ifndef ZSTD_MULTITHREAD RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); @@ -688,6 +705,9 @@ size_t ZSTD_CCtxParam_getParameter( case ZSTD_c_forceAttachDict : *value = CCtxParams->attachDictPref; break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; case ZSTD_c_nbWorkers : #ifndef ZSTD_MULTITHREAD assert(CCtxParams->nbWorkers == 0); @@ -2369,6 +2389,21 @@ static size_t ZSTD_encodeSequences( sequences, nbSeq, longOffsets); } +static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + /* ZSTD_compressSequences_internal(): * actually compresses both literals and sequences */ MEM_STATIC size_t @@ -2404,10 +2439,10 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; - int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); size_t const cSize = ZSTD_compressLiterals( &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, disableLiteralCompression, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), op, dstCapacity, literals, litSize, workspace, wkspSize, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 29bca5985..e0b54299d 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -188,6 +188,7 @@ struct ZSTD_CCtx_params_s { * 1< Date: Wed, 13 Feb 2019 15:00:32 -0800 Subject: [PATCH 2/2] [regression] Test ZSTD_c_literalCompressionMode Test a positive compression level with uncompressed literals, and a negative compression level with compressed literals. I double checked the `results.csv` and made sure that the compressed sizes make sense. --- tests/regression/config.c | 22 ++++++++++++++++++++++ tests/regression/results.csv | 30 ++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/tests/regression/config.c b/tests/regression/config.c index 8a6103b29..f04d32d83 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -122,6 +122,26 @@ static config_t small_clog = { .param_values = PARAM_VALUES(small_clog_param_values), }; +static param_value_t const uncompressed_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = 3}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_uncompressed}, +}; + +static config_t uncompressed_literals = { + .name = "uncompressed literals", + .param_values = PARAM_VALUES(uncompressed_literals_param_values), +}; + +static param_value_t const huffman_literals_param_values[] = { + {.param = ZSTD_c_compressionLevel, .value = -1}, + {.param = ZSTD_c_literalCompressionMode, .value = ZSTD_lcm_huffman}, +}; + +static config_t huffman_literals = { + .name = "huffman literals", + .param_values = PARAM_VALUES(huffman_literals_param_values), +}; + static param_value_t const explicit_params_param_values[] = { {.param = ZSTD_c_checksumFlag, .value = 1}, {.param = ZSTD_c_contentSizeFlag, .value = 0}, @@ -155,6 +175,8 @@ static config_t const* g_configs[] = { &small_hlog, &small_clog, &explicit_params, + &uncompressed_literals, + &huffman_literals, NULL, }; diff --git a/tests/regression/results.csv b/tests/regression/results.csv index a62178d6e..d9c5a4a0a 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -13,6 +13,8 @@ silesia.tar, level 9, compress sim silesia.tar, level 13, compress simple, 4491702 silesia.tar, level 16, compress simple, 4381277 silesia.tar, level 19, compress simple, 4281514 +silesia.tar, uncompressed literals, compress simple, 4875008 +silesia.tar, huffman literals, compress simple, 6195462 silesia, level -5, compress cctx, 7152294 silesia, level -3, compress cctx, 6789969 silesia, level -1, compress cctx, 6191548 @@ -34,6 +36,8 @@ silesia, small window log, compress cct silesia, small hash log, compress cctx, 6554898 silesia, small chain log, compress cctx, 4931093 silesia, explicit params, compress cctx, 4813352 +silesia, uncompressed literals, compress cctx, 4862377 +silesia, huffman literals, compress cctx, 6191548 github, level -5, compress cctx, 232744 github, level -5 with dict, compress cctx, 47294 github, level -3, compress cctx, 220611 @@ -69,6 +73,8 @@ github, small window log, compress cct github, small hash log, compress cctx, decompression error github, small chain log, compress cctx, decompression error github, explicit params, compress cctx, decompression error +github, uncompressed literals, compress cctx, 136397 +github, huffman literals, compress cctx, 176575 silesia, level -5, zstdcli, 7152342 silesia, level -3, zstdcli, 6790021 silesia, level -1, zstdcli, 6191597 @@ -169,6 +175,8 @@ silesia, small window log, advanced one silesia, small hash log, advanced one pass, 6554898 silesia, small chain log, advanced one pass, 4931093 silesia, explicit params, advanced one pass, 4815369 +silesia, uncompressed literals, advanced one pass, 5155424 +silesia, huffman literals, advanced one pass, 5341356 silesia.tar, level -5, advanced one pass, 7160438 silesia.tar, level -3, advanced one pass, 6789024 silesia.tar, level -1, advanced one pass, 6195462 @@ -191,6 +199,8 @@ silesia.tar, small window log, advanced one silesia.tar, small hash log, advanced one pass, 6587833 silesia.tar, small chain log, advanced one pass, 4943255 silesia.tar, explicit params, advanced one pass, 4829974 +silesia.tar, uncompressed literals, advanced one pass, 5157992 +silesia.tar, huffman literals, advanced one pass, 5358079 github, level -5, advanced one pass, 232744 github, level -5 with dict, advanced one pass, 46718 github, level -3, advanced one pass, 220611 @@ -227,6 +237,8 @@ github, small window log, advanced one github, small hash log, advanced one pass, 135467 github, small chain log, advanced one pass, 136314 github, explicit params, advanced one pass, 137670 +github, uncompressed literals, advanced one pass, 167004 +github, huffman literals, advanced one pass, 143457 silesia, level -5, advanced one pass small out, 7152294 silesia, level -3, advanced one pass small out, 6789969 silesia, level -1, advanced one pass small out, 6191548 @@ -249,6 +261,8 @@ silesia, small window log, advanced one silesia, small hash log, advanced one pass small out, 6554898 silesia, small chain log, advanced one pass small out, 4931093 silesia, explicit params, advanced one pass small out, 4815369 +silesia, uncompressed literals, advanced one pass small out, 5155424 +silesia, huffman literals, advanced one pass small out, 5341356 silesia.tar, level -5, advanced one pass small out, 7160438 silesia.tar, level -3, advanced one pass small out, 6789024 silesia.tar, level -1, advanced one pass small out, 6195462 @@ -271,6 +285,8 @@ silesia.tar, small window log, advanced one silesia.tar, small hash log, advanced one pass small out, 6587833 silesia.tar, small chain log, advanced one pass small out, 4943255 silesia.tar, explicit params, advanced one pass small out, 4829974 +silesia.tar, uncompressed literals, advanced one pass small out, 5157992 +silesia.tar, huffman literals, advanced one pass small out, 5358079 github, level -5, advanced one pass small out, 232744 github, level -5 with dict, advanced one pass small out, 46718 github, level -3, advanced one pass small out, 220611 @@ -307,6 +323,8 @@ github, small window log, advanced one github, small hash log, advanced one pass small out, 135467 github, small chain log, advanced one pass small out, 136314 github, explicit params, advanced one pass small out, 137670 +github, uncompressed literals, advanced one pass small out, 167004 +github, huffman literals, advanced one pass small out, 143457 silesia, level -5, advanced streaming, 7152294 silesia, level -3, advanced streaming, 6789973 silesia, level -1, advanced streaming, 6191549 @@ -329,6 +347,8 @@ silesia, small window log, advanced str silesia, small hash log, advanced streaming, 6554898 silesia, small chain log, advanced streaming, 4931093 silesia, explicit params, advanced streaming, 4815380 +silesia, uncompressed literals, advanced streaming, 5155424 +silesia, huffman literals, advanced streaming, 5341357 silesia.tar, level -5, advanced streaming, 7160440 silesia.tar, level -3, advanced streaming, 6789026 silesia.tar, level -1, advanced streaming, 6195465 @@ -351,6 +371,8 @@ silesia.tar, small window log, advanced str silesia.tar, small hash log, advanced streaming, 6587834 silesia.tar, small chain log, advanced streaming, 4943260 silesia.tar, explicit params, advanced streaming, 4830002 +silesia.tar, uncompressed literals, advanced streaming, 5157995 +silesia.tar, huffman literals, advanced streaming, 5358083 github, level -5, advanced streaming, 232744 github, level -5 with dict, advanced streaming, 46718 github, level -3, advanced streaming, 220611 @@ -387,6 +409,8 @@ github, small window log, advanced str github, small hash log, advanced streaming, 135467 github, small chain log, advanced streaming, 136314 github, explicit params, advanced streaming, 137670 +github, uncompressed literals, advanced streaming, 167004 +github, huffman literals, advanced streaming, 143457 silesia, level -5, old streaming, 7152294 silesia, level -3, old streaming, 6789973 silesia, level -1, old streaming, 6191549 @@ -402,6 +426,8 @@ silesia, level 13, old streamin silesia, level 16, old streaming, 4377391 silesia, level 19, old streaming, 4293262 silesia, no source size, old streaming, 4862341 +silesia, uncompressed literals, old streaming, 4862377 +silesia, huffman literals, old streaming, 6191549 silesia.tar, level -5, old streaming, 7160440 silesia.tar, level -3, old streaming, 6789026 silesia.tar, level -1, old streaming, 6195465 @@ -417,6 +443,8 @@ silesia.tar, level 13, old streamin silesia.tar, level 16, old streaming, 4381277 silesia.tar, level 19, old streaming, 4281514 silesia.tar, no source size, old streaming, 4875006 +silesia.tar, uncompressed literals, old streaming, 4875010 +silesia.tar, huffman literals, old streaming, 6195465 github, level -5, old streaming, 232744 github, level -5 with dict, old streaming, 46718 github, level -3, old streaming, 220611 @@ -446,3 +474,5 @@ github, level 16 with dict, old streamin github, level 19, old streaming, 133717 github, level 19 with dict, old streaming, 37576 github, no source size, old streaming, 141003 +github, uncompressed literals, old streaming, 136397 +github, huffman literals, old streaming, 176575