From f9cb3487762024a6297bf539aabdaef4c996ea4a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 16 Nov 2018 15:01:50 -0800 Subject: [PATCH 01/21] Add HUF_DECOMPRESS_MINIMAL Macro, Which Avoids Using X2 Variants --- lib/decompress/huf_decompress.c | 55 ++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 83ecaff01..d4a548f2f 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -921,8 +921,13 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif } size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, @@ -930,8 +935,13 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif } @@ -966,6 +976,9 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { assert(dstSize > 0); assert(dstSize <= 128*1024); +#ifdef HUF_DECOMPRESS_MINIMAL + return 0; +#else /* decoder timing evaluation */ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ U32 const D256 = (U32)(dstSize >> 8); @@ -973,14 +986,18 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ return DTime1 < DTime0; -} } + } +#endif +} typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { +#ifndef HUF_DECOMPRESS_MINIMAL static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); @@ -989,7 +1006,12 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#else return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif } } @@ -1002,8 +1024,13 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif } } @@ -1025,8 +1052,13 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif } } @@ -1041,10 +1073,16 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif } } @@ -1060,8 +1098,13 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif } size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) @@ -1079,8 +1122,13 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif } size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) @@ -1090,7 +1138,12 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#ifdef HUF_DECOMPRESS_MINIMAL + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif } } From 36a84b07a8c6a33b2756baf043b80b2e419b2f32 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 16 Nov 2018 15:28:53 -0800 Subject: [PATCH 02/21] Load Dictionaries as X1 Tables --- lib/decompress/zstd_decompress.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 510ce3c65..62ba86aa4 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -954,9 +954,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_DECOMPRESS_MINIMAL + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, workspace, workspaceSize); +#endif if (HUF_isError(hSize)) return ERROR(dictionary_corrupted); dictPtr += hSize; } From f45c9df42eb1bef328177f0e347bed209fa6012c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 16 Nov 2018 15:30:49 -0800 Subject: [PATCH 03/21] Totally Hide/Disable X2 Variants when HUF_DECOMPRESS_MINIMAL is Defined --- lib/common/huf.h | 14 ++++++++++++++ lib/decompress/huf_decompress.c | 3 +++ 2 files changed, 17 insertions(+) diff --git a/lib/common/huf.h b/lib/common/huf.h index de9464111..75e5230e3 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,15 +173,19 @@ typedef U32 HUF_DTable; * Advanced decompression functions ******************************************/ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif /* **************************************** @@ -279,12 +283,16 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif /* ====================== */ @@ -306,18 +314,24 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_DECOMPRESS_MINIMAL size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif /* BMI2 variants. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index d4a548f2f..7c32c4766 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -437,6 +437,7 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } +#ifndef HUF_DECOMPRESS_MINIMAL /* *************************/ /* double-symbols decoding */ @@ -911,6 +912,8 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } +#endif /* HUF_DECOMPRESS_MINIMAL */ + /* ***********************************/ /* Universal decompression selectors */ From df28e5babdfd927901de9be5856060981daf831f Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 16 Nov 2018 15:02:11 -0800 Subject: [PATCH 04/21] Add ZSTD_DECOMPRESS_MINIMAL Macro, Which Reduces Branching of Decompress Variants --- lib/decompress/zstd_decompress_block.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 869bdd9aa..00f04208b 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1201,13 +1201,17 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, } /* Build Decoding Tables */ - { int usePrefetchDecoder = dctx->ddictIsCold; + { +#ifndef ZSTD_DECOMPRESS_MINIMAL + int usePrefetchDecoder = dctx->ddictIsCold; +#endif int nbSeq; size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); if (ZSTD_isError(seqHSize)) return seqHSize; ip += seqHSize; srcSize -= seqHSize; +#ifndef ZSTD_DECOMPRESS_MINIMAL if ( !usePrefetchDecoder && (!frame || (dctx->fParams.windowSize > (1<<24))) && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ @@ -1215,11 +1219,14 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ usePrefetchDecoder = (shareLongOffsets >= minShare); } +#endif dctx->ddictIsCold = 0; +#ifndef ZSTD_DECOMPRESS_MINIMAL if (usePrefetchDecoder) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); +#endif /* else */ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); From 9d5f3963ff02827b17945edfc6facb676866be0b Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 16 Nov 2018 16:43:57 -0800 Subject: [PATCH 05/21] Add Option to Not Request Inlining with ZSTD_NO_INLINE --- lib/common/compiler.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/common/compiler.h b/lib/common/compiler.h index e6267e90b..7f561282c 100644 --- a/lib/common/compiler.h +++ b/lib/common/compiler.h @@ -15,6 +15,8 @@ * Compiler specifics *********************************************************/ /* force inlining */ + +#if !defined(ZSTD_NO_INLINE) #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # define INLINE_KEYWORD inline #else @@ -29,6 +31,13 @@ # define FORCE_INLINE_ATTR #endif +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + /** * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant * parameters. They must be inlined for the compiler to elimininate the constant From 605dd576ee8f3421ad57eba6ea2290beba0ef103 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Fri, 16 Nov 2018 16:44:41 -0800 Subject: [PATCH 06/21] Remove Error Strings with ZSTD_STRIP_ERROR_STRINGS --- lib/common/error_private.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/common/error_private.c b/lib/common/error_private.c index d39e1a7ad..692e66b22 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -14,6 +14,9 @@ const char* ERR_getErrorString(ERR_enum code) { +#ifdef ZSTD_STRIP_ERROR_STRINGS + return "Error strings stripped"; +#else static const char* const notErrorCode = "Unspecified error code"; switch( code ) { @@ -46,4 +49,5 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(maxCode): default: return notErrorCode; } +#endif } From 64553a0e35dd632765a218b1ae4edecdc3d6039a Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 3 Dec 2018 17:28:02 -0800 Subject: [PATCH 07/21] Rename ZSTD_DECOMPRESS_MINIMAL -> ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT --- lib/decompress/zstd_decompress_block.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 00f04208b..a251e4f86 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -1202,7 +1202,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, /* Build Decoding Tables */ { -#ifndef ZSTD_DECOMPRESS_MINIMAL +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT int usePrefetchDecoder = dctx->ddictIsCold; #endif int nbSeq; @@ -1211,7 +1211,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += seqHSize; srcSize -= seqHSize; -#ifndef ZSTD_DECOMPRESS_MINIMAL +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT if ( !usePrefetchDecoder && (!frame || (dctx->fParams.windowSize > (1<<24))) && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ @@ -1223,7 +1223,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, dctx->ddictIsCold = 0; -#ifndef ZSTD_DECOMPRESS_MINIMAL +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT if (usePrefetchDecoder) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); #endif From 4bbb8a48adc7ed0803a0c4ae2373b2d748870424 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Mon, 3 Dec 2018 17:36:24 -0800 Subject: [PATCH 08/21] Add ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG This macro forces behavior in the opposite direction. --- lib/decompress/zstd_decompress_block.c | 29 +++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index a251e4f86..dbe4340ea 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -27,6 +27,18 @@ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_decompress_block.h" +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif /*_******************************************************* @@ -1202,7 +1214,12 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, /* Build Decoding Tables */ { -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) int usePrefetchDecoder = dctx->ddictIsCold; #endif int nbSeq; @@ -1211,7 +1228,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += seqHSize; srcSize -= seqHSize; -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) if ( !usePrefetchDecoder && (!frame || (dctx->fParams.windowSize > (1<<24))) && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ @@ -1223,13 +1241,18 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, dctx->ddictIsCold = 0; -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); #endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); +#endif } } From 432314b58a9c0ec0848913162128ec6463e0c030 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 4 Dec 2018 10:01:58 -0800 Subject: [PATCH 09/21] Rename HUF_DECOMPRESS_MINIMAL -> HUF_FORCE_DECOMPRESS_X1 --- lib/common/huf.h | 14 +++++++------- lib/decompress/huf_decompress.c | 26 +++++++++++++------------- lib/decompress/zstd_decompress.c | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/common/huf.h b/lib/common/huf.h index 75e5230e3..5994ed9c4 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -173,7 +173,7 @@ typedef U32 HUF_DTable; * Advanced decompression functions ******************************************/ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ #endif @@ -182,7 +182,7 @@ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, con size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ #endif @@ -283,14 +283,14 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); #endif size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); #endif @@ -314,7 +314,7 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ #endif @@ -322,14 +322,14 @@ size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ #endif size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); #endif diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 7c32c4766..1e0d58559 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -437,7 +437,7 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 /* *************************/ /* double-symbols decoding */ @@ -912,7 +912,7 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } -#endif /* HUF_DECOMPRESS_MINIMAL */ +#endif /* HUF_FORCE_DECOMPRESS_X1 */ /* ***********************************/ @@ -924,7 +924,7 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(dtd.tableType == 0); return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); #else @@ -938,7 +938,7 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(dtd.tableType == 0); return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); #else @@ -979,7 +979,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { assert(dstSize > 0); assert(dstSize <= 128*1024); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 return 0; #else /* decoder timing evaluation */ @@ -998,7 +998,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { -#ifndef HUF_DECOMPRESS_MINIMAL +#ifndef HUF_FORCE_DECOMPRESS_X1 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; #endif @@ -1009,7 +1009,7 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(algoNb == 0); return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); #else @@ -1027,7 +1027,7 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(algoNb == 0); return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); #else @@ -1055,7 +1055,7 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(algoNb == 0); return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); #else @@ -1076,7 +1076,7 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(algoNb == 0); return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); @@ -1101,7 +1101,7 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(dtd.tableType == 0); return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); #else @@ -1125,7 +1125,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(dtd.tableType == 0); return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); #else @@ -1141,7 +1141,7 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 assert(algoNb == 0); return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); #else diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 62ba86aa4..58cc98a4a 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -954,7 +954,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); -#ifdef HUF_DECOMPRESS_MINIMAL +#ifdef HUF_FORCE_DECOMPRESS_X1 /* in minimal huffman, we always use X1 variants */ size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, From 4a0572b21560a500298ac6128050faa398f71a80 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 4 Dec 2018 11:24:36 -0800 Subject: [PATCH 10/21] Refactor Huffman Decompression Away From Ternary Tree in ZSTD_decodeLiteralsBlock --- lib/decompress/zstd_decompress_block.c | 36 +++++++++++++++++++------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index dbe4340ea..993540f29 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -95,6 +95,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, U32 singleStream=0; U32 const lhlCode = (istart[0] >> 2) & 3; U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; switch(lhlCode) { case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ @@ -125,16 +126,31 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); } - if (HUF_isError((litEncType==set_repeat) ? - ( singleStream ? - HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) : - HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) : - ( singleStream ? - HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->workspace, sizeof(dctx->workspace), dctx->bmi2) : - HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->workspace, sizeof(dctx->workspace), dctx->bmi2)))) - return ERROR(corruption_detected); + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + if (HUF_isError(hufSuccess)) return ERROR(corruption_detected); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; From abd1567d3c53bfea625857bde2abc7c2aa512ae1 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 4 Dec 2018 11:42:59 -0800 Subject: [PATCH 11/21] Move HUF_DGEN Up Out of X1 Definitions --- lib/decompress/huf_decompress.c | 85 +++++++++++++++++---------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 1e0d58559..bbab53456 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -58,6 +58,51 @@ #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + /*-***************************/ /* generic DTableDesc */ /*-***************************/ @@ -307,46 +352,6 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -#if DYNAMIC_BMI2 - -#define HUF_DGEN(fn) \ - \ - static size_t fn##_default( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - if (bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -#else - -#define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - (void)bmi2; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -#endif HUF_DGEN(HUF_decompress1X1_usingDTable_internal) HUF_DGEN(HUF_decompress4X1_usingDTable_internal) From c560e34c86797493040d434d42bd0f31cf66430c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 4 Dec 2018 11:44:02 -0800 Subject: [PATCH 12/21] Add HUF_FORCE_DECOMPRESS_X2 --- lib/common/huf.h | 10 +++ lib/decompress/huf_decompress.c | 86 ++++++++++++++++++++++---- lib/decompress/zstd_decompress_block.c | 7 +++ 3 files changed, 91 insertions(+), 12 deletions(-) diff --git a/lib/common/huf.h b/lib/common/huf.h index 5994ed9c4..34a16f117 100644 --- a/lib/common/huf.h +++ b/lib/common/huf.h @@ -281,15 +281,19 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif #ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); #endif size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif #ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); #endif @@ -320,15 +324,19 @@ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif #ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ #endif size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif #ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); #endif @@ -337,7 +345,9 @@ size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* c * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. */ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index bbab53456..1139d39cd 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -116,6 +116,8 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) } +#ifndef HUF_FORCE_DECOMPRESS_X2 + /*-***************************/ /* single-symbol decoding */ /*-***************************/ @@ -442,6 +444,9 @@ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cS return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); } +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + #ifndef HUF_FORCE_DECOMPRESS_X1 /* *************************/ @@ -929,9 +934,14 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; assert(dtd.tableType == 0); return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); #else return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); @@ -943,9 +953,14 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const HUF_DTable* DTable) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; assert(dtd.tableType == 0); return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); #else return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); @@ -953,6 +968,7 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, } +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { @@ -974,6 +990,7 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ }; +#endif /** HUF_selectDecoder() : * Tells which decoder is likely to decode faster, @@ -984,8 +1001,14 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) { assert(dstSize > 0); assert(dstSize <= 128*1024); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; #else /* decoder timing evaluation */ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ @@ -1003,7 +1026,7 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { -#ifndef HUF_FORCE_DECOMPRESS_X1 +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; #endif @@ -1014,9 +1037,14 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; assert(algoNb == 0); return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); #else return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); #endif @@ -1032,9 +1060,14 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; assert(algoNb == 0); return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); #else return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; @@ -1060,11 +1093,17 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; assert(algoNb == 0); return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); #else - return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); #endif } @@ -1081,10 +1120,16 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; assert(algoNb == 0); return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); #else return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize): @@ -1106,15 +1151,21 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; assert(dtd.tableType == 0); return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); #else return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); #endif } +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) { const BYTE* ip = (const BYTE*) cSrc; @@ -1126,13 +1177,19 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); } +#endif size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { DTableDesc const dtd = HUF_getDTableDesc(DTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; assert(dtd.tableType == 0); return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); #else return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); @@ -1146,9 +1203,14 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds if (cSrcSize == 0) return ERROR(corruption_detected); { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#ifdef HUF_FORCE_DECOMPRESS_X1 +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; assert(algoNb == 0); return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); #else return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 993540f29..4c3b1e920 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -138,10 +138,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } else { if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->workspace, sizeof(dctx->workspace), dctx->bmi2); +#endif } else { hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( dctx->entropy.hufTable, dctx->litBuffer, litSize, From c2d51637d9750ca44f98c7dfaad10e6e0265f0a2 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 4 Dec 2018 11:52:40 -0800 Subject: [PATCH 13/21] Add Mutual-Exclusion Error --- lib/decompress/huf_decompress.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 1139d39cd..2cc20da22 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -43,6 +43,19 @@ #include "huf.h" #include "error_private.h" +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + /* ************************************************************** * Error Management From 4e2f6c110e602a31e97a1ba19768cde7db900bf6 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 5 Dec 2018 14:04:57 -0800 Subject: [PATCH 14/21] Add Contbuild Tests --- .travis.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.travis.yml b/.travis.yml index 4105c1f09..b8dff02a7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -63,6 +63,15 @@ matrix: - make clang38install - CC=clang-3.8 make clean msan-test-zstd + - name: Trusty (Minimal Decompressor Macros) + script: + - make clean + - make -C tests test-zstd MOREFLAGS="-O0 -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT" + - make clean + - make -C tests test-zstd MOREFLAGS="-O0 -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG" + - make clean + - make -C tests test-zstd MOREFLAGS="-O0 -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" + - name: Trusty (CMake) script: - make cmakebuild From ece2c18372ca6caa4f066e815aa8b278dcdd6192 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 6 Dec 2018 10:32:36 -0800 Subject: [PATCH 15/21] Document Macros in README --- lib/README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/README.md b/lib/README.md index 0966c7aef..45fd62551 100644 --- a/lib/README.md +++ b/lib/README.md @@ -66,6 +66,24 @@ It's possible to compile only a limited set of features. and `ZSTD_LIB_DEPRECATED` as 0 to forgo compilation of the corresponding features. This will also disable compilation of all dependencies (eg. `ZSTD_LIB_COMPRESSION=0` will also disable dictBuilder). +- There are some additional macros that can be used to minify the decoder. + + Zstandard often has more than one implementation of a piece of functionality, + where each implementation optimizes for different scenarios. For example, the + Huffman decoder has complementary implementations that decode the stream one + symbol at a time or two symbols at a time. Zstd normally includes both (and + dispatches between them at runtime), but by defining `HUF_FORCE_DECOMPRESS_X1` + or `HUF_FORCE_DECOMPRESS_X2`, you can force the use of one or the other, avoiding + compilation of the other. Similarly, `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT` + and `ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG` force the compilation and use of + only one or the other of two decompression implementations. The smallest + binary is achieved by using `HUF_FORCE_DECOMPRESS_X1` and + `ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT`. + + For squeezing the last ounce of size out, you can also define + `ZSTD_NO_INLINE`, which disables inlining, and `ZSTD_STRIP_ERROR_STRINGS`, + which removes the error messages that are otherwise returned by + `ZSTD_getErrorName`. #### Multithreading support From bd4afc389f986c51243ca62168c0b71c39d9c5b7 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 6 Dec 2018 12:32:32 -0800 Subject: [PATCH 16/21] Add Logic to Makefile to Convert Make Vars to Defines --- lib/Makefile | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/lib/Makefile b/lib/Makefile index 3fddf4fcd..386583e72 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -50,6 +50,12 @@ ZSTD_LIB_COMPRESSION ?= 1 ZSTD_LIB_DECOMPRESSION ?= 1 ZSTD_LIB_DICTBUILDER ?= 1 ZSTD_LIB_DEPRECATED ?= 1 +HUF_FORCE_DECOMPRESS_X1 ?= 0 +HUF_FORCE_DECOMPRESS_X2 ?= 0 +ZSTD_FORCE_DECOMPRESS_SHORT ?= 0 +ZSTD_FORCE_DECOMPRESS_LONG ?= 0 +ZSTD_NO_INLINE ?= 0 +ZSTD_STRIP_ERROR_STRINGS ?= 0 ifeq ($(ZSTD_LIB_COMPRESSION), 0) ZSTD_LIB_DICTBUILDER = 0 @@ -77,6 +83,30 @@ ifneq ($(ZSTD_LIB_DICTBUILDER), 0) ZSTD_FILES += $(ZDICT_FILES) endif +ifneq ($(HUF_FORCE_DECOMPRESS_X1), 0) + CFLAGS += -DHUF_FORCE_DECOMPRESS_X1 +endif + +ifneq ($(HUF_FORCE_DECOMPRESS_X2), 0) + CFLAGS += -DHUF_FORCE_DECOMPRESS_X2 +endif + +ifneq ($(ZSTD_FORCE_DECOMPRESS_SHORT), 0) + CFLAGS += -DZSTD_FORCE_DECOMPRESS_SHORT +endif + +ifneq ($(ZSTD_FORCE_DECOMPRESS_LONG), 0) + CFLAGS += -DZSTD_FORCE_DECOMPRESS_LONG +endif + +ifneq ($(ZSTD_NO_INLINE), 0) + CFLAGS += -DZSTD_NO_INLINE +endif + +ifneq ($(ZSTD_STRIP_ERROR_STRINGS), 0) + CFLAGS += -DZSTD_STRIP_ERROR_STRINGS +endif + ifneq ($(ZSTD_LEGACY_SUPPORT), 0) ifeq ($(shell test $(ZSTD_LEGACY_SUPPORT) -lt 8; echo $$?), 0) ZSTD_FILES += $(shell ls legacy/*.c | $(GREP) 'v0[$(ZSTD_LEGACY_SUPPORT)-7]') From 0d606ee3db248c6df922535bb7aeda973f4a564c Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Tue, 18 Dec 2018 13:35:57 -0800 Subject: [PATCH 17/21] Fix Incorrect assert() --- lib/decompress/huf_decompress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/decompress/huf_decompress.c b/lib/decompress/huf_decompress.c index 2cc20da22..3f8bd2973 100644 --- a/lib/decompress/huf_decompress.c +++ b/lib/decompress/huf_decompress.c @@ -1140,7 +1140,7 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, cSrcSize, workSpace, wkspSize); #elif defined(HUF_FORCE_DECOMPRESS_X2) (void)algoNb; - assert(algoNb == 0); + assert(algoNb == 1); return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); #else From 8e61ac816133396b91ab17b327d76ae5356ca653 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Wed, 19 Dec 2018 12:36:10 -0800 Subject: [PATCH 18/21] Use Unused Variable in ERR_getErrorString() --- lib/common/error_private.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/common/error_private.c b/lib/common/error_private.c index 692e66b22..7c1bb67a2 100644 --- a/lib/common/error_private.c +++ b/lib/common/error_private.c @@ -15,6 +15,7 @@ const char* ERR_getErrorString(ERR_enum code) { #ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; return "Error strings stripped"; #else static const char* const notErrorCode = "Unspecified error code"; From 9b944041dac9fb937157b7833591e8215abd6628 Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Dec 2018 12:10:51 -0800 Subject: [PATCH 19/21] Update Travis Jobs to Run Shorter Tests with -Werror --- .travis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index b8dff02a7..e9f918481 100644 --- a/.travis.yml +++ b/.travis.yml @@ -66,11 +66,11 @@ matrix: - name: Trusty (Minimal Decompressor Macros) script: - make clean - - make -C tests test-zstd MOREFLAGS="-O0 -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT" + - make check -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X1 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT" - make clean - - make -C tests test-zstd MOREFLAGS="-O0 -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG" + - make check -j all MOREFLAGS="-Werror -DHUF_FORCE_DECOMPRESS_X2 -DZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG" - make clean - - make -C tests test-zstd MOREFLAGS="-O0 -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" + - make check -j all MOREFLAGS="-Werror -DZSTD_NO_INLINE -DZSTD_STRIP_ERROR_STRINGS" - name: Trusty (CMake) script: From 038aabde284c1f8225d83b279522125aa9ea0feb Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Dec 2018 12:15:07 -0800 Subject: [PATCH 20/21] Mask Off Unused Functions When ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT --- lib/decompress/zstd_decompress_block.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 4c3b1e920..d37c63b23 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -952,6 +952,7 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) { @@ -1116,6 +1117,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, { return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1130,6 +1132,7 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT static TARGET_ATTRIBUTE("bmi2") size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1138,8 +1141,9 @@ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, { return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ -#endif +#endif /* DYNAMIC_BMI2 */ typedef size_t (*ZSTD_decompressSequences_t)( ZSTD_DCtx* dctx, @@ -1162,6 +1166,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, } +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT /* ZSTD_decompressSequencesLong() : * decompression function triggered when a minimum share of offsets is considered "long", * aka out of cache. @@ -1181,9 +1186,12 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, #endif return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) /* ZSTD_getLongOffsetsShare() : * condition : offTable must be valid * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) @@ -1208,6 +1216,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) return total; } +#endif size_t From 91b7309115cfa7c809134e3b4991838b139d0f0e Mon Sep 17 00:00:00 2001 From: "W. Felix Handte" Date: Thu, 20 Dec 2018 12:20:34 -0800 Subject: [PATCH 21/21] Mask Off Unused Functions When ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG --- lib/decompress/zstd_decompress_block.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index d37c63b23..a4b6c4c1a 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -810,6 +810,7 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { @@ -949,6 +950,7 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1123,6 +1125,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, #if DYNAMIC_BMI2 +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static TARGET_ATTRIBUTE("bmi2") size_t ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, @@ -1131,6 +1134,7 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, { return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT static TARGET_ATTRIBUTE("bmi2") size_t @@ -1151,6 +1155,7 @@ typedef size_t (*ZSTD_decompressSequences_t)( const void* seqStart, size_t seqSize, int nbSeq, const ZSTD_longOffset_e isLongOffset); +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, @@ -1164,6 +1169,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, #endif return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); } +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT