From 2a128110d05aa5f67c9faee55a4fc538c169c16b Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 21 Jun 2022 11:59:27 -0400 Subject: [PATCH 1/5] Add prefetchCDictTables CCtxParam --- lib/compress/zstd_compress.c | 16 ++++++++++++++++ lib/compress/zstd_compress_internal.h | 4 ++++ lib/compress/zstd_double_fast.c | 7 +++++++ lib/compress/zstd_fast.c | 5 +++++ lib/zstd.h | 7 ++++++- 5 files changed, 38 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 46a9dbe66..f8ca7d35e 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -576,6 +576,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = 1; return bounds; + case ZSTD_c_prefetchCDictTables: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -640,6 +645,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_useBlockSplitter: case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: + case ZSTD_c_prefetchCDictTables: default: return 0; } @@ -695,6 +701,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_useBlockSplitter: case ZSTD_c_useRowMatchFinder: case ZSTD_c_deterministicRefPrefix: + case ZSTD_c_prefetchCDictTables: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -921,6 +928,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->deterministicRefPrefix = !!value; return CCtxParams->deterministicRefPrefix; + case ZSTD_c_prefetchCDictTables: + BOUNDCHECK(ZSTD_c_prefetchCDictTables, value); + CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; + return CCtxParams->prefetchCDictTables; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -1053,6 +1065,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_deterministicRefPrefix: *value = (int)CCtxParams->deterministicRefPrefix; break; + case ZSTD_c_prefetchCDictTables: + *value = (int)CCtxParams->prefetchCDictTables; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -2913,6 +2928,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, dictMode); ms->ldmSeqStore = NULL; + ms->prefetchCDictTables = zc->appliedParams.prefetchCDictTables == ZSTD_ps_enable; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 870bcc8be..bd1077f86 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -235,6 +235,7 @@ struct ZSTD_matchState_t { const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; const rawSeqStore_t* ldmSeqStore; + int prefetchCDictTables; /* TODO document */ }; typedef struct { @@ -331,6 +332,9 @@ struct ZSTD_CCtx_params_s { /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; + + /* TODO document */ + ZSTD_paramSwitch_e prefetchCDictTables; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ #define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index 6697ba0a9..a69fdc818 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -345,6 +345,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( /* if a dictionary is attached, it must be within window range */ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + if (ms->prefetchCDictTables) { + size_t const hashTableSize = ((size_t)1) << dictCParams->hashLog; + size_t const chainTableSize = ((size_t)1) << dictCParams->chainLog; + PREFETCH_AREA(dictHashLong, hashTableSize * sizeof(U32)) + PREFETCH_AREA(dictHashSmall, chainTableSize * sizeof(U32)) + } + /* init */ ip += (dictAndPrefixLength == 0); diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index de7336907..a44057988 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -500,6 +500,11 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + if (ms->prefetchCDictTables) { + size_t const hashTableSize = ((size_t)1) << dictCParams->hashLog; + PREFETCH_AREA(dictHashTable, hashTableSize * sizeof(U32)) + } + /* init */ DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); ip0 += (dictAndPrefixLength == 0); diff --git a/lib/zstd.h b/lib/zstd.h index 14a78cd93..221426715 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -421,6 +421,7 @@ typedef enum { * ZSTD_c_validateSequences * ZSTD_c_useBlockSplitter * ZSTD_c_useRowMatchFinder + * ZSTD_c_prefetchCDictTables * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -439,7 +440,8 @@ typedef enum { ZSTD_c_experimentalParam12=1009, ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam14=1011, - ZSTD_c_experimentalParam15=1012 + ZSTD_c_experimentalParam15=1012, + ZSTD_c_experimentalParam16=1013 } ZSTD_cParameter; typedef struct { @@ -1954,6 +1956,9 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 +/* TODO document */ +#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. From 93b89fb24b1e019ffc2df027993d04e4c0cadfef Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Tue, 21 Jun 2022 18:06:48 -0400 Subject: [PATCH 2/5] Add docs --- lib/compress/zstd_compress_internal.h | 4 ++-- lib/zstd.h | 22 +++++++++++++++++++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index bd1077f86..8c73007c8 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -235,7 +235,7 @@ struct ZSTD_matchState_t { const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; const rawSeqStore_t* ldmSeqStore; - int prefetchCDictTables; /* TODO document */ + int prefetchCDictTables; /* Controls prefetching in some dictMatchState matchfinders */ }; typedef struct { @@ -333,7 +333,7 @@ struct ZSTD_CCtx_params_s { /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; - /* TODO document */ + /* Controls prefetching in some dictMatchState matchfinders */ ZSTD_paramSwitch_e prefetchCDictTables; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ diff --git a/lib/zstd.h b/lib/zstd.h index 221426715..14b4b4559 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -1956,7 +1956,27 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 -/* TODO document */ +/* ZSTD_c_prefetchCDictTables + * Controlled with ZSTD_paramSwitch_e enum. Default is ZSTD_ps_auto. + * + * In some situations, zstd uses CDict tables in-place rather than copying them + * into the working context. (See docs on ZSTD_dictAttachPref_e above for details). + * In such situations, compression speed is seriously impacted when CDict tables are + * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables + * when they are used in-place. + * + * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit. + * For sufficiently large inputs, zstd will by default memcpy() CDict tables + * into the working context, so there is no need to prefetch. This parameter is + * targeted at a middle range of input sizes, where a prefetch is cheap enough to be + * useful but memcpy() is too expensive. The exact range of input sizes where this + * makes sense is best determined by careful experimentation. + * + * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable, + * but in the future zstd may conditionally enable this feature via an auto-detection + * heuristic for cold CDicts. + * Use ZSTD_ps_disable to opt out of prefetching under any circumstances. + */ #define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 /*! ZSTD_CCtx_getParameter() : From 6bd5ac671352801cd249576d94606bdc0ef235ec Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 22 Jun 2022 08:59:28 -0700 Subject: [PATCH 3/5] add prefetchCDictTables to largeNbDicts --- contrib/largeNbDicts/largeNbDicts.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index 25b154c30..66850e28c 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -968,6 +968,7 @@ int main (int argc, const char** argv) unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */ ZSTD_dictContentType_e dictContentType = ZSTD_dct_auto; ZSTD_dictAttachPref_e dictAttachPref = ZSTD_dictDefaultAttach; + ZSTD_paramSwitch_e prefetchCDictTables = ZSTD_ps_auto; for (int argNb = 1; argNb < argc ; argNb++) { const char* argument = argv[argNb]; @@ -986,6 +987,7 @@ int main (int argc, const char** argv) if (longCommandWArg(&argument, "--dedicated-dict-search")) { dedicatedDictSearch = 1; continue; } if (longCommandWArg(&argument, "--dict-content-type=")) { dictContentType = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "--dict-attach-pref=")) { dictAttachPref = (int)readU32FromChar(&argument); continue; } + if (longCommandWArg(&argument, "--prefetch-cdict-tables=")) { prefetchCDictTables = (int)readU32FromChar(&argument); continue; } if (longCommandWArg(&argument, "-")) { cLevel = (int)readU32FromChar(&argument); continue; } /* anything that's not a command is a filename */ nameTable[nameIdx++] = argument; @@ -1008,6 +1010,7 @@ int main (int argc, const char** argv) ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, dedicatedDictSearch); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_nbWorkers, 0); ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_forceAttachDict, dictAttachPref); + ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_prefetchCDictTables, prefetchCDictTables); int result = bench(filenameTable->fileNames, (unsigned)filenameTable->tableSize, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds, benchCompression, dictContentType, cctxParams, exeName); From 747e06f4f6db99d7f4a05f6a0d4a5490da7c944f Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Wed, 22 Jun 2022 17:05:23 -0400 Subject: [PATCH 4/5] Add tests --- tests/fuzz/zstd_helpers.c | 1 + tests/fuzzer.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index f66579754..4f8727df9 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -98,6 +98,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); setRand(cctx, ZSTD_c_useBlockSplitter, 0, 2, producer); setRand(cctx, ZSTD_c_deterministicRefPrefix, 0, 1, producer); + setRand(cctx, ZSTD_c_prefetchCDictTables, 0, 2, producer); if (FUZZ_dataProducer_uint32Range(producer, 0, 1) == 0) { setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, producer); } diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 71356d53d..81c2d9dba 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2041,6 +2041,7 @@ static int basicUnitTests(U32 const seed, double compressibility) CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) ); CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) ); CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) ); + CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_prefetchCDictTables, seed % 3) ); wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize); if (wdict_cSize > target_wdict_cSize[l]) { DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n", From cb9e3411292133c4e2b96ef331760eefdf895f23 Mon Sep 17 00:00:00 2001 From: Elliot Gorokhovsky Date: Thu, 23 Jun 2022 16:58:03 -0400 Subject: [PATCH 5/5] Nits --- lib/compress/zstd_compress.c | 2 +- lib/compress/zstd_compress_internal.h | 6 +++++- lib/compress/zstd_double_fast.c | 8 ++++---- lib/compress/zstd_fast.c | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index f8ca7d35e..b1f9bba8a 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1943,6 +1943,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, /* init params */ zc->blockState.matchState.cParams = params->cParams; + zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable; zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; zc->consumedSrcSize = 0; zc->producedCSize = 0; @@ -2928,7 +2929,6 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) zc->appliedParams.useRowMatchFinder, dictMode); ms->ldmSeqStore = NULL; - ms->prefetchCDictTables = zc->appliedParams.prefetchCDictTables == ZSTD_ps_enable; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 8c73007c8..4771b15ba 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -235,7 +235,11 @@ struct ZSTD_matchState_t { const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; const rawSeqStore_t* ldmSeqStore; - int prefetchCDictTables; /* Controls prefetching in some dictMatchState matchfinders */ + + /* Controls prefetching in some dictMatchState matchfinders. + * This behavior is controlled from the cctx ms. + * This parameter has no effect in the cdict ms. */ + int prefetchCDictTables; }; typedef struct { diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c index a69fdc818..c2dbd54c1 100644 --- a/lib/compress/zstd_double_fast.c +++ b/lib/compress/zstd_double_fast.c @@ -346,10 +346,10 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); if (ms->prefetchCDictTables) { - size_t const hashTableSize = ((size_t)1) << dictCParams->hashLog; - size_t const chainTableSize = ((size_t)1) << dictCParams->chainLog; - PREFETCH_AREA(dictHashLong, hashTableSize * sizeof(U32)) - PREFETCH_AREA(dictHashSmall, chainTableSize * sizeof(U32)) + size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); + size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); + PREFETCH_AREA(dictHashLong, hashTableBytes) + PREFETCH_AREA(dictHashSmall, chainTableBytes) } /* init */ diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c index a44057988..291173449 100644 --- a/lib/compress/zstd_fast.c +++ b/lib/compress/zstd_fast.c @@ -501,8 +501,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); if (ms->prefetchCDictTables) { - size_t const hashTableSize = ((size_t)1) << dictCParams->hashLog; - PREFETCH_AREA(dictHashTable, hashTableSize * sizeof(U32)) + size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); + PREFETCH_AREA(dictHashTable, hashTableBytes) } /* init */