Cap hashLog & chainLog to ensure that we only use 32 bits of hash

* Cap shortCache chainLog to 24
* Cap row match finder hashLog so that rowLog <= 24
* Add unit tests to expose all cases. The row match finder unit tests
  are only run in 64-bit mode, because they allocate ~1GB.

Fixes #3336
This commit is contained in:
Nick Terrell 2023-01-19 16:27:24 -08:00 committed by Nick Terrell
parent abf965c64a
commit 666944fbe6
5 changed files with 144 additions and 7 deletions

View File

@ -1412,7 +1412,8 @@ static ZSTD_compressionParameters
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
unsigned long long srcSize,
size_t dictSize,
ZSTD_cParamMode_e mode)
ZSTD_cParamMode_e mode,
ZSTD_paramSwitch_e useRowMatchFinder)
{
const U64 minSrcSize = 513; /* (1<<9) + 1 */
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
@ -1465,11 +1466,40 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
/* We can't use more than 32 bits of hash in total, so that means that we require:
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
*/
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
if (cPar.hashLog > maxShortCacheHashLog) {
cPar.hashLog = maxShortCacheHashLog;
}
if (cPar.chainLog > maxShortCacheHashLog) {
cPar.chainLog = maxShortCacheHashLog;
}
}
/* At this point, we aren't 100% sure if we are using the row match finder.
* Unless it is explicitly disabled, conservatively assume that it is enabled.
* In this case it will only be disabled for small sources, so shrinking the
* hash log a little bit shouldn't result in any ratio loss.
*/
if (useRowMatchFinder == ZSTD_ps_auto)
useRowMatchFinder = ZSTD_ps_enable;
/* We can't hash more than 32-bits in total. So that means that we require:
* (hashLog - rowLog + 8) <= 32
*/
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
/* Switch to 32-entry rows if searchLog is 5 (or more) */
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
U32 const maxHashLog = maxRowHashLog + rowLog;
assert(cPar.hashLog >= rowLog);
if (cPar.hashLog > maxHashLog) {
cPar.hashLog = maxHashLog;
}
}
return cPar;
@ -1482,7 +1512,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
{
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
}
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
@ -1513,7 +1543,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
assert(!ZSTD_checkCParams(cParams));
/* srcSizeHint == 0 means 0 */
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
}
static size_t
@ -2185,7 +2215,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
}
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
cdict->dictContentSize, ZSTD_cpm_attachDict);
cdict->dictContentSize, ZSTD_cpm_attachDict,
params.useRowMatchFinder);
params.cParams.windowLog = windowLog;
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
@ -6740,7 +6771,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
cp.targetLength = (unsigned)(-clampedCompressionLevel);
}
/* refine parameters based on srcSize & dictSize */
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
}
}

View File

@ -759,7 +759,6 @@ size_t ZSTD_HcFindBestMatch(
***********************************/
/* Constants for row-based hash */
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */

View File

@ -25,6 +25,8 @@ extern "C" {
*/
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
size_t ZSTD_compressBlock_btlazy2_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize);
#if defined (__cplusplus)
}

View File

@ -2832,6 +2832,90 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_fast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
{
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
ZSTD_customMem customMem = {NULL, NULL, NULL};
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_CDict* cdict;
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_fast));
/* Set windowLog to 25 so hash/chain logs don't get sized down */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
/* Set srcSizeHint to 2^25 so hash/chain logs don't get sized down */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
ZSTD_freeCDict(cdict);
ZSTD_freeDCtx(dctx);
ZSTD_freeCCtxParams(cctxParams);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_dfast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
{
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
ZSTD_customMem customMem = {NULL, NULL, NULL};
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_CDict* cdict;
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_dfast));
/* Set windowLog to 25 so hash/chain logs don't get sized down */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
/* Set srcSizeHint to 2^25 so hash/chain logs don't get sized down */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
ZSTD_freeCDict(cdict);
ZSTD_freeDCtx(dctx);
ZSTD_freeCCtxParams(cctxParams);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_lazy attach dictionary with hashLog = 29 and searchLog = 4 : ", testNb++);
if (MEM_64bits()) {
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
ZSTD_customMem customMem = {NULL, NULL, NULL};
ZSTD_DCtx* dctx = ZSTD_createDCtx();
ZSTD_CDict* cdict;
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_lazy));
/* Force enable row based match finder, and disable dedicated dict search. */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, 0));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_searchLog, 4));
/* Set windowLog to 29 so hash/chain logs don't get sized down */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 29));
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 29));
/* Set srcSizeHint to 2^29 so hash/chain logs don't get sized down */
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 29));
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
CHECK_Z(cSize);
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
ZSTD_freeCDict(cdict);
ZSTD_freeDCtx(dctx);
ZSTD_freeCCtxParams(cctxParams);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,

View File

@ -1566,6 +1566,27 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_lazy compress with hashLog = 29 and searchLog = 4 : ", testNb++);
if (MEM_64bits()) {
ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
ZSTD_inBuffer in = { CNBuffer, CNBufferSize, 0 };
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_strategy, ZSTD_lazy));
/* Force enable the row based match finder */
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_searchLog, 4));
/* Set windowLog to 29 so the hashLog doesn't get sized down */
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_windowLog, 29));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_hashLog, 29));
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, 1));
/* Compress with continue first so the hashLog doesn't get sized down */
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_continue));
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_end));
cSize = out.pos;
CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Test offset == windowSize : ", testNb++);
{
int windowLog;