mirror of
https://github.com/facebook/zstd.git
synced 2025-11-22 00:10:22 -05:00
Cap hashLog & chainLog to ensure that we only use 32 bits of hash
* Cap shortCache chainLog to 24 * Cap row match finder hashLog so that rowLog <= 24 * Add unit tests to expose all cases. The row match finder unit tests are only run in 64-bit mode, because they allocate ~1GB. Fixes #3336
This commit is contained in:
parent
abf965c64a
commit
666944fbe6
@ -1412,7 +1412,8 @@ static ZSTD_compressionParameters
|
||||
ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
unsigned long long srcSize,
|
||||
size_t dictSize,
|
||||
ZSTD_cParamMode_e mode)
|
||||
ZSTD_cParamMode_e mode,
|
||||
ZSTD_paramSwitch_e useRowMatchFinder)
|
||||
{
|
||||
const U64 minSrcSize = 513; /* (1<<9) + 1 */
|
||||
const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
|
||||
@ -1465,11 +1466,40 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
|
||||
if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
|
||||
cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
|
||||
|
||||
/* We can't use more than 32 bits of hash in total, so that means that we require:
|
||||
* (hashLog + 8) <= 32 && (chainLog + 8) <= 32
|
||||
*/
|
||||
if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) {
|
||||
U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS;
|
||||
if (cPar.hashLog > maxShortCacheHashLog) {
|
||||
cPar.hashLog = maxShortCacheHashLog;
|
||||
}
|
||||
if (cPar.chainLog > maxShortCacheHashLog) {
|
||||
cPar.chainLog = maxShortCacheHashLog;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* At this point, we aren't 100% sure if we are using the row match finder.
|
||||
* Unless it is explicitly disabled, conservatively assume that it is enabled.
|
||||
* In this case it will only be disabled for small sources, so shrinking the
|
||||
* hash log a little bit shouldn't result in any ratio loss.
|
||||
*/
|
||||
if (useRowMatchFinder == ZSTD_ps_auto)
|
||||
useRowMatchFinder = ZSTD_ps_enable;
|
||||
|
||||
/* We can't hash more than 32-bits in total. So that means that we require:
|
||||
* (hashLog - rowLog + 8) <= 32
|
||||
*/
|
||||
if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) {
|
||||
/* Switch to 32-entry rows if searchLog is 5 (or more) */
|
||||
U32 const rowLog = BOUNDED(4, cPar.searchLog, 6);
|
||||
U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS;
|
||||
U32 const maxHashLog = maxRowHashLog + rowLog;
|
||||
assert(cPar.hashLog >= rowLog);
|
||||
if (cPar.hashLog > maxHashLog) {
|
||||
cPar.hashLog = maxHashLog;
|
||||
}
|
||||
}
|
||||
|
||||
return cPar;
|
||||
@ -1482,7 +1512,7 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
||||
{
|
||||
cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
|
||||
if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
|
||||
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
|
||||
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto);
|
||||
}
|
||||
|
||||
static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
|
||||
@ -1513,7 +1543,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
|
||||
assert(!ZSTD_checkCParams(cParams));
|
||||
/* srcSizeHint == 0 means 0 */
|
||||
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
|
||||
return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder);
|
||||
}
|
||||
|
||||
static size_t
|
||||
@ -2185,7 +2215,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
|
||||
}
|
||||
|
||||
params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
|
||||
cdict->dictContentSize, ZSTD_cpm_attachDict);
|
||||
cdict->dictContentSize, ZSTD_cpm_attachDict,
|
||||
params.useRowMatchFinder);
|
||||
params.cParams.windowLog = windowLog;
|
||||
params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
|
||||
FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize,
|
||||
@ -6740,7 +6771,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
|
||||
cp.targetLength = (unsigned)(-clampedCompressionLevel);
|
||||
}
|
||||
/* refine parameters based on srcSize & dictSize */
|
||||
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
|
||||
return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -759,7 +759,6 @@ size_t ZSTD_HcFindBestMatch(
|
||||
***********************************/
|
||||
/* Constants for row-based hash */
|
||||
#define ZSTD_ROW_HASH_TAG_OFFSET 16 /* byte offset of hashes in the match state's tagTable from the beginning of a row */
|
||||
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
||||
#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
|
||||
#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */
|
||||
|
||||
|
||||
@ -25,6 +25,8 @@ extern "C" {
|
||||
*/
|
||||
#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
|
||||
|
||||
#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */
|
||||
|
||||
U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
|
||||
void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
|
||||
|
||||
@ -116,7 +118,7 @@ size_t ZSTD_compressBlock_lazy2_extDict_row(
|
||||
size_t ZSTD_compressBlock_btlazy2_extDict(
|
||||
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
||||
void const* src, size_t srcSize);
|
||||
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@ -2832,6 +2832,90 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_fast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
|
||||
{
|
||||
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
|
||||
ZSTD_customMem customMem = {NULL, NULL, NULL};
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
ZSTD_CDict* cdict;
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_fast));
|
||||
/* Set windowLog to 25 so hash/chain logs don't get sized down */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
|
||||
/* Set srcSizeHint to 2^25 so hash/chain logs don't get sized down */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
|
||||
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
|
||||
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
|
||||
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||
CHECK_Z(cSize);
|
||||
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
|
||||
ZSTD_freeCDict(cdict);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtxParams(cctxParams);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_dfast attach dictionary with hashLog = 25 and chainLog = 25 : ", testNb++);
|
||||
{
|
||||
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
|
||||
ZSTD_customMem customMem = {NULL, NULL, NULL};
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
ZSTD_CDict* cdict;
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_dfast));
|
||||
/* Set windowLog to 25 so hash/chain logs don't get sized down */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 25));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 25));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_chainLog, 25));
|
||||
/* Set srcSizeHint to 2^25 so hash/chain logs don't get sized down */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 25));
|
||||
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
|
||||
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
|
||||
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||
CHECK_Z(cSize);
|
||||
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
|
||||
ZSTD_freeCDict(cdict);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtxParams(cctxParams);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_lazy attach dictionary with hashLog = 29 and searchLog = 4 : ", testNb++);
|
||||
if (MEM_64bits()) {
|
||||
ZSTD_CCtx_params* cctxParams = ZSTD_createCCtxParams();
|
||||
ZSTD_customMem customMem = {NULL, NULL, NULL};
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
ZSTD_CDict* cdict;
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_strategy, ZSTD_lazy));
|
||||
/* Force enable row based match finder, and disable dedicated dict search. */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_enableDedicatedDictSearch, 0));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_searchLog, 4));
|
||||
/* Set windowLog to 29 so hash/chain logs don't get sized down */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_windowLog, 29));
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_hashLog, 29));
|
||||
/* Set srcSizeHint to 2^29 so hash/chain logs don't get sized down */
|
||||
CHECK_Z(ZSTD_CCtxParams_setParameter(cctxParams, ZSTD_c_srcSizeHint, 1u << 29));
|
||||
cdict = ZSTD_createCDict_advanced2(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cctxParams, customMem);
|
||||
CHECK_Z(ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
|
||||
CHECK_Z(ZSTD_CCtx_refCDict(cctx, cdict));
|
||||
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
|
||||
CHECK_Z(cSize);
|
||||
CHECK_Z(ZSTD_decompress_usingDict(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize, dictBuffer, dictSize));
|
||||
ZSTD_freeCDict(cdict);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
ZSTD_freeCCtxParams(cctxParams);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : Dictionary with non-default repcodes : ", testNb++);
|
||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||
dictSize = ZDICT_trainFromBuffer(dictBuffer, dictSize,
|
||||
|
||||
@ -1566,6 +1566,27 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
||||
CHECK(!ZSTD_isError(ZSTD_CCtx_setParameter(zc, ZSTD_c_srcSizeHint, -1)), "Out of range doesn't error");
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : ZSTD_lazy compress with hashLog = 29 and searchLog = 4 : ", testNb++);
|
||||
if (MEM_64bits()) {
|
||||
ZSTD_outBuffer out = { compressedBuffer, compressedBufferSize, 0 };
|
||||
ZSTD_inBuffer in = { CNBuffer, CNBufferSize, 0 };
|
||||
CHECK_Z(ZSTD_CCtx_reset(zc, ZSTD_reset_session_and_parameters));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_strategy, ZSTD_lazy));
|
||||
/* Force enable the row based match finder */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_useRowMatchFinder, ZSTD_ps_enable));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_searchLog, 4));
|
||||
/* Set windowLog to 29 so the hashLog doesn't get sized down */
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_windowLog, 29));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_hashLog, 29));
|
||||
CHECK_Z(ZSTD_CCtx_setParameter(zc, ZSTD_c_checksumFlag, 1));
|
||||
/* Compress with continue first so the hashLog doesn't get sized down */
|
||||
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_continue));
|
||||
CHECK_Z(ZSTD_compressStream2(zc, &out, &in, ZSTD_e_end));
|
||||
cSize = out.pos;
|
||||
CHECK_Z(ZSTD_decompress(decodedBuffer, CNBufferSize, compressedBuffer, cSize));
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : Test offset == windowSize : ", testNb++);
|
||||
{
|
||||
int windowLog;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user