mirror of
https://github.com/facebook/zstd.git
synced 2025-10-09 00:05:28 -04:00
dictBuilder protection vs huge sample sets (>2 GB)
This commit is contained in:
parent
bcb5f77efa
commit
99b045b70a
@ -31,6 +31,12 @@
|
|||||||
- Zstd homepage : https://www.zstd.net
|
- Zstd homepage : https://www.zstd.net
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*-**************************************
|
||||||
|
* Tuning parameters
|
||||||
|
****************************************/
|
||||||
|
#define ZDICT_MAX_SAMPLES_SIZE (1500U << 20)
|
||||||
|
|
||||||
|
|
||||||
/*-**************************************
|
/*-**************************************
|
||||||
* Compiler Options
|
* Compiler Options
|
||||||
****************************************/
|
****************************************/
|
||||||
@ -481,7 +487,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|||||||
|
|
||||||
|
|
||||||
static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
||||||
const void* const buffer, const size_t bufferSize, /* buffer must end with noisy guard band */
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
||||||
const size_t* fileSizes, unsigned nbFiles,
|
const size_t* fileSizes, unsigned nbFiles,
|
||||||
U32 shiftRatio, unsigned maxDictSize)
|
U32 shiftRatio, unsigned maxDictSize)
|
||||||
{
|
{
|
||||||
@ -503,6 +509,10 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|||||||
if (minRatio < MINRATIO) minRatio = MINRATIO;
|
if (minRatio < MINRATIO) minRatio = MINRATIO;
|
||||||
memset(doneMarks, 0, bufferSize+16);
|
memset(doneMarks, 0, bufferSize+16);
|
||||||
|
|
||||||
|
/* limit sample set size (divsufsort limitation)*/
|
||||||
|
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduce to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20));
|
||||||
|
while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
|
||||||
|
|
||||||
/* sort */
|
/* sort */
|
||||||
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
|
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
|
||||||
divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
|
divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
|
||||||
@ -703,7 +713,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
}
|
}
|
||||||
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
|
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
|
||||||
params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize);
|
params.cParams = ZSTD_getCParams(compressionLevel, averageSampleSize, dictBufferSize);
|
||||||
//params.cParams.strategy = ZSTD_greedy;
|
|
||||||
params.fParams.contentSizeFlag = 0;
|
params.fParams.contentSizeFlag = 0;
|
||||||
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
||||||
if (ZSTD_isError(beginResult)) {
|
if (ZSTD_isError(beginResult)) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user