mirror of
https://github.com/facebook/zstd.git
synced 2025-10-09 00:05:28 -04:00
better warning and error messages in case of dictionary training failure (#292)
This commit is contained in:
parent
79d9cdd258
commit
49d105cfcf
@ -692,7 +692,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
|
esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
|
||||||
if (!esr.ref || !esr.zc || !esr.workPlace) {
|
if (!esr.ref || !esr.zc || !esr.workPlace) {
|
||||||
eSize = ERROR(memory_allocation);
|
eSize = ERROR(memory_allocation);
|
||||||
DISPLAYLEVEL(1, "Not enough memory");
|
DISPLAYLEVEL(1, "Not enough memory \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
||||||
@ -708,7 +708,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
||||||
if (ZSTD_isError(beginResult)) {
|
if (ZSTD_isError(beginResult)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed ");
|
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
} }
|
} }
|
||||||
|
|
||||||
@ -724,7 +724,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
||||||
if (HUF_isError(errorCode)) {
|
if (HUF_isError(errorCode)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "HUF_buildCTable error");
|
DISPLAYLEVEL(1, "HUF_buildCTable error \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
huffLog = (U32)errorCode;
|
huffLog = (U32)errorCode;
|
||||||
@ -740,7 +740,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(errorCode)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
Offlog = (U32)errorCode;
|
Offlog = (U32)errorCode;
|
||||||
@ -749,7 +749,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(errorCode)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
mlLog = (U32)errorCode;
|
mlLog = (U32)errorCode;
|
||||||
@ -758,7 +758,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
||||||
if (FSE_isError(errorCode)) {
|
if (FSE_isError(errorCode)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
llLog = (U32)errorCode;
|
llLog = (U32)errorCode;
|
||||||
@ -768,7 +768,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
||||||
if (HUF_isError(hhSize)) {
|
if (HUF_isError(hhSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "HUF_writeCTable error");
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
dstPtr += hhSize;
|
dstPtr += hhSize;
|
||||||
@ -779,7 +779,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
||||||
if (FSE_isError(ohSize)) {
|
if (FSE_isError(ohSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
|
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
dstPtr += ohSize;
|
dstPtr += ohSize;
|
||||||
@ -790,7 +790,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
||||||
if (FSE_isError(mhSize)) {
|
if (FSE_isError(mhSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
|
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
dstPtr += mhSize;
|
dstPtr += mhSize;
|
||||||
@ -801,7 +801,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
||||||
if (FSE_isError(lhSize)) {
|
if (FSE_isError(lhSize)) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
dstPtr += lhSize;
|
dstPtr += lhSize;
|
||||||
@ -811,7 +811,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|||||||
|
|
||||||
if (maxDstSize<12) {
|
if (maxDstSize<12) {
|
||||||
eSize = ERROR(GENERIC);
|
eSize = ERROR(GENERIC);
|
||||||
DISPLAYLEVEL(1, "not enough space to write RepOffsets");
|
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
||||||
goto _cleanup;
|
goto _cleanup;
|
||||||
}
|
}
|
||||||
# if 0
|
# if 0
|
||||||
@ -856,10 +856,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|||||||
/* entropy tables */
|
/* entropy tables */
|
||||||
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
||||||
DISPLAYLEVEL(2, "statistics ... \n");
|
DISPLAYLEVEL(2, "statistics ... \n");
|
||||||
hSize += ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
{ size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
||||||
compressionLevel,
|
compressionLevel,
|
||||||
samplesBuffer, samplesSizes, nbSamples,
|
samplesBuffer, samplesSizes, nbSamples,
|
||||||
(char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
(char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
||||||
|
if (ZDICT_isError(eSize)) return eSize;
|
||||||
|
hSize += eSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (hSize + dictContentSize < dictBufferCapacity)
|
if (hSize + dictContentSize < dictBufferCapacity)
|
||||||
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
||||||
@ -902,7 +906,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
|
|
||||||
/* display best matches */
|
/* display best matches */
|
||||||
if (g_displayLevel>= 3) {
|
if (g_displayLevel>= 3) {
|
||||||
U32 const nb = 25;
|
U32 const nb = MIN(25, dictList[0].pos);
|
||||||
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
||||||
U32 u;
|
U32 u;
|
||||||
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
||||||
|
@ -204,11 +204,11 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
|||||||
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
||||||
g_displayLevel = params.notificationLevel;
|
g_displayLevel = params.notificationLevel;
|
||||||
if (nbFiles < 5) {
|
if (nbFiles < 5) {
|
||||||
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing \n");
|
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
|
||||||
DISPLAYLEVEL(2, "! Please provide one file per sample \n");
|
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
|
||||||
DISPLAYLEVEL(2, "! Avoid concatenating multiple samples into a single file \n");
|
DISPLAYLEVEL(2, "! Do not concatenate samples together into a single file, \n");
|
||||||
DISPLAYLEVEL(2, "! otherwise, dictBuilder will be unable to find the beginning of each sample \n");
|
DISPLAYLEVEL(2, "! as dictBuilder will be unable to find the beginning of each sample, \n");
|
||||||
DISPLAYLEVEL(2, "! resulting in distorted statistics \n");
|
DISPLAYLEVEL(2, "! resulting in poor dictionary quality. \n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user