[bug] Fix dictContentType when reprocessing cdict

Conditions to trigger:
* CDict is loaded as raw content.
* CDict starts with the zstd dictionary magic number.
* The CDict is reprocessed (not attached or copied).
* The new API is used (streaming or `ZSTD_compress2()`).

Bug: The dictionary is loaded as a zstd dictionary, not a raw content
dictionary, because the dict content type is set to `ZSTD_dct_auto`.

Fix: Pass in the dictionary content type from cdict creation to the call
to `ZSTD_compress_insertDictionary()`.

Test: Added a test case that exposes the bug, and fixed the raw
content tests to not modify the `dictBuffer`, which makes all future
tests with the `dictBuffer` raw content, which doesn't seem intentional.
This commit is contained in:
Nick Terrell 2020-10-05 15:17:44 -07:00
parent b951ad20a2
commit 7083f79008
2 changed files with 45 additions and 24 deletions

View File

@ -64,6 +64,7 @@ size_t ZSTD_compressBound(size_t srcSize) {
struct ZSTD_CDict_s {
const void* dictContent;
size_t dictContentSize;
ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
ZSTD_cwksp workspace;
ZSTD_matchState_t matchState;
@ -3188,7 +3189,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
&cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
cdict->dictContentSize, dictContentType, dtlm,
cdict->dictContentSize, cdict->dictContentType, dtlm,
cctx->entropyWorkspace)
: ZSTD_compress_insertDictionary(
cctx->blockState.prevCBlock, &cctx->blockState.matchState,
@ -3460,6 +3461,7 @@ static size_t ZSTD_initCDict_internal(
ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
}
cdict->dictContentSize = dictSize;
cdict->dictContentType = dictContentType;
cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);

View File

@ -1971,34 +1971,53 @@ static int basicUnitTests(U32 const seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
/* Either operation is allowed to fail, but one must fail. */
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_auto);
if (!ZSTD_isError(ret)) {
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (!ZSTD_isError(ret)) goto _output_error;
}
}
DISPLAYLEVEL(3, "OK \n");
{ char* rawDictBuffer = (char*)malloc(dictSize);
assert(rawDictBuffer);
memcpy(rawDictBuffer, (char*)dictBuffer + 2, dictSize - 2);
memset(rawDictBuffer + dictSize - 2, 0, 2);
MEM_writeLE32((char*)rawDictBuffer, ZSTD_MAGIC_DICTIONARY);
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
{
size_t ret;
MEM_writeLE32((char*)dictBuffer+2, ZSTD_MAGIC_DICTIONARY);
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)dictBuffer+2, dictSize-2, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
if (ZSTD_isError(ret)) goto _output_error;
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (ZSTD_isError(ret)) goto _output_error;
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_auto should fail : ", testNb++);
{
size_t ret;
/* Either operation is allowed to fail, but one must fail. */
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
if (!ZSTD_isError(ret)) {
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (!ZSTD_isError(ret)) goto _output_error;
}
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Loading rawContent starting with dict header w/ ZSTD_dct_rawContent should pass : ", testNb++);
{
size_t ret;
ret = ZSTD_CCtx_loadDictionary_advanced(
cctx, (const char*)rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent);
if (ZSTD_isError(ret)) goto _output_error;
ret = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, MIN(CNBuffSize, 100));
if (ZSTD_isError(ret)) goto _output_error;
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : Testing non-attached CDict with ZSTD_dct_rawContent : ", testNb++);
{ size_t const srcSize = MIN(CNBuffSize, 100);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
/* Force the dictionary to be reloaded in raw content mode */
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_forceAttachDict, ZSTD_dictForceLoad));
CHECK_Z(ZSTD_CCtx_loadDictionary_advanced(cctx, rawDictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent));
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, srcSize);
CHECK_Z(cSize);
}
DISPLAYLEVEL(3, "OK \n");
free(rawDictBuffer);
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : ZSTD_CCtx_refCDict() then set parameters : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, dictSize, 1);
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, 1) );
CHECK_Z( ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, 12 ));
CHECK_Z( ZSTD_CCtx_refCDict(cctx, cdict) );