From bea78e8fc26bd9d46bc2d98a3d9099586c4ad29d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 22 Mar 2017 18:09:11 -0700 Subject: [PATCH 1/2] limited CDict acceptation criteria to be the same as DDict --- lib/compress/zstd_compress.c | 55 ++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1aab3553f..eecc76950 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2567,18 +2567,15 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym /* Dictionary format : - Magic == ZSTD_DICT_MAGIC (4 bytes) - HUF_writeCTable(256) - FSE_writeNCount(off) - FSE_writeNCount(ml) - FSE_writeNCount(ll) - RepOffsets - Dictionary content -*/ -/*! ZSTD_loadDictEntropyStats() : - @return : size read from dictionary - note : magic number supposed already checked */ -static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadDictionary() : + * @return : size read from dictionary + * note : magic number supposed already checked + * dictSize supposed > 8 + */ +static size_t ZSTD_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; @@ -2586,7 +2583,11 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ unsigned offcodeMaxValue = MaxOff; BYTE scratchBuffer[1<hufTable, 255, dict, dictSize); + dictPtr += 4; /* skip magic number */ + cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); + dictPtr += 4; + + { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dictPtr, dictEnd-dictPtr); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; } @@ -2628,15 +2629,20 @@ static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_ cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); dictPtr += 12; - { U32 offcodeMax = MaxOff; - if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */ - /* Calculate minimum offset code required to represent maxOffset */ - offcodeMax = ZSTD_highbit32(maxOffset); + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ } - /* Every possible supported offset <= dictContentSize + 128 KB must be representable */ + /* All offset values <= dictContentSize + 128 KB must be representable */ CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); - } + /* All repCodes must be <= dictContentSize and != 0*/ + { U32 u; + for (u=0; u<3; u++) { + if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted); + if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); + } } } cctx->flagStaticTables = 1; cctx->flagStaticHufTable = HUF_repeat_valid; @@ -2652,14 +2658,9 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, si /* dict as pure content */ if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict)) return ZSTD_loadDictionaryContent(zc, dict, dictSize); - zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4); - /* known magic number : dict is parsed for entropy stats and content */ - { size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8); - size_t const eSize = loadError + 8; - if (ZSTD_isError(loadError)) return loadError; - return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize); - } + /* dict as zstd dictionary */ + return ZSTD_loadDictionary(zc, dict, dictSize); } /*! ZSTD_compressBegin_internal() : From 16a0b10781a62dfbd8ed137d435b5022f389f955 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 24 Mar 2017 12:46:46 -0700 Subject: [PATCH 2/2] fixed ZSTD_loadZstdDictionary() forgot to add the dictionary content (tests were not failing, just compressing less). Also : added size protections when adding dict content since hc/bt table filling would fail if size < 8 --- lib/compress/zstd_compress.c | 43 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index eecc76950..c1ef526b4 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2504,7 +2504,9 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0); } - +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize) { const BYTE* const ip = (const BYTE*) src; @@ -2534,13 +2536,15 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength); + if (srcSize >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength); break; case ZSTD_btlazy2: case ZSTD_btopt: case ZSTD_btopt2: - ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); + if (srcSize >= HASH_READ_SIZE) + ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); break; default: @@ -2570,12 +2574,12 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym * See : * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format */ -/*! ZSTD_loadDictionary() : - * @return : size read from dictionary - * note : magic number supposed already checked - * dictSize supposed > 8 +/*! ZSTD_loadZstdDictionary() : + * @return : 0, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed > 8 */ -static size_t ZSTD_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; @@ -2624,9 +2628,9 @@ static size_t ZSTD_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dict } if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); - cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] == 0 || cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted); - cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] == 0 || cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted); - cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] == 0 || cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted); + cctx->rep[0] = MEM_readLE32(dictPtr+0); + cctx->rep[1] = MEM_readLE32(dictPtr+4); + cctx->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); @@ -2642,25 +2646,26 @@ static size_t ZSTD_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dict for (u=0; u<3; u++) { if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted); if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); - } } } + } } - cctx->flagStaticTables = 1; - cctx->flagStaticHufTable = HUF_repeat_valid; - return dictPtr - (const BYTE*)dict; + cctx->flagStaticTables = 1; + cctx->flagStaticHufTable = HUF_repeat_valid; + return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize); + } } /** ZSTD_compress_insertDictionary() : * @return : 0, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize) +static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) { if ((dict==NULL) || (dictSize<=8)) return 0; /* dict as pure content */ - if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (zc->forceRawDict)) - return ZSTD_loadDictionaryContent(zc, dict, dictSize); + if ((MEM_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict)) + return ZSTD_loadDictionaryContent(cctx, dict, dictSize); /* dict as zstd dictionary */ - return ZSTD_loadDictionary(zc, dict, dictSize); + return ZSTD_loadZstdDictionary(cctx, dict, dictSize); } /*! ZSTD_compressBegin_internal() :