diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 538319bcb..df7cb8aa7 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -1734,8 +1734,8 @@ typedef enum { ZSTD_defaultAllowed = 1 } ZSTD_defaultPolicy_e; -MEM_STATIC -symbolEncodingType_e ZSTD_selectEncodingType( +MEM_STATIC symbolEncodingType_e +ZSTD_selectEncodingType( FSE_repeat* repeatMode, unsigned const* count, unsigned const max, size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, FSE_CTable const* prevCTable, @@ -1809,17 +1809,17 @@ symbolEncodingType_e ZSTD_selectEncodingType( return set_compressed; } -MEM_STATIC -size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - U32* count, U32 max, - BYTE const* codeTable, size_t nbSeq, - S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax, - FSE_CTable const* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize) +MEM_STATIC size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + U32* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) { BYTE* op = (BYTE*)dst; - BYTE const* const oend = op + dstCapacity; + const BYTE* const oend = op + dstCapacity; switch (type) { case set_rle: @@ -2069,9 +2069,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, assert(set_basic < set_compressed && set_rle < set_compressed); assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), - workspace, HUF_WORKSPACE_SIZE); + count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), + workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; if (LLtype == set_compressed) lastNCount = op; @@ -2087,9 +2087,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), - workspace, HUF_WORKSPACE_SIZE); + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), + workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; if (Offtype == set_compressed) lastNCount = op; @@ -2103,9 +2103,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), - workspace, HUF_WORKSPACE_SIZE); + count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), + workspace, HUF_WORKSPACE_SIZE); if (ZSTD_isError(countSize)) return countSize; if (MLtype == set_compressed) lastNCount = op; @@ -2144,9 +2144,9 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, - ZSTD_entropyCTables_t const* prevEntropy, + const ZSTD_entropyCTables_t* prevEntropy, ZSTD_entropyCTables_t* nextEntropy, - ZSTD_CCtx_params const* cctxParams, + const ZSTD_CCtx_params* cctxParams, void* dst, size_t dstCapacity, size_t srcSize, U32* workspace, int bmi2) { @@ -2162,7 +2162,7 @@ MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, if (ZSTD_isError(cSize)) return cSize; /* Check compressibility */ - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula; to be refined, depending on compression level, or strategy */ if (cSize >= maxCSize) return 0; /* block not compressed */ } @@ -2211,7 +2211,7 @@ static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, seqStorePtr->lit += lastLLSize; } -static void ZSTD_resetSeqStore(seqStore_t* ssPtr) +void ZSTD_resetSeqStore(seqStore_t* ssPtr) { ssPtr->lit = ssPtr->litStart; ssPtr->sequences = ssPtr->sequencesStart; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index a7666d5c5..516c752a3 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -84,28 +84,26 @@ typedef struct { U32 rep[ZSTD_REP_NUM]; } ZSTD_optimal_t; -typedef enum { zop_dynamic=0, zop_predef, zop_static } ZSTD_OptPrice_e; +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; typedef struct { /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ - U32* litFreq; /* table of literals statistics, of size 256 */ - U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ - U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ - U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + U32* litFreq; /* table of literals statistics, of size 256 */ + U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ U32 litSum; /* nb of literals */ U32 litLengthSum; /* nb of litLength codes */ U32 matchLengthSum; /* nb of matchLength codes */ U32 offCodeSum; /* nb of offset codes */ - /* begin updated by ZSTD_setLog2Prices */ - U32 log2litSum; /* pow2 to compare log2(litfreq) to */ - U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */ - U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */ - U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */ - /* end : updated by ZSTD_setLog2Prices */ - ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow dictionary statistics, or a pre-defined cost structure */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ } optState_t; @@ -616,12 +614,13 @@ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, { U32 const current = (U32)((BYTE const*)srcEnd - window->base); U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist); if (current > maxDist + loadedDictEnd) { U32 const newLowLimit = current - maxDist; if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; if (window->dictLimit < window->lowLimit) { - DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit, - window->lowLimit); + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + window->dictLimit, window->lowLimit); window->dictLimit = window->lowLimit; } if (loadedDictEndPtr) @@ -643,12 +642,12 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, { BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); /* Check if blocks follow each other */ if (src != window->nextSrc) { /* not contiguous */ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); - DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", - window->dictLimit); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); window->lowLimit = window->dictLimit; assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ window->dictLimit = (U32)distanceFromBase; @@ -665,10 +664,38 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); } return contiguous; } + +/* debug functions */ + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (stat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + #if defined (__cplusplus) } #endif @@ -696,6 +723,8 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + /*! ZSTD_compressStream_generic() : * Private use only. To be called from zstdmt_compress.c in single-thread mode. */ size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 03d1f54c4..819c62aa3 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -225,12 +225,10 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, { case ZSTD_fast: ZSTD_fillHashTable(ms, cParams, iend, ZSTD_dtlm_fast); - ms->nextToUpdate = (U32)(iend - ms->window.base); break; case ZSTD_dfast: ZSTD_fillDoubleHashTable(ms, cParams, iend, ZSTD_dtlm_fast); - ms->nextToUpdate = (U32)(iend - ms->window.base); break; case ZSTD_greedy: @@ -595,15 +593,14 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, { unsigned const minMatch = cParams->searchLength; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, - ZSTD_matchState_dictMode(ms)); - BYTE const* const base = ms->window.base; + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; /* Input positions */ BYTE const* ip = istart; + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); /* Loop through each sequence and apply the block compressor to the lits */ @@ -623,12 +620,12 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, cParams, ip); /* Run the block compressor */ + DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength); { size_t const newLitLength = blockCompressor(ms, seqStore, rep, cParams, ip, sequence.litLength); ip += sequence.litLength; - ms->nextToUpdate = (U32)(ip - base); /* Update the repcodes */ for (i = ZSTD_REP_NUM - 1; i > 0; i--) rep[i] = rep[i-1]; @@ -644,10 +641,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, cParams, ip); /* Compress the last literals */ - { - size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams, - ip, iend - ip); - ms->nextToUpdate = (U32)(iend - base); - return lastLiterals; - } + return blockCompressor(ms, seqStore, rep, cParams, + ip, iend - ip); } diff --git a/lib/compress/zstd_opt.c b/lib/compress/zstd_opt.c index 521fbbf39..e66019aa8 100644 --- a/lib/compress/zstd_opt.c +++ b/lib/compress/zstd_opt.c @@ -20,17 +20,66 @@ /*-************************************* * Price functions for optimal parser ***************************************/ -static void ZSTD_setLog2Prices(optState_t* optPtr) + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) { - optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); - optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); - optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); - optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +/* debugging function, @return price in bytes */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); } +static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s<=lastEltIndex; s++) { + table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + static void ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize) + const BYTE* const src, size_t const srcSize, + int optLevel) { optPtr->priceType = zop_dynamic; @@ -40,14 +89,9 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, assert(optPtr->symbolCosts != NULL); if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ - if (srcSize <= 8192) /* heuristic */ - optPtr->priceType = zop_static; - else { - assert(optPtr->priceType == zop_dynamic); - } + optPtr->priceType = zop_dynamic; assert(optPtr->litFreq != NULL); - assert(optPtr->symbolCosts != NULL); optPtr->litSum = 0; { unsigned lit; for (lit=0; lit<=MaxLit; lit++) { @@ -99,119 +143,69 @@ static void ZSTD_rescaleFreqs(optState_t* const optPtr, assert(optPtr->litFreq != NULL); { unsigned lit = MaxLit; FSE_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = 0; - for (lit=0; lit<=MaxLit; lit++) { - optPtr->litFreq[lit] = 1 + (optPtr->litFreq[lit] >> (ZSTD_FREQ_DIV+1)); - optPtr->litSum += optPtr->litFreq[lit]; - } } + } + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); { unsigned ll; for (ll=0; ll<=MaxLL; ll++) optPtr->litLengthFreq[ll] = 1; - optPtr->litLengthSum = MaxLL+1; } + optPtr->litLengthSum = MaxLL+1; { unsigned ml; for (ml=0; ml<=MaxML; ml++) optPtr->matchLengthFreq[ml] = 1; - optPtr->matchLengthSum = MaxML+1; } + optPtr->matchLengthSum = MaxML+1; { unsigned of; for (of=0; of<=MaxOff; of++) optPtr->offCodeFreq[of] = 1; - optPtr->offCodeSum = MaxOff+1; } + optPtr->offCodeSum = MaxOff+1; } } else { /* new block : re-use previous statistics, scaled down */ - unsigned u; - optPtr->litSum = 0; - for (u=0; u<=MaxLit; u++) { - optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1)); - optPtr->litSum += optPtr->litFreq[u]; - } - optPtr->litLengthSum = 0; - for (u=0; u<=MaxLL; u++) { - optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u] >> ZSTD_FREQ_DIV); - optPtr->litLengthSum += optPtr->litLengthFreq[u]; - } - optPtr->matchLengthSum = 0; - for (u=0; u<=MaxML; u++) { - optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV); - optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; - } - optPtr->offCodeSum = 0; - for (u=0; u<=MaxOff; u++) { - optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV); - optPtr->offCodeSum += optPtr->offCodeFreq[u]; - } + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); } - ZSTD_setLog2Prices(optPtr); -} - -#if 1 /* approximation at bit level */ -# define BITCOST_ACCURACY 0 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define BITCOST_SYMBOL(t,l,s) ((void)l, FSE_getMaxNbBits(t,s)*BITCOST_MULTIPLIER) -#else /* fractional bit accuracy */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define BITCOST_SYMBOL(t,l,s) FSE_bitCost(t,l,s,BITCOST_ACCURACY) -#endif - -MEM_STATIC double -ZSTD_fCost(U32 price) -{ - return (double)price / (BITCOST_MULTIPLIER*8); + ZSTD_setBasePrices(optPtr, optLevel); } /* ZSTD_rawLiteralsCost() : - * cost of literals (only) in specified segment (which length can be 0). - * does not include cost of literalLength symbol */ + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr) + const optState_t* const optPtr, + int optLevel) { if (litLength == 0) return 0; - if (optPtr->priceType == zop_predef) return (litLength*6); /* 6 bit per literal - no statistic used */ - if (optPtr->priceType == zop_static) { - U32 u, cost; - assert(optPtr->symbolCosts != NULL); - assert(optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid); - for (u=0, cost=0; u < litLength; u++) - cost += HUF_getNbBits(optPtr->symbolCosts->huf.CTable, literals[u]); - return cost * BITCOST_MULTIPLIER; - } + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ /* dynamic statistics */ - { U32 u; - U32 cost = litLength * optPtr->log2litSum; + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; for (u=0; u < litLength; u++) - cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); - return cost * BITCOST_MULTIPLIER; + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + return price; } } /* ZSTD_litLengthPrice() : * cost of literalLength symbol */ -static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr) +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) { - if (optPtr->priceType == zop_static) { - U32 const llCode = ZSTD_LLcode(litLength); - FSE_CState_t cstate; - FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable); - { U32 const price = LL_bits[llCode]*BITCOST_MULTIPLIER + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode); - DEBUGLOG(8, "ZSTD_litLengthPrice: ll=%u, bitCost=%.2f", litLength, (double)price / BITCOST_MULTIPLIER); - return price; - } } - if (optPtr->priceType == zop_predef) return ZSTD_highbit32((U32)litLength+1); + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - return (LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) * BITCOST_MULTIPLIER; + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel)); } } @@ -219,34 +213,26 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP * cost of the literal part of a sequence, * including literals themselves, and literalLength symbol */ static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr) + const optState_t* const optPtr, + int optLevel) { - return ZSTD_rawLiteralsCost(literals, litLength, optPtr) - + ZSTD_litLengthPrice(litLength, optPtr); + return ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + + ZSTD_litLengthPrice(litLength, optPtr, optLevel); } /* ZSTD_litLengthContribution() : * @return ( cost(litlength) - cost(0) ) * this value can then be added to rawLiteralsCost() * to provide a cost which is directly comparable to a match ending at same position */ -static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr) +static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) { - if (optPtr->priceType == zop_static) { - U32 const llCode = ZSTD_LLcode(litLength); - FSE_CState_t cstate; - FSE_initCState(&cstate, optPtr->symbolCosts->fse.litlengthCTable); - return (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) - + BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, llCode) - - BITCOST_SYMBOL(cstate.symbolTT, cstate.stateLog, 0); - } - if (optPtr->priceType >= zop_predef) return ZSTD_highbit32(litLength+1); + if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - int const contribution = (LL_bits[llCode] - + ZSTD_highbit32(optPtr->litLengthFreq[0]+1) /* note: log2litLengthSum cancels out with following one */ - - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1)) - * BITCOST_MULTIPLIER; + int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) + + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); #if 1 return contribution; #else @@ -260,10 +246,11 @@ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* con * which can be compared to the ending cost of a match * should a new match start at this position */ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr) + const optState_t* const optPtr, + int optLevel) { - int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr) - + ZSTD_litLengthContribution(litLength, optPtr); + int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + + ZSTD_litLengthContribution(litLength, optPtr, optLevel); return contribution; } @@ -272,7 +259,8 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ FORCE_INLINE_TEMPLATE U32 -ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, const optState_t* const optPtr, int const optLevel) { @@ -281,29 +269,23 @@ ZSTD_getMatchPrice(U32 const offset, U32 const matchLength, U32 const mlBase = matchLength - MINMATCH; assert(matchLength >= MINMATCH); - if (optPtr->priceType == zop_static) { - U32 const mlCode = ZSTD_MLcode(mlBase); - FSE_CState_t mlstate, offstate; - FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); - FSE_initCState(&offstate, optPtr->symbolCosts->fse.offcodeCTable); - return BITCOST_SYMBOL(offstate.symbolTT, offstate.stateLog, offCode) + offCode*BITCOST_MULTIPLIER - + BITCOST_SYMBOL(mlstate.symbolTT, mlstate.stateLog, mlCode) + ML_bits[mlCode]*BITCOST_MULTIPLIER; - } - if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ - return ZSTD_highbit32(mlBase+1) + 16 + offCode; + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); /* dynamic statistics */ - price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); - if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ /* match Length */ { U32 const mlCode = ZSTD_MLcode(mlBase); - price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); } + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); - return price * BITCOST_MULTIPLIER; + return price; } static void ZSTD_updateStats(optState_t* const optPtr, @@ -501,7 +483,7 @@ void ZSTD_updateTree_internal( const BYTE* const base = ms->window.base; U32 const target = (U32)(ip - base); U32 idx = ms->nextToUpdate; - DEBUGLOG(8, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", + DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)", idx, target, extDict); while(idx < target) @@ -549,7 +531,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 nbCompares = 1U << cParams->searchLog; size_t bestLength = lengthToBeat-1; - DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches"); + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); /* check repCode */ { U32 const lastR = ZSTD_REP_NUM + ll0; @@ -733,7 +715,8 @@ typedef struct { static U32 ZSTD_rawLiteralsCost_cached( cachedLiteralPrice_t* const cachedLitPrice, const BYTE* const anchor, U32 const litlen, - const optState_t* const optStatePtr) + const optState_t* const optStatePtr, + int optLevel) { U32 startCost; U32 remainingLength; @@ -750,7 +733,7 @@ static U32 ZSTD_rawLiteralsCost_cached( remainingLength = litlen; } - { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr); + { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr, optLevel); cachedLitPrice->anchor = anchor; cachedLitPrice->litlen = litlen; cachedLitPrice->rawLitCost = rawLitCost; @@ -761,19 +744,21 @@ static U32 ZSTD_rawLiteralsCost_cached( static U32 ZSTD_fullLiteralsCost_cached( cachedLiteralPrice_t* const cachedLitPrice, const BYTE* const anchor, U32 const litlen, - const optState_t* const optStatePtr) + const optState_t* const optStatePtr, + int optLevel) { - return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr) - + ZSTD_litLengthPrice(litlen, optStatePtr); + return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); } static int ZSTD_literalsContribution_cached( cachedLiteralPrice_t* const cachedLitPrice, const BYTE* const anchor, U32 const litlen, - const optState_t* const optStatePtr) + const optState_t* const optStatePtr, + int optLevel) { - int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr) - + ZSTD_litLengthContribution(litlen, optStatePtr); + int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr, optLevel) + + ZSTD_litLengthContribution(litlen, optStatePtr, optLevel); return contribution; } @@ -803,8 +788,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* init */ DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); + assert(optLevel <= 2); ms->nextToUpdate3 = ms->nextToUpdate; - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); memset(&cachedLitPrice, 0, sizeof(cachedLitPrice)); @@ -841,7 +827,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } /* set prices for first matches starting position == 0 */ - { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr); + { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr, optLevel); U32 pos; U32 matchNb; for (pos = 1; pos < minMatch; pos++) { @@ -876,9 +862,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1; int price; /* note : contribution can be negative */ if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr); + price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr, optLevel); } else { - price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr); + price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr, optLevel); } assert(price < 1000000000); /* overflow check */ if (price <= opt[cur].price) { @@ -909,7 +895,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, { U32 const ll0 = (opt[cur].mlen != 1); U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0; U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0; - U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr); + U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr, optLevel); U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch); U32 matchNb; if (!nbMatches) { @@ -1011,7 +997,7 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH); anchor = ip; } } - ZSTD_setLog2Prices(optStatePtr); + ZSTD_setBasePrices(optStatePtr, optLevel); } /* while (ip < ilimit) */ /* Return the last literals size */ @@ -1021,29 +1007,80 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ size_t ZSTD_compressBlock_btopt( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_compressBlock_btopt"); return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/); } + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus > 0); + for (s=0; s<=lastEltIndex; s++) { + table[s] <<= ZSTD_FREQ_DIV+bonus; + table[s]--; + sum += table[s]; + } + return sum; +} + +/* used in 2-pass strategy */ +MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) +{ + optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); +} + size_t ZSTD_compressBlock_btultra( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) { + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); +#if 0 + /* 2-pass strategy (disabled) + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */ + U32 tmpRep[ZSTD_REP_NUM]; + DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics"); + assert(ms->nextToUpdate >= ms->window.dictLimit + && ms->nextToUpdate <= ms->window.dictLimit + 1); + memcpy(tmpRep, rep, sizeof(tmpRep)); + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/); /* generate stats into ms->opt*/ + ZSTD_resetSeqStore(seqStore); + /* invalidate first scan from history */ + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + ms->nextToUpdate3 = ms->window.dictLimit; + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); + } +#endif return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/); } size_t ZSTD_compressBlock_btopt_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/); } size_t ZSTD_compressBlock_btultra_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize) + const ZSTD_compressionParameters* cParams, const void* src, size_t srcSize) { return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/); }