diff --git a/.gitignore b/.gitignore index ae277e932..ea574d747 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,5 @@ googletest/ *.code-workspace compile_commands.json .clangd +perf.data +perf.data.old diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 2eb9cb7ac..cb58ca294 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -72,6 +72,10 @@ struct ZSTD_CDict_s { ZSTD_customMem customMem; U32 dictID; int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use + * row-based matchfinder. Unless the cdict is reloaded, we will use + * the same greedy/lazy matchfinder at compression time. + */ }; /* typedef'd to ZSTD_CDict within "zstd.h" */ ZSTD_CCtx* ZSTD_createCCtx(void) @@ -202,6 +206,49 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) /* private API call, for dictBuilder only */ const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } +/* Returns true if the strategy supports using a row based matchfinder */ +static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { + return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); +} + +/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder + * for this compression. + */ +static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) { + assert(mode != ZSTD_urm_auto); + return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder); +} + +/* Returns row matchfinder usage enum given an initial mode and cParams */ +static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode, + const ZSTD_compressionParameters* const cParams) { +#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON)) + int const kHasSIMD128 = 1; +#else + int const kHasSIMD128 = 0; +#endif + if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ + mode = ZSTD_urm_disableRowMatchFinder; + if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; + if (kHasSIMD128) { + if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder; + } else { + if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder; + } + return mode; +} + +/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ +static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const U32 forDDSDict) { + assert(useRowMatchFinder != ZSTD_urm_auto); + /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. + * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. + */ + return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); +} + /* Returns 1 if compression parameters are such that we should * enable long distance matching (wlog >= 27, strategy >= btopt). * Returns 0 otherwise. @@ -241,6 +288,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( cctxParams.splitBlocks = 1; } + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); assert(!ZSTD_checkCParams(cParams)); return cctxParams; } @@ -299,6 +347,8 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par * But, set it for tracing anyway. */ cctxParams->compressionLevel = compressionLevel; + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); + DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder); } size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) @@ -498,12 +548,17 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; - + case ZSTD_c_splitBlocks: bounds.lowerBound = 0; bounds.upperBound = 1; return bounds; + case ZSTD_c_useRowMatchFinder: + bounds.lowerBound = (int)ZSTD_urm_auto; + bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder; + return bounds; + default: bounds.error = ERROR(parameter_unsupported); return bounds; @@ -566,6 +621,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: case ZSTD_c_splitBlocks: + case ZSTD_c_useRowMatchFinder: default: return 0; } @@ -619,6 +675,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_blockDelimiters: case ZSTD_c_validateSequences: case ZSTD_c_splitBlocks: + case ZSTD_c_useRowMatchFinder: break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); @@ -835,6 +892,11 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->splitBlocks = value; return CCtxParams->splitBlocks; + case ZSTD_c_useRowMatchFinder: + BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); + CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value; + return CCtxParams->useRowMatchFinder; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -961,6 +1023,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_splitBlocks : *value = (int)CCtxParams->splitBlocks; break; + case ZSTD_c_useRowMatchFinder : + *value = (int)CCtxParams->useRowMatchFinder; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -1327,9 +1392,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( static size_t ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const U32 enableDedicatedDictSearch, const U32 forCCtx) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* chain table size should be 0 for fast or row-hash strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; @@ -1345,6 +1415,9 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) + ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) + : 0; size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) ? optPotentialSpace : 0; @@ -1352,16 +1425,18 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); + assert(useRowMatchFinder != ZSTD_urm_auto); DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace + slackSpace; + return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; } static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( const ZSTD_compressionParameters* cParams, const ldmParams_t* ldmParams, const int isStatic, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, const size_t buffInSize, const size_t buffOutSize, const U64 pledgedSrcSize) @@ -1375,7 +1450,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); @@ -1406,19 +1481,32 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, + &cParams); RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); /* estimateCCtxSize is for one-shot compression. So no buffers should * be needed. However, we still allocate two 0-sized buffers, which can * take space under ASAN. */ return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); } size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder; + noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder; + rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) @@ -1458,17 +1546,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) ? ZSTD_compressBound(blockSize) + 1 : 0; + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); return ZSTD_estimateCCtxSize_usingCCtxParams_internal( - &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, ZSTD_CONTENTSIZE_UNKNOWN); } } size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) { - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder; + noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder; + rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + } } static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) @@ -1593,20 +1693,27 @@ typedef enum { ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; + static size_t ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, const ZSTD_compressionParameters* cParams, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, const ZSTD_compResetPolicy_e crp, const ZSTD_indexResetPolicy_e forceResetIndex, const ZSTD_resetTarget_e forWho) { - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + /* disable chain table allocation for fast or row-based strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, + ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) + ? ((size_t)1 << cParams->chainLog) + : 0; size_t const hSize = ((size_t)1) << cParams->hashLog; U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + assert(useRowMatchFinder != ZSTD_urm_auto); if (forceResetIndex == ZSTDirp_reset) { ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); @@ -1645,11 +1752,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); } + if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { + { /* Row match finder needs an additional table of hashes ("tags") */ + size_t const tagTableSize = hSize*sizeof(U16); + ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); + if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); + } + { /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = cParams->searchLog < 5 ? 4 : 5; + assert(cParams->hashLog > rowLog); + ms->rowHashLog = ZSTD_highbit32((U32)1 << (cParams->hashLog - rowLog)); + } + } + ms->cParams = *cParams; RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, "failed a workspace allocation in ZSTD_reset_matchState"); - return 0; } @@ -1675,12 +1794,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_buffered_policy_e const zbuff) { ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d", + (U32)pledgedSrcSize, params.cParams.windowLog, (int)params.useRowMatchFinder); assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); zc->isFirstBlock = 1; + assert(params.useRowMatchFinder != ZSTD_urm_auto); if (params.ldmParams.enableLdm) { /* Adjust long distance matching parameters */ ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); @@ -1706,7 +1826,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const neededSpace = ZSTD_estimateCCtxSize_usingCCtxParams_internal( - ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, params.useRowMatchFinder, buffInSize, buffOutSize, pledgedSrcSize); int resizeWorkspace; @@ -1802,6 +1922,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, &zc->blockState.matchState, ws, ¶ms.cParams, + params.useRowMatchFinder, crp, needsIndexReset, ZSTD_resetTarget_CCtx), ""); @@ -1878,6 +1999,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { + DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%zu", pledgedSrcSize); { ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; @@ -1893,6 +2015,7 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, cdict->dictContentSize, ZSTD_cpm_attachDict); params.cParams.windowLog = windowLog; + params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_makeClean, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); @@ -1937,14 +2060,14 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; assert(!cdict->matchState.dedicatedDictSearch); - - DEBUGLOG(4, "copying dictionary into context"); + DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%zu", pledgedSrcSize); { unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Copy only compression parameters related to tables. */ params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; + params.useRowMatchFinder = cdict->useRowMatchFinder; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_leaveDirty, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); @@ -1953,17 +2076,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, } ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + assert(params.useRowMatchFinder != ZSTD_urm_auto); /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) + ? ((size_t)1 << cdict_cParams->chainLog) + : 0; size_t const hSize = (size_t)1 << cdict_cParams->hashLog; ZSTD_memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, hSize * sizeof(U32)); - ZSTD_memcpy(cctx->blockState.matchState.chainTable, + /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ + if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { + ZSTD_memcpy(cctx->blockState.matchState.chainTable, cdict->matchState.chainTable, chainSize * sizeof(U32)); + } + /* copy tag table */ + if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { + size_t const tagTableSize = hSize*sizeof(U16); + ZSTD_memcpy(cctx->blockState.matchState.tagTable, + cdict->matchState.tagTable, + tagTableSize); + } } /* Zero the hashTable3, since the cdict never fills it */ @@ -2027,14 +2163,15 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, U64 pledgedSrcSize, ZSTD_buffered_policy_e zbuff) { - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, "Can't copy a ctx that's not in init stage."); - + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; /* Copy only compression parameters related to tables. */ params.cParams = srcCCtx->appliedParams.cParams; + assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto); + params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; params.fParams = fParams; ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, ZSTDcrp_leaveDirty, zbuff); @@ -2048,7 +2185,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, + srcCCtx->appliedParams.useRowMatchFinder, + 0 /* forDDSDict */) + ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) + : 0; size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; int const h3log = srcCCtx->blockState.matchState.hashLog3; size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; @@ -2239,7 +2380,7 @@ typedef struct { /* ZSTD_buildSequencesStatistics(): * Returns the size of the statistics for a given set of sequences, or a ZSTD error code, * Also modifies LLtype, Offtype, MLtype, and lastNCount to the appropriate values. - * + * * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) */ static ZSTD_symbolEncodingTypeStats_t @@ -2510,7 +2651,7 @@ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, /* ZSTD_selectBlockCompressor() : * Not static, but internal use only (used by long distance matcher) * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode) { static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { { ZSTD_compressBlock_fast /* default for 0 */, @@ -2558,7 +2699,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); + if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { + static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { + { ZSTD_compressBlock_greedy_row, + ZSTD_compressBlock_lazy_row, + ZSTD_compressBlock_lazy2_row }, + { ZSTD_compressBlock_greedy_extDict_row, + ZSTD_compressBlock_lazy_extDict_row, + ZSTD_compressBlock_lazy2_extDict_row }, + { ZSTD_compressBlock_greedy_dictMatchState_row, + ZSTD_compressBlock_lazy_dictMatchState_row, + ZSTD_compressBlock_lazy2_dictMatchState_row }, + { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } + }; + DEBUGLOG(4, "Selecting a row-based matchfinder"); + assert(useRowMatchFinder != ZSTD_urm_auto); + selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; + } else { + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + } assert(selectedCompressor != NULL); return selectedCompressor; } @@ -2627,6 +2789,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_ldm_blockCompress(&zc->externSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(zc->externSeqStore.pos <= zc->externSeqStore.size); } else if (zc->appliedParams.ldmParams.enableLdm) { @@ -2643,10 +2806,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) ZSTD_ldm_blockCompress(&ldmSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, src, srcSize); assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); ms->ldmSeqStore = NULL; lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); } @@ -2954,7 +3120,7 @@ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, /** ZSTD_buildBlockEntropyStats() : * Builds entropy for the block. * Requires workspace size ENTROPY_WORKSPACE_SIZE - * + * * @return : 0 on success or error code */ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, @@ -3219,7 +3385,7 @@ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_ /* ZSTD_compressSeqStore_singleBlock(): * Compresses a seqStore into a block with a block header, into the buffer dst. - * + * * Returns the total size of that block (including header) or a ZSTD error code. */ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, @@ -3298,11 +3464,11 @@ typedef struct { * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then * we do not recurse. - * + * * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). * In practice, recursion depth usually doesn't go beyond 4. - * + * * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize * maximum of 128 KB, this value is actually impossible to reach. */ @@ -3357,7 +3523,7 @@ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) /* ZSTD_compressBlock_splitBlock(): * Attempts to split a given block into multiple blocks to improve compression ratio. - * + * * Returns combined size of all blocks (which includes headers), or a ZSTD error code. */ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, @@ -3921,6 +4087,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; + DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); ZSTD_window_update(&ms->window, src, srcSize); ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); @@ -3956,11 +4123,24 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, case ZSTD_greedy: case ZSTD_lazy: case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { - assert(chunk == remaining); /* must load everything in one go */ - ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); - } else if (chunk >= HASH_READ_SIZE) { - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + if (chunk >= HASH_READ_SIZE) { + if (ms->dedicatedDictSearch) { + assert(chunk == remaining); /* must load everything in one go */ + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_urm_auto); + if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); + if (ip == src) + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, ichunk-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); + } else { + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); + } + } } break; @@ -4115,7 +4295,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, const BYTE* const dictEnd = dictPtr + dictSize; size_t dictID; size_t eSize; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); @@ -4454,7 +4633,10 @@ size_t ZSTD_estimateCDictSize_advanced( DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small + * in case we are using DDS with row-hash. */ + + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams), + /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); } @@ -4508,6 +4690,7 @@ static size_t ZSTD_initCDict_internal( &cdict->matchState, &cdict->workspace, ¶ms.cParams, + params.useRowMatchFinder, ZSTDcrp_makeClean, ZSTDirp_reset, ZSTD_resetTarget_CDict), ""); @@ -4531,14 +4714,17 @@ static size_t ZSTD_initCDict_internal( static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) + ZSTD_compressionParameters cParams, + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + U32 enableDedicatedDictSearch, + ZSTD_customMem customMem) { if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; { size_t const workspaceSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); @@ -4557,7 +4743,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, ZSTD_cwksp_move(&cdict->workspace, &ws); cdict->customMem = customMem; cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ - + cdict->useRowMatchFinder = useRowMatchFinder; return cdict; } } @@ -4609,10 +4795,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); } + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); cctxParams.cParams = cParams; + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); cdict = ZSTD_createCDict_advanced_internal(dictSize, dictLoadMethod, cctxParams.cParams, + cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, customMem); if (ZSTD_isError( ZSTD_initCDict_internal(cdict, @@ -4681,7 +4870,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_dictContentType_e dictContentType, ZSTD_compressionParameters cParams) { - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams); + /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) @@ -4706,6 +4897,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict( ZSTD_CCtxParams_init(¶ms, 0); params.cParams = cParams; + params.useRowMatchFinder = useRowMatchFinder; + cdict->useRowMatchFinder = useRowMatchFinder; if (ZSTD_isError( ZSTD_initCDict_internal(cdict, dict, dictSize, @@ -5242,6 +5435,8 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, params.splitBlocks = 1; } + params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); + #ifdef ZSTD_MULTITHREAD if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ @@ -6051,6 +6246,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, else row = compressionLevel; { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); /* acceleration factor */ if (compressionLevel < 0) { int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 447115478..3c488c61c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -196,6 +196,9 @@ typedef struct { } ZSTD_window_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t; + +#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ + struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ U32 loadedDictEnd; /* index of end of dictionary, within context's referential. @@ -207,9 +210,15 @@ struct ZSTD_matchState_t { */ U32 nextToUpdate; /* index from which to continue table update */ U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + + U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ + U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + U32* hashTable; U32* hashTable3; U32* chainTable; + int dedicatedDictSearch; /* Indicates whether this matchState is using the * dedicated dictionary search structure. */ @@ -305,6 +314,9 @@ struct ZSTD_CCtx_params_s { /* Block splitting */ int splitBlocks; + /* Param for deciding whether to use row-based matchfinder */ + ZSTD_useRowMatchFinderMode_e useRowMatchFinder; + /* Internal use, for createCCtxParams() and freeCCtxParams() only */ ZSTD_customMem customMem; }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ @@ -420,7 +432,7 @@ typedef enum { typedef size_t (*ZSTD_blockCompressor) ( ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); MEM_STATIC U32 ZSTD_LLcode(U32 litLength) diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 75745a79b..0a460848f 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -438,43 +438,9 @@ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( } } - - -/* ********************************* -* Hash Chain +/*********************************** +* Dedicated dict search ***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) { @@ -500,7 +466,6 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; - U32 hashIdx; assert(ms->cParams.chainLog <= 24); @@ -591,6 +556,139 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B ms->nextToUpdate = target; } +/* Returns the longest match length found in the dedicated dict search structure. + * If none are longer than the argument ml, then ml will be returned. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, + const ZSTD_matchState_t* const dms, + const BYTE* const ip, const BYTE* const iLimit, + const BYTE* const prefixStart, const U32 curr, + const U32 dictLimit, const size_t ddsIdx) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + U32 matchIndex; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + return ml; +} + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} /* inlining is important to hardwire a hot branch (template emulation) */ FORCE_INLINE_TEMPLATE @@ -661,90 +759,8 @@ size_t ZSTD_HcFindBestMatch_generic ( } if (dictMode == ZSTD_dedicatedDictSearch) { - const U32 ddsLowestIndex = dms->window.dictLimit; - const BYTE* const ddsBase = dms->window.base; - const BYTE* const ddsEnd = dms->window.nextSrc; - const U32 ddsSize = (U32)(ddsEnd - ddsBase); - const U32 ddsIndexDelta = dictLimit - ddsSize; - const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); - const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; - U32 ddsAttempt; - - for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { - PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 const chainIndex = chainPackedPointer >> 8; - - PREFETCH_L1(&dms->chainTable[chainIndex]); - } - - for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; - match = ddsBase + matchIndex; - - if (!matchIndex) { - return ml; - } - - /* guaranteed by table construction */ - (void)ddsLowestIndex; - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) { - /* best possible, avoids read overflow on next attempt */ - return ml; - } - } - } - - { - U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; - U32 chainIndex = chainPackedPointer >> 8; - U32 const chainLength = chainPackedPointer & 0xFF; - U32 const chainAttempts = nbAttempts - ddsAttempt; - U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; - U32 chainAttempt; - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { - PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); - } - - for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { - size_t currentMl=0; - const BYTE* match; - matchIndex = dms->chainTable[chainIndex]; - match = ddsBase + matchIndex; - - /* guaranteed by table construction */ - assert(matchIndex >= ddsLowestIndex); - assert(match+4 <= ddsEnd); - if (MEM_read32(match) == MEM_read32(ip)) { - /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - } - } + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); } else if (dictMode == ZSTD_dictMatchState) { const U32* const dmsChainTable = dms->chainTable; const U32 dmsChainSize = (1 << dms->cParams.chainLog); @@ -845,11 +861,656 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( } } +/* ********************************* +* (SIMD) Row-based matchfinder +***********************************/ +/* Constants for row-based hash */ +#define ZSTD_ROW_HASH_TAG_OFFSET 1 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ +#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) + +#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1) + +typedef U32 ZSTD_VecMask; /* Clarifies when we are interacting with a U32 representing a mask of matches */ + +#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */ + +#include +typedef __m128i ZSTD_Vec128; + +/* Returns a 128-bit container with 128-bits from src */ +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + return _mm_loadu_si128((ZSTD_Vec128 const*)src); +} + +/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */ +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + return _mm_set1_epi8((char)val); +} + +/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask + * into a 32-bit mask that is the MSB of each byte. + * */ +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)); +} + +typedef struct { + __m128i fst; + __m128i snd; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_read(ptr); + v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1); + return v; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_set8(val); + v.snd = ZSTD_Vec128_set8(val); + return v; +} + +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask fstMask; + ZSTD_VecMask sndMask; + fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst); + sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd); + return fstMask | (sndMask << 16); +} + +#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */ + +#include +typedef uint8x16_t ZSTD_Vec128; + +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + return vld1q_u8((const BYTE* const)src); +} + +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + return vdupq_n_u8(val); +} + +/* Mimics '_mm_movemask_epi8()' from SSE */ +static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) { + /* Shift out everything but the MSB bits in each byte */ + uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7)); + /* Merge the even lanes together with vsra (right shift and add) */ + uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7)); + uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); + uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); + /* Extract the low 8 bits from each lane, merge */ + return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8); +} + +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y)); +} + +typedef struct { + uint8x16_t fst; + uint8x16_t snd; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_read(ptr); + v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1); + return v; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_set8(val); + v.snd = ZSTD_Vec128_set8(val); + return v; +} + +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask fstMask; + ZSTD_VecMask sndMask; + fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst); + sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd); + return fstMask | (sndMask << 16); +} + +#else /* Scalar fallback version */ + +#define VEC128_NB_SIZE_T (16 / sizeof(size_t)) +typedef struct { + size_t vec[VEC128_NB_SIZE_T]; +} ZSTD_Vec128; + +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + ZSTD_Vec128 ret; + ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t)); + return ret; +} + +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + ZSTD_Vec128 ret = { {0} }; + int startBit = sizeof(size_t) * 8 - 8; + for (;startBit >= 0; startBit -= 8) { + unsigned j = 0; + for (;j < VEC128_NB_SIZE_T; ++j) { + ret.vec[j] |= ((size_t)val << startBit); + } + } + return ret; +} + +/* Compare x to y, byte by byte, generating a "matches" bitfield */ +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + ZSTD_VecMask res = 0; + unsigned i = 0; + unsigned l = 0; + for (; i < VEC128_NB_SIZE_T; ++i) { + const size_t cmp1 = x.vec[i]; + const size_t cmp2 = y.vec[i]; + unsigned j = 0; + for (; j < sizeof(size_t); ++j, ++l) { + if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) { + res |= ((U32)1 << (j+i*sizeof(size_t))); + } + } + } + return res; +} + +#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T +typedef struct { + size_t vec[VEC256_NB_SIZE_T]; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) { + ZSTD_Vec256 ret; + ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t)); + return ret; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 ret = { {0} }; + int startBit = sizeof(size_t) * 8 - 8; + for (;startBit >= 0; startBit -= 8) { + unsigned j = 0; + for (;j < VEC256_NB_SIZE_T; ++j) { + ret.vec[j] |= ((size_t)val << startBit); + } + } + return ret; +} + +/* Compare x to y, byte by byte, generating a "matches" bitfield */ +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask res = 0; + unsigned i = 0; + unsigned l = 0; + for (; i < VEC256_NB_SIZE_T; ++i) { + const size_t cmp1 = x.vec[i]; + const size_t cmp2 = y.vec[i]; + unsigned j = 0; + for (; j < sizeof(size_t); ++j, ++l) { + if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) { + res |= ((U32)1 << (j+i*sizeof(size_t))); + } + } + } + return res; +} + +#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */ + +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (U32)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (U32)__builtin_ctz(val); +# else + /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */ + static const U32 multiplyDeBruijnBitPosition[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27]; +# endif +} + +/* ZSTD_VecMask_rotateRight(): + * Rotates a bitfield to the right by "rotation" bits. + * If the rotation is greater than totalBits, the returned mask is 0. + */ +FORCE_INLINE_TEMPLATE ZSTD_VecMask +ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) { + if (rotation == 0) + return mask; + switch (totalBits) { + default: + assert(0); + case 16: + return (mask >> rotation) | (U16)(mask << (16 - rotation)); + case 32: + return (mask >> rotation) | (U32)(mask << (32 - rotation)); + } +} + +/* ZSTD_row_nextIndex(): + * Returns the next index to insert at within a tagTable row, and updates the "head" + * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { + U32 const next = (*tagRow - 1) & rowMask; + *tagRow = (BYTE)next; + return next; +} + +/* ZSTD_isAligned(): + * Checks that a pointer is aligned to "align" bytes which must be a power of 2. + */ +MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { + assert((align & (align - 1)) == 0); + return (((size_t)ptr) & (align - 1)) == 0; +} + +/* ZSTD_row_prefetch(): + * Performs prefetching for the hashTable and tagTable at a given row. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { + PREFETCH_L1(hashTable + relRow); + if (rowLog == 5) { + PREFETCH_L1(hashTable + relRow + 16); + } + PREFETCH_L1(tagTable + relRow); + assert(rowLog == 4 || rowLog == 5); + assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ + assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */ +} + +/* ZSTD_row_fillHashCache(): + * Fill up the hash cache starting at idx, prefetching ZSTD_ROW_HASH_CACHE_SIZE entries. + */ +static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, + U32 const rowLog, U32 const mls, + U32 idx, const BYTE* const iend) +{ + U32 const* const hashTable = ms->hashTable; + U16 const* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + U32 const maxElemsToPrefetch = (base + idx) >= iend ? 0 : (U32)(iend - (base + idx)); + U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); + + for (; idx < lim; ++idx) { + U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; + } + + DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1], + ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], + ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); +} + +/* ZSTD_row_nextCachedHash(): + * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at + * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, + U16 const* tagTable, BYTE const* base, + U32 idx, U32 const hashLog, + U32 const rowLog, U32 const mls) +{ + U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; + cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash; + return hash; + } +} + +/* ZSTD_row_update_internal(): + * Inserts the byte at ip into the appropriate position in the hash table. + * Determines the relative row, and the position within the {16, 32} entry row to insert at. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target); + for (; idx < target; ++idx) { + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls) + : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. + Explicit cast allows us to get exact desired position within each row */ + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + + assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); + ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; + row[pos] = idx; + } + ms->nextToUpdate = target; +} + +/* ZSTD_row_update(): + * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary + * processing. + */ +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; + const U32 rowMask = (1u << rowLog) - 1; + const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); + + DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); +} + +/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches + * the hash at the nth position in a row of the tagTable. + */ +FORCE_INLINE_TEMPLATE +ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) { + ZSTD_VecMask matches = 0; + if (rowEntries == 16) { + ZSTD_Vec128 hashes = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET); + ZSTD_Vec128 expandedTags = ZSTD_Vec128_set8(tag); + matches = ZSTD_Vec128_cmpMask8(hashes, expandedTags); + } else if (rowEntries == 32) { + ZSTD_Vec256 hashes = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET); + ZSTD_Vec256 expandedTags = ZSTD_Vec256_set8(tag); + matches = ZSTD_Vec256_cmpMask8(hashes, expandedTags); + } else { + assert(0); + } + /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield + to match up with the actual layout of the entries within the hashTable */ + return ZSTD_VecMask_rotateRight(matches, head, rowEntries); +} + +/* The high-level approach of the SIMD row based match finder is as follows: + * - Figure out where to insert the new entry: + * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" + * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines + * which row to insert into. + * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can + * be considered as a circular buffer with a "head" index that resides in the tagTable. + * - Also insert the "tag" into the equivalent row and position in the tagTable. + * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. + * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, + * for alignment/performance reasons, leaving some bytes unused. + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and + * generate a bitfield that we can cycle through to check the collisions in the hash table. + * - Pick the longest match. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_RowFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode, + const U32 rowLog) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32* const hashCache = ms->hashCache; + const U32 hashLog = ms->rowHashLog; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 rowEntries = (1U << rowLog); + const U32 rowMask = rowEntries - 1; + const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ + U32 nbAttempts = 1U << cappedSearchLog; + size_t ml=4-1; + + /* DMS/DDS variables that may be referenced laster */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; + size_t ddsIdx; + U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */ + U32 dmsTag; + U32* dmsRow; + BYTE* dmsTagRow; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + { /* Prefetch DDS hashtable entry */ + ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG; + PREFETCH_L1(&dms->hashTable[ddsIdx]); + } + ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; + } + + if (dictMode == ZSTD_dictMatchState) { + /* Prefetch DMS rows */ + U32* const dmsHashTable = dms->hashTable; + U16* const dmsTagTable = dms->tagTable; + U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; + dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); + dmsRow = dmsHashTable + dmsRelRow; + ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog); + } + + /* Update the hashTable and tagTable up to (but not including) ip */ + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); + { /* Get the hash for ip, compute the appropriate row */ + U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); + U32 const head = *tagRow & rowMask; + U32 matchBuffer[32 /* maximum nb entries per row */]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); + + /* Cycle through the matches and prefetch */ + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = row[matchPos]; + assert(numMatches < rowEntries); + if (matchIndex < lowLimit) + break; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + PREFETCH_L1(base + matchIndex); + } else { + PREFETCH_L1(dictBase + matchIndex); + } + matchBuffer[numMatches++] = matchIndex; + } + + /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop + in ZSTD_row_update_internal() at the next search. */ + { + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; + row[pos] = ms->nextToUpdate++; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex < curr); + assert(matchIndex >= lowLimit); + + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* Save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + /* TODO: Measure and potentially add prefetching to DMS */ + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + + { U32 const head = *dmsTagRow & rowMask; + U32 matchBuffer[32 /* maximum nb row entries */]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); + + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = dmsRow[matchPos]; + if (matchIndex < dmsLowestIndex) + break; + PREFETCH_L1(dmsBase + matchIndex); + matchBuffer[numMatches++] = matchIndex; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex >= dmsLowestIndex); + assert(matchIndex < curr); + + { const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + } + + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; + } + } + } + } + return ml; +} + +/* Inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog); + case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog); + case 7 : + case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5); + } +} + /* ******************************* * Common parser - lazy strategy *********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_generic( @@ -863,10 +1524,11 @@ ZSTD_compressBlock_lazy_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 16 : iend - 8; const BYTE* const base = ms->window.base; const U32 prefixLowestIndex = ms->window.dictLimit; const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, @@ -878,26 +1540,30 @@ ZSTD_compressBlock_lazy_generic( * that should never occur (extDict modes go to the other implementation * below and there is no DDSS for binary tree search yet). */ - const searchMax_f searchFuncs[4][2] = { + const searchMax_f searchFuncs[4][3] = { { ZSTD_HcFindBestMatch_selectMLS, - ZSTD_BtFindBestMatch_selectMLS + ZSTD_BtFindBestMatch_selectMLS, + ZSTD_RowFindBestMatch_selectRowLog }, { + NULL, NULL, NULL }, { ZSTD_HcFindBestMatch_dictMatchState_selectMLS, - ZSTD_BtFindBestMatch_dictMatchState_selectMLS + ZSTD_BtFindBestMatch_dictMatchState_selectMLS, + ZSTD_RowFindBestMatch_dictMatchState_selectRowLog }, { ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, - NULL + NULL, + ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog } }; - searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; + searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod]; U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const int isDMS = dictMode == ZSTD_dictMatchState; @@ -915,9 +1581,7 @@ ZSTD_compressBlock_lazy_generic( assert(searchMax != NULL); - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - - /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { U32 const curr = (U32)(ip - base); @@ -933,6 +1597,12 @@ ZSTD_compressBlock_lazy_generic( assert(offset_2 <= dictAndPrefixLength); } + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } + /* Match Loop */ #if defined(__GNUC__) && defined(__x86_64__) /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the @@ -1198,6 +1868,70 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); } +/* Row-based matchfinder */ +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); +} FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_lazy_extDict_generic( @@ -1210,7 +1944,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 16 : iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; @@ -1218,18 +1952,28 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const dictStart = dictBase + ms->window.lowLimit; const U32 windowLog = ms->cParams.windowLog; + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; - + const searchMax_f searchFuncs[3] = { + ZSTD_HcFindBestMatch_extDict_selectMLS, + ZSTD_BtFindBestMatch_extDict_selectMLS, + ZSTD_RowFindBestMatch_extDict_selectRowLog + }; + searchMax_f searchMax = searchFuncs[(int)searchMethod]; U32 offset_1 = rep[0], offset_2 = rep[1]; - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); /* init */ ip += (ip == prefixStart); + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } /* Match Loop */ #if defined(__GNUC__) && defined(__x86_64__) @@ -1410,3 +2154,26 @@ size_t ZSTD_compressBlock_btlazy2_extDict( { return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } + +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); +} diff --git a/lib/compress/zstd_lazy.h b/lib/compress/zstd_lazy.h index b75f7e8ae..150f7b390 100644 --- a/lib/compress/zstd_lazy.h +++ b/lib/compress/zstd_lazy.h @@ -26,6 +26,7 @@ extern "C" { #define ZSTD_LAZY_DDSS_BUCKET_LOG 2 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); @@ -43,6 +44,15 @@ size_t ZSTD_compressBlock_lazy( size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -56,6 +66,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState( size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -66,6 +85,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -76,9 +104,19 @@ size_t ZSTD_compressBlock_lazy_extDict( size_t ZSTD_compressBlock_lazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); size_t ZSTD_compressBlock_btlazy2_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize); + #if defined (__cplusplus) } diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 66dbc2875..7dea97aa8 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -622,12 +622,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, void const* src, size_t srcSize) { const ZSTD_compressionParameters* const cParams = &ms->cParams; unsigned const minMatch = cParams->minMatch; ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); /* Input bounds */ BYTE const* const istart = (BYTE const*)src; BYTE const* const iend = istart + srcSize; diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index 7e69cc9b4..393466fa9 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -66,6 +66,7 @@ size_t ZSTD_ldm_generateSequences( */ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, void const* src, size_t srcSize); /** diff --git a/lib/zstd.h b/lib/zstd.h index 3a4a755e6..b95959a34 100644 --- a/lib/zstd.h +++ b/lib/zstd.h @@ -420,6 +420,7 @@ typedef enum { * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences * ZSTD_c_splitBlocks + * ZSTD_c_useRowMatchFinder * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -436,7 +437,8 @@ typedef enum { ZSTD_c_experimentalParam10=1007, ZSTD_c_experimentalParam11=1008, ZSTD_c_experimentalParam12=1009, - ZSTD_c_experimentalParam13=1010 + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011 } ZSTD_cParameter; typedef struct { @@ -1272,6 +1274,11 @@ typedef enum { ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ } ZSTD_literalCompressionMode_e; +typedef enum { + ZSTD_urm_auto = 0, /* Automatically determine whether or not we use row matchfinder */ + ZSTD_urm_disableRowMatchFinder = 1, /* Never use row matchfinder */ + ZSTD_urm_enableRowMatchFinder = 2 /* Always use row matchfinder when applicable */ +} ZSTD_useRowMatchFinderMode_e; /*************************************** * Frame size functions @@ -1843,6 +1850,19 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13 +/* ZSTD_c_useRowMatchFinder + * Default is ZSTD_urm_auto. + * Controlled with ZSTD_useRowMatchFinderMode_e enum. + * + * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library + * will decide at runtime whether to use the row-based matchfinder based on support for SIMD + * instructions as well as the windowLog. + * + * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder. + * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder. + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. diff --git a/programs/benchzstd.c b/programs/benchzstd.c index cd7902fbe..c40f0a24c 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -137,7 +137,8 @@ BMK_advancedParams_t BMK_initAdvancedParams(void) { 0, /* ldmHashLog */ 0, /* ldmBuckSizeLog */ 0, /* ldmHashRateLog */ - ZSTD_lcm_auto /* literalCompressionMode */ + ZSTD_lcm_auto, /* literalCompressionMode */ + 0 /* useRowMatchFinder */ }; return res; } @@ -175,6 +176,7 @@ BMK_initCCtx(ZSTD_CCtx* ctx, CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers)); } CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder)); CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag)); CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch)); CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog)); diff --git a/programs/benchzstd.h b/programs/benchzstd.h index bdeecae7a..9b40dcc29 100644 --- a/programs/benchzstd.h +++ b/programs/benchzstd.h @@ -117,6 +117,7 @@ typedef struct { int ldmBucketSizeLog; int ldmHashRateLog; ZSTD_literalCompressionMode_e literalCompressionMode; + int useRowMatchFinder; /* use row-based matchfinder if possible */ } BMK_advancedParams_t; /* returns default parameters used by nonAdvanced functions */ diff --git a/programs/fileio.c b/programs/fileio.c index 66b86f710..fab7918d7 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -298,6 +298,7 @@ struct FIO_prefs_s { int blockSize; int overlapLog; U32 adaptiveMode; + U32 useRowMatchFinder; int rsyncable; int minAdaptLevel; int maxAdaptLevel; @@ -468,6 +469,10 @@ void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) { prefs->adaptiveMode = adapt; } +void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) { + prefs->useRowMatchFinder = useRowMatchFinder; +} + void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { if ((rsyncable>0) && (prefs->nbWorkers==0)) EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n"); @@ -986,6 +991,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) { CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) ); } + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder)); /* compression parameters */ CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); diff --git a/programs/fileio.h b/programs/fileio.h index 252df2f26..1f5db4b52 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -77,6 +77,7 @@ void FIO_overwriteMode(FIO_prefs_t* const prefs); void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt); void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel); void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel); +void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder); void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize); void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag); void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag); diff --git a/programs/zstdcli.c b/programs/zstdcli.c index cda2d9578..d9d2c701e 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -205,6 +205,7 @@ static void usage_advanced(const char* programName) DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog); DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1); DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n"); + DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n"); # ifdef ZSTD_MULTITHREAD DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n"); @@ -730,6 +731,7 @@ int main(int const argCount, const char* argv[]) main_pause = 0, nbWorkers = 0, adapt = 0, + useRowMatchFinder = 0, adaptMin = MINCLEVEL, adaptMax = MAXCLEVEL, rsyncable = 0, @@ -857,6 +859,8 @@ int main(int const argCount, const char* argv[]) if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; } if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; } if (!strcmp(argument, "--adapt")) { adapt = 1; continue; } + if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; } + if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; } if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; } if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; } if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; } @@ -1196,6 +1200,7 @@ int main(int const argCount, const char* argv[]) benchParams.ldmFlag = ldmFlag; benchParams.ldmMinMatch = (int)g_ldmMinMatch; benchParams.ldmHashLog = (int)g_ldmHashLog; + benchParams.useRowMatchFinder = useRowMatchFinder; if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog; } @@ -1348,6 +1353,7 @@ int main(int const argCount, const char* argv[]) if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog); if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog); FIO_setAdaptiveMode(prefs, (unsigned)adapt); + FIO_setUseRowMatchFinder(prefs, useRowMatchFinder); FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); @@ -1387,7 +1393,7 @@ int main(int const argCount, const char* argv[]) else operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; /* not used when ZSTD_NOCOMPRESS set */ + (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */ DISPLAY("Compression not supported \n"); #endif } else { /* decompression or test */ diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore index 9bd280c08..8ef3a3efd 100644 --- a/tests/fuzz/.gitignore +++ b/tests/fuzz/.gitignore @@ -19,6 +19,7 @@ sequence_compression_api fuzz-*.log rt_lib_* d_lib_* +crash-* # misc trace diff --git a/tests/fuzz/zstd_helpers.c b/tests/fuzz/zstd_helpers.c index 0607a0745..fcbe3361d 100644 --- a/tests/fuzz/zstd_helpers.c +++ b/tests/fuzz/zstd_helpers.c @@ -91,6 +91,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, FUZZ_dataProducer /* Set misc parameters */ setRand(cctx, ZSTD_c_nbWorkers, 0, 2, producer); setRand(cctx, ZSTD_c_rsyncable, 0, 1, producer); + setRand(cctx, ZSTD_c_useRowMatchFinder, 0, 2, producer); setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, producer); setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, producer); setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, producer); diff --git a/tests/fuzzer.c b/tests/fuzzer.c index d58e6a568..1b1042393 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -1757,13 +1757,14 @@ static int basicUnitTests(U32 const seed, double compressibility) size_t const contentSize = 9 KB; const void* const dict = (const char*)CNBuffer; const void* const contentStart = (const char*)dict + flatdictSize; + /* These upper bounds are generally within a few bytes of the compressed size */ size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770, 3770, 3770, 3770, 3750, 3750, 3742, 3670, 3670, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660, 3660 }; size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940, - 2950, 2950, 2921, 2900, 2891, + 2950, 2950, 2925, 2900, 2891, 2910, 2910, 2910, 2770, 2760, 2750, 2750, 2750, 2750, 2750, 2750, 2750, 2750 }; @@ -1800,6 +1801,22 @@ static int basicUnitTests(U32 const seed, double compressibility) DISPLAYLEVEL(4, "level %i with dictionary : max expected %u >= reached %u \n", l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize); } + /* Dict compression with DMS */ + for ( l=1 ; l <= maxLevel; l++) { + size_t wdict_cSize; + CHECK_Z( ZSTD_CCtx_loadDictionary(ctxOrig, dict, flatdictSize) ); + CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_compressionLevel, l) ); + CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_enableDedicatedDictSearch, 0) ); + CHECK_Z( ZSTD_CCtx_setParameter(ctxOrig, ZSTD_c_forceAttachDict, ZSTD_dictForceAttach) ); + wdict_cSize = ZSTD_compress2(ctxOrig, compressedBuffer, compressedBufferSize, contentStart, contentSize); + if (wdict_cSize > target_wdict_cSize[l]) { + DISPLAYLEVEL(1, "error : compression with dictionary and compress2 at level %i worse than expected (%u > %u) \n", + l, (unsigned)wdict_cSize, (unsigned)target_wdict_cSize[l]); + goto _output_error; + } + DISPLAYLEVEL(4, "level %i with dictionary and compress2 : max expected %u >= reached %u \n", + l, (unsigned)target_wdict_cSize[l], (unsigned)wdict_cSize); + } DISPLAYLEVEL(4, "compression efficiency tests OK \n"); } diff --git a/tests/regression/config.c b/tests/regression/config.c index e1bb6eaae..4c66dd150 100644 --- a/tests/regression/config.c +++ b/tests/regression/config.c @@ -92,6 +92,72 @@ .advanced_api_only = 1, \ }; +/* Define a config specifically to test row hash based levels and settings. + */ +#define ROW_LEVEL(x, y) \ + param_value_t const row_##y##_level_##x##_param_values[] = { \ + {.param = ZSTD_c_useRowMatchFinder, .value = y}, \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + }; \ + param_value_t const row_##y##_level_##x##_param_values_dms[] = { \ + {.param = ZSTD_c_useRowMatchFinder, .value = y}, \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0}, \ + {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach}, \ + }; \ + param_value_t const row_##y##_level_##x##_param_values_dds[] = { \ + {.param = ZSTD_c_useRowMatchFinder, .value = y}, \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + {.param = ZSTD_c_enableDedicatedDictSearch, .value = 1}, \ + {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceAttach}, \ + }; \ + param_value_t const row_##y##_level_##x##_param_values_dictcopy[] = { \ + {.param = ZSTD_c_useRowMatchFinder, .value = y}, \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0}, \ + {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceCopy}, \ + }; \ + param_value_t const row_##y##_level_##x##_param_values_dictload[] = { \ + {.param = ZSTD_c_useRowMatchFinder, .value = y}, \ + {.param = ZSTD_c_compressionLevel, .value = x}, \ + {.param = ZSTD_c_enableDedicatedDictSearch, .value = 0}, \ + {.param = ZSTD_c_forceAttachDict, .value = ZSTD_dictForceLoad}, \ + }; \ + config_t const row_##y##_level_##x = { \ + .name = "level " #x " row " #y, \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values), \ + .advanced_api_only = 1, \ + }; \ + config_t const row_##y##_level_##x##_dict_dms = { \ + .name = "level " #x " row " #y " with dict dms", \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dms), \ + .use_dictionary = 1, \ + .advanced_api_only = 1, \ + }; \ + config_t const row_##y##_level_##x##_dict_dds = { \ + .name = "level " #x " row " #y " with dict dds", \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dds), \ + .use_dictionary = 1, \ + .advanced_api_only = 1, \ + }; \ + config_t const row_##y##_level_##x##_dict_copy = { \ + .name = "level " #x " row " #y" with dict copy", \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dictcopy), \ + .use_dictionary = 1, \ + .advanced_api_only = 1, \ + }; \ + config_t const row_##y##_level_##x##_dict_load = { \ + .name = "level " #x " row " #y " with dict load", \ + .cli_args = "-" #x, \ + .param_values = PARAM_VALUES(row_##y##_level_##x##_param_values_dictload), \ + .use_dictionary = 1, \ + .advanced_api_only = 1, \ + }; + #define PARAM_VALUES(pv) \ { .data = pv, .size = sizeof(pv) / sizeof((pv)[0]) } @@ -99,6 +165,7 @@ #undef LEVEL #undef FAST_LEVEL +#undef ROW_LEVEL static config_t no_pledged_src_size = { .name = "no source size", @@ -243,7 +310,9 @@ static config_t const* g_configs[] = { #define FAST_LEVEL(x) &level_fast##x, &level_fast##x##_dict, #define LEVEL(x) &level_##x, &level_##x##_dict, &level_##x##_dict_dms, &level_##x##_dict_dds, &level_##x##_dict_copy, &level_##x##_dict_load, +#define ROW_LEVEL(x, y) &row_##y##_level_##x, &row_##y##_level_##x##_dict_dms, &row_##y##_level_##x##_dict_dds, &row_##y##_level_##x##_dict_copy, &row_##y##_level_##x##_dict_load, #include "levels.h" +#undef ROW_LEVEL #undef LEVEL #undef FAST_LEVEL diff --git a/tests/regression/levels.h b/tests/regression/levels.h index 3b4878409..3b211f8c2 100644 --- a/tests/regression/levels.h +++ b/tests/regression/levels.h @@ -14,6 +14,9 @@ #ifndef FAST_LEVEL # error FAST_LEVEL(x) must be defined #endif +#ifndef ROW_LEVEL +# error ROW_LEVEL(x, y) must be defined +#endif /** * The levels are chosen to trigger every strategy in every source size, @@ -31,12 +34,22 @@ LEVEL(1) LEVEL(3) LEVEL(4) +/* ROW_LEVEL triggers the row hash (force enabled and disabled) with different + * dictionary strategies, and 16/32 row entries based on the level/searchLog. + * 1 == disabled, 2 == enabled. + */ +ROW_LEVEL(5, 1) +ROW_LEVEL(5, 2) LEVEL(5) LEVEL(6) +ROW_LEVEL(7, 1) +ROW_LEVEL(7, 2) LEVEL(7) LEVEL(9) +ROW_LEVEL(12, 1) +ROW_LEVEL(12, 2) LEVEL(13) LEVEL(16) diff --git a/tests/regression/results.csv b/tests/regression/results.csv index ccf328ad3..9301e58fb 100644 --- a/tests/regression/results.csv +++ b/tests/regression/results.csv @@ -6,10 +6,10 @@ silesia.tar, level 0, compress silesia.tar, level 1, compress simple, 5334885 silesia.tar, level 3, compress simple, 4861425 silesia.tar, level 4, compress simple, 4799630 -silesia.tar, level 5, compress simple, 4722324 -silesia.tar, level 6, compress simple, 4672279 -silesia.tar, level 7, compress simple, 4606715 -silesia.tar, level 9, compress simple, 4554147 +silesia.tar, level 5, compress simple, 4719256 +silesia.tar, level 6, compress simple, 4677721 +silesia.tar, level 7, compress simple, 4613541 +silesia.tar, level 9, compress simple, 4555426 silesia.tar, level 13, compress simple, 4491764 silesia.tar, level 16, compress simple, 4381332 silesia.tar, level 19, compress simple, 4281605 @@ -23,10 +23,10 @@ github.tar, level 0, compress github.tar, level 1, compress simple, 39265 github.tar, level 3, compress simple, 38441 github.tar, level 4, compress simple, 38467 -github.tar, level 5, compress simple, 39788 -github.tar, level 6, compress simple, 39603 -github.tar, level 7, compress simple, 39206 -github.tar, level 9, compress simple, 36717 +github.tar, level 5, compress simple, 39693 +github.tar, level 6, compress simple, 39621 +github.tar, level 7, compress simple, 39213 +github.tar, level 9, compress simple, 36758 github.tar, level 13, compress simple, 35621 github.tar, level 16, compress simple, 40255 github.tar, level 19, compress simple, 32837 @@ -40,10 +40,10 @@ silesia, level 0, compress silesia, level 1, compress cctx, 5313204 silesia, level 3, compress cctx, 4849552 silesia, level 4, compress cctx, 4786970 -silesia, level 5, compress cctx, 4710236 -silesia, level 6, compress cctx, 4660056 -silesia, level 7, compress cctx, 4596296 -silesia, level 9, compress cctx, 4543925 +silesia, level 5, compress cctx, 4707794 +silesia, level 6, compress cctx, 4666383 +silesia, level 7, compress cctx, 4603381 +silesia, level 9, compress cctx, 4546001 silesia, level 13, compress cctx, 4482135 silesia, level 16, compress cctx, 4377465 silesia, level 19, compress cctx, 4293330 @@ -53,7 +53,7 @@ silesia, multithreaded long distance mode, compress silesia, small window log, compress cctx, 7084179 silesia, small hash log, compress cctx, 6555021 silesia, small chain log, compress cctx, 4931148 -silesia, explicit params, compress cctx, 4794677 +silesia, explicit params, compress cctx, 4794479 silesia, uncompressed literals, compress cctx, 4849552 silesia, uncompressed literals optimal, compress cctx, 4293330 silesia, huffman literals, compress cctx, 6178460 @@ -73,13 +73,13 @@ github, level 3 with dict, compress github, level 4, compress cctx, 136199 github, level 4 with dict, compress cctx, 41725 github, level 5, compress cctx, 135121 -github, level 5 with dict, compress cctx, 38934 +github, level 5 with dict, compress cctx, 38759 github, level 6, compress cctx, 135122 -github, level 6 with dict, compress cctx, 38628 +github, level 6 with dict, compress cctx, 38669 github, level 7, compress cctx, 135122 -github, level 7 with dict, compress cctx, 38745 +github, level 7 with dict, compress cctx, 38755 github, level 9, compress cctx, 135122 -github, level 9 with dict, compress cctx, 39341 +github, level 9 with dict, compress cctx, 39398 github, level 13, compress cctx, 134064 github, level 13 with dict, compress cctx, 39948 github, level 16, compress cctx, 134064 @@ -104,10 +104,10 @@ silesia, level 0, zstdcli, silesia, level 1, zstdcli, 5314210 silesia, level 3, zstdcli, 4849600 silesia, level 4, zstdcli, 4787018 -silesia, level 5, zstdcli, 4710284 -silesia, level 6, zstdcli, 4660104 -silesia, level 7, zstdcli, 4596344 -silesia, level 9, zstdcli, 4543973 +silesia, level 5, zstdcli, 4707842 +silesia, level 6, zstdcli, 4666431 +silesia, level 7, zstdcli, 4603429 +silesia, level 9, zstdcli, 4546049 silesia, level 13, zstdcli, 4482183 silesia, level 16, zstdcli, 4360299 silesia, level 19, zstdcli, 4283285 @@ -117,7 +117,7 @@ silesia, multithreaded long distance mode, zstdcli, silesia, small window log, zstdcli, 7111012 silesia, small hash log, zstdcli, 6526189 silesia, small chain log, zstdcli, 4912245 -silesia, explicit params, zstdcli, 4797112 +silesia, explicit params, zstdcli, 4795887 silesia, uncompressed literals, zstdcli, 5128030 silesia, uncompressed literals optimal, zstdcli, 4317944 silesia, huffman literals, zstdcli, 5331216 @@ -129,10 +129,10 @@ silesia.tar, level 0, zstdcli, silesia.tar, level 1, zstdcli, 5336318 silesia.tar, level 3, zstdcli, 4861512 silesia.tar, level 4, zstdcli, 4800529 -silesia.tar, level 5, zstdcli, 4723364 -silesia.tar, level 6, zstdcli, 4673663 -silesia.tar, level 7, zstdcli, 4608403 -silesia.tar, level 9, zstdcli, 4554751 +silesia.tar, level 5, zstdcli, 4720121 +silesia.tar, level 6, zstdcli, 4678661 +silesia.tar, level 7, zstdcli, 4614424 +silesia.tar, level 9, zstdcli, 4556062 silesia.tar, level 13, zstdcli, 4491768 silesia.tar, level 16, zstdcli, 4356831 silesia.tar, level 19, zstdcli, 4264491 @@ -143,7 +143,7 @@ silesia.tar, multithreaded long distance mode, zstdcli, silesia.tar, small window log, zstdcli, 7101576 silesia.tar, small hash log, zstdcli, 6529290 silesia.tar, small chain log, zstdcli, 4917022 -silesia.tar, explicit params, zstdcli, 4822362 +silesia.tar, explicit params, zstdcli, 4821274 silesia.tar, uncompressed literals, zstdcli, 5129559 silesia.tar, uncompressed literals optimal, zstdcli, 4307457 silesia.tar, huffman literals, zstdcli, 5347610 @@ -163,13 +163,13 @@ github, level 3 with dict, zstdcli, github, level 4, zstdcli, 138199 github, level 4 with dict, zstdcli, 43251 github, level 5, zstdcli, 137121 -github, level 5 with dict, zstdcli, 40741 +github, level 5 with dict, zstdcli, 40737 github, level 6, zstdcli, 137122 -github, level 6 with dict, zstdcli, 40632 +github, level 6 with dict, zstdcli, 40630 github, level 7, zstdcli, 137122 -github, level 7 with dict, zstdcli, 40771 +github, level 7 with dict, zstdcli, 40747 github, level 9, zstdcli, 137122 -github, level 9 with dict, zstdcli, 41332 +github, level 9 with dict, zstdcli, 41338 github, level 13, zstdcli, 136064 github, level 13 with dict, zstdcli, 41743 github, level 16, zstdcli, 136064 @@ -201,14 +201,14 @@ github.tar, level 3, zstdcli, github.tar, level 3 with dict, zstdcli, 37999 github.tar, level 4, zstdcli, 38471 github.tar, level 4 with dict, zstdcli, 37952 -github.tar, level 5, zstdcli, 39792 -github.tar, level 5 with dict, zstdcli, 39231 -github.tar, level 6, zstdcli, 39607 -github.tar, level 6 with dict, zstdcli, 38669 -github.tar, level 7, zstdcli, 39210 -github.tar, level 7 with dict, zstdcli, 37958 -github.tar, level 9, zstdcli, 36721 -github.tar, level 9 with dict, zstdcli, 36886 +github.tar, level 5, zstdcli, 39697 +github.tar, level 5 with dict, zstdcli, 39027 +github.tar, level 6, zstdcli, 39625 +github.tar, level 6 with dict, zstdcli, 38614 +github.tar, level 7, zstdcli, 39217 +github.tar, level 7 with dict, zstdcli, 37871 +github.tar, level 9, zstdcli, 36762 +github.tar, level 9 with dict, zstdcli, 36641 github.tar, level 13, zstdcli, 35625 github.tar, level 13 with dict, zstdcli, 38730 github.tar, level 16, zstdcli, 40259 @@ -223,7 +223,7 @@ github.tar, multithreaded long distance mode, zstdcli, github.tar, small window log, zstdcli, 199432 github.tar, small hash log, zstdcli, 129874 github.tar, small chain log, zstdcli, 41673 -github.tar, explicit params, zstdcli, 41199 +github.tar, explicit params, zstdcli, 41227 github.tar, uncompressed literals, zstdcli, 41126 github.tar, uncompressed literals optimal, zstdcli, 35392 github.tar, huffman literals, zstdcli, 38804 @@ -235,10 +235,16 @@ silesia, level 0, advanced silesia, level 1, advanced one pass, 5313204 silesia, level 3, advanced one pass, 4849552 silesia, level 4, advanced one pass, 4786970 -silesia, level 5, advanced one pass, 4710236 -silesia, level 6, advanced one pass, 4660056 -silesia, level 7, advanced one pass, 4596296 -silesia, level 9, advanced one pass, 4543925 +silesia, level 5 row 1, advanced one pass, 4710236 +silesia, level 5 row 2, advanced one pass, 4707794 +silesia, level 5, advanced one pass, 4707794 +silesia, level 6, advanced one pass, 4666383 +silesia, level 7 row 1, advanced one pass, 4596296 +silesia, level 7 row 2, advanced one pass, 4603381 +silesia, level 7, advanced one pass, 4603381 +silesia, level 9, advanced one pass, 4546001 +silesia, level 12 row 1, advanced one pass, 4519288 +silesia, level 12 row 2, advanced one pass, 4521397 silesia, level 13, advanced one pass, 4482135 silesia, level 16, advanced one pass, 4360251 silesia, level 19, advanced one pass, 4283237 @@ -249,7 +255,7 @@ silesia, multithreaded long distance mode, advanced silesia, small window log, advanced one pass, 7095919 silesia, small hash log, advanced one pass, 6526141 silesia, small chain log, advanced one pass, 4912197 -silesia, explicit params, advanced one pass, 4797095 +silesia, explicit params, advanced one pass, 4795856 silesia, uncompressed literals, advanced one pass, 5127982 silesia, uncompressed literals optimal, advanced one pass, 4317896 silesia, huffman literals, advanced one pass, 5326268 @@ -261,10 +267,16 @@ silesia.tar, level 0, advanced silesia.tar, level 1, advanced one pass, 5334885 silesia.tar, level 3, advanced one pass, 4861425 silesia.tar, level 4, advanced one pass, 4799630 -silesia.tar, level 5, advanced one pass, 4722324 -silesia.tar, level 6, advanced one pass, 4672279 -silesia.tar, level 7, advanced one pass, 4606715 -silesia.tar, level 9, advanced one pass, 4554147 +silesia.tar, level 5 row 1, advanced one pass, 4722324 +silesia.tar, level 5 row 2, advanced one pass, 4719256 +silesia.tar, level 5, advanced one pass, 4719256 +silesia.tar, level 6, advanced one pass, 4677721 +silesia.tar, level 7 row 1, advanced one pass, 4606715 +silesia.tar, level 7 row 2, advanced one pass, 4613541 +silesia.tar, level 7, advanced one pass, 4613541 +silesia.tar, level 9, advanced one pass, 4555426 +silesia.tar, level 12 row 1, advanced one pass, 4529459 +silesia.tar, level 12 row 2, advanced one pass, 4530256 silesia.tar, level 13, advanced one pass, 4491764 silesia.tar, level 16, advanced one pass, 4356827 silesia.tar, level 19, advanced one pass, 4264487 @@ -275,7 +287,7 @@ silesia.tar, multithreaded long distance mode, advanced silesia.tar, small window log, advanced one pass, 7101530 silesia.tar, small hash log, advanced one pass, 6529232 silesia.tar, small chain log, advanced one pass, 4917041 -silesia.tar, explicit params, advanced one pass, 4808589 +silesia.tar, explicit params, advanced one pass, 4807380 silesia.tar, uncompressed literals, advanced one pass, 5129458 silesia.tar, uncompressed literals optimal, advanced one pass, 4307453 silesia.tar, huffman literals, advanced one pass, 5347335 @@ -310,30 +322,60 @@ github, level 4 with dict dms, advanced github, level 4 with dict dds, advanced one pass, 41251 github, level 4 with dict copy, advanced one pass, 41216 github, level 4 with dict load, advanced one pass, 41159 +github, level 5 row 1, advanced one pass, 135121 +github, level 5 row 1 with dict dms, advanced one pass, 38938 +github, level 5 row 1 with dict dds, advanced one pass, 38741 +github, level 5 row 1 with dict copy, advanced one pass, 38934 +github, level 5 row 1 with dict load, advanced one pass, 40725 +github, level 5 row 2, advanced one pass, 134584 +github, level 5 row 2 with dict dms, advanced one pass, 38758 +github, level 5 row 2 with dict dds, advanced one pass, 38737 +github, level 5 row 2 with dict copy, advanced one pass, 38759 +github, level 5 row 2 with dict load, advanced one pass, 41518 github, level 5, advanced one pass, 135121 -github, level 5 with dict, advanced one pass, 38938 -github, level 5 with dict dms, advanced one pass, 38938 -github, level 5 with dict dds, advanced one pass, 38741 -github, level 5 with dict copy, advanced one pass, 38934 +github, level 5 with dict, advanced one pass, 38758 +github, level 5 with dict dms, advanced one pass, 38758 +github, level 5 with dict dds, advanced one pass, 38737 +github, level 5 with dict copy, advanced one pass, 38759 github, level 5 with dict load, advanced one pass, 40725 github, level 6, advanced one pass, 135122 -github, level 6 with dict, advanced one pass, 38632 -github, level 6 with dict dms, advanced one pass, 38632 -github, level 6 with dict dds, advanced one pass, 38632 -github, level 6 with dict copy, advanced one pass, 38628 +github, level 6 with dict, advanced one pass, 38671 +github, level 6 with dict dms, advanced one pass, 38671 +github, level 6 with dict dds, advanced one pass, 38630 +github, level 6 with dict copy, advanced one pass, 38669 github, level 6 with dict load, advanced one pass, 40695 +github, level 7 row 1, advanced one pass, 135122 +github, level 7 row 1 with dict dms, advanced one pass, 38771 +github, level 7 row 1 with dict dds, advanced one pass, 38771 +github, level 7 row 1 with dict copy, advanced one pass, 38745 +github, level 7 row 1 with dict load, advanced one pass, 40695 +github, level 7 row 2, advanced one pass, 134584 +github, level 7 row 2 with dict dms, advanced one pass, 38758 +github, level 7 row 2 with dict dds, advanced one pass, 38747 +github, level 7 row 2 with dict copy, advanced one pass, 38755 +github, level 7 row 2 with dict load, advanced one pass, 41030 github, level 7, advanced one pass, 135122 -github, level 7 with dict, advanced one pass, 38771 -github, level 7 with dict dms, advanced one pass, 38771 -github, level 7 with dict dds, advanced one pass, 38771 -github, level 7 with dict copy, advanced one pass, 38745 +github, level 7 with dict, advanced one pass, 38758 +github, level 7 with dict dms, advanced one pass, 38758 +github, level 7 with dict dds, advanced one pass, 38747 +github, level 7 with dict copy, advanced one pass, 38755 github, level 7 with dict load, advanced one pass, 40695 github, level 9, advanced one pass, 135122 -github, level 9 with dict, advanced one pass, 39332 -github, level 9 with dict dms, advanced one pass, 39332 -github, level 9 with dict dds, advanced one pass, 39332 -github, level 9 with dict copy, advanced one pass, 39341 +github, level 9 with dict, advanced one pass, 39437 +github, level 9 with dict dms, advanced one pass, 39437 +github, level 9 with dict dds, advanced one pass, 39338 +github, level 9 with dict copy, advanced one pass, 39398 github, level 9 with dict load, advanced one pass, 41710 +github, level 12 row 1, advanced one pass, 134180 +github, level 12 row 1 with dict dms, advanced one pass, 39677 +github, level 12 row 1 with dict dds, advanced one pass, 39677 +github, level 12 row 1 with dict copy, advanced one pass, 39677 +github, level 12 row 1 with dict load, advanced one pass, 41166 +github, level 12 row 2, advanced one pass, 134180 +github, level 12 row 2 with dict dms, advanced one pass, 39677 +github, level 12 row 2 with dict dds, advanced one pass, 39677 +github, level 12 row 2 with dict copy, advanced one pass, 39677 +github, level 12 row 2 with dict load, advanced one pass, 41166 github, level 13, advanced one pass, 134064 github, level 13 with dict, advanced one pass, 39743 github, level 13 with dict dms, advanced one pass, 39743 @@ -395,30 +437,60 @@ github.tar, level 4 with dict dms, advanced github.tar, level 4 with dict dds, advanced one pass, 37954 github.tar, level 4 with dict copy, advanced one pass, 37948 github.tar, level 4 with dict load, advanced one pass, 37927 -github.tar, level 5, advanced one pass, 39788 -github.tar, level 5 with dict, advanced one pass, 39715 -github.tar, level 5 with dict dms, advanced one pass, 39365 -github.tar, level 5 with dict dds, advanced one pass, 39227 -github.tar, level 5 with dict copy, advanced one pass, 39715 -github.tar, level 5 with dict load, advanced one pass, 39209 -github.tar, level 6, advanced one pass, 39603 -github.tar, level 6 with dict, advanced one pass, 38800 -github.tar, level 6 with dict dms, advanced one pass, 38665 -github.tar, level 6 with dict dds, advanced one pass, 38665 -github.tar, level 6 with dict copy, advanced one pass, 38800 -github.tar, level 6 with dict load, advanced one pass, 38983 -github.tar, level 7, advanced one pass, 39206 -github.tar, level 7 with dict, advanced one pass, 38071 -github.tar, level 7 with dict dms, advanced one pass, 37954 -github.tar, level 7 with dict dds, advanced one pass, 37954 -github.tar, level 7 with dict copy, advanced one pass, 38071 -github.tar, level 7 with dict load, advanced one pass, 38584 -github.tar, level 9, advanced one pass, 36717 -github.tar, level 9 with dict, advanced one pass, 36898 -github.tar, level 9 with dict dms, advanced one pass, 36882 -github.tar, level 9 with dict dds, advanced one pass, 36882 -github.tar, level 9 with dict copy, advanced one pass, 36898 -github.tar, level 9 with dict load, advanced one pass, 36363 +github.tar, level 5 row 1, advanced one pass, 39788 +github.tar, level 5 row 1 with dict dms, advanced one pass, 39365 +github.tar, level 5 row 1 with dict dds, advanced one pass, 39227 +github.tar, level 5 row 1 with dict copy, advanced one pass, 39715 +github.tar, level 5 row 1 with dict load, advanced one pass, 39209 +github.tar, level 5 row 2, advanced one pass, 39693 +github.tar, level 5 row 2 with dict dms, advanced one pass, 39024 +github.tar, level 5 row 2 with dict dds, advanced one pass, 39023 +github.tar, level 5 row 2 with dict copy, advanced one pass, 39040 +github.tar, level 5 row 2 with dict load, advanced one pass, 39037 +github.tar, level 5, advanced one pass, 39693 +github.tar, level 5 with dict, advanced one pass, 39040 +github.tar, level 5 with dict dms, advanced one pass, 39024 +github.tar, level 5 with dict dds, advanced one pass, 39023 +github.tar, level 5 with dict copy, advanced one pass, 39040 +github.tar, level 5 with dict load, advanced one pass, 39037 +github.tar, level 6, advanced one pass, 39621 +github.tar, level 6 with dict, advanced one pass, 38622 +github.tar, level 6 with dict dms, advanced one pass, 38608 +github.tar, level 6 with dict dds, advanced one pass, 38610 +github.tar, level 6 with dict copy, advanced one pass, 38622 +github.tar, level 6 with dict load, advanced one pass, 38962 +github.tar, level 7 row 1, advanced one pass, 39206 +github.tar, level 7 row 1 with dict dms, advanced one pass, 37954 +github.tar, level 7 row 1 with dict dds, advanced one pass, 37954 +github.tar, level 7 row 1 with dict copy, advanced one pass, 38071 +github.tar, level 7 row 1 with dict load, advanced one pass, 38584 +github.tar, level 7 row 2, advanced one pass, 39213 +github.tar, level 7 row 2 with dict dms, advanced one pass, 37848 +github.tar, level 7 row 2 with dict dds, advanced one pass, 37867 +github.tar, level 7 row 2 with dict copy, advanced one pass, 37848 +github.tar, level 7 row 2 with dict load, advanced one pass, 38582 +github.tar, level 7, advanced one pass, 39213 +github.tar, level 7 with dict, advanced one pass, 37848 +github.tar, level 7 with dict dms, advanced one pass, 37848 +github.tar, level 7 with dict dds, advanced one pass, 37867 +github.tar, level 7 with dict copy, advanced one pass, 37848 +github.tar, level 7 with dict load, advanced one pass, 38582 +github.tar, level 9, advanced one pass, 36758 +github.tar, level 9 with dict, advanced one pass, 36457 +github.tar, level 9 with dict dms, advanced one pass, 36549 +github.tar, level 9 with dict dds, advanced one pass, 36637 +github.tar, level 9 with dict copy, advanced one pass, 36457 +github.tar, level 9 with dict load, advanced one pass, 36350 +github.tar, level 12 row 1, advanced one pass, 36435 +github.tar, level 12 row 1 with dict dms, advanced one pass, 36986 +github.tar, level 12 row 1 with dict dds, advanced one pass, 36986 +github.tar, level 12 row 1 with dict copy, advanced one pass, 36609 +github.tar, level 12 row 1 with dict load, advanced one pass, 36419 +github.tar, level 12 row 2, advanced one pass, 36435 +github.tar, level 12 row 2 with dict dms, advanced one pass, 36986 +github.tar, level 12 row 2 with dict dds, advanced one pass, 36986 +github.tar, level 12 row 2 with dict copy, advanced one pass, 36609 +github.tar, level 12 row 2 with dict load, advanced one pass, 36424 github.tar, level 13, advanced one pass, 35621 github.tar, level 13 with dict, advanced one pass, 38726 github.tar, level 13 with dict dms, advanced one pass, 38903 @@ -445,7 +517,7 @@ github.tar, multithreaded long distance mode, advanced github.tar, small window log, advanced one pass, 198540 github.tar, small hash log, advanced one pass, 129870 github.tar, small chain log, advanced one pass, 41669 -github.tar, explicit params, advanced one pass, 41199 +github.tar, explicit params, advanced one pass, 41227 github.tar, uncompressed literals, advanced one pass, 41122 github.tar, uncompressed literals optimal, advanced one pass, 35388 github.tar, huffman literals, advanced one pass, 38777 @@ -457,10 +529,16 @@ silesia, level 0, advanced silesia, level 1, advanced one pass small out, 5313204 silesia, level 3, advanced one pass small out, 4849552 silesia, level 4, advanced one pass small out, 4786970 -silesia, level 5, advanced one pass small out, 4710236 -silesia, level 6, advanced one pass small out, 4660056 -silesia, level 7, advanced one pass small out, 4596296 -silesia, level 9, advanced one pass small out, 4543925 +silesia, level 5 row 1, advanced one pass small out, 4710236 +silesia, level 5 row 2, advanced one pass small out, 4707794 +silesia, level 5, advanced one pass small out, 4707794 +silesia, level 6, advanced one pass small out, 4666383 +silesia, level 7 row 1, advanced one pass small out, 4596296 +silesia, level 7 row 2, advanced one pass small out, 4603381 +silesia, level 7, advanced one pass small out, 4603381 +silesia, level 9, advanced one pass small out, 4546001 +silesia, level 12 row 1, advanced one pass small out, 4519288 +silesia, level 12 row 2, advanced one pass small out, 4521397 silesia, level 13, advanced one pass small out, 4482135 silesia, level 16, advanced one pass small out, 4360251 silesia, level 19, advanced one pass small out, 4283237 @@ -471,7 +549,7 @@ silesia, multithreaded long distance mode, advanced silesia, small window log, advanced one pass small out, 7095919 silesia, small hash log, advanced one pass small out, 6526141 silesia, small chain log, advanced one pass small out, 4912197 -silesia, explicit params, advanced one pass small out, 4797095 +silesia, explicit params, advanced one pass small out, 4795856 silesia, uncompressed literals, advanced one pass small out, 5127982 silesia, uncompressed literals optimal, advanced one pass small out, 4317896 silesia, huffman literals, advanced one pass small out, 5326268 @@ -483,10 +561,16 @@ silesia.tar, level 0, advanced silesia.tar, level 1, advanced one pass small out, 5334885 silesia.tar, level 3, advanced one pass small out, 4861425 silesia.tar, level 4, advanced one pass small out, 4799630 -silesia.tar, level 5, advanced one pass small out, 4722324 -silesia.tar, level 6, advanced one pass small out, 4672279 -silesia.tar, level 7, advanced one pass small out, 4606715 -silesia.tar, level 9, advanced one pass small out, 4554147 +silesia.tar, level 5 row 1, advanced one pass small out, 4722324 +silesia.tar, level 5 row 2, advanced one pass small out, 4719256 +silesia.tar, level 5, advanced one pass small out, 4719256 +silesia.tar, level 6, advanced one pass small out, 4677721 +silesia.tar, level 7 row 1, advanced one pass small out, 4606715 +silesia.tar, level 7 row 2, advanced one pass small out, 4613541 +silesia.tar, level 7, advanced one pass small out, 4613541 +silesia.tar, level 9, advanced one pass small out, 4555426 +silesia.tar, level 12 row 1, advanced one pass small out, 4529459 +silesia.tar, level 12 row 2, advanced one pass small out, 4530256 silesia.tar, level 13, advanced one pass small out, 4491764 silesia.tar, level 16, advanced one pass small out, 4356827 silesia.tar, level 19, advanced one pass small out, 4264487 @@ -497,7 +581,7 @@ silesia.tar, multithreaded long distance mode, advanced silesia.tar, small window log, advanced one pass small out, 7101530 silesia.tar, small hash log, advanced one pass small out, 6529232 silesia.tar, small chain log, advanced one pass small out, 4917041 -silesia.tar, explicit params, advanced one pass small out, 4808589 +silesia.tar, explicit params, advanced one pass small out, 4807380 silesia.tar, uncompressed literals, advanced one pass small out, 5129458 silesia.tar, uncompressed literals optimal, advanced one pass small out, 4307453 silesia.tar, huffman literals, advanced one pass small out, 5347335 @@ -532,30 +616,60 @@ github, level 4 with dict dms, advanced github, level 4 with dict dds, advanced one pass small out, 41251 github, level 4 with dict copy, advanced one pass small out, 41216 github, level 4 with dict load, advanced one pass small out, 41159 +github, level 5 row 1, advanced one pass small out, 135121 +github, level 5 row 1 with dict dms, advanced one pass small out, 38938 +github, level 5 row 1 with dict dds, advanced one pass small out, 38741 +github, level 5 row 1 with dict copy, advanced one pass small out, 38934 +github, level 5 row 1 with dict load, advanced one pass small out, 40725 +github, level 5 row 2, advanced one pass small out, 134584 +github, level 5 row 2 with dict dms, advanced one pass small out, 38758 +github, level 5 row 2 with dict dds, advanced one pass small out, 38737 +github, level 5 row 2 with dict copy, advanced one pass small out, 38759 +github, level 5 row 2 with dict load, advanced one pass small out, 41518 github, level 5, advanced one pass small out, 135121 -github, level 5 with dict, advanced one pass small out, 38938 -github, level 5 with dict dms, advanced one pass small out, 38938 -github, level 5 with dict dds, advanced one pass small out, 38741 -github, level 5 with dict copy, advanced one pass small out, 38934 +github, level 5 with dict, advanced one pass small out, 38758 +github, level 5 with dict dms, advanced one pass small out, 38758 +github, level 5 with dict dds, advanced one pass small out, 38737 +github, level 5 with dict copy, advanced one pass small out, 38759 github, level 5 with dict load, advanced one pass small out, 40725 github, level 6, advanced one pass small out, 135122 -github, level 6 with dict, advanced one pass small out, 38632 -github, level 6 with dict dms, advanced one pass small out, 38632 -github, level 6 with dict dds, advanced one pass small out, 38632 -github, level 6 with dict copy, advanced one pass small out, 38628 +github, level 6 with dict, advanced one pass small out, 38671 +github, level 6 with dict dms, advanced one pass small out, 38671 +github, level 6 with dict dds, advanced one pass small out, 38630 +github, level 6 with dict copy, advanced one pass small out, 38669 github, level 6 with dict load, advanced one pass small out, 40695 +github, level 7 row 1, advanced one pass small out, 135122 +github, level 7 row 1 with dict dms, advanced one pass small out, 38771 +github, level 7 row 1 with dict dds, advanced one pass small out, 38771 +github, level 7 row 1 with dict copy, advanced one pass small out, 38745 +github, level 7 row 1 with dict load, advanced one pass small out, 40695 +github, level 7 row 2, advanced one pass small out, 134584 +github, level 7 row 2 with dict dms, advanced one pass small out, 38758 +github, level 7 row 2 with dict dds, advanced one pass small out, 38747 +github, level 7 row 2 with dict copy, advanced one pass small out, 38755 +github, level 7 row 2 with dict load, advanced one pass small out, 41030 github, level 7, advanced one pass small out, 135122 -github, level 7 with dict, advanced one pass small out, 38771 -github, level 7 with dict dms, advanced one pass small out, 38771 -github, level 7 with dict dds, advanced one pass small out, 38771 -github, level 7 with dict copy, advanced one pass small out, 38745 +github, level 7 with dict, advanced one pass small out, 38758 +github, level 7 with dict dms, advanced one pass small out, 38758 +github, level 7 with dict dds, advanced one pass small out, 38747 +github, level 7 with dict copy, advanced one pass small out, 38755 github, level 7 with dict load, advanced one pass small out, 40695 github, level 9, advanced one pass small out, 135122 -github, level 9 with dict, advanced one pass small out, 39332 -github, level 9 with dict dms, advanced one pass small out, 39332 -github, level 9 with dict dds, advanced one pass small out, 39332 -github, level 9 with dict copy, advanced one pass small out, 39341 +github, level 9 with dict, advanced one pass small out, 39437 +github, level 9 with dict dms, advanced one pass small out, 39437 +github, level 9 with dict dds, advanced one pass small out, 39338 +github, level 9 with dict copy, advanced one pass small out, 39398 github, level 9 with dict load, advanced one pass small out, 41710 +github, level 12 row 1, advanced one pass small out, 134180 +github, level 12 row 1 with dict dms, advanced one pass small out, 39677 +github, level 12 row 1 with dict dds, advanced one pass small out, 39677 +github, level 12 row 1 with dict copy, advanced one pass small out, 39677 +github, level 12 row 1 with dict load, advanced one pass small out, 41166 +github, level 12 row 2, advanced one pass small out, 134180 +github, level 12 row 2 with dict dms, advanced one pass small out, 39677 +github, level 12 row 2 with dict dds, advanced one pass small out, 39677 +github, level 12 row 2 with dict copy, advanced one pass small out, 39677 +github, level 12 row 2 with dict load, advanced one pass small out, 41166 github, level 13, advanced one pass small out, 134064 github, level 13 with dict, advanced one pass small out, 39743 github, level 13 with dict dms, advanced one pass small out, 39743 @@ -617,30 +731,60 @@ github.tar, level 4 with dict dms, advanced github.tar, level 4 with dict dds, advanced one pass small out, 37954 github.tar, level 4 with dict copy, advanced one pass small out, 37948 github.tar, level 4 with dict load, advanced one pass small out, 37927 -github.tar, level 5, advanced one pass small out, 39788 -github.tar, level 5 with dict, advanced one pass small out, 39715 -github.tar, level 5 with dict dms, advanced one pass small out, 39365 -github.tar, level 5 with dict dds, advanced one pass small out, 39227 -github.tar, level 5 with dict copy, advanced one pass small out, 39715 -github.tar, level 5 with dict load, advanced one pass small out, 39209 -github.tar, level 6, advanced one pass small out, 39603 -github.tar, level 6 with dict, advanced one pass small out, 38800 -github.tar, level 6 with dict dms, advanced one pass small out, 38665 -github.tar, level 6 with dict dds, advanced one pass small out, 38665 -github.tar, level 6 with dict copy, advanced one pass small out, 38800 -github.tar, level 6 with dict load, advanced one pass small out, 38983 -github.tar, level 7, advanced one pass small out, 39206 -github.tar, level 7 with dict, advanced one pass small out, 38071 -github.tar, level 7 with dict dms, advanced one pass small out, 37954 -github.tar, level 7 with dict dds, advanced one pass small out, 37954 -github.tar, level 7 with dict copy, advanced one pass small out, 38071 -github.tar, level 7 with dict load, advanced one pass small out, 38584 -github.tar, level 9, advanced one pass small out, 36717 -github.tar, level 9 with dict, advanced one pass small out, 36898 -github.tar, level 9 with dict dms, advanced one pass small out, 36882 -github.tar, level 9 with dict dds, advanced one pass small out, 36882 -github.tar, level 9 with dict copy, advanced one pass small out, 36898 -github.tar, level 9 with dict load, advanced one pass small out, 36363 +github.tar, level 5 row 1, advanced one pass small out, 39788 +github.tar, level 5 row 1 with dict dms, advanced one pass small out, 39365 +github.tar, level 5 row 1 with dict dds, advanced one pass small out, 39227 +github.tar, level 5 row 1 with dict copy, advanced one pass small out, 39715 +github.tar, level 5 row 1 with dict load, advanced one pass small out, 39209 +github.tar, level 5 row 2, advanced one pass small out, 39693 +github.tar, level 5 row 2 with dict dms, advanced one pass small out, 39024 +github.tar, level 5 row 2 with dict dds, advanced one pass small out, 39023 +github.tar, level 5 row 2 with dict copy, advanced one pass small out, 39040 +github.tar, level 5 row 2 with dict load, advanced one pass small out, 39037 +github.tar, level 5, advanced one pass small out, 39693 +github.tar, level 5 with dict, advanced one pass small out, 39040 +github.tar, level 5 with dict dms, advanced one pass small out, 39024 +github.tar, level 5 with dict dds, advanced one pass small out, 39023 +github.tar, level 5 with dict copy, advanced one pass small out, 39040 +github.tar, level 5 with dict load, advanced one pass small out, 39037 +github.tar, level 6, advanced one pass small out, 39621 +github.tar, level 6 with dict, advanced one pass small out, 38622 +github.tar, level 6 with dict dms, advanced one pass small out, 38608 +github.tar, level 6 with dict dds, advanced one pass small out, 38610 +github.tar, level 6 with dict copy, advanced one pass small out, 38622 +github.tar, level 6 with dict load, advanced one pass small out, 38962 +github.tar, level 7 row 1, advanced one pass small out, 39206 +github.tar, level 7 row 1 with dict dms, advanced one pass small out, 37954 +github.tar, level 7 row 1 with dict dds, advanced one pass small out, 37954 +github.tar, level 7 row 1 with dict copy, advanced one pass small out, 38071 +github.tar, level 7 row 1 with dict load, advanced one pass small out, 38584 +github.tar, level 7 row 2, advanced one pass small out, 39213 +github.tar, level 7 row 2 with dict dms, advanced one pass small out, 37848 +github.tar, level 7 row 2 with dict dds, advanced one pass small out, 37867 +github.tar, level 7 row 2 with dict copy, advanced one pass small out, 37848 +github.tar, level 7 row 2 with dict load, advanced one pass small out, 38582 +github.tar, level 7, advanced one pass small out, 39213 +github.tar, level 7 with dict, advanced one pass small out, 37848 +github.tar, level 7 with dict dms, advanced one pass small out, 37848 +github.tar, level 7 with dict dds, advanced one pass small out, 37867 +github.tar, level 7 with dict copy, advanced one pass small out, 37848 +github.tar, level 7 with dict load, advanced one pass small out, 38582 +github.tar, level 9, advanced one pass small out, 36758 +github.tar, level 9 with dict, advanced one pass small out, 36457 +github.tar, level 9 with dict dms, advanced one pass small out, 36549 +github.tar, level 9 with dict dds, advanced one pass small out, 36637 +github.tar, level 9 with dict copy, advanced one pass small out, 36457 +github.tar, level 9 with dict load, advanced one pass small out, 36350 +github.tar, level 12 row 1, advanced one pass small out, 36435 +github.tar, level 12 row 1 with dict dms, advanced one pass small out, 36986 +github.tar, level 12 row 1 with dict dds, advanced one pass small out, 36986 +github.tar, level 12 row 1 with dict copy, advanced one pass small out, 36609 +github.tar, level 12 row 1 with dict load, advanced one pass small out, 36419 +github.tar, level 12 row 2, advanced one pass small out, 36435 +github.tar, level 12 row 2 with dict dms, advanced one pass small out, 36986 +github.tar, level 12 row 2 with dict dds, advanced one pass small out, 36986 +github.tar, level 12 row 2 with dict copy, advanced one pass small out, 36609 +github.tar, level 12 row 2 with dict load, advanced one pass small out, 36424 github.tar, level 13, advanced one pass small out, 35621 github.tar, level 13 with dict, advanced one pass small out, 38726 github.tar, level 13 with dict dms, advanced one pass small out, 38903 @@ -667,7 +811,7 @@ github.tar, multithreaded long distance mode, advanced github.tar, small window log, advanced one pass small out, 198540 github.tar, small hash log, advanced one pass small out, 129870 github.tar, small chain log, advanced one pass small out, 41669 -github.tar, explicit params, advanced one pass small out, 41199 +github.tar, explicit params, advanced one pass small out, 41227 github.tar, uncompressed literals, advanced one pass small out, 41122 github.tar, uncompressed literals optimal, advanced one pass small out, 35388 github.tar, huffman literals, advanced one pass small out, 38777 @@ -679,10 +823,16 @@ silesia, level 0, advanced silesia, level 1, advanced streaming, 5314162 silesia, level 3, advanced streaming, 4849552 silesia, level 4, advanced streaming, 4786970 -silesia, level 5, advanced streaming, 4710236 -silesia, level 6, advanced streaming, 4660056 -silesia, level 7, advanced streaming, 4596296 -silesia, level 9, advanced streaming, 4543925 +silesia, level 5 row 1, advanced streaming, 4710236 +silesia, level 5 row 2, advanced streaming, 4707794 +silesia, level 5, advanced streaming, 4707794 +silesia, level 6, advanced streaming, 4666383 +silesia, level 7 row 1, advanced streaming, 4596296 +silesia, level 7 row 2, advanced streaming, 4603381 +silesia, level 7, advanced streaming, 4603381 +silesia, level 9, advanced streaming, 4546001 +silesia, level 12 row 1, advanced streaming, 4519288 +silesia, level 12 row 2, advanced streaming, 4521397 silesia, level 13, advanced streaming, 4482135 silesia, level 16, advanced streaming, 4360251 silesia, level 19, advanced streaming, 4283237 @@ -693,7 +843,7 @@ silesia, multithreaded long distance mode, advanced silesia, small window log, advanced streaming, 7112062 silesia, small hash log, advanced streaming, 6526141 silesia, small chain log, advanced streaming, 4912197 -silesia, explicit params, advanced streaming, 4797112 +silesia, explicit params, advanced streaming, 4795887 silesia, uncompressed literals, advanced streaming, 5127982 silesia, uncompressed literals optimal, advanced streaming, 4317896 silesia, huffman literals, advanced streaming, 5331168 @@ -705,10 +855,16 @@ silesia.tar, level 0, advanced silesia.tar, level 1, advanced streaming, 5336939 silesia.tar, level 3, advanced streaming, 4861427 silesia.tar, level 4, advanced streaming, 4799630 -silesia.tar, level 5, advanced streaming, 4722329 -silesia.tar, level 6, advanced streaming, 4672288 -silesia.tar, level 7, advanced streaming, 4606715 -silesia.tar, level 9, advanced streaming, 4554154 +silesia.tar, level 5 row 1, advanced streaming, 4722329 +silesia.tar, level 5 row 2, advanced streaming, 4719261 +silesia.tar, level 5, advanced streaming, 4719261 +silesia.tar, level 6, advanced streaming, 4677729 +silesia.tar, level 7 row 1, advanced streaming, 4606715 +silesia.tar, level 7 row 2, advanced streaming, 4613544 +silesia.tar, level 7, advanced streaming, 4613544 +silesia.tar, level 9, advanced streaming, 4555432 +silesia.tar, level 12 row 1, advanced streaming, 4529459 +silesia.tar, level 12 row 2, advanced streaming, 4530258 silesia.tar, level 13, advanced streaming, 4491765 silesia.tar, level 16, advanced streaming, 4356834 silesia.tar, level 19, advanced streaming, 4264392 @@ -719,7 +875,7 @@ silesia.tar, multithreaded long distance mode, advanced silesia.tar, small window log, advanced streaming, 7118769 silesia.tar, small hash log, advanced streaming, 6529235 silesia.tar, small chain log, advanced streaming, 4917021 -silesia.tar, explicit params, advanced streaming, 4808618 +silesia.tar, explicit params, advanced streaming, 4807401 silesia.tar, uncompressed literals, advanced streaming, 5129461 silesia.tar, uncompressed literals optimal, advanced streaming, 4307400 silesia.tar, huffman literals, advanced streaming, 5352360 @@ -754,30 +910,60 @@ github, level 4 with dict dms, advanced github, level 4 with dict dds, advanced streaming, 41251 github, level 4 with dict copy, advanced streaming, 41216 github, level 4 with dict load, advanced streaming, 41159 +github, level 5 row 1, advanced streaming, 135121 +github, level 5 row 1 with dict dms, advanced streaming, 38938 +github, level 5 row 1 with dict dds, advanced streaming, 38741 +github, level 5 row 1 with dict copy, advanced streaming, 38934 +github, level 5 row 1 with dict load, advanced streaming, 40725 +github, level 5 row 2, advanced streaming, 134584 +github, level 5 row 2 with dict dms, advanced streaming, 38758 +github, level 5 row 2 with dict dds, advanced streaming, 38737 +github, level 5 row 2 with dict copy, advanced streaming, 38759 +github, level 5 row 2 with dict load, advanced streaming, 41518 github, level 5, advanced streaming, 135121 -github, level 5 with dict, advanced streaming, 38938 -github, level 5 with dict dms, advanced streaming, 38938 -github, level 5 with dict dds, advanced streaming, 38741 -github, level 5 with dict copy, advanced streaming, 38934 +github, level 5 with dict, advanced streaming, 38758 +github, level 5 with dict dms, advanced streaming, 38758 +github, level 5 with dict dds, advanced streaming, 38737 +github, level 5 with dict copy, advanced streaming, 38759 github, level 5 with dict load, advanced streaming, 40725 github, level 6, advanced streaming, 135122 -github, level 6 with dict, advanced streaming, 38632 -github, level 6 with dict dms, advanced streaming, 38632 -github, level 6 with dict dds, advanced streaming, 38632 -github, level 6 with dict copy, advanced streaming, 38628 +github, level 6 with dict, advanced streaming, 38671 +github, level 6 with dict dms, advanced streaming, 38671 +github, level 6 with dict dds, advanced streaming, 38630 +github, level 6 with dict copy, advanced streaming, 38669 github, level 6 with dict load, advanced streaming, 40695 +github, level 7 row 1, advanced streaming, 135122 +github, level 7 row 1 with dict dms, advanced streaming, 38771 +github, level 7 row 1 with dict dds, advanced streaming, 38771 +github, level 7 row 1 with dict copy, advanced streaming, 38745 +github, level 7 row 1 with dict load, advanced streaming, 40695 +github, level 7 row 2, advanced streaming, 134584 +github, level 7 row 2 with dict dms, advanced streaming, 38758 +github, level 7 row 2 with dict dds, advanced streaming, 38747 +github, level 7 row 2 with dict copy, advanced streaming, 38755 +github, level 7 row 2 with dict load, advanced streaming, 41030 github, level 7, advanced streaming, 135122 -github, level 7 with dict, advanced streaming, 38771 -github, level 7 with dict dms, advanced streaming, 38771 -github, level 7 with dict dds, advanced streaming, 38771 -github, level 7 with dict copy, advanced streaming, 38745 +github, level 7 with dict, advanced streaming, 38758 +github, level 7 with dict dms, advanced streaming, 38758 +github, level 7 with dict dds, advanced streaming, 38747 +github, level 7 with dict copy, advanced streaming, 38755 github, level 7 with dict load, advanced streaming, 40695 github, level 9, advanced streaming, 135122 -github, level 9 with dict, advanced streaming, 39332 -github, level 9 with dict dms, advanced streaming, 39332 -github, level 9 with dict dds, advanced streaming, 39332 -github, level 9 with dict copy, advanced streaming, 39341 +github, level 9 with dict, advanced streaming, 39437 +github, level 9 with dict dms, advanced streaming, 39437 +github, level 9 with dict dds, advanced streaming, 39338 +github, level 9 with dict copy, advanced streaming, 39398 github, level 9 with dict load, advanced streaming, 41710 +github, level 12 row 1, advanced streaming, 134180 +github, level 12 row 1 with dict dms, advanced streaming, 39677 +github, level 12 row 1 with dict dds, advanced streaming, 39677 +github, level 12 row 1 with dict copy, advanced streaming, 39677 +github, level 12 row 1 with dict load, advanced streaming, 41166 +github, level 12 row 2, advanced streaming, 134180 +github, level 12 row 2 with dict dms, advanced streaming, 39677 +github, level 12 row 2 with dict dds, advanced streaming, 39677 +github, level 12 row 2 with dict copy, advanced streaming, 39677 +github, level 12 row 2 with dict load, advanced streaming, 41166 github, level 13, advanced streaming, 134064 github, level 13 with dict, advanced streaming, 39743 github, level 13 with dict dms, advanced streaming, 39743 @@ -839,30 +1025,60 @@ github.tar, level 4 with dict dms, advanced github.tar, level 4 with dict dds, advanced streaming, 37954 github.tar, level 4 with dict copy, advanced streaming, 37948 github.tar, level 4 with dict load, advanced streaming, 37927 -github.tar, level 5, advanced streaming, 39788 -github.tar, level 5 with dict, advanced streaming, 39715 -github.tar, level 5 with dict dms, advanced streaming, 39365 -github.tar, level 5 with dict dds, advanced streaming, 39227 -github.tar, level 5 with dict copy, advanced streaming, 39715 -github.tar, level 5 with dict load, advanced streaming, 39209 -github.tar, level 6, advanced streaming, 39603 -github.tar, level 6 with dict, advanced streaming, 38800 -github.tar, level 6 with dict dms, advanced streaming, 38665 -github.tar, level 6 with dict dds, advanced streaming, 38665 -github.tar, level 6 with dict copy, advanced streaming, 38800 -github.tar, level 6 with dict load, advanced streaming, 38983 -github.tar, level 7, advanced streaming, 39206 -github.tar, level 7 with dict, advanced streaming, 38071 -github.tar, level 7 with dict dms, advanced streaming, 37954 -github.tar, level 7 with dict dds, advanced streaming, 37954 -github.tar, level 7 with dict copy, advanced streaming, 38071 -github.tar, level 7 with dict load, advanced streaming, 38584 -github.tar, level 9, advanced streaming, 36717 -github.tar, level 9 with dict, advanced streaming, 36898 -github.tar, level 9 with dict dms, advanced streaming, 36882 -github.tar, level 9 with dict dds, advanced streaming, 36882 -github.tar, level 9 with dict copy, advanced streaming, 36898 -github.tar, level 9 with dict load, advanced streaming, 36363 +github.tar, level 5 row 1, advanced streaming, 39788 +github.tar, level 5 row 1 with dict dms, advanced streaming, 39365 +github.tar, level 5 row 1 with dict dds, advanced streaming, 39227 +github.tar, level 5 row 1 with dict copy, advanced streaming, 39715 +github.tar, level 5 row 1 with dict load, advanced streaming, 39209 +github.tar, level 5 row 2, advanced streaming, 39693 +github.tar, level 5 row 2 with dict dms, advanced streaming, 39024 +github.tar, level 5 row 2 with dict dds, advanced streaming, 39023 +github.tar, level 5 row 2 with dict copy, advanced streaming, 39040 +github.tar, level 5 row 2 with dict load, advanced streaming, 39037 +github.tar, level 5, advanced streaming, 39693 +github.tar, level 5 with dict, advanced streaming, 39040 +github.tar, level 5 with dict dms, advanced streaming, 39024 +github.tar, level 5 with dict dds, advanced streaming, 39023 +github.tar, level 5 with dict copy, advanced streaming, 39040 +github.tar, level 5 with dict load, advanced streaming, 39037 +github.tar, level 6, advanced streaming, 39621 +github.tar, level 6 with dict, advanced streaming, 38622 +github.tar, level 6 with dict dms, advanced streaming, 38608 +github.tar, level 6 with dict dds, advanced streaming, 38610 +github.tar, level 6 with dict copy, advanced streaming, 38622 +github.tar, level 6 with dict load, advanced streaming, 38962 +github.tar, level 7 row 1, advanced streaming, 39206 +github.tar, level 7 row 1 with dict dms, advanced streaming, 37954 +github.tar, level 7 row 1 with dict dds, advanced streaming, 37954 +github.tar, level 7 row 1 with dict copy, advanced streaming, 38071 +github.tar, level 7 row 1 with dict load, advanced streaming, 38584 +github.tar, level 7 row 2, advanced streaming, 39213 +github.tar, level 7 row 2 with dict dms, advanced streaming, 37848 +github.tar, level 7 row 2 with dict dds, advanced streaming, 37867 +github.tar, level 7 row 2 with dict copy, advanced streaming, 37848 +github.tar, level 7 row 2 with dict load, advanced streaming, 38582 +github.tar, level 7, advanced streaming, 39213 +github.tar, level 7 with dict, advanced streaming, 37848 +github.tar, level 7 with dict dms, advanced streaming, 37848 +github.tar, level 7 with dict dds, advanced streaming, 37867 +github.tar, level 7 with dict copy, advanced streaming, 37848 +github.tar, level 7 with dict load, advanced streaming, 38582 +github.tar, level 9, advanced streaming, 36758 +github.tar, level 9 with dict, advanced streaming, 36457 +github.tar, level 9 with dict dms, advanced streaming, 36549 +github.tar, level 9 with dict dds, advanced streaming, 36637 +github.tar, level 9 with dict copy, advanced streaming, 36457 +github.tar, level 9 with dict load, advanced streaming, 36350 +github.tar, level 12 row 1, advanced streaming, 36435 +github.tar, level 12 row 1 with dict dms, advanced streaming, 36986 +github.tar, level 12 row 1 with dict dds, advanced streaming, 36986 +github.tar, level 12 row 1 with dict copy, advanced streaming, 36609 +github.tar, level 12 row 1 with dict load, advanced streaming, 36419 +github.tar, level 12 row 2, advanced streaming, 36435 +github.tar, level 12 row 2 with dict dms, advanced streaming, 36986 +github.tar, level 12 row 2 with dict dds, advanced streaming, 36986 +github.tar, level 12 row 2 with dict copy, advanced streaming, 36609 +github.tar, level 12 row 2 with dict load, advanced streaming, 36424 github.tar, level 13, advanced streaming, 35621 github.tar, level 13 with dict, advanced streaming, 38726 github.tar, level 13 with dict dms, advanced streaming, 38903 @@ -889,7 +1105,7 @@ github.tar, multithreaded long distance mode, advanced github.tar, small window log, advanced streaming, 199558 github.tar, small hash log, advanced streaming, 129870 github.tar, small chain log, advanced streaming, 41669 -github.tar, explicit params, advanced streaming, 41199 +github.tar, explicit params, advanced streaming, 41227 github.tar, uncompressed literals, advanced streaming, 41122 github.tar, uncompressed literals optimal, advanced streaming, 35388 github.tar, huffman literals, advanced streaming, 38800 @@ -901,10 +1117,10 @@ silesia, level 0, old stre silesia, level 1, old streaming, 5314162 silesia, level 3, old streaming, 4849552 silesia, level 4, old streaming, 4786970 -silesia, level 5, old streaming, 4710236 -silesia, level 6, old streaming, 4660056 -silesia, level 7, old streaming, 4596296 -silesia, level 9, old streaming, 4543925 +silesia, level 5, old streaming, 4707794 +silesia, level 6, old streaming, 4666383 +silesia, level 7, old streaming, 4603381 +silesia, level 9, old streaming, 4546001 silesia, level 13, old streaming, 4482135 silesia, level 16, old streaming, 4360251 silesia, level 19, old streaming, 4283237 @@ -919,10 +1135,10 @@ silesia.tar, level 0, old stre silesia.tar, level 1, old streaming, 5336939 silesia.tar, level 3, old streaming, 4861427 silesia.tar, level 4, old streaming, 4799630 -silesia.tar, level 5, old streaming, 4722329 -silesia.tar, level 6, old streaming, 4672288 -silesia.tar, level 7, old streaming, 4606715 -silesia.tar, level 9, old streaming, 4554154 +silesia.tar, level 5, old streaming, 4719261 +silesia.tar, level 6, old streaming, 4677729 +silesia.tar, level 7, old streaming, 4613544 +silesia.tar, level 9, old streaming, 4555432 silesia.tar, level 13, old streaming, 4491765 silesia.tar, level 16, old streaming, 4356834 silesia.tar, level 19, old streaming, 4264392 @@ -945,13 +1161,13 @@ github, level 3 with dict, old stre github, level 4, old streaming, 136199 github, level 4 with dict, old streaming, 41251 github, level 5, old streaming, 135121 -github, level 5 with dict, old streaming, 38938 +github, level 5 with dict, old streaming, 38758 github, level 6, old streaming, 135122 -github, level 6 with dict, old streaming, 38632 +github, level 6 with dict, old streaming, 38671 github, level 7, old streaming, 135122 -github, level 7 with dict, old streaming, 38771 +github, level 7 with dict, old streaming, 38758 github, level 9, old streaming, 135122 -github, level 9 with dict, old streaming, 39332 +github, level 9 with dict, old streaming, 39437 github, level 13, old streaming, 134064 github, level 13 with dict, old streaming, 39743 github, level 16, old streaming, 134064 @@ -977,14 +1193,14 @@ github.tar, level 3, old stre github.tar, level 3 with dict, old streaming, 37995 github.tar, level 4, old streaming, 38467 github.tar, level 4 with dict, old streaming, 37948 -github.tar, level 5, old streaming, 39788 -github.tar, level 5 with dict, old streaming, 39715 -github.tar, level 6, old streaming, 39603 -github.tar, level 6 with dict, old streaming, 38800 -github.tar, level 7, old streaming, 39206 -github.tar, level 7 with dict, old streaming, 38071 -github.tar, level 9, old streaming, 36717 -github.tar, level 9 with dict, old streaming, 36898 +github.tar, level 5, old streaming, 39693 +github.tar, level 5 with dict, old streaming, 39040 +github.tar, level 6, old streaming, 39621 +github.tar, level 6 with dict, old streaming, 38622 +github.tar, level 7, old streaming, 39213 +github.tar, level 7 with dict, old streaming, 37848 +github.tar, level 9, old streaming, 36758 +github.tar, level 9 with dict, old streaming, 36457 github.tar, level 13, old streaming, 35621 github.tar, level 13 with dict, old streaming, 38726 github.tar, level 16, old streaming, 40255 @@ -1003,10 +1219,10 @@ silesia, level 0, old stre silesia, level 1, old streaming advanced, 5314162 silesia, level 3, old streaming advanced, 4849552 silesia, level 4, old streaming advanced, 4786970 -silesia, level 5, old streaming advanced, 4710236 -silesia, level 6, old streaming advanced, 4660056 -silesia, level 7, old streaming advanced, 4596296 -silesia, level 9, old streaming advanced, 4543925 +silesia, level 5, old streaming advanced, 4707794 +silesia, level 6, old streaming advanced, 4666383 +silesia, level 7, old streaming advanced, 4603381 +silesia, level 9, old streaming advanced, 4546001 silesia, level 13, old streaming advanced, 4482135 silesia, level 16, old streaming advanced, 4360251 silesia, level 19, old streaming advanced, 4283237 @@ -1017,7 +1233,7 @@ silesia, multithreaded long distance mode, old stre silesia, small window log, old streaming advanced, 7112062 silesia, small hash log, old streaming advanced, 6526141 silesia, small chain log, old streaming advanced, 4912197 -silesia, explicit params, old streaming advanced, 4797112 +silesia, explicit params, old streaming advanced, 4795887 silesia, uncompressed literals, old streaming advanced, 4849552 silesia, uncompressed literals optimal, old streaming advanced, 4283237 silesia, huffman literals, old streaming advanced, 6183403 @@ -1029,10 +1245,10 @@ silesia.tar, level 0, old stre silesia.tar, level 1, old streaming advanced, 5336939 silesia.tar, level 3, old streaming advanced, 4861427 silesia.tar, level 4, old streaming advanced, 4799630 -silesia.tar, level 5, old streaming advanced, 4722329 -silesia.tar, level 6, old streaming advanced, 4672288 -silesia.tar, level 7, old streaming advanced, 4606715 -silesia.tar, level 9, old streaming advanced, 4554154 +silesia.tar, level 5, old streaming advanced, 4719261 +silesia.tar, level 6, old streaming advanced, 4677729 +silesia.tar, level 7, old streaming advanced, 4613544 +silesia.tar, level 9, old streaming advanced, 4555432 silesia.tar, level 13, old streaming advanced, 4491765 silesia.tar, level 16, old streaming advanced, 4356834 silesia.tar, level 19, old streaming advanced, 4264392 @@ -1043,7 +1259,7 @@ silesia.tar, multithreaded long distance mode, old stre silesia.tar, small window log, old streaming advanced, 7118772 silesia.tar, small hash log, old streaming advanced, 6529235 silesia.tar, small chain log, old streaming advanced, 4917021 -silesia.tar, explicit params, old streaming advanced, 4808618 +silesia.tar, explicit params, old streaming advanced, 4807401 silesia.tar, uncompressed literals, old streaming advanced, 4861427 silesia.tar, uncompressed literals optimal, old streaming advanced, 4264392 silesia.tar, huffman literals, old streaming advanced, 6190795 @@ -1063,13 +1279,13 @@ github, level 3 with dict, old stre github, level 4, old streaming advanced, 141104 github, level 4 with dict, old streaming advanced, 41084 github, level 5, old streaming advanced, 139399 -github, level 5 with dict, old streaming advanced, 39159 +github, level 5 with dict, old streaming advanced, 38633 github, level 6, old streaming advanced, 139402 -github, level 6 with dict, old streaming advanced, 38749 +github, level 6 with dict, old streaming advanced, 38723 github, level 7, old streaming advanced, 138676 -github, level 7 with dict, old streaming advanced, 38746 +github, level 7 with dict, old streaming advanced, 38744 github, level 9, old streaming advanced, 138676 -github, level 9 with dict, old streaming advanced, 38993 +github, level 9 with dict, old streaming advanced, 38981 github, level 13, old streaming advanced, 138676 github, level 13 with dict, old streaming advanced, 39731 github, level 16, old streaming advanced, 138676 @@ -1103,14 +1319,14 @@ github.tar, level 3, old stre github.tar, level 3 with dict, old streaming advanced, 38013 github.tar, level 4, old streaming advanced, 38467 github.tar, level 4 with dict, old streaming advanced, 38063 -github.tar, level 5, old streaming advanced, 39788 -github.tar, level 5 with dict, old streaming advanced, 39310 -github.tar, level 6, old streaming advanced, 39603 -github.tar, level 6 with dict, old streaming advanced, 39279 -github.tar, level 7, old streaming advanced, 39206 -github.tar, level 7 with dict, old streaming advanced, 38728 -github.tar, level 9, old streaming advanced, 36717 -github.tar, level 9 with dict, old streaming advanced, 36504 +github.tar, level 5, old streaming advanced, 39693 +github.tar, level 5 with dict, old streaming advanced, 39049 +github.tar, level 6, old streaming advanced, 39621 +github.tar, level 6 with dict, old streaming advanced, 38959 +github.tar, level 7, old streaming advanced, 39213 +github.tar, level 7 with dict, old streaming advanced, 38573 +github.tar, level 9, old streaming advanced, 36758 +github.tar, level 9 with dict, old streaming advanced, 36233 github.tar, level 13, old streaming advanced, 35621 github.tar, level 13 with dict, old streaming advanced, 36035 github.tar, level 16, old streaming advanced, 40255 @@ -1125,7 +1341,7 @@ github.tar, multithreaded long distance mode, old stre github.tar, small window log, old streaming advanced, 199561 github.tar, small hash log, old streaming advanced, 129870 github.tar, small chain log, old streaming advanced, 41669 -github.tar, explicit params, old streaming advanced, 41199 +github.tar, explicit params, old streaming advanced, 41227 github.tar, uncompressed literals, old streaming advanced, 38441 github.tar, uncompressed literals optimal, old streaming advanced, 32837 github.tar, huffman literals, old streaming advanced, 42465 @@ -1137,10 +1353,10 @@ github, level 0 with dict, old stre github, level 1 with dict, old streaming cdict, 41682 github, level 3 with dict, old streaming cdict, 41148 github, level 4 with dict, old streaming cdict, 41251 -github, level 5 with dict, old streaming cdict, 38938 -github, level 6 with dict, old streaming cdict, 38632 -github, level 7 with dict, old streaming cdict, 38771 -github, level 9 with dict, old streaming cdict, 39332 +github, level 5 with dict, old streaming cdict, 38758 +github, level 6 with dict, old streaming cdict, 38671 +github, level 7 with dict, old streaming cdict, 38758 +github, level 9 with dict, old streaming cdict, 39437 github, level 13 with dict, old streaming cdict, 39743 github, level 16 with dict, old streaming cdict, 37577 github, level 19 with dict, old streaming cdict, 37576 @@ -1152,10 +1368,10 @@ github.tar, level 0 with dict, old stre github.tar, level 1 with dict, old streaming cdict, 38766 github.tar, level 3 with dict, old streaming cdict, 37956 github.tar, level 4 with dict, old streaming cdict, 37927 -github.tar, level 5 with dict, old streaming cdict, 39209 -github.tar, level 6 with dict, old streaming cdict, 38983 -github.tar, level 7 with dict, old streaming cdict, 38584 -github.tar, level 9 with dict, old streaming cdict, 36363 +github.tar, level 5 with dict, old streaming cdict, 39037 +github.tar, level 6 with dict, old streaming cdict, 38962 +github.tar, level 7 with dict, old streaming cdict, 38582 +github.tar, level 9 with dict, old streaming cdict, 36350 github.tar, level 13 with dict, old streaming cdict, 36372 github.tar, level 16 with dict, old streaming cdict, 39353 github.tar, level 19 with dict, old streaming cdict, 32676 @@ -1167,10 +1383,10 @@ github, level 0 with dict, old stre github, level 1 with dict, old streaming advanced cdict, 42430 github, level 3 with dict, old streaming advanced cdict, 41113 github, level 4 with dict, old streaming advanced cdict, 41084 -github, level 5 with dict, old streaming advanced cdict, 39159 -github, level 6 with dict, old streaming advanced cdict, 38749 -github, level 7 with dict, old streaming advanced cdict, 38746 -github, level 9 with dict, old streaming advanced cdict, 38993 +github, level 5 with dict, old streaming advanced cdict, 38633 +github, level 6 with dict, old streaming advanced cdict, 38723 +github, level 7 with dict, old streaming advanced cdict, 38744 +github, level 9 with dict, old streaming advanced cdict, 38981 github, level 13 with dict, old streaming advanced cdict, 39731 github, level 16 with dict, old streaming advanced cdict, 40789 github, level 19 with dict, old streaming advanced cdict, 37576 @@ -1182,10 +1398,10 @@ github.tar, level 0 with dict, old stre github.tar, level 1 with dict, old streaming advanced cdict, 39002 github.tar, level 3 with dict, old streaming advanced cdict, 38013 github.tar, level 4 with dict, old streaming advanced cdict, 38063 -github.tar, level 5 with dict, old streaming advanced cdict, 39310 -github.tar, level 6 with dict, old streaming advanced cdict, 39279 -github.tar, level 7 with dict, old streaming advanced cdict, 38728 -github.tar, level 9 with dict, old streaming advanced cdict, 36504 +github.tar, level 5 with dict, old streaming advanced cdict, 39049 +github.tar, level 6 with dict, old streaming advanced cdict, 38959 +github.tar, level 7 with dict, old streaming advanced cdict, 38573 +github.tar, level 9 with dict, old streaming advanced cdict, 36233 github.tar, level 13 with dict, old streaming advanced cdict, 36035 github.tar, level 16 with dict, old streaming advanced cdict, 38736 github.tar, level 19 with dict, old streaming advanced cdict, 32876