Merge pull request #1449 from facebook/ovlog_def

overlapLog default values
This commit is contained in:
Yann Collet 2018-12-18 09:45:53 -08:00 committed by GitHub
commit 517d8c984c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 221 additions and 147 deletions

View File

@ -21,6 +21,7 @@
#include <stddef.h> /* size_t */
#include <stdlib.h> /* malloc, free, abort */
#include <stdio.h> /* fprintf */
#include <limits.h> /* UINT_MAX */
#include <assert.h> /* assert */
#include "util.h"
@ -127,7 +128,7 @@ static buffer_t createBuffer_fromFile(const char* fileName)
static buffer_t
createDictionaryBuffer(const char* dictionaryName,
const void* srcBuffer,
const size_t* srcBlockSizes, unsigned nbBlocks,
const size_t* srcBlockSizes, size_t nbBlocks,
size_t requestedDictSize)
{
if (dictionaryName) {
@ -141,9 +142,10 @@ createDictionaryBuffer(const char* dictionaryName,
void* const dictBuffer = malloc(requestedDictSize);
CONTROL(dictBuffer != NULL);
assert(nbBlocks <= UINT_MAX);
size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
srcBuffer,
srcBlockSizes, nbBlocks);
srcBlockSizes, (unsigned)nbBlocks);
CONTROL(!ZSTD_isError(dictSize));
buffer_t result;

View File

@ -148,20 +148,20 @@ static void sumFile_orDie(const char* fname, int nbThreads)
size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
size_t const numFrames = ZSTD_seekable_getNumFrames(seekable);
unsigned const numFrames = ZSTD_seekable_getNumFrames(seekable);
struct sum_job* jobs = (struct sum_job*)malloc(numFrames * sizeof(struct sum_job));
size_t i;
for (i = 0; i < numFrames; i++) {
jobs[i] = (struct sum_job){ fname, 0, i, 0 };
POOL_add(pool, sumFrame, &jobs[i]);
unsigned fnb;
for (fnb = 0; fnb < numFrames; fnb++) {
jobs[fnb] = (struct sum_job){ fname, 0, fnb, 0 };
POOL_add(pool, sumFrame, &jobs[fnb]);
}
unsigned long long total = 0;
for (i = 0; i < numFrames; i++) {
while (!jobs[i].done) SLEEP(5); /* wake up every 5 milliseconds to check */
total += jobs[i].sum;
for (fnb = 0; fnb < numFrames; fnb++) {
while (!jobs[fnb].done) SLEEP(5); /* wake up every 5 milliseconds to check */
total += jobs[fnb].sum;
}
printf("Sum: %llu\n", total);

View File

@ -8,6 +8,8 @@
*/
#include <stdlib.h> /* malloc, free */
#include <limits.h> /* UINT_MAX */
#include <assert.h>
#define XXH_STATIC_LINKING_ONLY
#define XXH_NAMESPACE ZSTD_
@ -167,9 +169,9 @@ size_t ZSTD_seekable_initCStream(ZSTD_seekable_CStream* zcs,
}
size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl,
unsigned compressedSize,
unsigned decompressedSize,
unsigned checksum)
unsigned compressedSize,
unsigned decompressedSize,
unsigned checksum)
{
if (fl->size == ZSTD_SEEKABLE_MAXFRAMES)
return ERROR(frameIndex_tooLarge);
@ -184,7 +186,8 @@ size_t ZSTD_seekable_logFrame(ZSTD_frameLog* fl,
if (newEntries == NULL) return ERROR(memory_allocation);
fl->entries = newEntries;
fl->capacity = newCapacity;
assert(newCapacity <= UINT_MAX);
fl->capacity = (U32)newCapacity;
}
fl->entries[fl->size] = (framelogEntry_t){
@ -307,32 +310,32 @@ size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
size_t const seekTableLen = ZSTD_seekable_seekTableSize(fl);
CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_MAGIC_SKIPPABLE_START | 0xE, 0));
CHECK_Z(ZSTD_stwrite32(fl, output, seekTableLen - ZSTD_SKIPPABLEHEADERSIZE,
4));
assert(seekTableLen <= (size_t)UINT_MAX);
CHECK_Z(ZSTD_stwrite32(fl, output, (U32)seekTableLen - ZSTD_SKIPPABLEHEADERSIZE, 4));
while (fl->seekTableIndex < fl->size) {
unsigned long long const start = ZSTD_SKIPPABLEHEADERSIZE + sizePerFrame * fl->seekTableIndex;
assert(start + 8 <= UINT_MAX);
CHECK_Z(ZSTD_stwrite32(fl, output,
fl->entries[fl->seekTableIndex].cSize,
ZSTD_SKIPPABLEHEADERSIZE +
sizePerFrame * fl->seekTableIndex + 0));
(U32)start + 0));
CHECK_Z(ZSTD_stwrite32(fl, output,
fl->entries[fl->seekTableIndex].dSize,
ZSTD_SKIPPABLEHEADERSIZE +
sizePerFrame * fl->seekTableIndex + 4));
(U32)start + 4));
if (fl->checksumFlag) {
CHECK_Z(ZSTD_stwrite32(
fl, output, fl->entries[fl->seekTableIndex].checksum,
ZSTD_SKIPPABLEHEADERSIZE +
sizePerFrame * fl->seekTableIndex + 8));
(U32)start + 8));
}
fl->seekTableIndex++;
}
assert(seekTableLen <= UINT_MAX);
CHECK_Z(ZSTD_stwrite32(fl, output, fl->size,
seekTableLen - ZSTD_seekTableFooterSize));
(U32)seekTableLen - ZSTD_seekTableFooterSize));
if (output->size - output->pos < 1) return seekTableLen - fl->seekTablePos;
if (fl->seekTablePos < seekTableLen - 4) {
@ -345,7 +348,7 @@ size_t ZSTD_seekable_writeSeekTable(ZSTD_frameLog* fl, ZSTD_outBuffer* output)
}
CHECK_Z(ZSTD_stwrite32(fl, output, ZSTD_SEEKABLE_MAGICNUMBER,
seekTableLen - 4));
(U32)seekTableLen - 4));
if (fl->seekTablePos != seekTableLen) return ERROR(GENERIC);
return 0;

View File

@ -54,8 +54,9 @@
# define LONG_SEEK fseek
#endif
#include <stdlib.h> /* malloc, free */
#include <stdio.h> /* FILE* */
#include <stdlib.h> /* malloc, free */
#include <stdio.h> /* FILE* */
#include <limits.h> /* UNIT_MAX */
#include <assert.h>
#define XXH_STATIC_LINKING_ONLY
@ -203,10 +204,11 @@ size_t ZSTD_seekable_free(ZSTD_seekable* zs)
U32 ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
{
U32 lo = 0;
U32 hi = zs->seekTable.tableLen;
U32 hi = (U32)zs->seekTable.tableLen;
assert(zs->seekTable.tableLen <= UINT_MAX);
if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
return zs->seekTable.tableLen;
return (U32)zs->seekTable.tableLen;
}
while (lo + 1 < hi) {
@ -222,7 +224,8 @@ U32 ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long
U32 ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
{
return zs->seekTable.tableLen;
assert(zs->seekTable.tableLen <= UINT_MAX);
return (U32)zs->seekTable.tableLen;
}
unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)

View File

@ -445,7 +445,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* , for all strategies > fast, effective maximum is 6.
* Special: value 0 means "use default minMatchLength". */
ZSTD_c_targetLength=106, </b>/* Impact of this field depends on strategy.<b>
* For strategies btopt & btultra:
* For strategies btopt, btultra & btultra2:
* Length of Match considered "good enough" to stop search.
* Larger values make compression stronger, and slower.
* For strategy fast:
@ -507,15 +507,20 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
ZSTD_c_jobSize=401, </b>/* Size of a compression job. This value is enforced only when nbWorkers >= 1.<b>
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
* The minimum size is automatically and transparently enforced */
ZSTD_c_overlapSizeLog=402, </b>/* Size of previous job reloaded at the beginning of each job, as a fraction of window size.<b>
ZSTD_c_overlapLog=402, </b>/* Control the overlap size, as a fraction of window size.<b>
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
* This value is enforced only when nbWorkers >= 1.
* Larger values increase compression ratio, but decrease speed.
* Values range from 0 (no overlap) to 9 (overlap a full windowSize).
* Each rank (except 0) increase/decrease load size by a factor 2
* 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:w/256;
* default value is 6 : use 1/8th of windowSize */
* Possible values range from 0 to 9 :
* - 0 means "default" : value will be determined by the library, depending on strategy
* - 1 means "no overlap"
* - 9 means "full overlap", using a full window size.
* Each intermediate rank increases/decreases load size by a factor 2 :
* 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default
* default value varies between 6 and 9, depending on strategy */
</b>/* note : additional experimental parameters are also available<b>
* within the experimental section of the API.

View File

@ -11,6 +11,7 @@
/*-*************************************
* Dependencies
***************************************/
#include <limits.h> /* INT_MAX */
#include <string.h> /* memset */
#include "cpu.h"
#include "mem.h"
@ -305,7 +306,7 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
#endif
return bounds;
case ZSTD_c_overlapSizeLog:
case ZSTD_c_overlapLog:
bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
return bounds;
@ -404,7 +405,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
case ZSTD_c_forceMaxWindow :
case ZSTD_c_nbWorkers:
case ZSTD_c_jobSize:
case ZSTD_c_overlapSizeLog:
case ZSTD_c_overlapLog:
case ZSTD_c_rsyncable:
case ZSTD_c_enableLongDistanceMatching:
case ZSTD_c_ldmHashLog:
@ -466,7 +467,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_c_jobSize:
case ZSTD_c_overlapSizeLog:
case ZSTD_c_overlapLog:
case ZSTD_c_rsyncable:
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
@ -587,11 +588,11 @@ size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams,
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value);
#endif
case ZSTD_c_overlapSizeLog :
case ZSTD_c_overlapLog :
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value);
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapLog, value);
#endif
case ZSTD_c_rsyncable :
@ -695,14 +696,15 @@ size_t ZSTD_CCtxParam_getParameter(
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
*value = CCtxParams->jobSize;
assert(CCtxParams->jobSize <= INT_MAX);
*value = (int)CCtxParams->jobSize;
break;
#endif
case ZSTD_c_overlapSizeLog :
case ZSTD_c_overlapLog :
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
*value = CCtxParams->overlapSizeLog;
*value = CCtxParams->overlapLog;
break;
#endif
case ZSTD_c_rsyncable :
@ -873,7 +875,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
CLAMP(ZSTD_c_searchLog, cParams.searchLog);
CLAMP(ZSTD_c_minMatch, cParams.minMatch);
CLAMP(ZSTD_c_targetLength,cParams.targetLength);
CLAMP_TYPE(ZSTD_c_strategy, cParams.strategy, ZSTD_strategy);
CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
return cParams;
}
@ -4014,8 +4016,8 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
if (cctx->streamStage == zcss_init) {
ZSTD_CCtx_params params = cctx->requestedParams;
ZSTD_prefixDict const prefixDict = cctx->prefixDict;
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
params.cParams = ZSTD_getCParamsFromCCtxParams(

View File

@ -190,10 +190,10 @@ struct ZSTD_CCtx_params_s {
ZSTD_dictAttachPref_e attachDictPref;
/* Multithreading: used to pass parameters to mtctx */
unsigned nbWorkers;
unsigned jobSize;
unsigned overlapSizeLog;
unsigned rsyncable;
int nbWorkers;
size_t jobSize;
int overlapLog;
int rsyncable;
/* Long distance matching parameters */
ldmParams_t ldmParams;

View File

@ -9,19 +9,19 @@
*/
/* ====== Tuning parameters ====== */
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
#endif
/* ====== Constants ====== */
#define ZSTDMT_OVERLAPLOG_DEFAULT 0
/* ====== Dependencies ====== */
#include <string.h> /* memcpy, memset */
#include <limits.h> /* INT_MAX */
#include <limits.h> /* INT_MAX, UINT_MAX */
#include "pool.h" /* threadpool */
#include "threading.h" /* mutex */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
@ -55,9 +55,9 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
static clock_t _ticksPerSecond = 0;
if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK);
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); }
}
{ struct tms junk; clock_t newTicks = (clock_t) times(&junk);
return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond);
} }
#define MUTEX_WAIT_TIME_DLEVEL 6
#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
@ -340,8 +340,8 @@ static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
typedef struct {
ZSTD_pthread_mutex_t poolMutex;
unsigned totalCCtx;
unsigned availCCtx;
int totalCCtx;
int availCCtx;
ZSTD_customMem cMem;
ZSTD_CCtx* cctx[1]; /* variable size */
} ZSTDMT_CCtxPool;
@ -349,16 +349,16 @@ typedef struct {
/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
{
unsigned u;
for (u=0; u<pool->totalCCtx; u++)
ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */
int cid;
for (cid=0; cid<pool->totalCCtx; cid++)
ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */
ZSTD_pthread_mutex_destroy(&pool->poolMutex);
ZSTD_free(pool, pool->cMem);
}
/* ZSTDMT_createCCtxPool() :
* implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
ZSTD_customMem cMem)
{
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
@ -379,7 +379,7 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
}
static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
unsigned nbWorkers)
int nbWorkers)
{
if (srcPool==NULL) return NULL;
if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
@ -866,7 +866,7 @@ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorker
{
if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
params->nbWorkers = nbWorkers;
params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
params->jobSize = 0;
return nbWorkers;
}
@ -976,47 +976,56 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
}
/* Internal only */
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
ZSTDMT_parameter parameter, unsigned value) {
size_t
ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
ZSTDMT_parameter parameter,
int value)
{
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter");
switch(parameter)
{
case ZSTDMT_p_jobSize :
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %u", value);
if ( (value > 0) /* value==0 => automatic job size */
& (value < ZSTDMT_JOBSIZE_MIN) )
DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
if ( value != 0 /* default */
&& value < ZSTDMT_JOBSIZE_MIN)
value = ZSTDMT_JOBSIZE_MIN;
if (value > ZSTDMT_JOBSIZE_MAX)
value = ZSTDMT_JOBSIZE_MAX;
assert(value >= 0);
if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
params->jobSize = value;
return value;
case ZSTDMT_p_overlapSectionLog :
if (value > 9) value = 9;
DEBUGLOG(4, "ZSTDMT_p_overlapSectionLog : %u", value);
params->overlapSizeLog = (value >= 9) ? 9 : value;
case ZSTDMT_p_overlapLog :
DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
params->overlapLog = value;
return value;
case ZSTDMT_p_rsyncable :
params->rsyncable = (value == 0 ? 0 : 1);
value = (value != 0);
params->rsyncable = value;
return value;
default :
return ERROR(parameter_unsupported);
}
}
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value)
size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value)
{
DEBUGLOG(4, "ZSTDMT_setMTCtxParameter");
return ZSTDMT_CCtxParam_setMTCtxParameter(&mtctx->params, parameter, value);
}
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value)
{
switch (parameter) {
case ZSTDMT_p_jobSize:
*value = mtctx->params.jobSize;
assert(mtctx->params.jobSize <= INT_MAX);
*value = (int)(mtctx->params.jobSize);
break;
case ZSTDMT_p_overlapSectionLog:
*value = mtctx->params.overlapSizeLog;
case ZSTDMT_p_overlapLog:
*value = mtctx->params.overlapLog;
break;
case ZSTDMT_p_rsyncable:
*value = mtctx->params.rsyncable;
@ -1145,22 +1154,66 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
{
if (params.ldmParams.enableLdm)
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead. */
return MAX(21, params.cParams.chainLog + 4);
return MAX(20, params.cParams.windowLog + 2);
}
static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
{
unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
if (params.ldmParams.enableLdm)
return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
switch(strat)
{
case ZSTD_btultra2:
return 9;
case ZSTD_btultra:
case ZSTD_btopt:
return 8;
case ZSTD_btlazy2:
case ZSTD_lazy2:
return 7;
case ZSTD_lazy:
case ZSTD_greedy:
case ZSTD_dfast:
case ZSTD_fast:
default:;
}
return 6;
}
static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
{
assert(0 <= ovlog && ovlog <= 9);
if (ovlog == 0) return ZSTDMT_overlapLog_default(strat);
return ovlog;
}
static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
{
int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
assert(0 <= overlapRLog && overlapRLog <= 8);
if (params.ldmParams.enableLdm) {
/* In Long Range Mode, the windowLog is typically oversized.
* In which case, it's preferable to determine the jobSize
* based on chainLog instead.
* Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
- overlapRLog;
}
assert(0 <= ovLog && ovLog <= 30);
DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
return (ovLog==0) ? 0 : (size_t)1 << ovLog;
}
static unsigned
ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
{
assert(nbWorkers>0);
{ size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
size_t const jobMaxSize = jobSizeTarget << 2;
@ -1183,7 +1236,7 @@ static size_t ZSTDMT_compress_advanced_internal(
ZSTD_CCtx_params params)
{
ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
@ -1294,16 +1347,17 @@ static size_t ZSTDMT_compress_advanced_internal(
}
size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_parameters params,
unsigned overlapLog)
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_parameters params,
int overlapLog)
{
ZSTD_CCtx_params cctxParams = mtctx->params;
cctxParams.cParams = params.cParams;
cctxParams.fParams = params.fParams;
cctxParams.overlapSizeLog = overlapLog;
assert(ZSTD_OVERLAPLOG_MIN <= overlapLog && overlapLog <= ZSTD_OVERLAPLOG_MAX);
cctxParams.overlapLog = overlapLog;
return ZSTDMT_compress_advanced_internal(mtctx,
dst, dstCapacity,
src, srcSize,
@ -1316,8 +1370,8 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
const void* src, size_t srcSize,
int compressionLevel)
{
U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT;
ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0);
int const overlapLog = ZSTDMT_overlapLog_default(params.cParams.strategy);
params.fParams.contentSizeFlag = 1;
return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog);
}
@ -1344,8 +1398,8 @@ size_t ZSTDMT_initCStream_internal(
if (params.nbWorkers != mtctx->params.nbWorkers)
CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
if (mtctx->singleBlockingThread) {
@ -1380,24 +1434,24 @@ size_t ZSTDMT_initCStream_internal(
mtctx->cdict = cdict;
}
mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
mtctx->targetSectionSize = params.jobSize;
if (mtctx->targetSectionSize == 0) {
mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
}
if (params.rsyncable) {
/* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
assert(jobSizeMB >= 1);
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
mtctx->rsync.hash = 0;
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
/* Aim for the targetsectionSize as the average job size. */
U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
U32 const rsyncBits = ZSTD_highbit32(jobSizeMB) + 20;
assert(jobSizeMB >= 1);
DEBUGLOG(4, "rsyncLog = %u", rsyncBits);
mtctx->rsync.hash = 0;
mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1;
mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH);
}
if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize);
DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
{

View File

@ -29,11 +29,13 @@
/* === Constants === */
#define ZSTDMT_NBWORKERS_MAX 200
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
#ifndef ZSTDMT_NBWORKERS_MAX
# define ZSTDMT_NBWORKERS_MAX 200
#endif
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1 GB)) /* note : limited by `jobSize` type, which is `int` */
#ifndef ZSTDMT_JOBSIZE_MIN
# define ZSTDMT_JOBSIZE_MIN (1 MB)
#endif
#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
/* === Memory management === */
@ -74,7 +76,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
ZSTD_parameters params,
unsigned overlapLog);
int overlapLog);
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
@ -89,9 +91,9 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
/* ZSTDMT_parameter :
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
typedef enum {
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
ZSTDMT_p_overlapSectionLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
} ZSTDMT_parameter;
/* ZSTDMT_setMTCtxParameter() :
@ -99,12 +101,12 @@ typedef enum {
* The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
/* ZSTDMT_getMTCtxParameter() :
* Query the ZSTDMT_CCtx for a parameter value.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
/*! ZSTDMT_compressStream_generic() :
@ -135,7 +137,7 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
/*! ZSTDMT_CCtxParam_setMTCtxParameter()
* like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
/*! ZSTDMT_CCtxParam_setNbWorkers()
* Set nbWorkers, and clamp it.

View File

@ -603,15 +603,20 @@ typedef enum {
ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
* The minimum size is automatically and transparently enforced */
ZSTD_c_overlapSizeLog=402, /* Size of previous job reloaded at the beginning of each job, as a fraction of window size.
ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size.
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
* This value is enforced only when nbWorkers >= 1.
* Larger values increase compression ratio, but decrease speed.
* Values range from 0 (no overlap) to 9 (overlap a full windowSize).
* Each rank (except 0) increase/decrease load size by a factor 2
* 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:w/256;
* default value is 6 : use 1/8th of windowSize */
* Possible values range from 0 to 9 :
* - 0 means "default" : value will be determined by the library, depending on strategy
* - 1 means "no overlap"
* - 9 means "full overlap", using a full window size.
* Each intermediate rank increases/decreases load size by a factor 2 :
* 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default
* default value varies between 6 and 9, depending on strategy */
/* note : additional experimental parameters are also available
* within the experimental section of the API.

View File

@ -561,12 +561,9 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel,
DISPLAYLEVEL(5,"set nb workers = %u \n", g_nbWorkers);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, g_nbWorkers) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, g_blockSize) );
if ( (g_overlapLog == FIO_OVERLAP_LOG_NOTSET)
&& (cLevel == ZSTD_maxCLevel()) )
g_overlapLog = 9; /* full overlap */
if (g_overlapLog != FIO_OVERLAP_LOG_NOTSET) {
DISPLAYLEVEL(3,"set overlapLog = %u \n", g_overlapLog);
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapSizeLog, g_overlapLog) );
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, g_overlapLog) );
}
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, g_rsyncable) );
#endif

View File

@ -389,7 +389,7 @@ The minimum \fItlen\fR is 0 and the maximum is 999\.
Determine \fBoverlapSize\fR, amount of data reloaded from previous job\. This parameter is only available when multithreading is enabled\. Reloading more data improves compression ratio, but decreases speed\.
.
.IP
The minimum \fIovlog\fR is 0, and the maximum is 9\. 0 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the amount of reload by a factor 2\. Default \fIovlog\fR is 6, which means "reload \fBwindowSize / 8\fR"\. Exception : the maximum compression level (22) has a default \fIovlog\fR of 9\.
The minimum \fIovlog\fR is 0, and the maximum is 9\. 1 means "no overlap", hence completely independent jobs\. 9 means "full overlap", meaning up to \fBwindowSize\fR is reloaded from previous job\. Reducing \fIovlog\fR by 1 reduces the reloaded amount by a factor 2\. For example, 8 means "windowSize/2", and 6 means "windowSize/8"\. Value 0 is special and means "default" : \fIovlog\fR is automatically determined by \fBzstd\fR\. In which case, \fIovlog\fR will range from 6 to 9, depending on selected \fIstrat\fR\.
.
.TP
\fBldmHashLog\fR=\fIlhlog\fR, \fBlhlog\fR=\fIlhlog\fR

View File

@ -417,11 +417,12 @@ The list of available _options_:
Reloading more data improves compression ratio, but decreases speed.
The minimum _ovlog_ is 0, and the maximum is 9.
0 means "no overlap", hence completely independent jobs.
1 means "no overlap", hence completely independent jobs.
9 means "full overlap", meaning up to `windowSize` is reloaded from previous job.
Reducing _ovlog_ by 1 reduces the amount of reload by a factor 2.
Default _ovlog_ is 6, which means "reload `windowSize / 8`".
Exception : the maximum compression level (22) has a default _ovlog_ of 9.
Reducing _ovlog_ by 1 reduces the reloaded amount by a factor 2.
For example, 8 means "windowSize/2", and 6 means "windowSize/8".
Value 0 is special and means "default" : _ovlog_ is automatically determined by `zstd`.
In which case, _ovlog_ will range from 6 to 9, depending on selected _strat_.
- `ldmHashLog`=_lhlog_, `lhlog`=_lhlog_:
Specify the maximum size for a hash table used for long distance matching.

View File

@ -751,17 +751,17 @@ then
./datagen -g2MB > tmp
refSize=$($ZSTD tmp -6 -c --zstd=wlog=18 | wc -c)
ov9Size=$($ZSTD tmp -6 -c --zstd=wlog=18,ovlog=9 | wc -c)
ov0Size=$($ZSTD tmp -6 -c --zstd=wlog=18,ovlog=0 | wc -c)
ov1Size=$($ZSTD tmp -6 -c --zstd=wlog=18,ovlog=1 | wc -c)
if [ $refSize -eq $ov9Size ]; then
echo ov9Size should be different from refSize
exit 1
fi
if [ $refSize -eq $ov0Size ]; then
echo ov0Size should be different from refSize
if [ $refSize -eq $ov1Size ]; then
echo ov1Size should be different from refSize
exit 1
fi
if [ $ov9Size -ge $ov0Size ]; then
echo ov9Size=$ov9Size should be smaller than ov0Size=$ov0Size
if [ $ov9Size -ge $ov1Size ]; then
echo ov9Size=$ov9Size should be smaller than ov1Size=$ov1Size
exit 1
fi

View File

@ -95,7 +95,7 @@ static size_t cctxParamRoundTripTest(void* resultBuff, size_t resultBuffCapacity
/* Set parameters */
CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_compressionLevel, cLevel) );
CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_nbWorkers, 2) );
CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_overlapSizeLog, 5) );
CHECK_Z( ZSTD_CCtxParam_setParameter(cctxParams, ZSTD_c_overlapLog, 5) );
/* Apply parameters */

View File

@ -836,7 +836,7 @@ static int basicUnitTests(U32 seed, double compressibility)
/* Basic multithreading compression test */
DISPLAYLEVEL(3, "test%3i : compress %u bytes with multiple threads : ", testNb++, COMPRESSIBLE_NOISE_LENGTH);
{ ZSTD_parameters const params = ZSTD_getParams(1, 0, 0);
unsigned jobSize;
int jobSize;
CHECK_Z( ZSTDMT_getMTCtxParameter(mtctx, ZSTDMT_p_jobSize, &jobSize));
CHECK(jobSize != 0, "job size non-zero");
CHECK_Z( ZSTDMT_initCStream_advanced(mtctx, CNBuffer, dictSize, params, CNBufferSize) );
@ -1547,7 +1547,7 @@ static int fuzzerTests_MT(U32 seed, U32 nbTests, unsigned startTest,
params.fParams.noDictIDFlag = FUZ_rand(&lseed) & 1;
params.fParams.contentSizeFlag = FUZ_rand(&lseed) & 1;
DISPLAYLEVEL(5, "checksumFlag : %u \n", params.fParams.checksumFlag);
CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapSectionLog, FUZ_rand(&lseed) % 12) );
CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_overlapLog, FUZ_rand(&lseed) % 12) );
CHECK_Z( ZSTDMT_setMTCtxParameter(zc, ZSTDMT_p_jobSize, FUZ_rand(&lseed) % (2*maxTestSize+1)) ); /* custom job size */
CHECK_Z( ZSTDMT_initCStream_advanced(zc, dict, dictSize, params, pledgedSrcSize) );
} } }
@ -1904,7 +1904,7 @@ static int fuzzerTests_newAPI(U32 seed, U32 nbTests, unsigned startTest,
CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_nbWorkers, nbThreads, opaqueAPI) );
if (nbThreads > 1) {
U32 const jobLog = FUZ_rand(&lseed) % (testLog+1);
CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_overlapSizeLog, FUZ_rand(&lseed) % 10, opaqueAPI) );
CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_overlapLog, FUZ_rand(&lseed) % 10, opaqueAPI) );
CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_jobSize, (U32)FUZ_rLogLength(&lseed, jobLog), opaqueAPI) );
}
}