mirror of
https://github.com/facebook/zstd.git
synced 2025-11-27 00:05:09 -05:00
[zdict] Stabilize ZDICT_finalizeDictionary()
This commit is contained in:
parent
625924774e
commit
45c66dd298
@ -61,6 +61,53 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap
|
|||||||
const void* samplesBuffer,
|
const void* samplesBuffer,
|
||||||
const size_t* samplesSizes, unsigned nbSamples);
|
const size_t* samplesSizes, unsigned nbSamples);
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */
|
||||||
|
unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||||
|
unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */
|
||||||
|
} ZDICT_params_t;
|
||||||
|
|
||||||
|
/*! ZDICT_finalizeDictionary():
|
||||||
|
* Given a custom content as a basis for dictionary, and a set of samples,
|
||||||
|
* finalize dictionary by adding headers and statistics according to the zstd
|
||||||
|
* dictionary format.
|
||||||
|
*
|
||||||
|
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
||||||
|
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
||||||
|
* sample in order. The samples are used to construct the statistics, so they
|
||||||
|
* should be representative of what you will compress with this dictionary.
|
||||||
|
*
|
||||||
|
* The compression level can be set in `parameters`. You should pass the
|
||||||
|
* compression level you expect to use in production. The statistics for each
|
||||||
|
* compression level differ, so tuning the dictionary for the compression level
|
||||||
|
* can help quite a bit.
|
||||||
|
*
|
||||||
|
* You can set an explicit dictionary ID in `parameters`, or allow us to pick
|
||||||
|
* a random dictionary ID for you, but we can't guarantee no collisions.
|
||||||
|
*
|
||||||
|
* The dstDictBuffer and the dictContent may overlap, and the content will be
|
||||||
|
* appended to the end of the header. If the header + the content doesn't fit in
|
||||||
|
* maxDictSize the beginning of the content is truncated to make room, since it
|
||||||
|
* is presumed that the most profitable content is at the end of the dictionary,
|
||||||
|
* since that is the cheapest to reference.
|
||||||
|
*
|
||||||
|
* `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
||||||
|
* `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN).
|
||||||
|
*
|
||||||
|
* @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`),
|
||||||
|
* or an error code, which can be tested by ZDICT_isError().
|
||||||
|
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if
|
||||||
|
* instructed to, using notificationLevel>0.
|
||||||
|
* NOTE: This function currently may fail in several edge cases including:
|
||||||
|
* * Not enough samples
|
||||||
|
* * Samples are uncompressible
|
||||||
|
* * Samples are all exactly the same
|
||||||
|
*/
|
||||||
|
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize,
|
||||||
|
const void* dictContent, size_t dictContentSize,
|
||||||
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||||
|
ZDICT_params_t parameters);
|
||||||
|
|
||||||
|
|
||||||
/*====== Helper functions ======*/
|
/*====== Helper functions ======*/
|
||||||
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */
|
||||||
@ -79,11 +126,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|||||||
* Use them only in association with static linking.
|
* Use them only in association with static linking.
|
||||||
* ==================================================================================== */
|
* ==================================================================================== */
|
||||||
|
|
||||||
typedef struct {
|
#define ZDICT_CONTENTSIZE_MIN 128
|
||||||
int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
|
#define ZDICT_DICTSIZE_MIN 256
|
||||||
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
|
||||||
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
|
||||||
} ZDICT_params_t;
|
|
||||||
|
|
||||||
/*! ZDICT_cover_params_t:
|
/*! ZDICT_cover_params_t:
|
||||||
* k and d are the only required parameters.
|
* k and d are the only required parameters.
|
||||||
@ -199,28 +243,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
|
|||||||
const size_t* samplesSizes, unsigned nbSamples,
|
const size_t* samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_fastCover_params_t* parameters);
|
ZDICT_fastCover_params_t* parameters);
|
||||||
|
|
||||||
/*! ZDICT_finalizeDictionary():
|
|
||||||
* Given a custom content as a basis for dictionary, and a set of samples,
|
|
||||||
* finalize dictionary by adding headers and statistics.
|
|
||||||
*
|
|
||||||
* Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
|
||||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
|
||||||
*
|
|
||||||
* dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
|
||||||
* maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
|
||||||
*
|
|
||||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
|
||||||
* or an error code, which can be tested by ZDICT_isError().
|
|
||||||
* Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
|
||||||
* Note 2: dictBuffer and dictContent can overlap
|
|
||||||
*/
|
|
||||||
#define ZDICT_CONTENTSIZE_MIN 128
|
|
||||||
#define ZDICT_DICTSIZE_MIN 256
|
|
||||||
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
||||||
const void* dictContent, size_t dictContentSize,
|
|
||||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
||||||
ZDICT_params_t parameters);
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
|
||||||
ZDICT_params_t zParams;
|
ZDICT_params_t zParams;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user