mirror of
https://github.com/facebook/zstd.git
synced 2025-10-17 00:07:08 -04:00
Fix spacing and Edit Makefile (now run with make instead of make run)
This commit is contained in:
parent
b5806d33db
commit
1f7fa5cdd6
@ -4,16 +4,15 @@ TEST_INPUT := ../../lib
|
|||||||
TEST_OUTPUT := randomDict
|
TEST_OUTPUT := randomDict
|
||||||
ARG :=
|
ARG :=
|
||||||
|
|
||||||
all: main testrun test clean
|
all: main run clean
|
||||||
|
|
||||||
run: main rand clean
|
test: main testrun testshell clean
|
||||||
|
|
||||||
.PHONY: rand
|
.PHONY: run
|
||||||
rand:
|
run:
|
||||||
echo "Building a random dictionary with given arguments"
|
echo "Building a random dictionary with given arguments"
|
||||||
./main $(ARG)
|
./main $(ARG)
|
||||||
|
|
||||||
|
|
||||||
main: main.o random.o libzstd.a
|
main: main.o random.o libzstd.a
|
||||||
gcc main.o random.o libzstd.a -o main
|
gcc main.o random.o libzstd.a -o main
|
||||||
|
|
||||||
@ -34,8 +33,8 @@ testrun: main
|
|||||||
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
zstd -be3 -D $(TEST_OUTPUT) -r $(TEST_INPUT) -q
|
||||||
rm -f $(TEST_OUTPUT)
|
rm -f $(TEST_OUTPUT)
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: testshell
|
||||||
test: test.sh
|
testshell: test.sh
|
||||||
sh test.sh
|
sh test.sh
|
||||||
echo "Finish running test.sh"
|
echo "Finish running test.sh"
|
||||||
|
|
||||||
|
@ -7,11 +7,14 @@ Dictionary ID (dictID=#): nonnegative number; if not provided, default to 0
|
|||||||
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
Maximum Dictionary Size (maxdict=#): positive number; in bytes, if not provided, default to 110KB
|
||||||
Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
Size of Randomly Selected Segment (k=#): positive number; in bytes; if not provided, default to 200
|
||||||
|
|
||||||
|
###Running Test:
|
||||||
|
make test
|
||||||
|
|
||||||
|
|
||||||
###Usage:
|
###Usage:
|
||||||
To build a random dictionary with the provided arguments: make run ARG= followed by arguments
|
To build a random dictionary with the provided arguments: make ARG= followed by arguments
|
||||||
|
|
||||||
|
|
||||||
### Examples:
|
### Examples:
|
||||||
make run ARG="in=../../lib/dictBuilder out=dict100 dictID=520"
|
make ARG="in=../../lib/dictBuilder out=dict100 dictID=520"
|
||||||
make run ARG="in=../../lib/dictBuilder in=../../lib/compress"
|
make ARG="in=../../lib/dictBuilder in=../../lib/compress"
|
||||||
|
@ -52,7 +52,8 @@ static const unsigned g_defaultMaxDictSize = 110 KB;
|
|||||||
#define SAMPLESIZE_MAX (128 KB)
|
#define SAMPLESIZE_MAX (128 KB)
|
||||||
#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
#define RANDOM_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
||||||
#define RANDOM_MEMMULT 9
|
#define RANDOM_MEMMULT 9
|
||||||
static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
static const size_t g_maxMemory = (sizeof(size_t) == 4) ?
|
||||||
|
(2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
||||||
|
|
||||||
#define NOISELENGTH 32
|
#define NOISELENGTH 32
|
||||||
|
|
||||||
@ -76,8 +77,7 @@ typedef struct {
|
|||||||
/*-*************************************
|
/*-*************************************
|
||||||
* Commandline related functions
|
* Commandline related functions
|
||||||
***************************************/
|
***************************************/
|
||||||
static unsigned readU32FromChar(const char** stringPtr)
|
static unsigned readU32FromChar(const char** stringPtr){
|
||||||
{
|
|
||||||
const char errorMsg[] = "error: numeric value too large";
|
const char errorMsg[] = "error: numeric value too large";
|
||||||
unsigned result = 0;
|
unsigned result = 0;
|
||||||
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
while ((**stringPtr >='0') && (**stringPtr <='9')) {
|
||||||
@ -105,8 +105,7 @@ static unsigned readU32FromChar(const char** stringPtr)
|
|||||||
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
* If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
|
||||||
* @return 0 and doesn't modify *stringPtr otherwise.
|
* @return 0 and doesn't modify *stringPtr otherwise.
|
||||||
*/
|
*/
|
||||||
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
static unsigned longCommandWArg(const char** stringPtr, const char* longCommand){
|
||||||
{
|
|
||||||
size_t const comSize = strlen(longCommand);
|
size_t const comSize = strlen(longCommand);
|
||||||
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
int const result = !strncmp(*stringPtr, longCommand, comSize);
|
||||||
if (result) *stringPtr += comSize;
|
if (result) *stringPtr += comSize;
|
||||||
@ -125,11 +124,9 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
|||||||
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
|
* *bufferSizePtr is modified, it provides the amount data loaded within buffer.
|
||||||
* sampleSizes is filled with the size of each sample.
|
* sampleSizes is filled with the size of each sample.
|
||||||
*/
|
*/
|
||||||
static unsigned loadFiles(void* buffer, size_t* bufferSizePtr,
|
static unsigned loadFiles(void* buffer, size_t* bufferSizePtr, size_t* sampleSizes,
|
||||||
size_t* sampleSizes, unsigned sstSize,
|
unsigned sstSize, const char** fileNamesTable, unsigned nbFiles,
|
||||||
const char** fileNamesTable, unsigned nbFiles, size_t targetChunkSize,
|
size_t targetChunkSize, unsigned displayLevel) {
|
||||||
unsigned displayLevel)
|
|
||||||
{
|
|
||||||
char* const buff = (char*)buffer;
|
char* const buff = (char*)buffer;
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
unsigned nbLoadedChunks = 0, fileIndex;
|
unsigned nbLoadedChunks = 0, fileIndex;
|
||||||
@ -200,8 +197,7 @@ static void shuffle(const char** fileNamesTable, unsigned nbFiles) {
|
|||||||
/*-********************************************************
|
/*-********************************************************
|
||||||
* Dictionary training functions
|
* Dictionary training functions
|
||||||
**********************************************************/
|
**********************************************************/
|
||||||
static size_t findMaxMem(unsigned long long requiredMem)
|
static size_t findMaxMem(unsigned long long requiredMem) {
|
||||||
{
|
|
||||||
size_t const step = 8 MB;
|
size_t const step = 8 MB;
|
||||||
void* testmem = NULL;
|
void* testmem = NULL;
|
||||||
|
|
||||||
@ -219,8 +215,7 @@ static size_t findMaxMem(unsigned long long requiredMem)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void saveDict(const char* dictFileName,
|
static void saveDict(const char* dictFileName,
|
||||||
const void* buff, size_t buffSize)
|
const void* buff, size_t buffSize) {
|
||||||
{
|
|
||||||
FILE* const f = fopen(dictFileName, "wb");
|
FILE* const f = fopen(dictFileName, "wb");
|
||||||
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
|
if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName);
|
||||||
|
|
||||||
@ -236,8 +231,8 @@ static void saveDict(const char* dictFileName,
|
|||||||
* provides the amount of data to be loaded and the resulting nb of samples.
|
* provides the amount of data to be loaded and the resulting nb of samples.
|
||||||
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
|
* This is useful primarily for allocation purpose => sample buffer, and sample sizes table.
|
||||||
*/
|
*/
|
||||||
static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize, unsigned displayLevel)
|
static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles,
|
||||||
{
|
size_t chunkSize, unsigned displayLevel) {
|
||||||
fileStats fs;
|
fileStats fs;
|
||||||
unsigned n;
|
unsigned n;
|
||||||
memset(&fs, 0, sizeof(fs));
|
memset(&fs, 0, sizeof(fs));
|
||||||
@ -255,8 +250,9 @@ static fileStats getFileStats(const char** fileNamesTable, unsigned nbFiles, siz
|
|||||||
return fs;
|
return fs;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info, unsigned maxDictSize,
|
int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info,
|
||||||
ZDICT_random_params_t *params){
|
unsigned maxDictSize,
|
||||||
|
ZDICT_random_params_t *params) {
|
||||||
unsigned const displayLevel = params->zParams.notificationLevel;
|
unsigned const displayLevel = params->zParams.notificationLevel;
|
||||||
void* const dictBuffer = malloc(maxDictSize);
|
void* const dictBuffer = malloc(maxDictSize);
|
||||||
|
|
||||||
@ -285,8 +281,8 @@ int RANDOM_trainFromFiles(const char* dictFileName, sampleInfo *info, unsigned m
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
sampleInfo* getSampleInfo(const char** fileNamesTable,
|
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
|
||||||
unsigned nbFiles, size_t chunkSize, unsigned maxDictSize, const unsigned displayLevel){
|
unsigned maxDictSize, const unsigned displayLevel) {
|
||||||
fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
|
fileStats const fs = getFileStats(fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||||
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
|
size_t* const sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
|
||||||
size_t const memMult = RANDOM_MEMMULT;
|
size_t const memMult = RANDOM_MEMMULT;
|
||||||
@ -320,7 +316,8 @@ sampleInfo* getSampleInfo(const char** fileNamesTable,
|
|||||||
/* Load input buffer */
|
/* Load input buffer */
|
||||||
DISPLAYLEVEL(3, "Shuffling input files\n");
|
DISPLAYLEVEL(3, "Shuffling input files\n");
|
||||||
shuffle(fileNamesTable, nbFiles);
|
shuffle(fileNamesTable, nbFiles);
|
||||||
nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel);
|
nbFiles = loadFiles(srcBuffer, &loadedSize, sampleSizes, fs.nbSamples,
|
||||||
|
fileNamesTable, nbFiles, chunkSize, displayLevel);
|
||||||
|
|
||||||
sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
|
sampleInfo *info = (sampleInfo *)malloc(sizeof(sampleInfo));
|
||||||
|
|
||||||
@ -376,7 +373,8 @@ int main(int argCount, const char* argv[])
|
|||||||
unsigned fileNamesNb = filenameIdx;
|
unsigned fileNamesNb = filenameIdx;
|
||||||
int followLinks = 0;
|
int followLinks = 0;
|
||||||
const char** extendedFileList = NULL;
|
const char** extendedFileList = NULL;
|
||||||
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf, &fileNamesNb, followLinks);
|
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
|
||||||
|
&fileNamesNb, followLinks);
|
||||||
if (extendedFileList) {
|
if (extendedFileList) {
|
||||||
unsigned u;
|
unsigned u;
|
||||||
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
for (u=0; u<fileNamesNb; u++) DISPLAYLEVEL(4, "%u %s\n", u, extendedFileList[u]);
|
||||||
|
@ -71,7 +71,8 @@ static RANDOM_segment_t RANDOM_selectSegment(const size_t totalSamplesSize,
|
|||||||
* Check the validity of the parameters.
|
* Check the validity of the parameters.
|
||||||
* Returns non-zero if the parameters are valid and 0 otherwise.
|
* Returns non-zero if the parameters are valid and 0 otherwise.
|
||||||
*/
|
*/
|
||||||
static int RANDOM_checkParameters(ZDICT_random_params_t parameters, size_t maxDictSize) {
|
static int RANDOM_checkParameters(ZDICT_random_params_t parameters,
|
||||||
|
size_t maxDictSize) {
|
||||||
/* k is a required parameter */
|
/* k is a required parameter */
|
||||||
if (parameters.k == 0) {
|
if (parameters.k == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
@ -115,7 +116,8 @@ static size_t RANDOM_buildDictionary(const size_t totalSamplesSize, const BYTE *
|
|||||||
/*! ZDICT_trainFromBuffer_random():
|
/*! ZDICT_trainFromBuffer_random():
|
||||||
* Train a dictionary from an array of samples using the RANDOM algorithm.
|
* Train a dictionary from an array of samples using the RANDOM algorithm.
|
||||||
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||||
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
* supplied with an array of sizes `samplesSizes`, providing the size of each
|
||||||
|
* sample, in order.
|
||||||
* The resulting dictionary will be saved into `dictBuffer`.
|
* The resulting dictionary will be saved into `dictBuffer`.
|
||||||
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||||
* or an error code, which can be tested with ZDICT_isError().
|
* or an error code, which can be tested with ZDICT_isError().
|
||||||
@ -145,7 +147,8 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
|
|||||||
|
|
||||||
DISPLAYLEVEL(2, "Building dictionary\n");
|
DISPLAYLEVEL(2, "Building dictionary\n");
|
||||||
{
|
{
|
||||||
const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples, dictBuffer, dictBufferCapacity, parameters);
|
const size_t tail = RANDOM_buildDictionary(totalSamplesSize, samples,
|
||||||
|
dictBuffer, dictBufferCapacity, parameters);
|
||||||
const size_t dictSize = ZDICT_finalizeDictionary(
|
const size_t dictSize = ZDICT_finalizeDictionary(
|
||||||
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
||||||
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
||||||
|
@ -18,13 +18,12 @@ typedef struct {
|
|||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+]; Default to 200 */
|
||||||
ZDICT_params_t zParams;
|
ZDICT_params_t zParams;
|
||||||
} ZDICT_random_params_t;
|
} ZDICT_random_params_t;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
|
||||||
void *dictBuffer, size_t dictBufferCapacity,
|
|
||||||
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||||
ZDICT_random_params_t parameters);
|
ZDICT_random_params_t parameters);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user