Merge branch 'dev' into fix1241

This commit is contained in:
Yann Collet 2018-08-03 16:08:33 -07:00
commit 5808027abf
13 changed files with 278 additions and 188 deletions

View File

@ -10,33 +10,34 @@ addons:
matrix:
include:
# Ubuntu 14.04
- env: Cmd='make gcc6install && CC=gcc-6 make -j all && make clean && CC=gcc-6 make clean uasan-test-zstd </dev/null' # also test when stdin is not a tty
- env: Cmd='make test'
- env: Cmd='make gcc6install && CC=gcc-6 make -j all
&& make clean && CC=gcc-6 make clean uasan-test-zstd </dev/null' # also test when stdin is not a tty
- env: Cmd='make gcc6install libc6install && CC=gcc-6 make clean uasan-test-zstd32'
- env: Cmd='make gcc7install && CC=gcc-7 make clean uasan-test-zstd'
- env: Cmd='make clang38install && CC=clang-3.8 make clean msan-test-zstd'
- env: Cmd='make gcc6install && CC=gcc-6 make clean uasan-fuzztest'
- env: Cmd='make gcc6install libc6install && CC=gcc-6 CFLAGS=-m32 make clean uasan-fuzztest'
- env: Cmd='make gcc6install libc6install
&& make clean && CC=gcc-6 CFLAGS=-m32 make uasan-fuzztest'
- env: Cmd='make clang38install && CC=clang-3.8 make clean msan-fuzztest'
- env: Cmd='make clang38install && CC=clang-3.8 make clean tsan-test-zstream'
- env: Cmd='make -C tests test-fuzzer-stackmode'
- env: Cmd='make valgrindinstall && make -C tests clean valgrindTest'
- env: Cmd='make arminstall && make armfuzz'
# Following test is disabled, as there is a bug in Travis' ld
# preventing aarch64 compilation to complete.
# > collect2: error: ld terminated with signal 11 [Segmentation fault], core dumped
# to be re-enabled in a few commit, as it's possible that a random code change circumvent the ld bug
# - env: Cmd='make arminstall && make aarch64fuzz'
- env: Cmd='make arminstall && make aarch64fuzz'
- env: Cmd='make ppcinstall && make ppcfuzz'
- env: Cmd='make ppcinstall && make ppc64fuzz'
- env: Cmd='make -j uasanregressiontest && make clean && make -j msanregressiontest'
- env: Cmd='make lz4install && make -C tests test-lz4 test-pool && make clean && bash tests/libzstd_partial_builds.sh'
- env: Cmd='make -j uasanregressiontest
&& make clean && make -j msanregressiontest'
- env: Cmd='make valgrindinstall && make -C tests clean valgrindTest
&& make clean && make -C tests test-fuzzer-stackmode'
- env: Cmd='make lz4install && make -C tests test-lz4
&& make clean && make -C tests test-pool
&& make clean && bash tests/libzstd_partial_builds.sh'
# tag-specific test
- if: tag =~ ^v[0-9]\.[0-9]
@ -49,6 +50,7 @@ branches:
only:
- dev
- master
- travisTest
script:
- JOB_NUMBER=$(echo $TRAVIS_JOB_NUMBER | sed -e 's:[0-9][0-9]*\.\(.*\):\1:')

View File

@ -63,8 +63,9 @@ zlibwrapper:
$(MAKE) -C $(ZWRAPDIR) test
.PHONY: test
test: MOREFLAGS += -g -DDEBUGLEVEL=1 -Werror
test:
MOREFLAGS+="-g -DDEBUGLEVEL=1 -Werror" $(MAKE) -C $(PRGDIR) allVariants
MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
$(MAKE) -C $(TESTDIR) $@
.PHONY: shortest

View File

@ -18,109 +18,109 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
- Fourth column is chosen d and fifth column is chosen k
github:
NODICT 0.000005 2.999642
RANDOM 0.141553 8.786957
LEGACY 0.904340 8.989482
COVER 53.621302 10.641263 8 1298
COVER 4.085037 10.641263 8 1298
FAST15 17.636211 10.586461 8 1778
FAST15 0.221236 10.586461 8 1778
FAST16 18.716259 10.492503 6 1778
FAST16 0.251522 10.492503 6 1778
FAST17 17.614391 10.611737 8 1778
FAST17 0.241011 10.611737 8 1778
FAST18 19.926270 10.621586 8 1778
FAST18 0.287195 10.621586 8 1778
FAST19 19.626808 10.629626 8 1778
FAST19 0.340191 10.629626 8 1778
FAST20 18.918657 10.610308 8 1778
FAST20 0.463307 10.610308 8 1778
FAST21 20.502362 10.625733 8 1778
FAST21 0.638202 10.625733 8 1778
FAST22 22.702695 10.625281 8 1778
FAST22 1.353399 10.625281 8 1778
FAST23 28.041990 10.602342 8 1778
FAST23 3.029502 10.602342 8 1778
FAST24 35.662961 10.603379 8 1778
FAST24 6.524258 10.603379 8 1778
NODICT 0.000025 2.999642
RANDOM 0.030101 8.791189
LEGACY 0.913108 8.173529
COVER 59.234160 10.652243 8 1298
COVER 6.258459 10.652243 8 1298
FAST15 9.959246 10.555630 8 1874
FAST15 0.077719 10.555630 8 1874
FAST16 10.028343 10.701698 8 1106
FAST16 0.078117 10.701698 8 1106
FAST17 10.567355 10.650652 8 1106
FAST17 0.124833 10.650652 8 1106
FAST18 11.795287 10.499142 8 1826
FAST18 0.086992 10.499142 8 1826
FAST19 13.132451 10.527140 8 1826
FAST19 0.134716 10.527140 8 1826
FAST20 14.366314 10.494710 8 1826
FAST20 0.128844 10.494710 8 1826
FAST21 14.941238 10.503488 8 1778
FAST21 0.134975 10.503488 8 1778
FAST22 15.146226 10.509284 8 1826
FAST22 0.146918 10.509284 8 1826
FAST23 16.260552 10.509284 8 1826
FAST23 0.158494 10.509284 8 1826
FAST24 16.806037 10.512369 8 1826
FAST24 0.190464 10.512369 8 1826
hg-commands:
NODICT 0.000005 2.425291
RANDOM 0.080469 3.489515
LEGACY 0.794417 3.911896
COVER 54.198788 4.131136 8 386
COVER 2.191729 4.131136 8 386
FAST15 11.852793 3.903719 6 1106
FAST15 0.175406 3.903719 6 1106
FAST16 12.863315 4.005077 8 530
FAST16 0.158410 4.005077 8 530
FAST17 11.977917 4.097811 8 818
FAST17 0.162381 4.097811 8 818
FAST18 11.749304 4.136081 8 770
FAST18 0.173242 4.136081 8 770
FAST19 11.905785 4.166021 8 530
FAST19 0.186403 4.166021 8 530
FAST20 13.293999 4.163740 8 482
FAST20 0.241508 4.163740 8 482
FAST21 16.623177 4.157057 8 434
FAST21 0.372647 4.157057 8 434
FAST22 20.918409 4.158195 8 290
FAST22 0.570431 4.158195 8 290
FAST23 21.762805 4.161450 8 434
FAST23 1.162206 4.161450 8 434
FAST24 29.133745 4.159658 8 338
FAST24 3.054376 4.159658 8 338
NODICT 0.000026 2.425291
RANDOM 0.046270 3.490331
LEGACY 0.847904 3.911682
COVER 71.691804 4.132653 8 386
COVER 3.187085 4.132653 8 386
FAST15 11.593687 3.920720 6 1106
FAST15 0.082431 3.920720 6 1106
FAST16 11.775958 4.033306 8 674
FAST16 0.092587 4.033306 8 674
FAST17 11.965064 4.064132 8 1490
FAST17 0.106382 4.064132 8 1490
FAST18 11.438197 4.086714 8 290
FAST18 0.097293 4.086714 8 290
FAST19 12.292512 4.097947 8 578
FAST19 0.104406 4.097947 8 578
FAST20 13.857857 4.102851 8 434
FAST20 0.139467 4.102851 8 434
FAST21 14.599613 4.105350 8 530
FAST21 0.189416 4.105350 8 530
FAST22 15.966109 4.104100 8 530
FAST22 0.183817 4.104100 8 530
FAST23 18.033645 4.098110 8 914
FAST23 0.246641 4.098110 8 914
FAST24 22.992891 4.117367 8 722
FAST24 0.285994 4.117367 8 722
hg-changelog:
NODICT 0.000006 1.377613
RANDOM 0.601346 2.096785
LEGACY 2.544973 2.058273
COVER 222.639708 2.188654 8 98
COVER 6.072892 2.188654 8 98
FAST15 70.394523 2.127194 8 866
FAST15 0.899766 2.127194 8 866
FAST16 69.845529 2.145401 8 338
FAST16 0.881569 2.145401 8 338
FAST17 69.382431 2.157544 8 194
FAST17 0.943291 2.157544 8 194
FAST18 71.348283 2.173127 8 98
FAST18 1.034765 2.173127 8 98
FAST19 71.380923 2.179527 8 98
FAST19 1.254700 2.179527 8 98
FAST20 72.802714 2.183233 6 98
FAST20 1.368704 2.183233 6 98
FAST21 82.042339 2.180920 8 98
FAST21 2.213864 2.180920 8 98
FAST22 90.666200 2.184297 8 98
FAST22 3.590399 2.184297 8 98
FAST23 108.926377 2.187666 6 98
FAST23 8.723759 2.187666 6 98
FAST24 134.296232 2.189889 6 98
FAST24 19.396532 2.189889 6 98
NODICT 0.000007 1.377613
RANDOM 0.297345 2.097487
LEGACY 2.633992 2.058907
COVER 219.179786 2.189685 8 98
COVER 6.620852 2.189685 8 98
FAST15 47.635082 2.130794 6 386
FAST15 0.321297 2.130794 6 386
FAST16 43.837676 2.144845 8 194
FAST16 0.312640 2.144845 8 194
FAST17 49.349017 2.156099 8 242
FAST17 0.348459 2.156099 8 242
FAST18 51.153784 2.172439 6 98
FAST18 0.353106 2.172439 6 98
FAST19 52.627045 2.180321 6 98
FAST19 0.390612 2.180321 6 98
FAST20 63.748782 2.187431 6 98
FAST20 0.489544 2.187431 6 98
FAST21 68.709198 2.184185 6 146
FAST21 0.530852 2.184185 6 146
FAST22 68.491639 2.182830 6 98
FAST22 0.645699 2.182830 6 98
FAST23 72.558688 2.186399 8 98
FAST23 0.593539 2.186399 8 98
FAST24 76.137195 2.185608 6 98
FAST24 0.680132 2.185608 6 98
hg-manifest:
NODICT 0.000005 1.866385
RANDOM 0.982192 2.309485
LEGACY 9.507729 2.506775
COVER 922.742066 2.582597 8 434
COVER 36.500276 2.582597 8 434
FAST15 163.886717 2.377689 8 1682
FAST15 2.107328 2.377689 8 1682
FAST16 152.684592 2.464814 8 1538
FAST16 2.157789 2.464814 8 1538
FAST17 154.463459 2.539834 6 1826
FAST17 2.282455 2.539834 6 1826
FAST18 155.540044 2.576924 8 1922
FAST18 2.101807 2.576924 8 1922
FAST19 152.650343 2.592479 6 290
FAST19 2.359461 2.592479 6 290
FAST20 174.623634 2.594551 8 194
FAST20 2.870022 2.594551 8 194
FAST21 219.876653 2.597128 6 194
FAST21 4.386269 2.597128 6 194
FAST22 247.986803 2.596971 6 386
FAST22 6.201144 2.596971 6 386
FAST23 276.051806 2.601416 8 194
FAST23 11.613477 2.601416 8 194
FAST24 328.234024 2.602830 6 194
FAST24 26.710364 2.602830 6 194
NODICT 0.000026 1.866385
RANDOM 0.784554 2.309436
LEGACY 10.193714 2.506977
COVER 988.206583 2.582528 8 434
COVER 39.726199 2.582528 8 434
FAST15 168.388819 2.392920 6 1826
FAST15 1.272178 2.392920 6 1826
FAST16 161.822607 2.480762 6 1922
FAST16 1.164908 2.480762 6 1922
FAST17 157.688544 2.548285 6 1682
FAST17 1.222439 2.548285 6 1682
FAST18 154.529585 2.567634 6 386
FAST18 1.217596 2.567634 6 386
FAST19 160.244979 2.581653 8 338
FAST19 1.282450 2.581653 8 338
FAST20 191.503297 2.586881 8 194
FAST20 2.009748 2.586881 8 194
FAST21 226.389709 2.590051 6 242
FAST21 2.494543 2.590051 6 242
FAST22 217.859055 2.591376 6 194
FAST22 2.295693 2.591376 6 194
FAST23 236.819791 2.591131 8 434
FAST23 2.744711 2.591131 8 434
FAST24 269.187800 2.591548 6 290
FAST24 2.923671 2.591548 6 290

View File

@ -91,14 +91,26 @@ dictInfo* createDictFromFiles(sampleInfo *info, unsigned maxDictSize,
dictSize = ZDICT_trainFromBuffer_random(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *randomParams);
}else if(coverParams) {
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, coverParams);
/* Run the optimize version if either k or d is not provided */
if (!coverParams->d || !coverParams->k){
dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, coverParams);
} else {
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *coverParams);
}
} else if(legacyParams) {
dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *legacyParams);
} else if(fastParams) {
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, fastParams);
/* Run the optimize version if either k or d is not provided */
if (!fastParams->d || !fastParams->k) {
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, fastParams);
} else {
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *fastParams);
}
} else {
dictSize = 0;
}
@ -403,7 +415,6 @@ int main(int argCount, const char* argv[])
goto _cleanup;
}
/* for fastCover (with k and d provided) */
const int fastResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
@ -411,7 +422,6 @@ int main(int argCount, const char* argv[])
result = 1;
goto _cleanup;
}
}

View File

@ -16,8 +16,8 @@ make test
###Usage:
To build a random dictionary with the provided arguments: make ARG= followed by arguments
To build a FASTCOVER dictionary with the provided arguments: make ARG= followed by arguments
If k or d is not provided, the optimize version of FASTCOVER is run.
### Examples:
make ARG="in=../../../lib/dictBuilder out=dict100 dictID=520"

View File

@ -266,25 +266,17 @@ static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
* Calculate for frequency of hash value of each dmer in ctx->samples
*/
static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
/* inCurrSample keeps track of this hash value has already be seen in previous dmers in the same sample*/
BYTE* inCurrSample = (BYTE *)malloc((1 << f) * sizeof(BYTE));
size_t start; /* start of current dmer */
for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
memset(inCurrSample, 0, (1 << f) * sizeof(*inCurrSample)); /* Reset inCurrSample for each sample */
size_t currSampleStart = ctx->offsets[i];
size_t currSampleEnd = ctx->offsets[i+1];
start = currSampleStart;
while (start + ctx->d <= currSampleEnd) {
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
/* if no dmer with same hash value has been seen in current sample */
if (inCurrSample[dmerIndex] == 0) {
inCurrSample[dmerIndex]++;
freqs[dmerIndex]++;
}
freqs[dmerIndex]++;
start++;
}
}
free(inCurrSample);
}
/**
@ -637,6 +629,55 @@ _cleanup:
}
}
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters) {
BYTE* const dict = (BYTE*)dictBuffer;
FASTCOVER_ctx_t ctx;
parameters.splitPoint = 1.0;
/* Initialize global data */
g_displayLevel = parameters.zParams.notificationLevel;
/* Checks */
if (!FASTCOVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
return ERROR(GENERIC);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
return ERROR(GENERIC);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
ZDICT_DICTSIZE_MIN);
return ERROR(dstSize_tooSmall);
}
/* Initialize context */
if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
parameters.d, parameters.splitPoint, parameters.f)) {
DISPLAYLEVEL(1, "Failed to initialize context\n");
return ERROR(GENERIC);
}
/* Build the dictionary */
DISPLAYLEVEL(2, "Building dictionary\n");
{
const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
dictBufferCapacity, parameters);
const size_t dictionarySize = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
samplesBuffer, samplesSizes, (unsigned)ctx.nbTrainSamples,
parameters.zParams);
if (!ZSTD_isError(dictionarySize)) {
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
(U32)dictionarySize);
}
FASTCOVER_ctx_destroy(&ctx);
return dictionarySize;
}
}
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
@ -665,15 +706,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
return ERROR(GENERIC);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
return ERROR(GENERIC);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "fast must have at least one input file\n");
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
return ERROR(GENERIC);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {

View File

@ -12,9 +12,6 @@
#include "zdict.h"
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
@ -26,7 +23,6 @@ typedef struct {
} ZDICT_fastCover_params_t;
/*! ZDICT_optimizeTrainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
@ -41,7 +37,21 @@ typedef struct {
* or an error code, which can be tested with ZDICT_isError().
* On success `*parameters` contains the parameters selected.
*/
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t *parameters);
/*! ZDICT_trainFromBuffer_fastCover():
* Train a dictionary from an array of samples using a modified version of the COVER algorithm.
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
* The resulting dictionary will be saved into `dictBuffer`.
* d, k, and f are required.
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
* or an error code, which can be tested with ZDICT_isError().
*/
ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
ZDICT_fastCover_params_t *parameters);
const size_t *samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t parameters);

View File

@ -64,8 +64,14 @@ int FASTCOVER_trainFromFiles(const char* dictFileName, sampleInfo *info,
EXM_THROW(12, "not enough memory for trainFromFiles"); /* should not happen */
{ size_t dictSize;
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, params);
/* Run the optimize version if either k or d is not provided */
if (!params->d || !params->k) {
dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, params);
} else {
dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, info->srcBuffer,
info->samplesSizes, info->nbSamples, *params);
}
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint*100));
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
@ -92,8 +98,8 @@ int main(int argCount, const char* argv[])
int operationResult = 0;
/* Initialize arguments to default values */
unsigned k = 200;
unsigned d = 8;
unsigned k = 0;
unsigned d = 0;
unsigned f = 23;
unsigned steps = 32;
unsigned nbThreads = 1;

View File

@ -1,8 +1,8 @@
echo "Building fastCover dictionary with in=../../lib/common k=200 f=20 out=dict1"
./main in=../../../lib/common k=200 f=20 out=dict1
echo "Building fastCover dictionary with in=../../lib/common f=20 out=dict1"
./main in=../../../lib/common f=20 out=dict1
zstd -be3 -D dict1 -r ../../../lib/common -q
echo "Building fastCover dictionary with in=../../lib/common k=500 f=24 out=dict2 dictID=100 maxdict=140000"
./main in=../../../lib/common k=500 f=24 out=dict2 dictID=100 maxdict=140000
echo "Building fastCover dictionary with in=../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000"
./main in=../../../lib/common k=500 d=6 f=24 out=dict2 dictID=100 maxdict=140000
zstd -be3 -D dict2 -r ../../../lib/common -q
echo "Building fastCover dictionary with 2 sample sources"
./main in=../../../lib/common in=../../../lib/compress out=dict3

View File

@ -331,7 +331,7 @@ void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
/* provides the minimum logSize to safely represent a distribution */
static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
{
U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
assert(srcSize > 1); /* Not supported, RLE should be used instead */
@ -394,6 +394,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
}
ToDistribute = (1 << tableLog) - distributed;
if (ToDistribute == 0)
return 0;
if ((total / ToDistribute) > lowOne) {
/* risk of rounding to zero */
lowOne = (U32)((total * 3) / (ToDistribute * 2));

View File

@ -293,7 +293,7 @@ static dictItem ZDICT_analyzePos(
refinedEnd = refinedStart + selectedCount;
}
/* evaluate gain based on new ref */
/* evaluate gain based on new dict */
start = refinedStart;
pos = suffix[refinedStart];
end = start;
@ -341,7 +341,7 @@ static dictItem ZDICT_analyzePos(
for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n",
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
(U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
solution.pos = (U32)pos;
@ -581,7 +581,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
typedef struct
{
ZSTD_CCtx* ref; /* contains reference to dictionary */
ZSTD_CDict* dict; /* dictionary */
ZSTD_CCtx* zc; /* working context */
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
} EStats_ress_t;
@ -597,8 +597,9 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
size_t cSize;
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
{ size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
}
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
@ -708,14 +709,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* init */
DEBUGLOG(4, "ZDICT_analyzeEntropy");
esr.ref = ZSTD_createCCtx();
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
if (!esr.ref || !esr.zc || !esr.workPlace) {
eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory \n");
goto _cleanup;
}
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
@ -726,12 +719,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
memset(bestRepOffset, 0, sizeof(bestRepOffset));
if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
if (ZSTD_isError(beginResult)) {
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
eSize = ERROR(GENERIC);
goto _cleanup;
} }
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
if (!esr.dict || !esr.zc || !esr.workPlace) {
eSize = ERROR(memory_allocation);
DISPLAYLEVEL(1, "Not enough memory \n");
goto _cleanup;
}
/* collect stats on all samples */
for (u=0; u<nbFiles; u++) {
@ -856,7 +852,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
eSize += 12;
_cleanup:
ZSTD_freeCCtx(esr.ref);
ZSTD_freeCDict(esr.dict);
ZSTD_freeCCtx(esr.zc);
free(esr.workPlace);

View File

@ -406,7 +406,6 @@ int main(int argCount, const char* argv[])
singleThread = 0,
ultra=0;
double compressibility = 0.5;
BMK_advancedParams_t adv = BMK_initAdvancedParams();
unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
size_t blockSize = 0;
zstd_operation_mode operation = zom_compress;
@ -434,6 +433,9 @@ int main(int argCount, const char* argv[])
ZDICT_cover_params_t coverParams = defaultCoverParams();
int cover = 1;
#endif
#ifndef ZSTD_NOBENCH
BMK_advancedParams_t benchParams = BMK_initAdvancedParams();
#endif
/* init */
@ -620,7 +622,7 @@ int main(int argCount, const char* argv[])
/* Decoding */
case 'd':
#ifndef ZSTD_NOBENCH
adv.mode = BMK_decodeOnly;
benchParams.mode = BMK_decodeOnly;
if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */
#endif
operation=zom_decompress; argument++; break;
@ -713,7 +715,7 @@ int main(int argCount, const char* argv[])
case 'p': argument++;
#ifndef ZSTD_NOBENCH
if ((*argument>='0') && (*argument<='9')) {
adv.additionalParam = (int)readU32FromChar(&argument);
benchParams.additionalParam = (int)readU32FromChar(&argument);
} else
#endif
main_pause=1;
@ -826,18 +828,18 @@ int main(int argCount, const char* argv[])
/* Check if benchmark is selected */
if (operation==zom_bench) {
#ifndef ZSTD_NOBENCH
adv.blockSize = blockSize;
adv.nbWorkers = nbWorkers;
adv.realTime = setRealTimePrio;
adv.nbSeconds = bench_nbSeconds;
adv.ldmFlag = ldmFlag;
adv.ldmMinMatch = g_ldmMinMatch;
adv.ldmHashLog = g_ldmHashLog;
benchParams.blockSize = blockSize;
benchParams.nbWorkers = nbWorkers;
benchParams.realTime = setRealTimePrio;
benchParams.nbSeconds = bench_nbSeconds;
benchParams.ldmFlag = ldmFlag;
benchParams.ldmMinMatch = g_ldmMinMatch;
benchParams.ldmHashLog = g_ldmHashLog;
if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
adv.ldmBucketSizeLog = g_ldmBucketSizeLog;
benchParams.ldmBucketSizeLog = g_ldmBucketSizeLog;
}
if (g_ldmHashEveryLog != LDM_PARAM_DEFAULT) {
adv.ldmHashEveryLog = g_ldmHashEveryLog;
benchParams.ldmHashEveryLog = g_ldmHashEveryLog;
}
if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
@ -852,22 +854,22 @@ int main(int argCount, const char* argv[])
int c;
DISPLAYLEVEL(2, "Benchmarking %s \n", filenameTable[i]);
for(c = cLevel; c <= cLevelLast; c++) {
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &adv);
BMK_benchFilesAdvanced(&filenameTable[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
}
}
} else {
for(; cLevel <= cLevelLast; cLevel++) {
BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &adv);
BMK_benchFilesAdvanced(filenameTable, filenameIdx, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
}
}
} else {
for(; cLevel <= cLevelLast; cLevel++) {
BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &adv);
BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);
}
}
#else
(void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles;
(void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; (void)compressibility;
#endif
goto _end;
}

View File

@ -27,6 +27,7 @@
#include <string.h> /* strcmp */
#include <assert.h>
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
#include "fse.h"
#include "zstd.h" /* ZSTD_VERSION_STRING */
#include "zstd_errors.h" /* ZSTD_getErrorCode */
#include "zstdmt_compress.h"
@ -1443,6 +1444,24 @@ static int basicUnitTests(U32 seed, double compressibility)
}
DISPLAYLEVEL(3, "OK \n");
DISPLAYLEVEL(3, "test%3i : testing FSE_normalizeCount() PR#1255: ", testNb++);
{
short norm[32];
unsigned count[32];
unsigned const tableLog = 5;
size_t const nbSeq = 32;
unsigned const maxSymbolValue = 31;
size_t i;
for (i = 0; i < 32; ++i)
count[i] = 1;
/* Calling FSE_normalizeCount() on a uniform distribution should not
* cause a division by zero.
*/
FSE_normalizeCount(norm, tableLog, count, nbSeq, maxSymbolValue);
}
DISPLAYLEVEL(3, "OK \n");
_end:
free(CNBuffer);
free(compressedBuffer);