Rerun cover and fastCover with optimized values

This commit is contained in:
Jennifer Liu 2018-07-26 19:03:01 -07:00
parent 3d7941ce41
commit 759c543312
3 changed files with 169 additions and 139 deletions

View File

@ -13,108 +13,113 @@ Benchmark given input files: make ARG= followed by permitted arguments
make ARG="in=../../../lib/dictBuilder in=../../../lib/compress" make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
###Benchmarking Result: ###Benchmarking Result:
First Cover is optimize cover, second Cover uses optimized d and k from first one.
For every f value for fast, the first one is optimize and the second one has k=200 For every f value of fastCover, the first one is optimize fastCover and the second one uses optimized d and k from first one.
github: github:
NODICT 0.000023 2.999642 NODICT 0.000004 2.999642
RANDOM 0.149020 8.786957 RANDOM 0.146096 8.786957
LEGACY 0.854277 8.989482 LEGACY 0.956888 8.989482
FAST15 8.764078 10.609015 COVER 56.596152 10.641263
FAST15 0.232610 9.135669 COVER 4.937047 10.641263
FAST16 9.597777 10.474574 FAST15 17.722269 10.586461
FAST16 0.243698 9.346482 FAST15 0.239135 10.586461
FAST17 9.385449 10.611737 FAST16 18.276179 10.492503
FAST17 0.268376 9.605798 FAST16 0.265285 10.492503
FAST18 9.988885 10.626382 FAST17 18.077916 10.611737
FAST18 0.311769 9.130565 FAST17 0.236573 10.611737
FAST19 10.737259 10.411729 FAST18 19.510150 10.621586
FAST19 0.331885 9.271814 FAST18 0.278683 10.621586
FAST20 10.479782 10.388895 FAST19 18.794350 10.629626
FAST20 0.498416 9.194115 FAST19 0.307943 10.629626
FAST21 21.189883 10.376394 FAST20 19.671099 10.610308
FAST21 1.098532 9.244456 FAST20 0.428814 10.610308
FAST22 39.849935 10.432555 FAST21 36.527238 10.625733
FAST22 2.590561 9.410930 FAST21 0.716384 10.625733
FAST23 75.832399 10.614747 FAST22 83.803521 10.625281
FAST23 6.108487 9.484150 FAST22 1.290246 10.625281
FAST24 139.782714 10.611753 FAST23 158.287924 10.602342
FAST24 13.029406 9.379030 FAST23 3.084848 10.602342
COVER 55.118542 10.641263 FAST24 283.630941 10.603379
FAST24 8.088933 10.603379
hg-commands hg-commands
NODICT 0.000012 2.425291 NODICT 0.000007 2.425291
RANDOM 0.083071 3.489515 RANDOM 0.084010 3.489515
LEGACY 0.835195 3.911896 LEGACY 0.926763 3.911896
FAST15 0.163980 3.808375 COVER 62.036915 4.131136
FAST16 6.373850 4.010783 COVER 2.194398 4.131136
FAST16 0.160299 3.966604 FAST15 12.169025 3.903719
FAST17 6.668799 4.091602 FAST15 0.156552 3.903719
FAST17 0.172480 4.062773 FAST16 11.886255 4.005077
FAST18 6.266105 4.130824 FAST16 0.155506 4.005077
FAST18 0.171554 4.094666 FAST17 11.886955 4.097811
FAST19 6.869651 4.158180 FAST17 0.176327 4.097811
FAST19 0.209468 4.111289 FAST18 12.544698 4.136081
FAST20 8.267766 4.149707 FAST18 0.171796 4.136081
FAST20 0.331680 4.119873 FAST19 12.920868 4.166021
FAST21 18.824296 4.171784 FAST19 0.207029 4.166021
FAST21 0.783961 4.120884 FAST20 15.771429 4.163740
FAST22 33.321252 4.152035 FAST20 0.258685 4.163740
FAST22 1.854215 4.126626 FAST21 33.165829 4.157057
FAST23 60.775388 4.157595 FAST21 0.663088 4.157057
FAST23 4.040395 4.134222 FAST22 68.779201 4.158195
FAST24 110.910038 4.163091 FAST22 1.568439 4.158195
FAST24 8.505828 4.143533 FAST23 121.921931 4.161450
COVER 61.654796 4.131136 FAST23 2.498972 4.161450
FAST24 221.990451 4.159658
FAST24 5.793594 4.159658
hg-changelog hg-changelog
NODICT 0.000004 1.377613 NODICT 0.000004 1.377613
RANDOM 0.582067 2.096785 RANDOM 0.549307 2.096785
LEGACY 2.739515 2.058273 LEGACY 2.273818 2.058273
FAST15 35.682665 2.127596 COVER 219.640608 2.188654
FAST15 0.931621 2.115299 COVER 6.055391 2.188654
FAST16 36.557988 2.141787 FAST15 67.820700 2.127194
FAST16 1.008155 2.136080 FAST15 0.824624 2.127194
FAST17 36.272242 2.155332 FAST16 69.774209 2.145401
FAST17 0.906803 2.154596 FAST16 0.889737 2.145401
FAST18 35.542043 2.171997 FAST17 70.027355 2.157544
FAST18 1.063101 2.167723 FAST17 0.869004 2.157544
FAST19 37.756934 2.180893 FAST18 68.229652 2.173127
FAST19 1.257291 2.173768 FAST18 0.930689 2.173127
FAST20 40.273755 2.179442 FAST19 70.696241 2.179527
FAST20 1.630522 2.170072 FAST19 1.385515 2.179527
FAST21 54.606548 2.181400 FAST20 80.618172 2.183233
FAST21 2.321266 2.171643 FAST20 1.699632 2.183233
FAST22 72.454066 2.178774 FAST21 96.366254 2.180920
FAST22 5.092888 2.168885 FAST21 2.606553 2.180920
FAST23 106.753208 2.180347 FAST22 139.440758 2.184297
FAST23 14.722222 2.170673 FAST22 5.962606 2.184297
FAST24 171.083201 2.183426 FAST23 207.791930 2.187666
FAST24 27.575575 2.170623 FAST23 14.823301 2.187666
COVER 227.219660 2.188654 FAST24 322.050385 2.189889
FAST24 29.294918 2.189889
hg-manifest hg-manifest
NODICT 0.000007 1.866385 NODICT 0.000008 1.866385
RANDOM 1.086571 2.309485 RANDOM 1.075766 2.309485
LEGACY 9.567507 2.506775 LEGACY 8.688387 2.506775
FAST15 77.811380 2.380461 COVER 926.024689 2.582597
FAST15 1.969718 2.317727 COVER 33.630695 2.582597
FAST16 75.789019 2.469144 FAST15 152.845945 2.377689
FAST16 2.051283 2.375815 FAST15 2.206285 2.377689
FAST17 79.659040 2.539069 FAST16 147.772371 2.464814
FAST17 1.995394 2.501047 FAST16 1.937997 2.464814
FAST18 76.281105 2.578095 FAST17 147.729498 2.539834
FAST18 2.059272 2.564840 FAST17 1.966577 2.539834
FAST19 79.395382 2.590433 FAST18 144.156821 2.576924
FAST19 2.354158 2.591024 FAST18 1.954106 2.576924
FAST20 87.937568 2.597813 FAST19 145.678760 2.592479
FAST20 2.922189 2.597104 FAST19 2.096876 2.592479
FAST21 121.760549 2.598408 FAST20 159.634674 2.594551
FAST21 4.798981 2.600269 FAST20 2.568766 2.594551
FAST22 155.878461 2.594560 FAST21 228.116552 2.597128
FAST22 8.151807 2.601047 FAST21 4.634508 2.597128
FAST23 194.238003 2.596761 FAST22 288.890644 2.596971
FAST23 15.160578 2.592985 FAST22 6.618204 2.596971
FAST24 267.425904 2.597657 FAST23 377.196211 2.601416
FAST24 29.513286 2.600363 FAST23 13.497286 2.601416
COVER 930.675322 2.582597 FAST24 503.208577 2.602830
FAST24 29.538585 2.602830

View File

@ -277,7 +277,8 @@ int main(int argCount, const char* argv[])
int result = 0; int result = 0;
/* Initialize arguments to default values */ /* Initialize arguments to default values */
const unsigned k = 200; unsigned k = 200;
unsigned d = 8;
const unsigned cLevel = DEFAULT_CLEVEL; const unsigned cLevel = DEFAULT_CLEVEL;
const unsigned dictID = 0; const unsigned dictID = 0;
const unsigned maxDictSize = g_defaultMaxDictSize; const unsigned maxDictSize = g_defaultMaxDictSize;
@ -360,47 +361,6 @@ int main(int argCount, const char* argv[])
} }
} }
/* for fastCover */
for (unsigned f = 15; f < 25; f++){
DISPLAYLEVEL(2, "current f is %u\n", f);
/* for fastCover (optimizing k) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.d = 8;
fastParam.f = f;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
}
/* for fastCover (with k provided) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.d = 8;
fastParam.f = f;
fastParam.k = 200;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
}
}
/* for cover */ /* for cover */
{ {
ZDICT_cover_params_t coverParam; ZDICT_cover_params_t coverParam;
@ -415,7 +375,72 @@ int main(int argCount, const char* argv[])
result = 1; result = 1;
goto _cleanup; goto _cleanup;
} }
k = coverParam.k;
d = coverParam.d;
/* for COVER with k and d provided */
ZDICT_cover_params_t covernParam;
memset(&covernParam, 0, sizeof(covernParam));
covernParam.zParams = zParams;
covernParam.splitPoint = 1.0;
covernParam.steps = 40;
covernParam.nbThreads = 1;
covernParam.k = k;
covernParam.d = d;
const int coverResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, &covernParam, NULL, NULL);
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", covernParam.k, covernParam.d, covernParam.steps, (unsigned)(covernParam.splitPoint * 100));
if(coverResult) {
result = 1;
goto _cleanup;
} }
}
/* for fastCover */
for (unsigned f = 15; f < 25; f++){
DISPLAYLEVEL(2, "current f is %u\n", f);
/* for fastCover (optimizing k and d) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.f = f;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
k = fastParam.k;
d = fastParam.d;
}
/* for fastCover (with k and d provided) */
{
ZDICT_fastCover_params_t fastParam;
memset(&fastParam, 0, sizeof(fastParam));
fastParam.zParams = zParams;
fastParam.splitPoint = 1.0;
fastParam.d = d;
fastParam.f = f;
fastParam.k = k;
fastParam.steps = 40;
fastParam.nbThreads = 1;
const int fastOptResult = benchmarkDictBuilder(srcInfo, maxDictSize, NULL, NULL, NULL, &fastParam);
DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\n", fastParam.k, fastParam.d, fastParam.f, fastParam.steps, (unsigned)(fastParam.splitPoint * 100));
if(fastOptResult) {
result = 1;
goto _cleanup;
}
}
}
/* Free allocated memory */ /* Free allocated memory */

View File

@ -267,7 +267,7 @@ static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *
size_t currSampleStart = ctx->offsets[i]; size_t currSampleStart = ctx->offsets[i];
size_t currSampleEnd = ctx->offsets[i+1]; size_t currSampleEnd = ctx->offsets[i+1];
start = currSampleStart; start = currSampleStart;
while (start + f < currSampleEnd) { while (start + ctx->d <= currSampleEnd) {
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d); const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
/* if no dmer with same hash value has been seen in current sample */ /* if no dmer with same hash value has been seen in current sample */
if (inCurrSample[dmerIndex] == 0) { if (inCurrSample[dmerIndex] == 0) {