From 630b47a158cc22002045494c7e0dc0f0672c2fca Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 11:38:44 -0800 Subject: [PATCH 1/8] added a new --max command set parameters to maximum compression (even more than -22) --- programs/zstdcli.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 7d00a94b2..0bb384b20 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -639,6 +639,22 @@ static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressi return 1; } +static void setMaxCompression(ZSTD_compressionParameters* params) +{ + params->windowLog = ZSTD_WINDOWLOG_MAX; + params->chainLog = ZSTD_CHAINLOG_MAX; + params->hashLog = ZSTD_HASHLOG_MAX; + params->searchLog = ZSTD_SEARCHLOG_MAX; + params->minMatch = ZSTD_MINMATCH_MIN; + params->targetLength = ZSTD_TARGETLENGTH_MAX; + params->strategy = ZSTD_STRATEGY_MAX; + g_overlapLog = ZSTD_OVERLAPLOG_MAX; + g_ldmHashLog = ZSTD_LDM_HASHLOG_MAX; + g_ldmHashRateLog = 0; /* automatically derived */ + g_ldmMinMatch = 32; /* heuristic */ + g_ldmBucketSizeLog = ZSTD_LDM_BUCKETSIZELOG_MAX; +} + static void printVersion(void) { if (g_displayLevel < DISPLAY_LEVEL_DEFAULT) { @@ -957,6 +973,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; continue; } if (!strcmp(argument, "--ultra")) { ultra=1; continue; } + if (!strcmp(argument, "--max")) { ultra=1; ldmFlag = 1; setMaxCompression(&compressionParams); continue; } if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; } if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; } if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; } From 8ae1330708b42c7f5751e94e02970e7ccb5d9731 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 11:51:25 -0800 Subject: [PATCH 2/8] add man entry for --max --- programs/zstd.1.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/programs/zstd.1.md b/programs/zstd.1.md index b4e848640..184db049f 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -161,6 +161,9 @@ the last one takes effect. Note: If `windowLog` is set to larger than 27, `--long=windowLog` or `--memory=windowSize` needs to be passed to the decompressor. +* `--max`: + set advanced parameters to maximum compression. + warning: this setting uses a lot of resources and is very slow. * `-D DICT`: use `DICT` as Dictionary to compress or decompress FILE(s) * `--patch-from FILE`: From 41b719375778ca92978f406b44bdea06cffbf108 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 12:26:36 -0800 Subject: [PATCH 3/8] added --max to test suite --- tests/cli-tests/compression/levels.sh | 4 +++- tests/cli-tests/compression/levels.sh.stderr.exact | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh index cc2700a30..db3baba6d 100755 --- a/tests/cli-tests/compression/levels.sh +++ b/tests/cli-tests/compression/levels.sh @@ -10,9 +10,11 @@ zstd --fast=10 file -o file-f10.zst -q zstd --fast=1 file -o file-f1.zst -q zstd -1 file -o file-1.zst -q zstd -19 file -o file-19.zst -q +zstd --max file -o file-max.zst -q -zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst +zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-max.zst +cmp_size -le file-max.zst file-19.zst cmp_size -lt file-19.zst file-1.zst cmp_size -lt file-1.zst file-f1.zst cmp_size -lt file-f1.zst file-f10.zst diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact index c8fb79c68..ddec06467 100644 --- a/tests/cli-tests/compression/levels.sh.stderr.exact +++ b/tests/cli-tests/compression/levels.sh.stderr.exact @@ -6,10 +6,12 @@ zstd --fast=10 file -o file-f10.zst -q zstd --fast=1 file -o file-f1.zst -q zstd -1 file -o file-1.zst -q zstd -19 file -o file-19.zst -q +zstd --max file -o file-max.zst -q -zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst -4 files decompressed : 262148 bytes total +zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-max.zst +5 files decompressed : 327685 bytes total +cmp_size -le file-max.zst file-19.zst cmp_size -lt file-19.zst file-1.zst cmp_size -lt file-1.zst file-f1.zst cmp_size -lt file-f1.zst file-f10.zst From 39d1d82fa80bfbec6d894ccf8bf18137cedad5d6 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 17:24:12 -0800 Subject: [PATCH 4/8] adjusted mml heuristic --- programs/zstdcli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 0bb384b20..b69d6bf99 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -651,7 +651,7 @@ static void setMaxCompression(ZSTD_compressionParameters* params) g_overlapLog = ZSTD_OVERLAPLOG_MAX; g_ldmHashLog = ZSTD_LDM_HASHLOG_MAX; g_ldmHashRateLog = 0; /* automatically derived */ - g_ldmMinMatch = 32; /* heuristic */ + g_ldmMinMatch = 16; /* heuristic */ g_ldmBucketSizeLog = ZSTD_LDM_BUCKETSIZELOG_MAX; } From f86024ccd2b2fc4608be336594e073096405ac13 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 17:25:35 -0800 Subject: [PATCH 5/8] reinforce man page warning 32-bit is unsuitable for --max --- programs/zstd.1.md | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/zstd.1.md b/programs/zstd.1.md index 184db049f..f56110100 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -164,6 +164,7 @@ the last one takes effect. * `--max`: set advanced parameters to maximum compression. warning: this setting uses a lot of resources and is very slow. + note that the amount of resource required is typically too large for 32-bit. * `-D DICT`: use `DICT` as Dictionary to compress or decompress FILE(s) * `--patch-from FILE`: From 1603cbe83ef7140f0cd0412bea50e5bfc9dd6d1c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 23:02:14 -0800 Subject: [PATCH 6/8] update test for 32-bit mode --max doesn't work in 32-bit mode, due to address space limitation --- tests/cli-tests/compression/levels.sh | 10 +++++++++- tests/cli-tests/compression/levels.sh.stderr.exact | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh index db3baba6d..8da3b4e74 100755 --- a/tests/cli-tests/compression/levels.sh +++ b/tests/cli-tests/compression/levels.sh @@ -5,12 +5,20 @@ set -v datagen > file +# Retrieve the program's version information +version_info=$(zstd -V) + # Compress with various levels and ensure that their sizes are ordered zstd --fast=10 file -o file-f10.zst -q zstd --fast=1 file -o file-f1.zst -q zstd -1 file -o file-1.zst -q zstd -19 file -o file-19.zst -q -zstd --max file -o file-max.zst -q +if echo "$version_info" | grep -q '32-bit'; then + # skip --max test: not enough address space + cp file-19.zst file-max.zst +else + zstd --max file -o file-max.zst -q +fi zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-max.zst diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact index ddec06467..dff9784dd 100644 --- a/tests/cli-tests/compression/levels.sh.stderr.exact +++ b/tests/cli-tests/compression/levels.sh.stderr.exact @@ -1,12 +1,20 @@ datagen > file +# Retrieve the program's version information +version_info=$(zstd -V) + # Compress with various levels and ensure that their sizes are ordered zstd --fast=10 file -o file-f10.zst -q zstd --fast=1 file -o file-f1.zst -q zstd -1 file -o file-1.zst -q zstd -19 file -o file-19.zst -q -zstd --max file -o file-max.zst -q +if echo "$version_info" | grep -q '32-bit'; then + # skip --max test: not enough address space + cp file-19.zst file-max.zst +else + zstd --max file -o file-max.zst -q +fi zstd -t file-f10.zst file-f1.zst file-1.zst file-19.zst file-max.zst 5 files decompressed : 327685 bytes total From 468e1453a55d119c914843bff73af809cbe4ba79 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 23:14:16 -0800 Subject: [PATCH 7/8] disable --max in 32-bit mode --- programs/zstd.1.md | 4 ++-- programs/zstdcli.c | 31 ++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/programs/zstd.1.md b/programs/zstd.1.md index f56110100..29b7a5bb7 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -163,8 +163,8 @@ the last one takes effect. `--memory=windowSize` needs to be passed to the decompressor. * `--max`: set advanced parameters to maximum compression. - warning: this setting uses a lot of resources and is very slow. - note that the amount of resource required is typically too large for 32-bit. + warning: this setting is very slow and uses a lot of resources. + It's inappropriate for 32-bit mode and therefore disabled in this mode. * `-D DICT`: use `DICT` as Dictionary to compress or decompress FILE(s) * `--patch-from FILE`: diff --git a/programs/zstdcli.c b/programs/zstdcli.c index b69d6bf99..feab4f944 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -810,22 +810,22 @@ static unsigned default_nbThreads(void) { CLEAN_RETURN(1); \ } } } -#define NEXT_UINT32(val32) { \ - const char* __nb; \ - NEXT_FIELD(__nb); \ +#define NEXT_UINT32(val32) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ val32 = readU32FromChar(&__nb); \ - if(*__nb != 0) { \ + if(*__nb != 0) { \ errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ - } \ + } \ } -#define NEXT_TSIZE(valTsize) { \ - const char* __nb; \ - NEXT_FIELD(__nb); \ +#define NEXT_TSIZE(valTsize) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ valTsize = readSizeTFromChar(&__nb); \ - if(*__nb != 0) { \ + if(*__nb != 0) { \ errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ - } \ + } \ } typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode; @@ -973,7 +973,6 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; continue; } if (!strcmp(argument, "--ultra")) { ultra=1; continue; } - if (!strcmp(argument, "--max")) { ultra=1; ldmFlag = 1; setMaxCompression(&compressionParams); continue; } if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; } if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; } if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; } @@ -1024,6 +1023,16 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--fake-stderr-is-console")) { UTIL_fakeStderrIsConsole(); continue; } if (!strcmp(argument, "--trace-file-stat")) { UTIL_traceFileStat(); continue; } + if (!strcmp(argument, "--max")) { + if (sizeof(void*)==4) { + DISPLAYLEVEL(2, "--max is incompatible with 32-bit mode \n"); + badUsage(programName, originalArgument); + CLEAN_RETURN(1); + } + ultra=1; ldmFlag = 1; setMaxCompression(&compressionParams); + continue; + } + /* long commands with arguments */ #ifndef ZSTD_NODICT if (longCommandWArg(&argument, "--train-cover")) { From 613901b6d3ea198ac051f63e16c6ea9cd088a6e4 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 9 Feb 2025 23:41:32 -0800 Subject: [PATCH 8/8] modifies command echoing for macos / linux compatibility --- tests/cli-tests/compression/levels.sh | 3 +++ tests/cli-tests/compression/levels.sh.stderr.exact | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/cli-tests/compression/levels.sh b/tests/cli-tests/compression/levels.sh index 8da3b4e74..b8230f2a3 100755 --- a/tests/cli-tests/compression/levels.sh +++ b/tests/cli-tests/compression/levels.sh @@ -6,7 +6,10 @@ set -v datagen > file # Retrieve the program's version information +# Note: command echoing differs between macos and linux, so it's disabled below +set +v version_info=$(zstd -V) +set -v # Compress with various levels and ensure that their sizes are ordered zstd --fast=10 file -o file-f10.zst -q diff --git a/tests/cli-tests/compression/levels.sh.stderr.exact b/tests/cli-tests/compression/levels.sh.stderr.exact index dff9784dd..fd7c076d2 100644 --- a/tests/cli-tests/compression/levels.sh.stderr.exact +++ b/tests/cli-tests/compression/levels.sh.stderr.exact @@ -2,7 +2,8 @@ datagen > file # Retrieve the program's version information -version_info=$(zstd -V) +# Note: command echoing differs between macos and linux, so it's disabled below +set +v # Compress with various levels and ensure that their sizes are ordered zstd --fast=10 file -o file-f10.zst -q