diff --git a/contrib/linux-kernel/README.md b/contrib/linux-kernel/README.md index 11938ad62..a62838517 100644 --- a/contrib/linux-kernel/README.md +++ b/contrib/linux-kernel/README.md @@ -1,8 +1,7 @@ # Linux Kernel Patch There are two pieces, the `zstd_compress` and `zstd_decompress` kernel modules, and the BtrFS patch. -The patches are based off of the linux kernel version 4.9. -The BtrFS patch is not present in its entirety yet. +The patches are based off of the linux kernel master branch (version 4.10). ## Zstd Kernel modules @@ -21,6 +20,33 @@ The BtrFS patch is not present in its entirety yet. ## BtrFS -* `fs/btrfs/zstd.c` is provided. -* Some more glue is required to integrate it with BtrFS, but I haven't included the patches yet. - In the meantime see https://github.com/terrelln/linux/commit/1914f7d4ca6c539369c84853eafa4ac104883047 if you're interested. +* The patch is located in `btrfs.diff`. +* Additionally `fs/btrfs/zstd.c` is provided as a source for convenience. +* The patch seems to be working, it doesn't crash the kernel, and compresses at speeds and ratios athat are expected. + It can still use some more testing for fringe features, like printing options. + +### Benchmarks + +Benchmarks run on a Ubuntu 14.04 with 2 cores and 4 GiB of RAM. +The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor, +16 GB of ram, and a SSD. + +The compression benchmark is copying 10 copies of the +unzipped [silesia corpus](http://mattmahoney.net/dc/silesia.html) into a BtrFS +filesystem mounted with `-o compress-force={none, lzo, zlib, zstd}`. +The decompression benchmark is timing how long it takes to `tar` all 10 copies +into `/dev/null`. +The compression ratio is measured by comparing the output of `df` and `du`. +See `btrfs-benchmark.sh` for details. + +| Algorithm | Compression ratio | Compression speed | Decompression speed | +|-----------|-------------------|-------------------|---------------------| +| None | 0.99 | 504 MB/s | 686 MB/s | +| lzo | 1.66 | 398 MB/s | 442 MB/s | +| zlib | 2.58 | 65 MB/s | 241 MB/s | +| zstd 1 | 2.57 | 260 MB/s | 383 MB/s | +| zstd 3 | 2.71 | 174 MB/s | 408 MB/s | +| zstd 6 | 2.87 | 70 MB/s | 398 MB/s | +| zstd 9 | 2.92 | 43 MB/s | 406 MB/s | +| zstd 12 | 2.93 | 21 MB/s | 408 MB/s | +| zstd 15 | 3.01 | 11 MB/s | 354 MB/s | diff --git a/contrib/linux-kernel/btrfs-benchmark.sh b/contrib/linux-kernel/btrfs-benchmark.sh new file mode 100755 index 000000000..5e28da9c6 --- /dev/null +++ b/contrib/linux-kernel/btrfs-benchmark.sh @@ -0,0 +1,104 @@ +# !/bin/sh +set -e + +# Benchmarks run on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. +# The VM is running on a Macbook Pro with a 3.1 GHz Intel Core i7 processor and +# 16 GB of RAM and an SSD. + +# silesia is a directory that can be downloaded from +# http://mattmahoney.net/dc/silesia.html +# ls -l silesia/ +# total 203M +# -rwxr-xr-x 1 terrelln 9.8M Apr 12 2002 dickens +# -rwxr-xr-x 1 terrelln 49M May 31 2002 mozilla +# -rwxr-xr-x 1 terrelln 9.6M Mar 20 2003 mr +# -rwxr-xr-x 1 terrelln 32M Apr 2 2002 nci +# -rwxr-xr-x 1 terrelln 5.9M Jul 4 2002 ooffice +# -rwxr-xr-x 1 terrelln 9.7M Apr 11 2002 osdb +# -rwxr-xr-x 1 terrelln 6.4M Apr 2 2002 reymont +# -rwxr-xr-x 1 terrelln 21M Mar 25 2002 samba +# -rwxr-xr-x 1 terrelln 7.0M Mar 24 2002 sao +# -rwxr-xr-x 1 terrelln 40M Mar 25 2002 webster +# -rwxr-xr-x 1 terrelln 8.1M Apr 4 2002 x-ray +# -rwxr-xr-x 1 terrelln 5.1M Nov 30 2000 xml + +# $HOME is on a ext4 filesystem +BENCHMARK_DIR="$HOME/silesia/" +N=10 + +# Normalize the environment +sudo umount /mnt/btrfs 2> /dev/null > /dev/null || true +sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs +sudo rm -rf /mnt/btrfs/* +sync +sudo umount /mnt/btrfs +sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs + +# Run the benchmark +echo "Compression" +time sh -c "for i in \$(seq $N); do sudo cp -r $BENCHMARK_DIR /mnt/btrfs/\$i; done; sync" + +echo "Approximate compression ratio" +printf "%d / %d\n" \ + $(df /mnt/btrfs --output=used -B 1 | tail -n 1) \ + $(sudo du /mnt/btrfs -b -d 0 | tr '\t' '\n' | head -n 1); + +# Unmount and remount to avoid any caching +sudo umount /mnt/btrfs +sudo mount -t btrfs $@ /dev/sda3 /mnt/btrfs + +echo "Decompression" +time sudo tar -c /mnt/btrfs 2> /dev/null | wc -c > /dev/null + +sudo rm -rf /mnt/btrfs/* +sudo umount /mnt/btrfs + +# Run for each of -o compress-force={none, lzo, zlib, zstd} 5 times and take the +# min time and ratio. +# Ran zstd with compression levels {1, 3, 6, 9, 12, 15}. +# Original size: 2119415342 B (using du /mnt/btrfs) + +# none +# compress: 4.205 s +# decompress: 3.090 s +# ratio: 0.99 + +# lzo +# compress: 5.328 s +# decompress: 4.793 s +# ratio: 1.66 + +# zlib +# compress: 32.588 s +# decompress: 8.791 s +# ratio : 2.58 + +# zstd 1 +# compress: 8.147 s +# decompress: 5.527 s +# ratio : 2.57 + +# zstd 3 +# compress: 12.207 s +# decompress: 5.195 s +# ratio : 2.71 + +# zstd 6 +# compress: 30.253 s +# decompress: 5.324 s +# ratio : 2.87 + +# zstd 9 +# compress: 49.659 s +# decompress: 5.220 s +# ratio : 2.92 + +# zstd 12 +# compress: 99.245 s +# decompress: 5.193 s +# ratio : 2.93 + +# zstd 15 +# compress: 196.997 s +# decompress: 5.992 s +# ratio : 3.01 diff --git a/contrib/linux-kernel/btrfs.diff b/contrib/linux-kernel/btrfs.diff new file mode 100644 index 000000000..b0f8b924b --- /dev/null +++ b/contrib/linux-kernel/btrfs.diff @@ -0,0 +1,633 @@ +diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig +index 80e9c18..a26c63b 100644 +--- a/fs/btrfs/Kconfig ++++ b/fs/btrfs/Kconfig +@@ -6,6 +6,8 @@ config BTRFS_FS + select ZLIB_DEFLATE + select LZO_COMPRESS + select LZO_DECOMPRESS ++ select ZSTD_COMPRESS ++ select ZSTD_DECOMPRESS + select RAID6_PQ + select XOR_BLOCKS + select SRCU +diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile +index 128ce17..962a95a 100644 +--- a/fs/btrfs/Makefile ++++ b/fs/btrfs/Makefile +@@ -6,7 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + transaction.o inode.o file.o tree-defrag.o \ + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ +- export.o tree-log.o free-space-cache.o zlib.o lzo.o \ ++ export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ + compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ + reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ + uuid-tree.o props.o hash.o free-space-tree.o +diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c +index c7721a6..66d4ced 100644 +--- a/fs/btrfs/compression.c ++++ b/fs/btrfs/compression.c +@@ -761,6 +761,7 @@ static struct { + static const struct btrfs_compress_op * const btrfs_compress_op[] = { + &btrfs_zlib_compress, + &btrfs_lzo_compress, ++ &btrfs_zstd_compress, + }; + + void __init btrfs_init_compress(void) +diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h +index 39ec43a..d99fc21 100644 +--- a/fs/btrfs/compression.h ++++ b/fs/btrfs/compression.h +@@ -60,8 +60,9 @@ enum btrfs_compression_type { + BTRFS_COMPRESS_NONE = 0, + BTRFS_COMPRESS_ZLIB = 1, + BTRFS_COMPRESS_LZO = 2, +- BTRFS_COMPRESS_TYPES = 2, +- BTRFS_COMPRESS_LAST = 3, ++ BTRFS_COMPRESS_ZSTD = 3, ++ BTRFS_COMPRESS_TYPES = 3, ++ BTRFS_COMPRESS_LAST = 4, + }; + + struct btrfs_compress_op { +@@ -92,5 +93,6 @@ struct btrfs_compress_op { + + extern const struct btrfs_compress_op btrfs_zlib_compress; + extern const struct btrfs_compress_op btrfs_lzo_compress; ++extern const struct btrfs_compress_op btrfs_zstd_compress; + + #endif +diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h +index 29b7fc2..878b23b9 100644 +--- a/fs/btrfs/ctree.h ++++ b/fs/btrfs/ctree.h +@@ -270,6 +270,7 @@ struct btrfs_super_block { + BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ + BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ + BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ ++ BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ + BTRFS_FEATURE_INCOMPAT_RAID56 | \ + BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ + BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 08b74da..0c43e4e 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -2853,6 +2853,8 @@ int open_ctree(struct super_block *sb, + features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; + if (fs_info->compress_type == BTRFS_COMPRESS_LZO) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; ++ else if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_ZSTD) ++ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; + + if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) + btrfs_info(fs_info, "has skinny extents"); +diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c +index dabfc7a..d8ea727 100644 +--- a/fs/btrfs/ioctl.c ++++ b/fs/btrfs/ioctl.c +@@ -327,8 +327,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) + + if (fs_info->compress_type == BTRFS_COMPRESS_LZO) + comp = "lzo"; +- else ++ else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) + comp = "zlib"; ++ else ++ comp = "zstd"; + ret = btrfs_set_prop(inode, "btrfs.compression", + comp, strlen(comp), 0); + if (ret) +@@ -1463,6 +1465,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, + + if (range->compress_type == BTRFS_COMPRESS_LZO) { + btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); ++ } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { ++ btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); + } + + ret = defrag_count; +diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c +index d6cb155..162105f 100644 +--- a/fs/btrfs/props.c ++++ b/fs/btrfs/props.c +@@ -383,6 +383,8 @@ static int prop_compression_validate(const char *value, size_t len) + return 0; + else if (!strncmp("zlib", value, len)) + return 0; ++ else if (!strncmp("zstd", value, len)) ++ return 0; + + return -EINVAL; + } +@@ -405,6 +407,8 @@ static int prop_compression_apply(struct inode *inode, + type = BTRFS_COMPRESS_LZO; + else if (!strncmp("zlib", value, len)) + type = BTRFS_COMPRESS_ZLIB; ++ else if (!strncmp("zstd", value, len)) ++ type = BTRFS_COMPRESS_ZSTD; + else + return -EINVAL; + +@@ -422,6 +426,8 @@ static const char *prop_compression_extract(struct inode *inode) + return "zlib"; + case BTRFS_COMPRESS_LZO: + return "lzo"; ++ case BTRFS_COMPRESS_ZSTD: ++ return "zstd"; + } + + return NULL; +diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c +index da687dc..b064456 100644 +--- a/fs/btrfs/super.c ++++ b/fs/btrfs/super.c +@@ -513,6 +513,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, + btrfs_clear_opt(info->mount_opt, NODATASUM); + btrfs_set_fs_incompat(info, COMPRESS_LZO); + no_compress = 0; ++ } else if (strcmp(args[0].from, "zstd") == 0) { ++ compress_type = "zstd"; ++ info->compress_type = BTRFS_COMPRESS_ZSTD; ++ btrfs_set_opt(info->mount_opt, COMPRESS); ++ btrfs_clear_opt(info->mount_opt, NODATACOW); ++ btrfs_clear_opt(info->mount_opt, NODATASUM); ++ btrfs_set_fs_incompat(info, COMPRESS_ZSTD); ++ no_compress = 0; + } else if (strncmp(args[0].from, "no", 2) == 0) { + compress_type = "no"; + btrfs_clear_opt(info->mount_opt, COMPRESS); +@@ -1230,8 +1238,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) + if (btrfs_test_opt(info, COMPRESS)) { + if (info->compress_type == BTRFS_COMPRESS_ZLIB) + compress_type = "zlib"; +- else ++ else if (info->compress_type == BTRFS_COMPRESS_LZO) + compress_type = "lzo"; ++ else ++ compress_type = "zstd"; + if (btrfs_test_opt(info, FORCE_COMPRESS)) + seq_printf(seq, ",compress-force=%s", compress_type); + else +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index 1f157fb..b0dec90 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -200,6 +200,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF); + BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); + BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); + BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); ++BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD); + BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); + BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); + BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); +@@ -212,6 +213,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { + BTRFS_FEAT_ATTR_PTR(default_subvol), + BTRFS_FEAT_ATTR_PTR(mixed_groups), + BTRFS_FEAT_ATTR_PTR(compress_lzo), ++ BTRFS_FEAT_ATTR_PTR(compress_zstd), + BTRFS_FEAT_ATTR_PTR(big_metadata), + BTRFS_FEAT_ATTR_PTR(extended_iref), + BTRFS_FEAT_ATTR_PTR(raid56), +diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c +new file mode 100644 +index 0000000..b7f319e +--- /dev/null ++++ b/fs/btrfs/zstd.c +@@ -0,0 +1,415 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "compression.h" ++ ++#define ZSTD_BTRFS_MAX_WINDOWLOG 17 ++#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) ++ ++static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) ++{ ++ ZSTD_parameters params = ZSTD_getParams(3, src_len, 0); ++ ++ if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) ++ params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; ++ WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); ++ return params; ++} ++ ++struct workspace { ++ void *mem; ++ size_t size; ++ char *buf; ++ struct list_head list; ++}; ++ ++static void zstd_free_workspace(struct list_head *ws) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ++ vfree(workspace->mem); ++ kfree(workspace->buf); ++ kfree(workspace); ++} ++ ++static struct list_head *zstd_alloc_workspace(void) ++{ ++ ZSTD_parameters params = ++ zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); ++ struct workspace *workspace; ++ ++ workspace = kzalloc(sizeof(*workspace), GFP_NOFS); ++ if (!workspace) ++ return ERR_PTR(-ENOMEM); ++ ++ workspace->size = max_t(size_t, ++ ZSTD_CStreamWorkspaceBound(params.cParams), ++ ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); ++ workspace->mem = vmalloc(workspace->size); ++ workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS); ++ if (!workspace->mem || !workspace->buf) ++ goto fail; ++ ++ INIT_LIST_HEAD(&workspace->list); ++ ++ return &workspace->list; ++fail: ++ zstd_free_workspace(&workspace->list); ++ return ERR_PTR(-ENOMEM); ++} ++ ++static int zstd_compress_pages(struct list_head *ws, ++ struct address_space *mapping, ++ u64 start, ++ struct page **pages, ++ unsigned long *out_pages, ++ unsigned long *total_in, ++ unsigned long *total_out) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ZSTD_CStream *stream; ++ int ret = 0; ++ int nr_pages = 0; ++ struct page *in_page = NULL; /* The current page to read */ ++ struct page *out_page = NULL; /* The current page to write to */ ++ ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ++ ZSTD_outBuffer out_buf = { NULL, 0, 0 }; ++ unsigned long tot_in = 0; ++ unsigned long tot_out = 0; ++ unsigned long len = *total_out; ++ const unsigned long nr_dest_pages = *out_pages; ++ unsigned long max_out = nr_dest_pages * PAGE_SIZE; ++ ZSTD_parameters params = zstd_get_btrfs_parameters(len); ++ ++ *out_pages = 0; ++ *total_out = 0; ++ *total_in = 0; ++ ++ /* Initialize the stream */ ++ stream = ZSTD_createCStream(params, len, workspace->mem, ++ workspace->size); ++ if (!stream) { ++ pr_warn("BTRFS: ZSTD_createStream failed\n"); ++ ret = -EIO; ++ goto out; ++ } ++ ++ /* map in the first page of input data */ ++ in_page = find_get_page(mapping, start >> PAGE_SHIFT); ++ in_buf.src = kmap(in_page); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, len, PAGE_SIZE); ++ ++ ++ /* Allocate and map in the output buffer */ ++ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ++ if (out_page == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ pages[nr_pages++] = out_page; ++ out_buf.dst = kmap(out_page); ++ out_buf.pos = 0; ++ out_buf.size = min_t(size_t, max_out, PAGE_SIZE); ++ ++ while (1) { ++ size_t ret2; ++ ++ ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_compressStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto out; ++ } ++ ++ /* Check to see if we are making it bigger */ ++ if (tot_in + in_buf.pos > 8192 && ++ tot_in + in_buf.pos < ++ tot_out + out_buf.pos) { ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ /* We've reached the end of our output range */ ++ if (out_buf.pos >= max_out) { ++ tot_out += out_buf.pos; ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ /* Check if we need more output space */ ++ if (out_buf.pos == out_buf.size) { ++ tot_out += PAGE_SIZE; ++ max_out -= PAGE_SIZE; ++ kunmap(out_page); ++ if (nr_pages == nr_dest_pages) { ++ out_page = NULL; ++ ret = -E2BIG; ++ goto out; ++ } ++ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ++ if (out_page == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ pages[nr_pages++] = out_page; ++ out_buf.dst = kmap(out_page); ++ out_buf.pos = 0; ++ out_buf.size = min_t(size_t, max_out, PAGE_SIZE); ++ } ++ ++ /* We've reached the end of the input */ ++ if (in_buf.pos >= len) { ++ tot_in += in_buf.pos; ++ break; ++ } ++ ++ /* Check if we need more input */ ++ if (in_buf.pos == in_buf.size) { ++ tot_in += PAGE_SIZE; ++ kunmap(in_page); ++ put_page(in_page); ++ ++ start += PAGE_SIZE; ++ len -= PAGE_SIZE; ++ in_page = find_get_page(mapping, start >> PAGE_SHIFT); ++ in_buf.src = kmap(in_page); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, len, PAGE_SIZE); ++ } ++ } ++ while (1) { ++ size_t ret2; ++ ++ ret2 = ZSTD_endStream(stream, &out_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_endStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto out; ++ } ++ if (ret2 == 0) { ++ tot_out += out_buf.pos; ++ break; ++ } ++ if (out_buf.pos >= max_out) { ++ tot_out += out_buf.pos; ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ tot_out += PAGE_SIZE; ++ max_out -= PAGE_SIZE; ++ kunmap(out_page); ++ if (nr_pages == nr_dest_pages) { ++ out_page = NULL; ++ ret = -E2BIG; ++ goto out; ++ } ++ out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ++ if (out_page == NULL) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ pages[nr_pages++] = out_page; ++ out_buf.dst = kmap(out_page); ++ out_buf.pos = 0; ++ out_buf.size = min_t(size_t, max_out, PAGE_SIZE); ++ } ++ ++ if (tot_out >= tot_in) { ++ ret = -E2BIG; ++ goto out; ++ } ++ ++ ret = 0; ++ *total_in = tot_in; ++ *total_out = tot_out; ++out: ++ *out_pages = nr_pages; ++ /* Cleanup */ ++ if (in_page) { ++ kunmap(in_page); ++ put_page(in_page); ++ } ++ if (out_page) ++ kunmap(out_page); ++ return ret; ++} ++ ++static int zstd_decompress_bio(struct list_head *ws, struct page **pages_in, ++ u64 disk_start, ++ struct bio *orig_bio, ++ size_t srclen) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ZSTD_DStream *stream; ++ int ret = 0; ++ unsigned long page_in_index = 0; ++ unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); ++ unsigned long buf_start; ++ unsigned long total_out = 0; ++ ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ++ ZSTD_outBuffer out_buf = { NULL, 0, 0 }; ++ ++ stream = ZSTD_createDStream( ++ ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); ++ if (!stream) { ++ pr_debug("BTRFS: ZSTD_createDStream failed\n"); ++ ret = -EIO; ++ goto done; ++ } ++ ++ in_buf.src = kmap(pages_in[page_in_index]); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, srclen, PAGE_SIZE); ++ ++ out_buf.dst = workspace->buf; ++ out_buf.pos = 0; ++ out_buf.size = PAGE_SIZE; ++ ++ while (1) { ++ size_t ret2; ++ ++ ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto done; ++ } ++ buf_start = total_out; ++ total_out += out_buf.pos; ++ out_buf.pos = 0; ++ ++ ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, ++ total_out, disk_start, orig_bio); ++ if (ret == 0) ++ break; ++ ++ if (in_buf.pos >= srclen) ++ break; ++ ++ /* Check if we've hit the end of a frame */ ++ if (ret2 == 0) ++ break; ++ ++ if (in_buf.pos == in_buf.size) { ++ kunmap(pages_in[page_in_index++]); ++ if (page_in_index >= total_pages_in) { ++ in_buf.src = NULL; ++ ret = -EIO; ++ goto done; ++ } ++ srclen -= PAGE_SIZE; ++ in_buf.src = kmap(pages_in[page_in_index]); ++ in_buf.pos = 0; ++ in_buf.size = min_t(size_t, srclen, PAGE_SIZE); ++ } ++ } ++ ret = 0; ++ zero_fill_bio(orig_bio); ++done: ++ if (in_buf.src) ++ kunmap(pages_in[page_in_index]); ++ return ret; ++} ++ ++static int zstd_decompress(struct list_head *ws, unsigned char *data_in, ++ struct page *dest_page, ++ unsigned long start_byte, ++ size_t srclen, size_t destlen) ++{ ++ struct workspace *workspace = list_entry(ws, struct workspace, list); ++ ZSTD_DStream *stream; ++ int ret = 0; ++ size_t ret2; ++ ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ++ ZSTD_outBuffer out_buf = { NULL, 0, 0 }; ++ unsigned long total_out = 0; ++ unsigned long pg_offset = 0; ++ char *kaddr; ++ ++ stream = ZSTD_createDStream( ++ ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); ++ if (!stream) { ++ pr_warn("BTRFS: ZSTD_createDStream failed\n"); ++ ret = -EIO; ++ goto finish; ++ } ++ ++ destlen = min_t(size_t, destlen, PAGE_SIZE); ++ ++ in_buf.src = data_in; ++ in_buf.pos = 0; ++ in_buf.size = srclen; ++ ++ out_buf.dst = workspace->buf; ++ out_buf.pos = 0; ++ out_buf.size = PAGE_SIZE; ++ ++ ret2 = 1; ++ while (pg_offset < destlen && in_buf.pos < in_buf.size) { ++ unsigned long buf_start; ++ unsigned long buf_offset; ++ unsigned long bytes; ++ ++ /* Check if the frame is over and we still need more input */ ++ if (ret2 == 0) { ++ pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); ++ ret = -EIO; ++ goto finish; ++ } ++ ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); ++ if (ZSTD_isError(ret2)) { ++ pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", ++ ZSTD_getErrorCode(ret2)); ++ ret = -EIO; ++ goto finish; ++ } ++ ++ buf_start = total_out; ++ total_out += out_buf.pos; ++ out_buf.pos = 0; ++ ++ if (total_out <= start_byte) ++ continue; ++ ++ if (total_out > start_byte && buf_start < start_byte) ++ buf_offset = start_byte - buf_start; ++ else ++ buf_offset = 0; ++ ++ bytes = min_t(unsigned long, destlen - pg_offset, ++ out_buf.size - buf_offset); ++ ++ kaddr = kmap_atomic(dest_page); ++ memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes); ++ kunmap_atomic(kaddr); ++ ++ pg_offset += bytes; ++ } ++ ret = 0; ++finish: ++ if (pg_offset < destlen) { ++ kaddr = kmap_atomic(dest_page); ++ memset(kaddr + pg_offset, 0, destlen - pg_offset); ++ kunmap_atomic(kaddr); ++ } ++ return ret; ++} ++ ++const struct btrfs_compress_op btrfs_zstd_compress = { ++ .alloc_workspace = zstd_alloc_workspace, ++ .free_workspace = zstd_free_workspace, ++ .compress_pages = zstd_compress_pages, ++ .decompress_bio = zstd_decompress_bio, ++ .decompress = zstd_decompress, ++}; +diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h +index db4c253..f26c34f 100644 +--- a/include/uapi/linux/btrfs.h ++++ b/include/uapi/linux/btrfs.h +@@ -255,13 +255,7 @@ struct btrfs_ioctl_fs_info_args { + #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) + #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) + #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) +-/* +- * some patches floated around with a second compression method +- * lets save that incompat here for when they do get in +- * Note we don't actually support it, we're just reserving the +- * number +- */ +-#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) ++#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) + + /* + * older kernels tried to do bigger metadata blocks, but the diff --git a/contrib/linux-kernel/fs/btrfs/zstd.c b/contrib/linux-kernel/fs/btrfs/zstd.c index 23a3692ad..b7f319e7a 100644 --- a/contrib/linux-kernel/fs/btrfs/zstd.c +++ b/contrib/linux-kernel/fs/btrfs/zstd.c @@ -15,9 +15,10 @@ static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) { ZSTD_parameters params = ZSTD_getParams(3, src_len, 0); - BUG_ON(src_len > ZSTD_BTRFS_MAX_INPUT); - BUG_ON(params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG); - params.fParams.checksumFlag = 1; + + if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) + params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; + WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); return params; } @@ -39,17 +40,21 @@ static void zstd_free_workspace(struct list_head *ws) static struct list_head *zstd_alloc_workspace(void) { - ZSTD_parameters params = zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); + ZSTD_parameters params = + zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); struct workspace *workspace; workspace = kzalloc(sizeof(*workspace), GFP_NOFS); - if (!workspace) return ERR_PTR(-ENOMEM); + if (!workspace) + return ERR_PTR(-ENOMEM); - workspace->size = max_t(size_t, ZSTD_CStreamWorkspaceBound(params.cParams), + workspace->size = max_t(size_t, + ZSTD_CStreamWorkspaceBound(params.cParams), ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); workspace->mem = vmalloc(workspace->size); workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS); - if (!workspace->mem || !workspace->buf) goto fail; + if (!workspace->mem || !workspace->buf) + goto fail; INIT_LIST_HEAD(&workspace->list); @@ -61,16 +66,13 @@ fail: static int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, - u64 start, unsigned long len, + u64 start, struct page **pages, - unsigned long nr_dest_pages, unsigned long *out_pages, unsigned long *total_in, - unsigned long *total_out, - unsigned long max_out) + unsigned long *total_out) { struct workspace *workspace = list_entry(ws, struct workspace, list); - ZSTD_parameters params = zstd_get_btrfs_parameters(len); ZSTD_CStream *stream; int ret = 0; int nr_pages = 0; @@ -80,13 +82,18 @@ static int zstd_compress_pages(struct list_head *ws, ZSTD_outBuffer out_buf = { NULL, 0, 0 }; unsigned long tot_in = 0; unsigned long tot_out = 0; + unsigned long len = *total_out; + const unsigned long nr_dest_pages = *out_pages; + unsigned long max_out = nr_dest_pages * PAGE_SIZE; + ZSTD_parameters params = zstd_get_btrfs_parameters(len); *out_pages = 0; *total_out = 0; *total_in = 0; /* Initialize the stream */ - stream = ZSTD_createCStream(params, len, workspace->mem, workspace->size); + stream = ZSTD_createCStream(params, len, workspace->mem, + workspace->size); if (!stream) { pr_warn("BTRFS: ZSTD_createStream failed\n"); ret = -EIO; @@ -112,10 +119,12 @@ static int zstd_compress_pages(struct list_head *ws, out_buf.size = min_t(size_t, max_out, PAGE_SIZE); while (1) { - const size_t rc = ZSTD_compressStream(stream, &out_buf, &in_buf); - if (ZSTD_isError(rc)) { + size_t ret2; + + ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_compressStream returned %d\n", - ZSTD_getErrorCode(rc)); + ZSTD_getErrorCode(ret2)); ret = -EIO; goto out; } @@ -177,14 +186,16 @@ static int zstd_compress_pages(struct list_head *ws, } } while (1) { - const size_t rc = ZSTD_endStream(stream, &out_buf); - if (ZSTD_isError(rc)) { + size_t ret2; + + ret2 = ZSTD_endStream(stream, &out_buf); + if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_endStream returned %d\n", - ZSTD_getErrorCode(rc)); + ZSTD_getErrorCode(ret2)); ret = -EIO; goto out; } - if (rc == 0) { + if (ret2 == 0) { tot_out += out_buf.pos; break; } @@ -228,24 +239,22 @@ out: kunmap(in_page); put_page(in_page); } - if (out_page) { kunmap(out_page); } + if (out_page) + kunmap(out_page); return ret; } -static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, +static int zstd_decompress_bio(struct list_head *ws, struct page **pages_in, u64 disk_start, - struct bio_vec *bvec, - int vcnt, + struct bio *orig_bio, size_t srclen) { struct workspace *workspace = list_entry(ws, struct workspace, list); ZSTD_DStream *stream; int ret = 0; unsigned long page_in_index = 0; - unsigned long page_out_index = 0; unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; - unsigned long pg_offset; unsigned long total_out = 0; ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ZSTD_outBuffer out_buf = { NULL, 0, 0 }; @@ -266,13 +275,13 @@ static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, out_buf.pos = 0; out_buf.size = PAGE_SIZE; - pg_offset = 0; - while (1) { - const size_t rc = ZSTD_decompressStream(stream, &out_buf, &in_buf); - if (ZSTD_isError(rc)) { + size_t ret2; + + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(rc)); + ZSTD_getErrorCode(ret2)); ret = -EIO; goto done; } @@ -280,23 +289,17 @@ static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, total_out += out_buf.pos; out_buf.pos = 0; - { - int ret2 = btrfs_decompress_buf2page(out_buf.dst, buf_start, - total_out, disk_start, bvec, vcnt, - &page_out_index, &pg_offset); - if (ret2 == 0) { - break; - } - } - - if (in_buf.pos >= srclen) { + ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, + total_out, disk_start, orig_bio); + if (ret == 0) + break; + + if (in_buf.pos >= srclen) break; - } /* Check if we've hit the end of a frame */ - if (rc == 0) { + if (ret2 == 0) break; - } if (in_buf.pos == in_buf.size) { kunmap(pages_in[page_in_index++]); @@ -311,10 +314,11 @@ static int zstd_decompress_biovec(struct list_head *ws, struct page **pages_in, in_buf.size = min_t(size_t, srclen, PAGE_SIZE); } } - btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); ret = 0; + zero_fill_bio(orig_bio); done: - if (in_buf.src) { kunmap(pages_in[page_in_index]); } + if (in_buf.src) + kunmap(pages_in[page_in_index]); return ret; } @@ -326,6 +330,7 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, struct workspace *workspace = list_entry(ws, struct workspace, list); ZSTD_DStream *stream; int ret = 0; + size_t ret2; ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ZSTD_outBuffer out_buf = { NULL, 0, 0 }; unsigned long total_out = 0; @@ -350,41 +355,37 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in, out_buf.pos = 0; out_buf.size = PAGE_SIZE; - ret = 1; + ret2 = 1; while (pg_offset < destlen && in_buf.pos < in_buf.size) { unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; /* Check if the frame is over and we still need more input */ - if (ret == 0) { - pr_debug("BTRFS: ZSTD_decompressStream frame ended to early\n"); + if (ret2 == 0) { + pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); ret = -EIO; goto finish; } - { - const size_t rc = ZSTD_decompressStream(stream, &out_buf, &in_buf); - if (ZSTD_isError(rc)) { - pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(rc)); - ret = -EIO; - goto finish; - } - ret = rc > 0; + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); + if (ZSTD_isError(ret2)) { + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", + ZSTD_getErrorCode(ret2)); + ret = -EIO; + goto finish; } + buf_start = total_out; total_out += out_buf.pos; out_buf.pos = 0; - if (total_out <= start_byte) { + if (total_out <= start_byte) continue; - } - if (total_out > start_byte && buf_start < start_byte) { + if (total_out > start_byte && buf_start < start_byte) buf_offset = start_byte - buf_start; - } else { + else buf_offset = 0; - } bytes = min_t(unsigned long, destlen - pg_offset, out_buf.size - buf_offset); @@ -409,6 +410,6 @@ const struct btrfs_compress_op btrfs_zstd_compress = { .alloc_workspace = zstd_alloc_workspace, .free_workspace = zstd_free_workspace, .compress_pages = zstd_compress_pages, - .decompress_biovec = zstd_decompress_biovec, + .decompress_bio = zstd_decompress_bio, .decompress = zstd_decompress, }; diff --git a/contrib/linux-kernel/include/linux/zstd.h b/contrib/linux-kernel/include/linux/zstd.h index dcfcebff0..38b2bca69 100644 --- a/contrib/linux-kernel/include/linux/zstd.h +++ b/contrib/linux-kernel/include/linux/zstd.h @@ -7,354 +7,749 @@ * of patent rights can be found in the PATENTS file in the same directory. */ -#ifndef ZSTD_H_235446 -#define ZSTD_H_235446 +#ifndef ZSTD_H +#define ZSTD_H /* ====== Dependency ======*/ #include /* size_t */ -/* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#define ZSTDLIB_API - - -/******************************************************************************************************* - Introduction - - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios - at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and - decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22. - Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory. - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit memory management) - - unbounded multiple steps (described as Streaming compression) - The compression ratio achievable on small data can be highly improved using compression with a dictionary in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Fast dictionary API) -*********************************************************************************************************/ - -/*------ Version ------*/ -#define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 1 -#define ZSTD_VERSION_RELEASE 5 - -#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE -#define ZSTD_QUOTE(str) #str -#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) -#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) - -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) - +/*-***************************************************************************** + * Introduction + * + * zstd, short for Zstandard, is a fast lossless compression algorithm, + * targeting real-time compression scenarios at zlib-level and better + * compression ratios. The zstd compression library provides in-memory + * compression and decompression functions. The library supports compression + * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled + * ultra, should be used with caution, as they require more memory. + * Compression can be done in: + * - a single step, reusing a context (described as Explicit memory management) + * - unbounded multiple steps (described as Streaming compression) + * The compression ratio achievable on small data can be highly improved using + * compression with a dictionary in: + * - a single step (described as Simple dictionary API) + * - a single step, reusing a dictionary (described as Fast dictionary API) + ******************************************************************************/ /*====== Helper functions ======*/ + +/** + * enum ZSTD_ErrorCode - zstd error codes + * + * Functions that return size_t can be checked for errors using ZSTD_isError() + * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + */ typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_dictionaryCreation_failed, - ZSTD_error_maxCode + ZSTD_error_no_error, + ZSTD_error_GENERIC, + ZSTD_error_prefix_unknown, + ZSTD_error_version_unsupported, + ZSTD_error_parameter_unknown, + ZSTD_error_frameParameter_unsupported, + ZSTD_error_frameParameter_unsupportedBy32bits, + ZSTD_error_frameParameter_windowTooLarge, + ZSTD_error_compressionParameter_unsupported, + ZSTD_error_init_missing, + ZSTD_error_memory_allocation, + ZSTD_error_stage_wrong, + ZSTD_error_dstSize_tooSmall, + ZSTD_error_srcSize_wrong, + ZSTD_error_corruption_detected, + ZSTD_error_checksum_wrong, + ZSTD_error_tableLog_tooLarge, + ZSTD_error_maxSymbolValue_tooLarge, + ZSTD_error_maxSymbolValue_tooSmall, + ZSTD_error_dictionary_corrupted, + ZSTD_error_dictionary_wrong, + ZSTD_error_dictionaryCreation_failed, + ZSTD_error_maxCode } ZSTD_ErrorCode; -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */ -/*! ZSTD_isError() : -* tells if a `size_t` function result is an error code */ -ZSTDLIB_API static __attribute__((unused)) unsigned ZSTD_isError(size_t code) { +/** + * ZSTD_maxCLevel() - maximum compression level available + * + * Return: Maximum compression level available. + */ +int ZSTD_maxCLevel(void); +/** + * ZSTD_compressBound() - maximum compressed size in worst case scenario + * @srcSize: The size of the data to compress. + * + * Return: The maximum compressed size in the worst case scenario. + */ +size_t ZSTD_compressBound(size_t srcSize); +/** + * ZSTD_isError() - tells if a size_t function result is an error code + * @code: The function result to check for error. + * + * Return: Non-zero iff the code is an error. + */ +static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) +{ return code > (size_t)-ZSTD_error_maxCode; } -/*! ZSTD_getErrorCode() : -* convert a `size_t` function result into a proper ZSTD_errorCode enum */ -ZSTDLIB_API static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult) { - if (!ZSTD_isError(functionResult)) { +/** + * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode + * @functionResult: The result of a function for which ZSTD_isError() is true. + * + * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 + * if the functionResult isn't an error. + */ +static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( + size_t functionResult) +{ + if (!ZSTD_isError(functionResult)) return (ZSTD_ErrorCode)0; - } return (ZSTD_ErrorCode)(0 - functionResult); } -/*************************************** -* Explicit memory management -***************************************/ - -typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt, ZSTD_btopt2 } ZSTD_strategy; /* from faster to stronger */ +/** + * enum ZSTD_strategy - zstd compression search strategy + * + * From faster to stronger. + */ +typedef enum { + ZSTD_fast, + ZSTD_dfast, + ZSTD_greedy, + ZSTD_lazy, + ZSTD_lazy2, + ZSTD_btlazy2, + ZSTD_btopt, + ZSTD_btopt2 +} ZSTD_strategy; +/** + * struct ZSTD_compressionParameters - zstd compression parameters + * @windowLog: Log of the largest match distance. Larger means more + * compression, and more memory needed during decompression. + * @chainLog: Fully searched segment. Larger means more compression, slower, + * and more memory (useless for fast). + * @hashLog: Dispatch table. Larger means more compression, + * slower, and more memory. + * @searchLog: Number of searches. Larger means more compression and slower. + * @searchLength: Match length searched. Larger means faster decompression, + * sometimes less compression. + * @targetLength: Acceptable match size for optimal parser (only). Larger means + * more compression, and slower. + * @strategy: The zstd compression strategy. + */ typedef struct { - unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ - unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - unsigned hashLog; /**< dispatch table : larger == faster, more memory */ - unsigned searchLog; /**< nb of searches : larger == more compression, slower */ - unsigned searchLength; /**< match length searched : larger == faster decompression, sometimes less compression */ - unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + unsigned int windowLog; + unsigned int chainLog; + unsigned int hashLog; + unsigned int searchLog; + unsigned int searchLength; + unsigned int targetLength; ZSTD_strategy strategy; } ZSTD_compressionParameters; +/** + * struct ZSTD_frameParameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the frame + * header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the end + * of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame header + * when using dictionary compression. + * + * The default value is all fields set to 0. + */ typedef struct { - unsigned contentSizeFlag; /**< 1: content size will be in frame header (when known) */ - unsigned checksumFlag; /**< 1: generate a 32-bits checksum at end of frame, for error detection */ - unsigned noDictIDFlag; /**< 1: no dictID will be saved into frame header (if dictionary compression) */ + unsigned int contentSizeFlag; + unsigned int checksumFlag; + unsigned int noDictIDFlag; } ZSTD_frameParameters; +/** + * struct ZSTD_parameters - zstd parameters + * @cParams: The compression parameters. + * @fParams: The frame parameters. + */ typedef struct { ZSTD_compressionParameters cParams; ZSTD_frameParameters fParams; } ZSTD_parameters; -/*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. -* `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +/** + * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * Return: The selected ZSTD_compressionParameters. + */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); -/*! ZSTD_getParams() : -* same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. -* All fields of `ZSTD_frameParameters` are set to default (0) */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +/** + * ZSTD_getParams() - returns ZSTD_parameters for selected level + * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). + * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. + * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * + * The same as ZSTD_getCParams() except also selects the default frame + * parameters (all zero). + * + * Return: The selected ZSTD_parameters. + */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, + unsigned long long estimatedSrcSize, size_t dictSize); +/*-************************************* + * Explicit memory management + **************************************/ -/*! ZSTD_CCtxWorkspaceBound() : -* Returns the minimum amount of memory that needs to be passed to ZSTD_createCCtx() in order to compress with `params.cParams` -* or a `cdict` created with `params.cParams`. */ +/** + * ZSTD_CCtxWorkspaceBound() - the amount of memory needed to create a ZSTD_CCtx + * @cParams: The compression parameters to be used for compression. + * + * If multiple compression parameters might be used, the caller must call + * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * size. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createCCtx(). + */ size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); -/*= Compression context -* When compressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* The context pointer is placed in `workspace`, which must outlive the returned context. -* Use one context per thread for parallel execution in multi-threaded environments. */ +/** + * struct ZSTD_CCtx - the zstd compression context + * + * When compressing many times it is recommended to allocate a context just once + * and reuse it for each successive compression operation. + */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void* workspace, size_t workspaceSize); +/** + * ZSTD_createCCtx() - create a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A compression context emplaced into workspace. + */ +ZSTD_CCtx *ZSTD_createCCtx(void *workspace, size_t workspaceSize); -/*! ZSTD_compressCCtx() : -* Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). -* Note : The workspace passed to ZSTD_createCCtx() must have been at least ZSTD_CCtxWorkspaceBound(params.cParams) bytes. */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, ZSTD_parameters params); +/** + * ZSTD_compressCCtx() - compress src into dst + * @ctx: The context. Must have been created with a workspace at least + * as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, ZSTD_parameters params); -/*! ZSTD_compress_usingDict() : -* Compression using a predefined Dictionary (see dictBuilder/zdict.h). -* Note : The workspace passed to ZSTD_createCCtx() must have been at least ZSTD_CCtxWorkspaceBound(params.cParams) bytes. -* Note : This function loads the dictionary, resulting in significant startup delay. -* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void *dict, size_t dictSize, ZSTD_parameters params); - -/*! ZSTD_DCtxWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createDCtx(). */ +/** + * ZSTD_DCtxWorkspaceBound() - the amount of memory needed to create a ZSTD_DCtx + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createDCtx(). + */ size_t ZSTD_DCtxWorkspaceBound(void); -/*= Decompression context -* When decompressing many times, -* it is recommended to allocate a context just once, and re-use it for each successive compression operation. -* The context pointer is placed in `workspace`, which must outlive the returned context. -* `workspace` must be at least ZSTD_DCtxWorkspaceBound() bytes. -* Use one context per thread for parallel execution in multi-threaded environments. */ +/** + * struct ZSTD_DCtx - the zstd decompression context + * + * When decompressing many times it is recommended to allocate a context just + * once and reuse it for each successive decompression operation. + */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void* workspace, size_t workspaceSize); +/** + * ZSTD_createDCtx() - create a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to + * determine how large the workspace must be. + * + * Return: A decompression context emplaced into workspace. + */ +ZSTD_DCtx *ZSTD_createDCtx(void *workspace, size_t workspaceSize); -/*! ZSTD_decompressDCtx() : -* Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +/** + * ZSTD_decompressDCtx() - decompress zstd compressed src into dst + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); -/*! ZSTD_decompress_usingDict() : -* Decompression using a predefined Dictionary (see dictBuilder/zdict.h). -* Dictionary must be identical to the one used during compression. -* Note : This function loads the dictionary, resulting in significant startup delay. -* Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void *dict, size_t dictSize); +/*-************************ + * Simple dictionary API + **************************/ -/**************************** -* Fast dictionary API -****************************/ -/*! ZSTD_CDictWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createCDict() when called with the given `params.cParams`. */ +/** + * ZSTD_compress_usingDict() - compress src into dst using a dictionary + * @ctx: The context. Must have been created with a workspace at least + * as large as ZSTD_CCtxWorkspaceBound(params.cParams). + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @dict: The dictionary to use for compression. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a predefined dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize, + ZSTD_parameters params); + +/** + * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @dict: The dictionary to use for decompression. The same dictionary + * must've been used to compress the data. + * @dictSize: The size of the dictionary. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const void *dict, size_t dictSize); + +/*-************************** + * Fast dictionary API + ***************************/ + +/** + * ZSTD_CDictWorkspaceBound() - amount of memory needed to create a ZSTD_CDict + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createCDict(). + */ size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); +/** + * struct ZSTD_CDict - a digested dictionary to be used for compression + */ typedef struct ZSTD_CDict_s ZSTD_CDict; -/*! ZSTD_createCDict() : -* When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. -* ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. -* ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. -* `dictBuffer` content is referenced, and it must remain accessible throughout the lifetime of the CDict. -* The cdict pointer is placed in `workspace`, which must outlive the returned cdict. -* `workspace` must be at least ZSTD_CDictWorkspaceBound(params.cParams) bytes. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, ZSTD_parameters params, void* workspace, size_t workspaceSize); +/** + * ZSTD_createCDict() - create a digested dictionary for compression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_CDict so it must outlive the returned ZSTD_CDict. + * @dictSize: The size of the dictionary. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * @workspace: The workspace. It must outlive the returned ZSTD_CDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_CDictWorkspaceBound(params.cParams). + * + * When compressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_CDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_CDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_CDict *ZSTD_createCDict(const void *dictBuffer, size_t dictSize, + ZSTD_parameters params, void *workspace, size_t workspaceSize); -/*! ZSTD_compress_usingCDict() : -* Compression using a digested Dictionary. -* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. -* Note that compression level is decided during dictionary creation. */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); +/** + * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict + * @ctx: The context. Must have been created with a workspace at least + * as large as ZSTD_CCtxWorkspaceBound(cParams) where cParams are + * the compression parameters used to create cdict. + * @dst: The buffer to compress src into. + * @dstCapacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @srcSize: The size of the data to compress. + * @cdict: The digested dictionary to use for compression. + * @params: The parameters to use for compression. See ZSTD_getParams(). + * + * Compression using a digested dictionary. The same dictionary must be used + * during decompression. + * + * Return: The compressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize, const ZSTD_CDict *cdict); -/*! ZSTD_DDictWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createDDict(). */ +/** + * ZSTD_DDictWorkspaceBound() - amount of memory needed to create a ZSTD_DDict + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createDDict(). + */ size_t ZSTD_DDictWorkspaceBound(void); +/** + * struct ZSTD_DDict - a digested dictionary to be used for decompression + */ typedef struct ZSTD_DDict_s ZSTD_DDict; -/*! ZSTD_createDDict() : -* Create a digested dictionary, ready to start decompression operation without startup delay. -* `dictBuffer` content is referenced, and it must remain accessible throughout the lifetime of the DDict. -* The ddict pointer is placed in `workspace`, which must outlive the returned ddict. -* `workspace` must be at least ZSTD_DDictWorkspaceBound() bytes. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize, void* workspace, size_t workspaceSize); +/** + * ZSTD_createDDict() - create a digested dictionary for decompression + * @dictBuffer: The dictionary to digest. The buffer is referenced by the + * ZSTD_DDict so it must outlive the returned ZSTD_DDict. + * @dictSize: The size of the dictionary. + * @workspace: The workspace. It must outlive the returned ZSTD_DDict. + * @workspaceSize: The workspace size. Must be at least + * ZSTD_DDictWorkspaceBound(). + * + * When decompressing multiple messages / blocks with the same dictionary it is + * recommended to load it just once. The ZSTD_DDict merely references the + * dictBuffer, so it must outlive the returned ZSTD_DDict. + * + * Return: The digested dictionary emplaced into workspace. + */ +ZSTD_DDict *ZSTD_createDDict(const void *dictBuffer, size_t dictSize, + void *workspace, size_t workspaceSize); -/*! ZSTD_decompress_usingDDict() : -* Decompression using a digested Dictionary. -* Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); +/** + * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict + * @ctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dstCapacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @srcSize: The exact size of the data to decompress. + * @ddict: The digested dictionary to use for decompression. The same + * dictionary must've been used to compress the data. + * + * Return: The decompressed size or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, + size_t dstCapacity, const void *src, size_t srcSize, + const ZSTD_DDict *ddict); -/**************************** -* Streaming -****************************/ +/*-************************** + * Streaming + ***************************/ +/** + * struct ZSTD_inBuffer - input buffer for streaming + * @src: Start of the input buffer. + * @size: Size of the input buffer. + * @pos: Position where reading stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ + const void *src; + size_t size; + size_t pos; } ZSTD_inBuffer; +/** + * struct ZSTD_outBuffer - output buffer for streaming + * @dst: Start of the output buffer. + * @size: Size of the output buffer. + * @pos: Position where writing stopped. Will be updated. + * Necessarily 0 <= pos <= size. + */ typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ + void *dst; + size_t size; + size_t pos; } ZSTD_outBuffer; -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream in situations where many streaming operations will be achieved consecutively. -* Use one separate ZSTD_CStream per thread for parallel execution. -* -* Start a new compression by initializing ZSTD_CStream. -* Use ZSTD_initCStream() to start a new compression operation. -* Use ZSTD_initCStream_usingCDict() for a compression which requires a dictionary. -* -* Use ZSTD_compressStream() repetitively to consume input stream. -* The function will automatically update both `pos` fields. -* Note that it may not consume the entire input, in which case `pos < size`, -* and it's up to the caller to present again remaining data. -* @return : a size hint, preferred nb of bytes to use as input for next function call -* or an error code, which can be tested using ZSTD_isError(). -* Note 1 : it's just a hint, to help latency a little, any other value will work fine. -* Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() -* -* At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream(). -* `output->pos` will be updated. -* Note that some content might still be left within internal buffer if `output->size` is too small. -* @return : nb of bytes still present within internal buffer (0 if it's empty) -* or an error code, which can be tested using ZSTD_isError(). -* -* ZSTD_endStream() instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small. -* In which case, call again ZSTD_endStream() to complete the flush. -* @return : nb of bytes still present within internal buffer (0 if it's empty, hence compression completed) -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ +/*-***************************************************************************** + * Streaming compression - HowTo + * + * A ZSTD_CStream object is required to track streaming operation. + * Use ZSTD_createCStream() to create and initialize a ZSTD_CStream object. + * ZSTD_CStream objects can be reused multiple times on consecutive compression + * operations. It is recommended to re-use ZSTD_CStream in situations where many + * streaming operations will be achieved consecutively. Use one separate + * ZSTD_CStream per thread for parallel execution. + * + * Use ZSTD_compressStream() repetitively to consume input stream. + * The function will automatically update both `pos` fields. + * Note that it may not consume the entire input, in which case `pos < size`, + * and it's up to the caller to present again remaining data. + * It returns a hint for the preferred number of bytes to use as an input for + * the next function call. + * + * At any moment, it's possible to flush whatever data remains within internal + * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might + * still be some content left within the internal buffer if `output->size` is + * too small. It returns the number of bytes left in the internal buffer and + * must be called until it returns 0. + * + * ZSTD_endStream() instructs to finish a frame. It will perform a flush and + * write frame epilogue. The epilogue is required for decoders to consider a + * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush + * the full content if `output->size` is too small. In which case, call again + * ZSTD_endStream() to complete the flush. It returns the number of bytes left + * in the internal buffer and must be called until it returns 0. + ******************************************************************************/ -/*! ZSTD_CStreamWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createCStream() or ZSTD_createCStream_usingCDict() - * when called with the given `params.cParams` or `cdict` created with `params.cParams`. */ +/** + * ZSTD_CStreamWorkspaceBound() - memory needed to create a ZSTD_CStream + * @cParams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createCStream() and ZSTD_createCStream_usingCDict(). + */ size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); +/** + * struct ZSTD_CStream - the zstd streaming compression context + */ typedef struct ZSTD_CStream_s ZSTD_CStream; + /*===== ZSTD_CStream management functions =====*/ -/*! ZSTD_createCStream() : -* Creates a cstream using params. -* Callers may optionally provide the size of the source they intend to compress, or pass 0 if unknown. -* The stream is placed in `workspace`, which must outlive the returned stream. -* `workspace` must be at least ZSTD_CStreamWorkspaceBound(params.cParams) bytes. */ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void* workspace, size_t workspaceSize); -/*! ZSTD_createCStream_usingCDict() : -* Similar to ZSTD_createCStream(), but use the given preprocessed dictionary. -*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_usingCDict(const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize, void* workspace, size_t workspaceSize); +/** + * ZSTD_createCStream() - create a zstd streaming compression context + * @params: The zstd compression parameters. + * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must + * pass the source size (zero means empty source). Otherwise, + * the caller may optionally pass the source size, or zero if + * unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine + * how large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_createCStream(ZSTD_parameters params, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); + +/** + * ZSTD_createCStream_usingCDict() - create a zstd streaming compression context + * @cdict: The digested dictionary to use for compression. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() + * with the cParams used to create the cdict to determine how + * large the workspace must be. + * + * Return: The zstd streaming compression context. + */ +ZSTD_CStream *ZSTD_createCStream_usingCDict(const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize, void *workspace, + size_t workspaceSize); /*===== Streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); /**< re-use compression parameters from previous init; skip dictionary loading stage; zcs must be init at least once before. note: pledgedSrcSize must be correct, a size of 0 means unknown. for a frame size of 0 use initCStream_advanced */ -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/** + * ZSTD_resetCStream() - reset the context using parameters from creation + * @zcs: The zstd streaming compression context to reset. + * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * content size is always written into the frame header. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +/** + * ZSTD_compressStream() - streaming compress some of input into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input->pos < input->size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * ZSTD_isError(). + */ +size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); +/** + * ZSTD_flushStream() - flush internal buffers into output + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_flushStream() must be called until it returns 0, meaning all the data + * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * calling it too often will degrade the compression ratio. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +/** + * ZSTD_endStream() - flush internal buffers into output and end the frame + * @zcs: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * + * ZSTD_endStream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. + * + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ +/** + * ZSTD_CStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_CStreamInSize(void); +/** + * ZSTD_CStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete compressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_CStreamOutSize(void); -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation, -* or ZSTD_initDStream_usingDict() if decompression requires a dictionary. -* @return : recommended first input size -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* If `output.pos < output.size`, decoder has flushed everything it could. -* @return : 0 when a frame is completely decoded and fully flushed, -* an error code, which can be tested using ZSTD_isError(), -* any other value > 0, which means there is still some decoding to do to complete current frame. -* The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame. -* *******************************************************************************/ +/*-***************************************************************************** + * Streaming decompression - HowTo + * + * A ZSTD_DStream object is required to track streaming operations. + * Use ZSTD_createDStream() to initialize a ZSTD_DStream object. + * ZSTD_DStream objects can be re-used multiple times. + * + * Use ZSTD_decompressStream() repetitively to consume your input. + * The function will update both `pos` fields. + * If `input->pos < input->size`, some input has not been consumed. + * It's up to the caller to present again remaining data. + * If `output->pos < output->size`, decoder has flushed everything it could. + * Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise it returns a suggested next input size that will never load more + * than the current frame. + ******************************************************************************/ -/*! ZSTD_DStreamWorkspaceBound() : - * Returns the minimum amount of memory that needs to be passed to ZSTD_createDStream() to decompress frames with windowSize <= maxWindowSize. */ +/** + * ZSTD_DStreamWorkspaceBound() - memory needed to create a ZSTD_DStream + * @maxWindowSize: The maximum window size allowed for compressed frames. + * + * Return: A lower bound on the size of the workspace that is passed to + * ZSTD_createDStream() and ZSTD_createDStream_usingDDict(). + */ size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); +/** + * struct ZSTD_DStream - the zstd streaming decompression context + */ typedef struct ZSTD_DStream_s ZSTD_DStream; /*===== ZSTD_DStream management functions =====*/ -/*! ZSTD_createDStream() : -* Creates a dstream that can decompress frames with windowSize up to maxWindowSize. -* The stream is placed in `workspace`, which must outlive the returned stream. -* `workspace` must be at least ZSTD_DStreamWorkspaceBound(maxWindowSize) bytes. */ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(size_t maxWindowSize, void* workspace, size_t workspaceSize); -/*! ZSTD_createDStream_usingDDict() : -* Similar to ZSTD_createCStream(), but use the given preprocessed dictionary. */ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict* ddict, void* workspace, size_t workspaceSize); +/** + * ZSTD_createDStream() - create a zstd streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_createDStream(size_t maxWindowSize, void *workspace, + size_t workspaceSize); +/** + * ZSTD_createDStream_usingDDict() - create zstd streaming decompression context + * @maxWindowSize: The maximum window size allowed for compressed frames. + * @ddict: The digested dictionary to use for decompression. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine + * how large the workspace must be. + * + * Return: The zstd streaming decompression context. + */ +ZSTD_DStream *ZSTD_createDStream_usingDDict(size_t maxWindowSize, + const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); /*===== Streaming decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/** + * ZSTD_resetDStream() - reset the context using parameters from creation + * @zds: The zstd streaming decompression context to reset. + * + * Resets the context using the parameters from creation. Skips dictionary + * loading, since it can be reused. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_resetDStream(ZSTD_DStream *zds); +/** + * ZSTD_decompressStream() - streaming decompress some of input into output + * @zds: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. + * + * The `input` and `output` buffers may be any size. Guaranteed to make some + * forward progress if `input` and `output` are not empty. + * ZSTD_decompressStream() will not consume the last byte of the frame until + * the entire frame is flushed. + * + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the input + * for the next function call or an error, which can be checked using + * ZSTD_isError(). The size hint will never load more than the frame. + */ +size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, + ZSTD_inBuffer *input); -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ +/** + * ZSTD_DStreamInSize() - recommended size for the input buffer + * + * Return: The recommended size for the input buffer. + */ +size_t ZSTD_DStreamInSize(void); +/** + * ZSTD_DStreamOutSize() - recommended size for the output buffer + * + * When the output buffer is at least this large, it is guaranteed to be large + * enough to flush at least one complete decompressed block. + * + * Return: The recommended size for the output buffer. + */ +size_t ZSTD_DStreamOutSize(void); /* --- Constants ---*/ @@ -366,281 +761,389 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output #define ZSTD_WINDOWLOG_MAX_32 27 #define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX +#define ZSTD_WINDOWLOG_MAX \ + ((unsigned int)(sizeof(size_t) == 4 \ + ? ZSTD_WINDOWLOG_MAX_32 \ + : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX #define ZSTD_HASHLOG_MIN 6 #define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) #define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN #define ZSTD_HASHLOG3_MAX 17 #define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) #define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */ +/* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_SEARCHLENGTH_MAX 7 +/* only for ZSTD_btopt, other strategies are limited to 4 */ +#define ZSTD_SEARCHLENGTH_MIN 3 #define ZSTD_TARGETLENGTH_MIN 4 #define ZSTD_TARGETLENGTH_MAX 999 -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* for static allocation */ +/* for static allocation */ +#define ZSTD_FRAMEHEADERSIZE_MAX 18 #define ZSTD_FRAMEHEADERSIZE_MIN 6 static const size_t ZSTD_frameHeaderSize_prefix = 5; static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; -static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */ +/* magic number + skippable frame length */ +static const size_t ZSTD_skippableHeaderSize = 8; -/*************************************** -* Compressed size functions -***************************************/ +/*-************************************* + * Compressed size functions + **************************************/ -/*! ZSTD_findFrameCompressedSize() : - * `src` should point to the start of a ZSTD encoded frame or skippable frame - * `srcSize` must be at least as large as the frame - * @return : the compressed size of the frame pointed to by `src`, suitable to pass to - * `ZSTD_decompress` or similar, or an error code if given invalid input. */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); +/** + * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded frame + * or a skippable frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * size of the frame. + * + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with ZSTD_isError(). + * Suitable to pass to ZSTD_decompress() or similar functions. + */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); -/*************************************** -* Decompressed size functions -***************************************/ -/*! ZSTD_getFrameContentSize() : -* `src` should point to the start of a ZSTD encoded frame -* `srcSize` must be at least as large as the frame header. A value greater than or equal -* to `ZSTD_frameHeaderSize_max` is guaranteed to be large enough in all cases. -* @return : decompressed size of the frame pointed to be `src` if known, otherwise -* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +/*-************************************* + * Decompressed size functions + **************************************/ +/** + * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header + * @src: It should point to the start of a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The frame content size stored in the frame header if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the + * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. + */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -/*! ZSTD_findDecompressedSize() : -* `src` should point the start of a series of ZSTD encoded and/or skippable frames -* `srcSize` must be the _exact_ size of this series -* (i.e. there should be a frame boundary exactly `srcSize` bytes after `src`) -* @return : the decompressed size of all data in the contained frames, as a 64-bit value _if known_ -* - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN -* - if an error occurred: ZSTD_CONTENTSIZE_ERROR -* -* note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. -* When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. -* In which case, it's necessary to use streaming mode to decompress data. -* Optionally, application can still use ZSTD_decompress() while relying on implied limits. -* (For example, data may be necessarily cut into blocks <= 16 KB). -* note 2 : decompressed size is always present when compression is done with ZSTD_compress() -* note 3 : decompressed size can be very large (64-bits value), -* potentially larger than what local system can handle as a single memory segment. -* In which case, it's necessary to use streaming mode to decompress data. -* note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. -* Always ensure result fits within application's authorized limits. -* Each application can set its own limits. -* note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to -* read each contained frame header. This is efficient as most of the data is skipped, -* however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); +/** + * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames + * @src: It should point to the start of a series of zstd encoded and/or + * skippable frames. + * @srcSize: The exact size of the series of frames. + * + * If any zstd encoded frame in the series doesn't have the frame content size + * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always + * set when using ZSTD_compress(). The decompressed size can be very large. + * If the source is untrusted, the decompressed size could be wrong or + * intentionally modified. Always ensure the result fits within the + * application's authorized limits. ZSTD_findDecompressedSize() handles multiple + * frames, and so it must traverse the input to read each frame header. This is + * efficient as most of the data is skipped, however it does mean that all frame + * data must be present and valid. + * + * Return: Decompressed size of all the data contained in the frames if known. + * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. + * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. + */ +unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); -/*************************************** -* Advanced compression functions -***************************************/ -/*! ZSTD_checkCParams() : -* Ensure param values remain within authorized range */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); +/*-************************************* + * Advanced compression functions + **************************************/ +/** + * ZSTD_checkCParams() - ensure parameter values remain within authorized range + * @cParams: The zstd compression parameters. + * + * Return: Zero or an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); -/*! ZSTD_adjustCParams() : -* optimize params for a given `srcSize` and `dictSize`. -* both values are optional, select `0` if unknown. */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/** + * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize + * @srcSize: Optionally the estimated source size, or zero if unknown. + * @dictSize: Optionally the estimated dictionary size, or zero if unknown. + * + * Return: The optimized parameters. + */ +ZSTD_compressionParameters ZSTD_adjustCParams( + ZSTD_compressionParameters cParams, unsigned long long srcSize, + size_t dictSize); /*--- Advanced decompression functions ---*/ -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - - -/********************************************************************* -* Buffer-less and synchronous inner streaming functions -* -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with many restrictions (documented below). -* Prefer using normal streaming API for an easier experience -********************************************************************* */ +/** + * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame + * @buffer: The source buffer to check. + * @size: The size of the source buffer, must be at least 4 bytes. + * + * Return: True iff the buffer starts with a zstd or skippable frame identifier. + */ +unsigned int ZSTD_isFrame(const void *buffer, size_t size); /** - Buffer-less streaming compression (synchronous mode) + * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary + * @dict: The dictionary buffer. + * @dictSize: The size of the dictionary buffer. + * + * Return: The dictionary id stored within the dictionary or 0 if the + * dictionary is not a zstd dictionary. If it returns 0 the + * dictionary can still be loaded as a content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. +/** + * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict + * @ddict: The ddict to find the id of. + * + * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not + * a zstd dictionary. If it returns 0 `ddict` will be loaded as a + * content-only dictionary. + */ +unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only. - - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. - - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames will be considered unfinished (corrupted) by decoders. - - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new frame. -*/ - -/*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be 0 (meaning unknown). note: if the contentSizeFlag is set, pledgedSrcSize == 0 means the source size is actually 0 */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize can be 0, indicating unknown size. if it is non-zero, it must be accurate. for 0 size frames, use compressBegin_advanced */ -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - - -/*- - Buffer-less streaming decompression (synchronous mode) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams(). - It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame, - such as the minimum rolling buffer size to allocate to decompress data (`windowSize`), - and the dictionary ID used. - (Note : content size is optional, it may not be present. 0 means : content size unknown). - Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information. - As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation. - Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB. - Frame parameters are extracted from the beginning of the compressed frame. - Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes. - @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - - Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict(). - Alternatively, you can copy a prepared context, using ZSTD_copyDCtx(). - - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`. - They should preferably be located contiguously, prior to current block. - Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference. - - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. - - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. - - == Special case : skippable frames == - - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_decompressContinue() always returns 0. - For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable. - Note : If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might actually be a Zstd encoded frame with no content. - For purposes of decompression, it is valid in both cases to skip the frame using - ZSTD_findFrameCompressedSize to find its size in bytes. - It also returns Frame Size as fparamsPtr->frameContentSize. -*/ +/** + * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame + * @src: Source buffer. It must be a zstd encoded frame. + * @srcSize: The size of the source buffer. It must be at least as large as the + * frame header. `ZSTD_frameHeaderSize_max` is always large enough. + * + * Return: The dictionary id required to decompress the frame stored within + * `src` or 0 if the dictionary id could not be decoded. It can return + * 0 if the frame does not require a dictionary, the dictionary id + * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` + * is too small. + */ +unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); +/** + * struct ZSTD_frameParams - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or 0 if not present. + * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @dictID: The dictionary id, or 0 if not present. + * @checksumFlag: Whether a checksum was used. + */ typedef struct { unsigned long long frameContentSize; - unsigned windowSize; - unsigned dictID; - unsigned checksumFlag; + unsigned int windowSize; + unsigned int dictID; + unsigned int checksumFlag; } ZSTD_frameParams; -/*===== Buffer-less streaming decompression functions =====*/ -ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input, see details below */ -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - /** - Block functions + * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame + * @fparamsPtr: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is + * always large enough to succeed. + * + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using ZSTD_isError(). + */ +size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, + size_t srcSize); - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). - User will have to take in charge required information to regenerate data, such as compressed and content sizes. +/*-***************************************************************************** + * Buffer-less and synchronous inner streaming functions + * + * This is an advanced API, giving full control over buffer management, for + * users which need direct control over memory. + * But it's also a complex one, with many restrictions (documented below). + * Prefer using normal streaming API for an easier experience + ******************************************************************************/ - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : ZSTD_compressBegin() - + decompression : ZSTD_decompressBegin() - + variants _usingDict() are also allowed - + copyCCtx() and copyDCtx() work too - - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - + If you need to compress more, cut data into multiple blocks - + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst`. - + User must test for such outcome and deal directly with uncompressed data - + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! - + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history. - Use ZSTD_insertBlock() in such a case. -*/ +/*-***************************************************************************** + * Buffer-less streaming compression (synchronous mode) + * + * A ZSTD_CCtx object is required to track streaming operations. + * Use ZSTD_createCCtx() to create a context. + * ZSTD_CCtx object can be re-used multiple times within successive compression + * operations. + * + * Start by initializing a context. + * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary + * compression, + * or ZSTD_compressBegin_advanced(), for finer parameter control. + * It's also possible to duplicate a reference context which has already been + * initialized, using ZSTD_copyCCtx() + * + * Then, consume your input using ZSTD_compressContinue(). + * There are some important considerations to keep in mind when using this + * advanced function : + * - ZSTD_compressContinue() has no internal buffer. It uses externally provided + * buffer only. + * - Interface is synchronous : input is consumed entirely and produce 1+ + * (or more) compressed blocks. + * - Caller must ensure there is enough space in `dst` to store compressed data + * under worst case scenario. Worst case evaluation is provided by + * ZSTD_compressBound(). + * ZSTD_compressContinue() doesn't guarantee recover after a failed + * compression. + * - ZSTD_compressContinue() presumes prior input ***is still accessible and + * unmodified*** (up to maximum distance size, see WindowLog). + * It remembers all previous contiguous blocks, plus one separated memory + * segment (which can itself consists of multiple contiguous blocks) + * - ZSTD_compressContinue() detects that prior input has been overwritten when + * `src` buffer overlaps. In which case, it will "discard" the relevant memory + * section from its history. + * + * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) + * and optional checksum. It's possible to use srcSize==0, in which case, it + * will write a final empty block to end the frame. Without last block mark, + * frames will be considered unfinished (corrupted) by decoders. + * + * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new + * frame. + ******************************************************************************/ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /* define, for static allocation */ +/*===== Buffer-less streaming compression functions =====*/ +size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, int compressionLevel); +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, + size_t dictSize, ZSTD_parameters params, + unsigned long long pledgedSrcSize); +size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, + unsigned long long pledgedSrcSize); +size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); + + + +/*-***************************************************************************** + * Buffer-less streaming decompression (synchronous mode) + * + * A ZSTD_DCtx object is required to track streaming operations. + * Use ZSTD_createDCtx() to create a context. + * A ZSTD_DCtx object can be re-used multiple times. + * + * First typical operation is to retrieve frame parameters, using + * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide + * important information to correctly decode the frame, such as the minimum + * rolling buffer size to allocate to decompress data (`windowSize`), and the + * dictionary ID used. + * Note: content size is optional, it may not be present. 0 means unknown. + * Note that these values could be wrong, either because of data malformation, + * or because an attacker is spoofing deliberate false information. As a + * consequence, check that values remain within valid application range, + * especially `windowSize`, before allocation. Each application can set its own + * limit, depending on local restrictions. For extended interoperability, it is + * recommended to support at least 8 MB. + * Frame parameters are extracted from the beginning of the compressed frame. + * Data fragment must be large enough to ensure successful decoding, typically + * `ZSTD_frameHeaderSize_max` bytes. + * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. + * >0: `srcSize` is too small, provide at least this many bytes. + * errorCode, which can be tested using ZSTD_isError(). + * + * Start decompression, with ZSTD_decompressBegin() or + * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared + * context, using ZSTD_copyDCtx(). + * + * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() + * alternatively. + * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' + * to ZSTD_decompressContinue(). + * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will + * fail. + * + * The result of ZSTD_decompressContinue() is the number of bytes regenerated + * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an + * error; it just means ZSTD_decompressContinue() has decoded some metadata + * item. It can also be an error code, which can be tested with ZSTD_isError(). + * + * ZSTD_decompressContinue() needs previous data blocks during decompression, up + * to `windowSize`. They should preferably be located contiguously, prior to + * current block. Alternatively, a round buffer of sufficient size is also + * possible. Sufficient size is determined by frame parameters. + * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't + * follow each other, make sure that either the compressor breaks contiguity at + * the same place, or that previous contiguous segment is large enough to + * properly handle maximum back-reference. + * + * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + * Context can then be reset to start a new decompression. + * + * Note: it's possible to know if next input to present is a header or a block, + * using ZSTD_nextInputType(). This information is not required to properly + * decode a frame. + * + * == Special case: skippable frames == + * + * Skippable frames allow integration of user-defined data into a flow of + * concatenated frames. Skippable frames will be ignored (skipped) by a + * decompressor. The format of skippable frames is as follows: + * a) Skippable frame ID - 4 Bytes, Little endian format, any value from + * 0x184D2A50 to 0x184D2A5F + * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + * c) Frame Content - any content (User Data) of length equal to Frame Size + * For skippable frames ZSTD_decompressContinue() always returns 0. + * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 + * what means that a frame is skippable. + * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might + * actually be a zstd encoded frame with no content. For purposes of + * decompression, it is valid in both cases to skip the frame using + * ZSTD_findFrameCompressedSize() to find its size in bytes. + * It also returns frame size as fparamsPtr->frameContentSize. + ******************************************************************************/ + +/*===== Buffer-less streaming decompression functions =====*/ +size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, + size_t dictSize); +void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); +size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +typedef enum { + ZSTDnit_frameHeader, + ZSTDnit_blockHeader, + ZSTDnit_block, + ZSTDnit_lastBlock, + ZSTDnit_checksum, + ZSTDnit_skippableFrame +} ZSTD_nextInputType_e; +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); + +/*-***************************************************************************** + * Block functions + * + * Block functions produce and decode raw zstd blocks, without frame metadata. + * Frame metadata cost is typically ~18 bytes, which can be non-negligible for + * very small blocks (< 100 bytes). User will have to take in charge required + * information to regenerate data, such as compressed and content sizes. + * + * A few rules to respect: + * - Compressing and decompressing require a context structure + * + Use ZSTD_createCCtx() and ZSTD_createDCtx() + * - It is necessary to init context before starting + * + compression : ZSTD_compressBegin() + * + decompression : ZSTD_decompressBegin() + * + variants _usingDict() are also allowed + * + copyCCtx() and copyDCtx() work too + * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() + * + If you need to compress more, cut data into multiple blocks + * + Consider using the regular ZSTD_compress() instead, as frame metadata + * costs become negligible when source size is large. + * - When a block is considered not compressible enough, ZSTD_compressBlock() + * result will be zero. In which case, nothing is produced into `dst`. + * + User must test for such outcome and deal directly with uncompressed data + * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! + * + In case of multiple successive blocks, decoder must be informed of + * uncompressed block existence to follow proper history. Use + * ZSTD_insertBlock() in such a case. + ******************************************************************************/ + +/* Define for static allocation */ +#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) /*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful for uncompressed blocks */ +size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); +size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, + const void *src, size_t srcSize); +size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, + size_t blockSize); - -#endif /* ZSTD_H_235446 */ +#endif /* ZSTD_H */ diff --git a/contrib/linux-kernel/lib/zstd/decompress.c b/contrib/linux-kernel/lib/zstd/decompress.c index 9eb210af0..3fcef0105 100644 --- a/contrib/linux-kernel/lib/zstd/decompress.c +++ b/contrib/linux-kernel/lib/zstd/decompress.c @@ -1399,7 +1399,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, /** ZSTD_insertBlock() : insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { ZSTD_checkContinuity(dctx, blockStart); dctx->previousDstEnd = (const char*)blockStart + blockSize; diff --git a/programs/fileio.c b/programs/fileio.c index b384c3c2b..f1f5a5577 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -76,7 +76,7 @@ #define CACHELINE 64 -#define MAX_DICT_SIZE (8 MB) /* protection against large input (attack scenario) */ +#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ #define FNSPACE 30 @@ -278,13 +278,13 @@ static FILE* FIO_openDstFile(const char* dstFileName) } -/*! FIO_loadFile() : -* creates a buffer, pointed by `*bufferPtr`, -* loads `filename` content into it, -* up to MAX_DICT_SIZE bytes. -* @return : loaded size -*/ -static size_t FIO_loadFile(void** bufferPtr, const char* fileName) +/*! FIO_createDictBuffer() : + * creates a buffer, pointed by `*bufferPtr`, + * loads `filename` content into it, up to DICTSIZE_MAX bytes. + * @return : loaded size + * if fileName==NULL, returns 0 and a NULL pointer + */ +static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName) { FILE* fileHandle; U64 fileSize; @@ -296,14 +296,7 @@ static size_t FIO_loadFile(void** bufferPtr, const char* fileName) fileHandle = fopen(fileName, "rb"); if (fileHandle==0) EXM_THROW(31, "zstd: %s: %s", fileName, strerror(errno)); fileSize = UTIL_getFileSize(fileName); - if (fileSize > MAX_DICT_SIZE) { - int seekResult; - if (fileSize > 1 GB) EXM_THROW(32, "Dictionary file %s is too large", fileName); /* avoid extreme cases */ - DISPLAYLEVEL(2,"Dictionary %s is too large : using last %u bytes only \n", fileName, (U32)MAX_DICT_SIZE); - seekResult = fseek(fileHandle, (long int)(fileSize-MAX_DICT_SIZE), SEEK_SET); /* use end of file */ - if (seekResult != 0) EXM_THROW(33, "zstd: %s: %s", fileName, strerror(errno)); - fileSize = MAX_DICT_SIZE; - } + if (fileSize > DICTSIZE_MAX) EXM_THROW(32, "Dictionary file %s is too large (> %u MB)", fileName, DICTSIZE_MAX >> 20); /* avoid extreme cases */ *bufferPtr = malloc((size_t)fileSize); if (*bufferPtr==NULL) EXM_THROW(34, "zstd: %s", strerror(errno)); { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); @@ -356,7 +349,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, /* dictionary */ { void* dictBuffer; - size_t const dictBuffSize = FIO_loadFile(&dictBuffer, dictFileName); + size_t const dictBuffSize = FIO_createDictBuffer(&dictBuffer, dictFileName); /* works with dictFileName==NULL */ if (dictFileName && (dictBuffer==NULL)) EXM_THROW(32, "zstd: allocation error : can't create dictBuffer"); { ZSTD_parameters params = ZSTD_getParams(cLevel, srcSize, dictBuffSize); params.fParams.contentSizeFlag = srcRegFile; @@ -368,7 +361,7 @@ static cRess_t FIO_createCResources(const char* dictFileName, int cLevel, if (comprParams->searchLog) params.cParams.searchLog = comprParams->searchLog; if (comprParams->searchLength) params.cParams.searchLength = comprParams->searchLength; if (comprParams->targetLength) params.cParams.targetLength = comprParams->targetLength; - if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1); + if (comprParams->strategy) params.cParams.strategy = (ZSTD_strategy)(comprParams->strategy - 1); /* 0 means : do not change */ #ifdef ZSTD_MULTITHREAD { size_t const errorCode = ZSTDMT_initCStream_advanced(ress.cctx, dictBuffer, dictBuffSize, params, srcSize); if (ZSTD_isError(errorCode)) EXM_THROW(33, "Error initializing CStream : %s", ZSTD_getErrorName(errorCode)); @@ -574,8 +567,8 @@ static int FIO_compressFilename_internal(cRess_t ress, readsize += inSize; { ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; - while (inBuff.pos != inBuff.size) { /* note : is there any possibility of endless loop ? for example, if outBuff is not large enough ? */ - ZSTD_outBuffer outBuff= { ress.dstBuffer, ress.dstBufferSize, 0 }; + while (inBuff.pos != inBuff.size) { + ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; #ifdef ZSTD_MULTITHREAD size_t const result = ZSTDMT_compressStream(ress.cctx, &outBuff, &inBuff); #else @@ -589,13 +582,13 @@ static int FIO_compressFilename_internal(cRess_t ress, if (sizeCheck!=outBuff.pos) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName); compressedfilesize += outBuff.pos; } } } -#ifdef ZSTD_MULTITHREAD - if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20)) - else DISPLAYUPDATE(2, "\rRead : %u / %u MB", (U32)(readsize>>20), (U32)(fileSize>>20)); -#else - if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(readsize>>20), (double)compressedfilesize/readsize*100) - else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(readsize>>20), (U32)(fileSize>>20), (double)compressedfilesize/readsize*100); -#endif + if (g_nbThreads > 1) { + if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB", (U32)(readsize>>20)) + else DISPLAYUPDATE(2, "\rRead : %u / %u MB", (U32)(readsize>>20), (U32)(fileSize>>20)); + } else { + if (!fileSize) DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", (U32)(readsize>>20), (double)compressedfilesize/readsize*100) + else DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", (U32)(readsize>>20), (U32)(fileSize>>20), (double)compressedfilesize/readsize*100); + } } /* End of Frame */ @@ -776,7 +769,7 @@ static dRess_t FIO_createDResources(const char* dictFileName) /* dictionary */ { void* dictBuffer; - size_t const dictBufferSize = FIO_loadFile(&dictBuffer, dictFileName); + size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName); size_t const initError = ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize); if (ZSTD_isError(initError)) EXM_THROW(61, "ZSTD_initDStream_usingDict error : %s", ZSTD_getErrorName(initError)); free(dictBuffer);