mirror of
https://github.com/facebook/zstd.git
synced 2025-12-10 00:06:51 -05:00
Merge pull request #526 from iburinoc/educational
Add educational decoder to /contrib
This commit is contained in:
commit
60259eb9a0
19
contrib/educational_decoder/README.md
Normal file
19
contrib/educational_decoder/README.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
Educational Decoder
|
||||||
|
===================
|
||||||
|
|
||||||
|
`zstd_decompress.c` is a self-contained implementation in C99 of a decoder,
|
||||||
|
according to the [Zstandard format specification].
|
||||||
|
While it does not implement as many features as the reference decoder,
|
||||||
|
such as the streaming API or content checksums, it is written to be easy to
|
||||||
|
follow and understand, to help understand how the Zstandard format works.
|
||||||
|
It's laid out to match the [format specification],
|
||||||
|
so it can be used to understand how complex segments could be implemented.
|
||||||
|
It also contains implementations of Huffman and FSE table decoding.
|
||||||
|
|
||||||
|
[Zstandard format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
|
||||||
|
[format specification]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
|
||||||
|
|
||||||
|
`harness.c` provides a simple test harness around the decoder:
|
||||||
|
|
||||||
|
harness <input-file> <output-file> [dictionary]
|
||||||
|
|
||||||
120
contrib/educational_decoder/harness.c
Normal file
120
contrib/educational_decoder/harness.c
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "zstd_decompress.h"
|
||||||
|
|
||||||
|
typedef unsigned char u8;
|
||||||
|
|
||||||
|
// If the data doesn't have decompressed size with it, fallback on assuming the
|
||||||
|
// compression ratio is at most 16
|
||||||
|
#define MAX_COMPRESSION_RATIO (16)
|
||||||
|
|
||||||
|
// Protect against allocating too much memory for output
|
||||||
|
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
|
||||||
|
|
||||||
|
u8 *input;
|
||||||
|
u8 *output;
|
||||||
|
u8 *dict;
|
||||||
|
|
||||||
|
size_t read_file(const char *path, u8 **ptr) {
|
||||||
|
FILE *f = fopen(path, "rb");
|
||||||
|
if (!f) {
|
||||||
|
fprintf(stderr, "failed to open file %s\n", path);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fseek(f, 0L, SEEK_END);
|
||||||
|
size_t size = ftell(f);
|
||||||
|
rewind(f);
|
||||||
|
|
||||||
|
*ptr = malloc(size);
|
||||||
|
if (!ptr) {
|
||||||
|
fprintf(stderr, "failed to allocate memory to hold %s\n", path);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t pos = 0;
|
||||||
|
while (!feof(f)) {
|
||||||
|
size_t read = fread(&(*ptr)[pos], 1, size, f);
|
||||||
|
if (ferror(f)) {
|
||||||
|
fprintf(stderr, "error while reading file %s\n", path);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
pos += read;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void write_file(const char *path, const u8 *ptr, size_t size) {
|
||||||
|
FILE *f = fopen(path, "wb");
|
||||||
|
|
||||||
|
size_t written = 0;
|
||||||
|
while (written < size) {
|
||||||
|
written += fwrite(&ptr[written], 1, size, f);
|
||||||
|
if (ferror(f)) {
|
||||||
|
fprintf(stderr, "error while writing file %s\n", path);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
if (argc < 3) {
|
||||||
|
fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
|
||||||
|
argv[0]);
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t input_size = read_file(argv[1], &input);
|
||||||
|
size_t dict_size = 0;
|
||||||
|
if (argc >= 4) {
|
||||||
|
dict_size = read_file(argv[3], &dict);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
|
||||||
|
if (decompressed_size == -1) {
|
||||||
|
decompressed_size = MAX_COMPRESSION_RATIO * input_size;
|
||||||
|
fprintf(stderr, "WARNING: Compressed data does not contain "
|
||||||
|
"decompressed size, going to assume the compression "
|
||||||
|
"ratio is at most %d (decompressed size of at most "
|
||||||
|
"%zu)\n",
|
||||||
|
MAX_COMPRESSION_RATIO, decompressed_size);
|
||||||
|
}
|
||||||
|
if (decompressed_size > MAX_OUTPUT_SIZE) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Required output size too large for this implementation\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
output = malloc(decompressed_size);
|
||||||
|
if (!output) {
|
||||||
|
fprintf(stderr, "failed to allocate memory\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t decompressed =
|
||||||
|
ZSTD_decompress_with_dict(output, decompressed_size,
|
||||||
|
input, input_size, dict, dict_size);
|
||||||
|
|
||||||
|
write_file(argv[2], output, decompressed);
|
||||||
|
|
||||||
|
free(input);
|
||||||
|
free(output);
|
||||||
|
free(dict);
|
||||||
|
input = output = dict = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
2345
contrib/educational_decoder/zstd_decompress.c
Normal file
2345
contrib/educational_decoder/zstd_decompress.c
Normal file
File diff suppressed because it is too large
Load Diff
16
contrib/educational_decoder/zstd_decompress.h
Normal file
16
contrib/educational_decoder/zstd_decompress.h
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2017-present, Facebook, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This source code is licensed under the BSD-style license found in the
|
||||||
|
* LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
* of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
*/
|
||||||
|
|
||||||
|
size_t ZSTD_decompress(void *const dst, const size_t dst_len,
|
||||||
|
const void *const src, const size_t src_len);
|
||||||
|
size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
|
||||||
|
const void *const src, const size_t src_len,
|
||||||
|
const void *const dict, const size_t dict_len);
|
||||||
|
size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user