mirror of
https://github.com/facebook/zstd.git
synced 2025-12-09 00:03:18 -05:00
commit
5ba495b622
1
Makefile
1
Makefile
@ -69,6 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror
|
|||||||
test:
|
test:
|
||||||
MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
|
MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
|
||||||
$(MAKE) -C $(TESTDIR) $@
|
$(MAKE) -C $(TESTDIR) $@
|
||||||
|
ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder test
|
||||||
|
|
||||||
## shortest: same as `make check`
|
## shortest: same as `make check`
|
||||||
.PHONY: shortest
|
.PHONY: shortest
|
||||||
|
|||||||
@ -1,15 +1,26 @@
|
|||||||
|
# ################################################################
|
||||||
|
# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under both the BSD-style license (found in the
|
||||||
|
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
||||||
|
# in the COPYING file in the root directory of this source tree).
|
||||||
|
# ################################################################
|
||||||
|
|
||||||
|
ZSTD ?= zstd # requires zstd installation on local system
|
||||||
|
DIFF ?= diff
|
||||||
HARNESS_FILES=*.c
|
HARNESS_FILES=*.c
|
||||||
|
|
||||||
MULTITHREAD_LDFLAGS = -pthread
|
MULTITHREAD_LDFLAGS = -pthread
|
||||||
DEBUGFLAGS= -g -DZSTD_DEBUG=1
|
DEBUGFLAGS= -g -DZSTD_DEBUG=1
|
||||||
CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
|
||||||
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
|
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
|
||||||
CFLAGS ?= -O3
|
CFLAGS ?= -O2
|
||||||
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
|
||||||
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
|
-Wstrict-aliasing=1 -Wswitch-enum \
|
||||||
-Wstrict-prototypes -Wundef \
|
-Wredundant-decls -Wstrict-prototypes -Wundef \
|
||||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||||
-Wredundant-decls
|
-std=c99
|
||||||
CFLAGS += $(DEBUGFLAGS)
|
CFLAGS += $(DEBUGFLAGS)
|
||||||
CFLAGS += $(MOREFLAGS)
|
CFLAGS += $(MOREFLAGS)
|
||||||
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
|
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
|
||||||
@ -22,13 +33,22 @@ clean:
|
|||||||
@$(RM) -rf harness.dSYM
|
@$(RM) -rf harness.dSYM
|
||||||
|
|
||||||
test: harness
|
test: harness
|
||||||
@zstd README.md -o tmp.zst
|
#
|
||||||
|
# Testing single-file decompression with educational decoder
|
||||||
|
#
|
||||||
|
@$(ZSTD) README.md -o tmp.zst
|
||||||
@./harness tmp.zst tmp
|
@./harness tmp.zst tmp
|
||||||
@diff -s tmp README.md
|
@$(DIFF) -s tmp README.md
|
||||||
@$(RM) -f tmp*
|
@$(RM) -f tmp*
|
||||||
@zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md
|
#
|
||||||
@zstd -D dictionary README.md -o tmp.zst
|
# Testing dictionary decompression with education decoder
|
||||||
|
#
|
||||||
|
# note : files are presented multiple for training, to reach minimum threshold
|
||||||
|
@$(ZSTD) --train harness.c zstd_decompress.c zstd_decompress.h README.md \
|
||||||
|
harness.c zstd_decompress.c zstd_decompress.h README.md \
|
||||||
|
harness.c zstd_decompress.c zstd_decompress.h README.md
|
||||||
|
@$(ZSTD) -D dictionary README.md -o tmp.zst
|
||||||
@./harness tmp.zst tmp dictionary
|
@./harness tmp.zst tmp dictionary
|
||||||
@diff -s tmp README.md
|
@$(DIFF) -s tmp README.md
|
||||||
@$(RM) -f tmp* dictionary
|
@$(RM) -f tmp* dictionary
|
||||||
@make clean
|
@$(MAKE) clean
|
||||||
|
|||||||
@ -33,7 +33,7 @@ size_t read_file(const char *path, u8 **ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fseek(f, 0L, SEEK_END);
|
fseek(f, 0L, SEEK_END);
|
||||||
size_t size = ftell(f);
|
size_t size = (size_t)ftell(f);
|
||||||
rewind(f);
|
rewind(f);
|
||||||
|
|
||||||
*ptr = malloc(size);
|
*ptr = malloc(size);
|
||||||
|
|||||||
@ -395,7 +395,7 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len,
|
|||||||
/* this decoder assumes decompression of a single frame */
|
/* this decoder assumes decompression of a single frame */
|
||||||
decode_frame(&out, &in, parsed_dict);
|
decode_frame(&out, &in, parsed_dict);
|
||||||
|
|
||||||
return out.ptr - (u8 *)dst;
|
return (size_t)(out.ptr - (u8 *)dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
/******* FRAME DECODING ******************************************************/
|
/******* FRAME DECODING ******************************************************/
|
||||||
@ -416,7 +416,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
|
|||||||
|
|
||||||
static void decode_frame(ostream_t *const out, istream_t *const in,
|
static void decode_frame(ostream_t *const out, istream_t *const in,
|
||||||
const dictionary_t *const dict) {
|
const dictionary_t *const dict) {
|
||||||
const u32 magic_number = IO_read_bits(in, 32);
|
const u32 magic_number = (u32)IO_read_bits(in, 32);
|
||||||
// Zstandard frame
|
// Zstandard frame
|
||||||
//
|
//
|
||||||
// "Magic_Number
|
// "Magic_Number
|
||||||
@ -497,7 +497,7 @@ static void parse_frame_header(frame_header_t *const header,
|
|||||||
// 3 Reserved_bit
|
// 3 Reserved_bit
|
||||||
// 2 Content_Checksum_flag
|
// 2 Content_Checksum_flag
|
||||||
// 1-0 Dictionary_ID_flag"
|
// 1-0 Dictionary_ID_flag"
|
||||||
const u8 descriptor = IO_read_bits(in, 8);
|
const u8 descriptor = (u8)IO_read_bits(in, 8);
|
||||||
|
|
||||||
// decode frame header descriptor into flags
|
// decode frame header descriptor into flags
|
||||||
const u8 frame_content_size_flag = descriptor >> 6;
|
const u8 frame_content_size_flag = descriptor >> 6;
|
||||||
@ -521,7 +521,7 @@ static void parse_frame_header(frame_header_t *const header,
|
|||||||
//
|
//
|
||||||
// Bit numbers 7-3 2-0
|
// Bit numbers 7-3 2-0
|
||||||
// Field name Exponent Mantissa"
|
// Field name Exponent Mantissa"
|
||||||
u8 window_descriptor = IO_read_bits(in, 8);
|
u8 window_descriptor = (u8)IO_read_bits(in, 8);
|
||||||
u8 exponent = window_descriptor >> 3;
|
u8 exponent = window_descriptor >> 3;
|
||||||
u8 mantissa = window_descriptor & 7;
|
u8 mantissa = window_descriptor & 7;
|
||||||
|
|
||||||
@ -541,7 +541,7 @@ static void parse_frame_header(frame_header_t *const header,
|
|||||||
const int bytes_array[] = {0, 1, 2, 4};
|
const int bytes_array[] = {0, 1, 2, 4};
|
||||||
const int bytes = bytes_array[dictionary_id_flag];
|
const int bytes = bytes_array[dictionary_id_flag];
|
||||||
|
|
||||||
header->dictionary_id = IO_read_bits(in, bytes * 8);
|
header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
|
||||||
} else {
|
} else {
|
||||||
header->dictionary_id = 0;
|
header->dictionary_id = 0;
|
||||||
}
|
}
|
||||||
@ -633,8 +633,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out,
|
|||||||
//
|
//
|
||||||
// The next 2 bits represent the Block_Type, while the remaining 21 bits
|
// The next 2 bits represent the Block_Type, while the remaining 21 bits
|
||||||
// represent the Block_Size. Format is little-endian."
|
// represent the Block_Size. Format is little-endian."
|
||||||
last_block = IO_read_bits(in, 1);
|
last_block = (int)IO_read_bits(in, 1);
|
||||||
const int block_type = IO_read_bits(in, 2);
|
const int block_type = (int)IO_read_bits(in, 2);
|
||||||
const size_t block_len = IO_read_bits(in, 21);
|
const size_t block_len = IO_read_bits(in, 21);
|
||||||
|
|
||||||
switch (block_type) {
|
switch (block_type) {
|
||||||
@ -748,8 +748,8 @@ static size_t decode_literals(frame_context_t *const ctx, istream_t *const in,
|
|||||||
// types"
|
// types"
|
||||||
//
|
//
|
||||||
// size_format takes between 1 and 2 bits
|
// size_format takes between 1 and 2 bits
|
||||||
int block_type = IO_read_bits(in, 2);
|
int block_type = (int)IO_read_bits(in, 2);
|
||||||
int size_format = IO_read_bits(in, 2);
|
int size_format = (int)IO_read_bits(in, 2);
|
||||||
|
|
||||||
if (block_type <= 1) {
|
if (block_type <= 1) {
|
||||||
// Raw or RLE literals block
|
// Raw or RLE literals block
|
||||||
@ -833,6 +833,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx,
|
|||||||
// bits (0-1023)."
|
// bits (0-1023)."
|
||||||
num_streams = 1;
|
num_streams = 1;
|
||||||
// Fall through as it has the same size format
|
// Fall through as it has the same size format
|
||||||
|
/* fallthrough */
|
||||||
case 1:
|
case 1:
|
||||||
// "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
|
// "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
|
||||||
// (0-1023)."
|
// (0-1023)."
|
||||||
@ -1005,7 +1006,7 @@ static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
|
|||||||
static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
|
static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||||
12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,
|
12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,
|
||||||
48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
|
48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
|
||||||
static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
|
static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||||
1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
|
1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
|
||||||
@ -1021,7 +1022,7 @@ static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
|
|||||||
2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
|
2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
|
||||||
|
|
||||||
/// Offset decoding is simpler so we just need a maximum code value
|
/// Offset decoding is simpler so we just need a maximum code value
|
||||||
static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
|
static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
|
||||||
|
|
||||||
static void decompress_sequences(frame_context_t *const ctx,
|
static void decompress_sequences(frame_context_t *const ctx,
|
||||||
istream_t *const in,
|
istream_t *const in,
|
||||||
@ -1132,7 +1133,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in,
|
|||||||
// a single 1-bit and then fills the byte with 0-7 0 bits of padding."
|
// a single 1-bit and then fills the byte with 0-7 0 bits of padding."
|
||||||
const int padding = 8 - highest_set_bit(src[len - 1]);
|
const int padding = 8 - highest_set_bit(src[len - 1]);
|
||||||
// The offset starts at the end because FSE streams are read backwards
|
// The offset starts at the end because FSE streams are read backwards
|
||||||
i64 bit_offset = len * 8 - padding;
|
i64 bit_offset = (i64)(len * 8 - (size_t)padding);
|
||||||
|
|
||||||
// "The bitstream starts with initial state values, each using the required
|
// "The bitstream starts with initial state values, each using the required
|
||||||
// number of bits in their respective accuracy, decoded previously from
|
// number of bits in their respective accuracy, decoded previously from
|
||||||
@ -1409,7 +1410,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
|
|||||||
|
|
||||||
// get decompressed size from ZSTD frame header
|
// get decompressed size from ZSTD frame header
|
||||||
{
|
{
|
||||||
const u32 magic_number = IO_read_bits(&in, 32);
|
const u32 magic_number = (u32)IO_read_bits(&in, 32);
|
||||||
|
|
||||||
if (magic_number == 0xFD2FB528U) {
|
if (magic_number == 0xFD2FB528U) {
|
||||||
// ZSTD frame
|
// ZSTD frame
|
||||||
@ -1418,7 +1419,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) {
|
|||||||
|
|
||||||
if (header.frame_content_size == 0 && !header.single_segment_flag) {
|
if (header.frame_content_size == 0 && !header.single_segment_flag) {
|
||||||
// Content size not provided, we can't tell
|
// Content size not provided, we can't tell
|
||||||
return -1;
|
return (size_t)-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return header.frame_content_size;
|
return header.frame_content_size;
|
||||||
|
|||||||
@ -7,6 +7,8 @@
|
|||||||
* in the COPYING file in the root directory of this source tree).
|
* in the COPYING file in the root directory of this source tree).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <stddef.h> /* size_t */
|
||||||
|
|
||||||
/******* EXPOSED TYPES ********************************************************/
|
/******* EXPOSED TYPES ********************************************************/
|
||||||
/*
|
/*
|
||||||
* Contains the parsed contents of a dictionary
|
* Contains the parsed contents of a dictionary
|
||||||
@ -39,7 +41,7 @@ size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len);
|
|||||||
* Return a valid dictionary_t pointer for use with dictionary initialization
|
* Return a valid dictionary_t pointer for use with dictionary initialization
|
||||||
* or decompression
|
* or decompression
|
||||||
*/
|
*/
|
||||||
dictionary_t* create_dictionary();
|
dictionary_t* create_dictionary(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parse a provided dictionary blob for use in decompression
|
* Parse a provided dictionary blob for use in decompression
|
||||||
|
|||||||
@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
|
|||||||
"compared to the source size %u! "
|
"compared to the source size %u! "
|
||||||
"size(source)/size(dictionary) = %f, but it should be >= "
|
"size(source)/size(dictionary) = %f, but it should be >= "
|
||||||
"10! This may lead to a subpar dictionary! We recommend "
|
"10! This may lead to a subpar dictionary! We recommend "
|
||||||
"training on sources at least 10x, and up to 100x the "
|
"training on sources at least 10x, and preferably 100x "
|
||||||
"size of the dictionary!\n", (U32)maxDictSize,
|
"the size of the dictionary! \n", (U32)maxDictSize,
|
||||||
(U32)nbDmers, ratio);
|
(U32)nbDmers, ratio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user