From 2b0a271ed216169d96dafed32713b9a6cb5f0697 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 6 Sep 2019 14:30:13 -0700 Subject: [PATCH 1/3] fix eductional decoder fix #1774 also : - fix minor compilation warnings - make sure the `test` is run during CI tests --- Makefile | 1 + doc/educational_decoder/Makefile | 5 +++- doc/educational_decoder/zstd_decompress.c | 28 +++++++++++------------ doc/educational_decoder/zstd_decompress.h | 4 +++- lib/dictBuilder/cover.c | 4 ++-- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index acf50cfa3..da3112e30 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror test: MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants $(MAKE) -C $(TESTDIR) $@ + $(MAKE) -C doc/educational_decoder test ## shortest: same as `make check` .PHONY: shortest diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile index c1d2c4cc4..ed3f3fef7 100644 --- a/doc/educational_decoder/Makefile +++ b/doc/educational_decoder/Makefile @@ -26,7 +26,10 @@ test: harness @./harness tmp.zst tmp @diff -s tmp README.md @$(RM) -f tmp* - @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md + # present files for training multiple times, to reach minimum threshold + @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md \ + harness.c zstd_decompress.c zstd_decompress.h README.md \ + harness.c zstd_decompress.c zstd_decompress.h README.md @zstd -D dictionary README.md -o tmp.zst @./harness tmp.zst tmp dictionary @diff -s tmp README.md diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 8e231bbb5..26143cd1a 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -395,7 +395,7 @@ size_t ZSTD_decompress_with_dict(void *const dst, const size_t dst_len, /* this decoder assumes decompression of a single frame */ decode_frame(&out, &in, parsed_dict); - return out.ptr - (u8 *)dst; + return (size_t)(out.ptr - (u8 *)dst); } /******* FRAME DECODING ******************************************************/ @@ -416,7 +416,7 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out, static void decode_frame(ostream_t *const out, istream_t *const in, const dictionary_t *const dict) { - const u32 magic_number = IO_read_bits(in, 32); + const u32 magic_number = (u32)IO_read_bits(in, 32); // Zstandard frame // // "Magic_Number @@ -497,7 +497,7 @@ static void parse_frame_header(frame_header_t *const header, // 3 Reserved_bit // 2 Content_Checksum_flag // 1-0 Dictionary_ID_flag" - const u8 descriptor = IO_read_bits(in, 8); + const u8 descriptor = (u8)IO_read_bits(in, 8); // decode frame header descriptor into flags const u8 frame_content_size_flag = descriptor >> 6; @@ -521,7 +521,7 @@ static void parse_frame_header(frame_header_t *const header, // // Bit numbers 7-3 2-0 // Field name Exponent Mantissa" - u8 window_descriptor = IO_read_bits(in, 8); + u8 window_descriptor = (u8)IO_read_bits(in, 8); u8 exponent = window_descriptor >> 3; u8 mantissa = window_descriptor & 7; @@ -541,7 +541,7 @@ static void parse_frame_header(frame_header_t *const header, const int bytes_array[] = {0, 1, 2, 4}; const int bytes = bytes_array[dictionary_id_flag]; - header->dictionary_id = IO_read_bits(in, bytes * 8); + header->dictionary_id = (u32)IO_read_bits(in, bytes * 8); } else { header->dictionary_id = 0; } @@ -633,8 +633,8 @@ static void decompress_data(frame_context_t *const ctx, ostream_t *const out, // // The next 2 bits represent the Block_Type, while the remaining 21 bits // represent the Block_Size. Format is little-endian." - last_block = IO_read_bits(in, 1); - const int block_type = IO_read_bits(in, 2); + last_block = (int)IO_read_bits(in, 1); + const int block_type = (int)IO_read_bits(in, 2); const size_t block_len = IO_read_bits(in, 21); switch (block_type) { @@ -748,8 +748,8 @@ static size_t decode_literals(frame_context_t *const ctx, istream_t *const in, // types" // // size_format takes between 1 and 2 bits - int block_type = IO_read_bits(in, 2); - int size_format = IO_read_bits(in, 2); + int block_type = (int)IO_read_bits(in, 2); + int size_format = (int)IO_read_bits(in, 2); if (block_type <= 1) { // Raw or RLE literals block @@ -1005,7 +1005,7 @@ static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = { static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, - 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538}; + 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536}; static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; @@ -1021,7 +1021,7 @@ static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = { 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; /// Offset decoding is simpler so we just need a maximum code value -static const u8 SEQ_MAX_CODES[3] = {35, -1, 52}; +static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52}; static void decompress_sequences(frame_context_t *const ctx, istream_t *const in, @@ -1132,7 +1132,7 @@ static void decompress_sequences(frame_context_t *const ctx, istream_t *in, // a single 1-bit and then fills the byte with 0-7 0 bits of padding." const int padding = 8 - highest_set_bit(src[len - 1]); // The offset starts at the end because FSE streams are read backwards - i64 bit_offset = len * 8 - padding; + i64 bit_offset = (i64)(len * 8 - (size_t)padding); // "The bitstream starts with initial state values, each using the required // number of bits in their respective accuracy, decoded previously from @@ -1409,7 +1409,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) { // get decompressed size from ZSTD frame header { - const u32 magic_number = IO_read_bits(&in, 32); + const u32 magic_number = (u32)IO_read_bits(&in, 32); if (magic_number == 0xFD2FB528U) { // ZSTD frame @@ -1418,7 +1418,7 @@ size_t ZSTD_get_decompressed_size(const void *src, const size_t src_len) { if (header.frame_content_size == 0 && !header.single_segment_flag) { // Content size not provided, we can't tell - return -1; + return (size_t)-1; } return header.frame_content_size; diff --git a/doc/educational_decoder/zstd_decompress.h b/doc/educational_decoder/zstd_decompress.h index a01fde331..74b185338 100644 --- a/doc/educational_decoder/zstd_decompress.h +++ b/doc/educational_decoder/zstd_decompress.h @@ -7,6 +7,8 @@ * in the COPYING file in the root directory of this source tree). */ +#include /* size_t */ + /******* EXPOSED TYPES ********************************************************/ /* * Contains the parsed contents of a dictionary @@ -39,7 +41,7 @@ size_t ZSTD_get_decompressed_size(const void *const src, const size_t src_len); * Return a valid dictionary_t pointer for use with dictionary initialization * or decompression */ -dictionary_t* create_dictionary(); +dictionary_t* create_dictionary(void); /* * Parse a provided dictionary blob for use in decompression diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 4721205da..2e129dd91 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve "compared to the source size %u! " "size(source)/size(dictionary) = %f, but it should be >= " "10! This may lead to a subpar dictionary! We recommend " - "training on sources at least 10x, and up to 100x the " - "size of the dictionary!\n", (U32)maxDictSize, + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, (U32)nbDmers, ratio); } From a3815d233c0ad35d6da7e03e5d71ac71b303977f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 6 Sep 2019 16:51:16 -0700 Subject: [PATCH 2/3] fix minor compilation condition harness.c is not designed to pass -Wdeclaration-after-statement --- doc/educational_decoder/Makefile | 6 +++--- doc/educational_decoder/harness.c | 2 +- doc/educational_decoder/zstd_decompress.c | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile index ed3f3fef7..fc0803755 100644 --- a/doc/educational_decoder/Makefile +++ b/doc/educational_decoder/Makefile @@ -6,10 +6,10 @@ CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) CFLAGS ?= -O3 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ - -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \ - -Wstrict-prototypes -Wundef \ + -Wstrict-aliasing=1 -Wswitch-enum \ + -Wredundant-decls -Wstrict-prototypes -Wundef \ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \ - -Wredundant-decls + -std=c99 CFLAGS += $(DEBUGFLAGS) CFLAGS += $(MOREFLAGS) FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS) diff --git a/doc/educational_decoder/harness.c b/doc/educational_decoder/harness.c index 47882b168..36f3967a9 100644 --- a/doc/educational_decoder/harness.c +++ b/doc/educational_decoder/harness.c @@ -33,7 +33,7 @@ size_t read_file(const char *path, u8 **ptr) { } fseek(f, 0L, SEEK_END); - size_t size = ftell(f); + size_t size = (size_t)ftell(f); rewind(f); *ptr = malloc(size); diff --git a/doc/educational_decoder/zstd_decompress.c b/doc/educational_decoder/zstd_decompress.c index 26143cd1a..f3e1b848f 100644 --- a/doc/educational_decoder/zstd_decompress.c +++ b/doc/educational_decoder/zstd_decompress.c @@ -833,6 +833,7 @@ static size_t decode_literals_compressed(frame_context_t *const ctx, // bits (0-1023)." num_streams = 1; // Fall through as it has the same size format + /* fallthrough */ case 1: // "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits // (0-1023)." From b9b9a1c8e9b2a0e27c485c691a46d695e8c90c95 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 10 Sep 2019 09:36:02 -0700 Subject: [PATCH 3/3] fix education decoder test when `zstd` in not installed on local system by allowing `ZSTD` variable to hold a custom location for the binary --- Makefile | 2 +- doc/educational_decoder/Makefile | 33 ++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index da3112e30..efb555c35 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werror test: MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants $(MAKE) -C $(TESTDIR) $@ - $(MAKE) -C doc/educational_decoder test + ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder test ## shortest: same as `make check` .PHONY: shortest diff --git a/doc/educational_decoder/Makefile b/doc/educational_decoder/Makefile index fc0803755..b2ed9f33d 100644 --- a/doc/educational_decoder/Makefile +++ b/doc/educational_decoder/Makefile @@ -1,10 +1,21 @@ +# ################################################################ +# Copyright (c) 2016-present, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# ################################################################ + +ZSTD ?= zstd # requires zstd installation on local system +DIFF ?= diff HARNESS_FILES=*.c MULTITHREAD_LDFLAGS = -pthread DEBUGFLAGS= -g -DZSTD_DEBUG=1 CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \ -I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR) -CFLAGS ?= -O3 +CFLAGS ?= -O2 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \ -Wstrict-aliasing=1 -Wswitch-enum \ -Wredundant-decls -Wstrict-prototypes -Wundef \ @@ -22,16 +33,22 @@ clean: @$(RM) -rf harness.dSYM test: harness - @zstd README.md -o tmp.zst + # + # Testing single-file decompression with educational decoder + # + @$(ZSTD) README.md -o tmp.zst @./harness tmp.zst tmp - @diff -s tmp README.md + @$(DIFF) -s tmp README.md @$(RM) -f tmp* - # present files for training multiple times, to reach minimum threshold - @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md \ + # + # Testing dictionary decompression with education decoder + # + # note : files are presented multiple for training, to reach minimum threshold + @$(ZSTD) --train harness.c zstd_decompress.c zstd_decompress.h README.md \ harness.c zstd_decompress.c zstd_decompress.h README.md \ harness.c zstd_decompress.c zstd_decompress.h README.md - @zstd -D dictionary README.md -o tmp.zst + @$(ZSTD) -D dictionary README.md -o tmp.zst @./harness tmp.zst tmp dictionary - @diff -s tmp README.md + @$(DIFF) -s tmp README.md @$(RM) -f tmp* dictionary - @make clean + @$(MAKE) clean