From 1c23b640499c1539df0bd30b833d31b50eac442a Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 11:27:33 -0700 Subject: [PATCH 1/9] [fuzz] fuzz.py can minimize and zip corpora * "minimize" minimizes the corpora into an output directory. * "zip" zips up the minimized corpora, which are ready to deploy. --- tests/fuzz/fuzz.py | 165 +++++++++++++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 51 deletions(-) diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 0ce201cdd..8c381ecf8 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -82,6 +82,35 @@ def tmpdir(): shutil.rmtree(dirpath, ignore_errors=True) +def parse_targets(in_targets): + targets = set() + for target in in_targets: + if not target: + continue + if target == 'all': + targets = targets.union(TARGETS) + elif target in TARGETS: + targets.add(target) + else: + raise RuntimeError('{} is not a valid target'.format(target)) + return list(targets) + + +def targets_parser(args, description): + parser = argparse.ArgumentParser(prog=args.pop(0), description=description) + parser.add_argument( + 'TARGET', + nargs='*', + type=str, + help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) + args, extra = parser.parse_known_args(args) + args.extra = extra + + args.TARGET = parse_targets(args.TARGET) + + return args + + def parse_env_flags(args, flags): """ Look for flags set by environment variables. @@ -424,36 +453,42 @@ def libfuzzer_parser(args): if args.TARGET and args.TARGET not in TARGETS: raise RuntimeError('{} is not a valid target'.format(args.TARGET)) - if not args.corpora: - args.corpora = abs_join(CORPORA_DIR, args.TARGET) - if not args.artifact: - args.artifact = abs_join(CORPORA_DIR, '{}-crash'.format(args.TARGET)) - if not args.seed: - args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) - return args -def libfuzzer(args): - try: - args = libfuzzer_parser(args) - except Exception as e: - print(e) - return 1 - target = abs_join(FUZZ_DIR, args.TARGET) +def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): + if corpora is None: + corpora = abs_join(CORPORA_DIR, target) + if artifact is None: + artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + if seed is None: + seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) + if extra_args is None: + extra_args = [] - corpora = [create(args.corpora)] - artifact = create(args.artifact) - seed = check(args.seed) + target = abs_join(FUZZ_DIR, target) + + corpora = [create(corpora)] + artifact = create(artifact) + seed = check(seed) corpora += [artifact] if seed is not None: corpora += [seed] cmd = [target, '-artifact_prefix={}/'.format(artifact)] - cmd += corpora + args.extra + cmd += corpora + extra_args print(' '.join(cmd)) - subprocess.call(cmd) + subprocess.check_call(cmd) + + +def libfuzzer_cmd(args): + try: + args = libfuzzer_parser(args) + except Exception as e: + print(e) + return 1 + libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) return 0 @@ -518,39 +553,15 @@ def afl(args): return 0 -def regression_parser(args): - description = """ - Runs one or more regression tests. - The fuzzer should have been built with with - LIB_FUZZING_ENGINE='libregression.a'. - Takes input from CORPORA. - """ - parser = argparse.ArgumentParser(prog=args.pop(0), description=description) - parser.add_argument( - 'TARGET', - nargs='*', - type=str, - help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) - args = parser.parse_args(args) - - targets = set() - for target in args.TARGET: - if not target: - continue - if target == 'all': - targets = targets.union(TARGETS) - elif target in TARGETS: - targets.add(target) - else: - raise RuntimeError('{} is not a valid target'.format(target)) - args.TARGET = list(targets) - - return args - - def regression(args): try: - args = regression_parser(args) + description = """ + Runs one or more regression tests. + The fuzzer should have been built with with + LIB_FUZZING_ENGINE='libregression.a'. + Takes input from CORPORA. + """ + args = targets_parser(args, description) except Exception as e: print(e) return 1 @@ -673,6 +684,52 @@ def gen(args): return 0 +def minimize(args): + try: + description = """ + Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in + TARGET_seed_corpus. All extra args are passed to libfuzzer. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Merge the corpus + anything else into the seed_corpus + corpus = abs_join(CORPORA_DIR, target) + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + extra_args = [corpus, "-merge=1"] + args.extra + libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) + seeds = set(os.listdir(seed_corpus)) + # Copy all crashes directly into the seed_corpus if not already present + crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) + for crash in os.listdir(crashes): + if crash not in seeds: + shutil.copy(abs_join(crashes, crash), seed_corpus) + seeds.add(crash) + + +def zip_cmd(args): + try: + description = """ + Zips up the seed corpus. + """ + args = targets_parser(args, description) + except Exception as e: + print(e) + return 1 + + for target in args.TARGET: + # Zip the seed_corpus + seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) + seeds = [abs_join(seed_corpus, f) for f in os.listdir(seed_corpus)] + zip_file = "{}.zip".format(seed_corpus) + cmd = ["zip", "-q", "-j", "-9", zip_file] + print(' '.join(cmd + [abs_join(seed_corpus, '*')])) + subprocess.check_call(cmd + seeds) + + def short_help(args): name = args[0] print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) @@ -690,6 +747,8 @@ def help(args): print("\tafl\t\tRun an AFL fuzzer") print("\tregression\tRun a regression test") print("\tgen\t\tGenerate a seed corpus for a fuzzer") + print("\tminimize\tMinimize the test corpora") + print("\tzip\t\tZip the minimized corpora up") def main(): @@ -705,13 +764,17 @@ def main(): if command == "build": return build(args) if command == "libfuzzer": - return libfuzzer(args) + return libfuzzer_cmd(args) if command == "regression": return regression(args) if command == "afl": return afl(args) if command == "gen": return gen(args) + if command == "minimize": + return minimize(args) + if command == "zip": + return zip_cmd(args) short_help(args) print("Error: No such command {} (pass -h for help)".format(command)) return 1 From 23199b6daf4757b41b20fc83d95f5f4b50bad948 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 13:28:18 -0700 Subject: [PATCH 2/9] [fuzz] Fix fuzz.py env flags parsing --- tests/fuzz/fuzz.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index 8c381ecf8..cd4087090 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -115,7 +115,6 @@ def parse_env_flags(args, flags): """ Look for flags set by environment variables. """ - flags = ' '.join(flags) san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) From bfad5568b5318adf6766de6deaaf4f9b0cc1c668 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 13:28:45 -0700 Subject: [PATCH 3/9] [fuzz] Make simple_round_trip compile cleanly --- tests/fuzz/simple_round_trip.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c index f853485ad..617e45df6 100644 --- a/tests/fuzz/simple_round_trip.c +++ b/tests/fuzz/simple_round_trip.c @@ -38,10 +38,11 @@ static size_t roundTripTest(void *result, size_t resultCapacity, if (FUZZ_rand(&seed) & 1) { ZSTD_inBuffer in = {src, srcSize, 0}; ZSTD_outBuffer out = {compressed, compressedCapacity, 0}; + size_t err; ZSTD_CCtx_reset(cctx); FUZZ_setRandomParameters(cctx, &seed); - size_t const err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); + err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end); if (err != 0) { return err; } From bbe77212efebcfefb6b510f2509a674a5dd25245 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 18 Sep 2017 16:54:53 -0700 Subject: [PATCH 4/9] [libzstd] Increase MaxOff --- lib/common/zstd_internal.h | 7 +++-- lib/compress/zstd_compress.c | 33 ++++++++++++++------ lib/decompress/zstd_decompress.c | 53 ++++++++++++++++++++++---------- tests/decodecorpus.c | 2 +- 4 files changed, 65 insertions(+), 30 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index cd0dbcc27..403c0cbdb 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -123,7 +123,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy #define MaxLit ((1<longLengthPos] = MaxML; } -MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode, - size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog) +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType( + FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq, + U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed) { #define MIN_SEQ_FOR_DYNAMIC_FSE 64 #define MAX_SEQ_FOR_STATIC_FSE 1000 - - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ *repeatMode = FSE_repeat_check; return set_rle; } - if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { return set_repeat; } - if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) { + if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) { *repeatMode = FSE_repeat_valid; return set_basic; } @@ -1299,6 +1309,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, count[codeTable[nbSeq-1]]--; nbSeq_1--; } + assert(nbSeq_1 > 1); CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return NCountSize; @@ -1436,7 +1447,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* CTable for Literal Lengths */ { U32 max = MaxLL; size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace); - LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog); + LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, entropy->workspace, sizeof(entropy->workspace)); @@ -1446,9 +1457,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* CTable for Offsets */ { U32 max = MaxOff; size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace); - Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog); + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff, + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, entropy->workspace, sizeof(entropy->workspace)); if (ZSTD_isError(countSize)) return countSize; op += countSize; @@ -1456,7 +1469,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, /* CTable for MatchLengths */ { U32 max = MaxML; size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace); - MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog); + MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed); { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, entropy->workspace, sizeof(entropy->workspace)); diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 6d6d83396..b6bfa0c49 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -862,6 +862,15 @@ size_t ZSTD_execSequenceLast7(BYTE* op, typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { @@ -869,7 +878,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l U32 const llCode = FSE_peekSymbol(&seqState->stateLL); U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */ U32 const llBits = LL_bits[llCode]; U32 const mlBits = ML_bits[mlCode]; @@ -896,7 +905,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; /* sequence */ { size_t offset; @@ -904,8 +913,10 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l offset = 0; else { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); @@ -936,13 +947,17 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if ( MEM_32bits() - || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) - BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); @@ -1102,7 +1117,6 @@ static size_t ZSTD_decompressSequences( } - HINT_INLINE seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) { @@ -1110,7 +1124,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long U32 const llCode = FSE_peekSymbol(&seqState->stateLL); U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ + U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */ U32 const llBits = LL_bits[llCode]; U32 const mlBits = ML_bits[mlCode]; @@ -1137,7 +1151,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; /* sequence */ { size_t offset; @@ -1145,8 +1159,10 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long offset = 0; else { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets) { + U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); @@ -1176,11 +1192,16 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long } seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits() || - (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); { size_t const pos = seqState->pos + seq.litLength; seq.match = seqState->base + pos - seq.offset; /* single memory segment */ diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index 9cde2825e..ea01d2718 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -881,7 +881,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr, frame->stats.offsetSymbolSet, 28)) { Offtype = set_repeat; } else if (!(RAND(seed) & 3)) { - FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); Offtype = set_basic; } else { size_t nbSeq_1 = nbSeq; From 6bb781e0f11105c961e89414e81eb8944e8d914b Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 13:29:50 -0700 Subject: [PATCH 5/9] [fuzz] Add regressiontest targets --- Makefile | 10 ++++++++++ tests/fuzz/Makefile | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index e8bdcea33..7baff751c 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ ZSTDDIR = lib BUILDIR = build ZWRAPDIR = zlibWrapper TESTDIR = tests +FUZZDIR = $(TESTDIR)/fuzz # Define nul output VOID = /dev/null @@ -215,6 +216,15 @@ arm-ppc-compilation: $(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static" $(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static" +regressiontest: + $(MAKE) -C $(FUZZDIR) regressiontest + +uasanregressiontest: + $(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined" + +msanregressiontest: + $(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory" + # run UBsan with -fsanitize-recover=signed-integer-overflow # due to a bug in UBsan when doing pointer subtraction # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63303 diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile index 60822d498..6d2a0cfa9 100644 --- a/tests/fuzz/Makefile +++ b/tests/fuzz/Makefile @@ -14,6 +14,13 @@ CPPFLAGS ?= LDFLAGS ?= ARFLAGS ?= LIB_FUZZING_ENGINE ?= libregression.a +PYTHON ?= python +ifeq ($(shell uname), Darwin) + DOWNLOAD?=curl -L -o +else + DOWNLOAD?=wget -O +endif +CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/ ZSTDDIR = ../../lib PRGDIR = ../../programs @@ -48,18 +55,20 @@ FUZZ_SRC := \ FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC))) -.PHONY: default all clean +.PHONY: default all clean cleanall default: all -all: \ +FUZZ_TARGETS := \ simple_round_trip \ stream_round_trip \ - block_round_trip \ + block_round_trip \ simple_decompress \ stream_decompress \ block_decompress +all: $(FUZZ_TARGETS) + %.o: %.c $(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@ @@ -93,7 +102,25 @@ libFuzzer: @git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer @cd Fuzzer && ./build.sh +corpora/%_seed_corpus.zip: + @mkdir -p corpora + $(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip + +corpora/%: corpora/%_seed_corpus.zip + unzip -q $^ -d $@ + +.PHONY: corpora +corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS)) + +regressiontest: corpora + CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all + $(PYTHON) ./fuzz.py regression all + clean: @$(MAKE) -C $(ZSTDDIR) clean @$(RM) -f *.a *.o @$(RM) -f simple_round_trip stream_round_trip simple_decompress stream_decompress + +cleanall: + @$(RM) -rf Fuzzer + @$(RM) -rf corpora From 11e21f23cbac2f0fe2c1bc87d427172211cf8e9f Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 13:32:50 -0700 Subject: [PATCH 6/9] [fuzz] Mention the corpora in the README --- tests/fuzz/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md index 6d0fab556..f184be646 100644 --- a/tests/fuzz/README.md +++ b/tests/fuzz/README.md @@ -1,6 +1,14 @@ # Fuzzing Each fuzzing target can be built with multiple engines. +Zstd provides a fuzz corpus for each target that can be downloaded with +the command: + +``` +make corpora +``` + +It will download each corpus into `./corpora/TARGET`. ## fuzz.py From 77d5bc2d626386527a3c290588c86f72fbd5a6f9 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 13:33:12 -0700 Subject: [PATCH 7/9] [fuzz][CI] Add regression tests to the CI --- .travis.yml | 2 ++ circle.yml | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a52d57af3..67da248d9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,6 +21,8 @@ matrix: - env: Cmd='make arminstall && make aarch64fuzz' - env: Cmd='make ppcinstall && make ppcfuzz' - env: Cmd='make ppcinstall && make ppc64fuzz' + - env: Cmd='make -j uasanregressiontest' + - env: Cmd='make -j msanregressiontest' git: depth: 1 diff --git a/circle.yml b/circle.yml index e89d548ac..5bc0ce643 100644 --- a/circle.yml +++ b/circle.yml @@ -45,7 +45,7 @@ test: parallel: true - ? | if [[ "$CIRCLE_NODE_INDEX" == "0" ]] ; then make ppc64build && make clean; fi && - if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build && make clean; fi #could add another test here + if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build && make clean; fi : parallel: true - ? | @@ -53,6 +53,11 @@ test: if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-longmatch test-symbols && make clean; fi : parallel: true + - ? | + if [[ "$CIRCLE_NODE_INDEX" == "0" ]] ; then make -j regressiontest && make clean; fi && + if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true; fi # Could add another test here + : + parallel: true post: - echo Circle CI tests finished From 917a21325478bf23c5fb8f4605cde4a75f74a986 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 15:00:50 -0700 Subject: [PATCH 8/9] [fuzz] Determine flags based on compiler version --- tests/fuzz/fuzz.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py index cd4087090..9864d822d 100755 --- a/tests/fuzz/fuzz.py +++ b/tests/fuzz/fuzz.py @@ -140,6 +140,34 @@ def parse_env_flags(args, flags): return args +def compiler_version(cc, cxx): + """ + Determines the compiler and version. + Only works for clang and gcc. + """ + cc_version_bytes = subprocess.check_output([cc, "--version"]) + cxx_version_bytes = subprocess.check_output([cxx, "--version"]) + if cc_version_bytes.startswith(b'clang'): + assert(cxx_version_bytes.startswith(b'clang')) + compiler = 'clang' + if cc_version_bytes.startswith(b'gcc'): + assert(cxx_version_bytes.startswith(b'g++')) + compiler = 'gcc' + version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' + version_match = re.search(version_regex, cc_version_bytes) + version = tuple(int(version_match.group(i)) for i in range(1, 4)) + return compiler, version + + +def overflow_ubsan_flags(cc, cxx): + compiler, version = compiler_version(cc, cxx) + if compiler == 'gcc': + return ['-fno-sanitize=signed-integer-overflow'] + if compiler == 'clang' and version >= (5, 0, 0): + return ['-fno-sanitize=pointer-overflow'] + return [] + + def build_parser(args): description = """ Cleans the repository and builds a fuzz target (or all). @@ -364,7 +392,7 @@ def build(args): if args.ubsan: ubsan_flags = ['-fsanitize=undefined'] if not args.ubsan_pointer_overflow: - ubsan_flags += ['-fno-sanitize=pointer-overflow'] + ubsan_flags += overflow_ubsan_flags(cc, cxx) common_flags += ubsan_flags if args.stateful_fuzzing: From 76cb38d0854433e2dc5ca6a6dd212fccb96ccd41 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Mon, 25 Sep 2017 16:12:46 -0700 Subject: [PATCH 9/9] [zstd] Backport kernel patch from @ColinIanKing * Make the U32 table in `FSE_normalizeCount()` static. * Patch from https://lkml.kernel.org/r/20170922145946.14316-1-colin.king@canonical.com. * Clang makes non-static tables static anyways. gcc however, does [weird things](https://godbolt.org/g/fvTcED). * Benchmarks showed no difference in speed. --- lib/compress/fse_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 599280b90..549c115d4 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -582,7 +582,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; U64 const scale = 62 - tableLog; U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ U64 const vStep = 1ULL<<(scale-20);