From 1c23b640499c1539df0bd30b833d31b50eac442a Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 11:27:33 -0700
Subject: [PATCH 1/9] [fuzz] fuzz.py can minimize and zip corpora

* "minimize" minimizes the corpora into an output directory.
* "zip" zips up the minimized corpora, which are ready to deploy.
---
 tests/fuzz/fuzz.py | 165 +++++++++++++++++++++++++++++++--------------
 1 file changed, 114 insertions(+), 51 deletions(-)

diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 0ce201cdd..8c381ecf8 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -82,6 +82,35 @@ def tmpdir():
         shutil.rmtree(dirpath, ignore_errors=True)
 
 
+def parse_targets(in_targets):
+    targets = set()
+    for target in in_targets:
+        if not target:
+            continue
+        if target == 'all':
+            targets = targets.union(TARGETS)
+        elif target in TARGETS:
+            targets.add(target)
+        else:
+            raise RuntimeError('{} is not a valid target'.format(target))
+    return list(targets)
+
+
+def targets_parser(args, description):
+    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
+    parser.add_argument(
+        'TARGET',
+        nargs='*',
+        type=str,
+        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
+    args, extra = parser.parse_known_args(args)
+    args.extra = extra
+
+    args.TARGET = parse_targets(args.TARGET)
+
+    return args
+
+
 def parse_env_flags(args, flags):
     """
     Look for flags set by environment variables.
@@ -424,36 +453,42 @@ def libfuzzer_parser(args):
     if args.TARGET and args.TARGET not in TARGETS:
         raise RuntimeError('{} is not a valid target'.format(args.TARGET))
 
-    if not args.corpora:
-        args.corpora = abs_join(CORPORA_DIR, args.TARGET)
-    if not args.artifact:
-        args.artifact = abs_join(CORPORA_DIR, '{}-crash'.format(args.TARGET))
-    if not args.seed:
-        args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
-
     return args
 
 
-def libfuzzer(args):
-    try:
-        args = libfuzzer_parser(args)
-    except Exception as e:
-        print(e)
-        return 1
-    target = abs_join(FUZZ_DIR, args.TARGET)
+def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
+    if corpora is None:
+        corpora = abs_join(CORPORA_DIR, target)
+    if artifact is None:
+        artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
+    if seed is None:
+        seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
+    if extra_args is None:
+        extra_args = []
 
-    corpora = [create(args.corpora)]
-    artifact = create(args.artifact)
-    seed = check(args.seed)
+    target = abs_join(FUZZ_DIR, target)
+
+    corpora = [create(corpora)]
+    artifact = create(artifact)
+    seed = check(seed)
 
     corpora += [artifact]
     if seed is not None:
         corpora += [seed]
 
     cmd = [target, '-artifact_prefix={}/'.format(artifact)]
-    cmd += corpora + args.extra
+    cmd += corpora + extra_args
     print(' '.join(cmd))
-    subprocess.call(cmd)
+    subprocess.check_call(cmd)
+
+
+def libfuzzer_cmd(args):
+    try:
+        args = libfuzzer_parser(args)
+    except Exception as e:
+        print(e)
+        return 1
+    libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
     return 0
 
 
@@ -518,39 +553,15 @@ def afl(args):
     return 0
 
 
-def regression_parser(args):
-    description = """
-    Runs one or more regression tests.
-    The fuzzer should have been built with with
-    LIB_FUZZING_ENGINE='libregression.a'.
-    Takes input from CORPORA.
-    """
-    parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
-    parser.add_argument(
-        'TARGET',
-        nargs='*',
-        type=str,
-        help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
-    args = parser.parse_args(args)
-
-    targets = set()
-    for target in args.TARGET:
-        if not target:
-            continue
-        if target == 'all':
-            targets = targets.union(TARGETS)
-        elif target in TARGETS:
-            targets.add(target)
-        else:
-            raise RuntimeError('{} is not a valid target'.format(target))
-    args.TARGET = list(targets)
-
-    return args
-
-
 def regression(args):
     try:
-        args = regression_parser(args)
+        description = """
+        Runs one or more regression tests.
+        The fuzzer should have been built with with
+        LIB_FUZZING_ENGINE='libregression.a'.
+        Takes input from CORPORA.
+        """
+        args = targets_parser(args, description)
     except Exception as e:
         print(e)
         return 1
@@ -673,6 +684,52 @@ def gen(args):
     return 0
 
 
+def minimize(args):
+    try:
+        description = """
+        Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
+        TARGET_seed_corpus. All extra args are passed to libfuzzer.
+        """
+        args = targets_parser(args, description)
+    except Exception as e:
+        print(e)
+        return 1
+
+    for target in args.TARGET:
+        # Merge the corpus + anything else into the seed_corpus
+        corpus = abs_join(CORPORA_DIR, target)
+        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
+        extra_args = [corpus, "-merge=1"] + args.extra
+        libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
+        seeds = set(os.listdir(seed_corpus))
+        # Copy all crashes directly into the seed_corpus if not already present
+        crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
+        for crash in os.listdir(crashes):
+            if crash not in seeds:
+                shutil.copy(abs_join(crashes, crash), seed_corpus)
+                seeds.add(crash)
+
+
+def zip_cmd(args):
+    try:
+        description = """
+        Zips up the seed corpus.
+        """
+        args = targets_parser(args, description)
+    except Exception as e:
+        print(e)
+        return 1
+
+    for target in args.TARGET:
+        # Zip the seed_corpus
+        seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
+        seeds = [abs_join(seed_corpus, f) for f in os.listdir(seed_corpus)]
+        zip_file = "{}.zip".format(seed_corpus)
+        cmd = ["zip", "-q", "-j", "-9", zip_file]
+        print(' '.join(cmd + [abs_join(seed_corpus, '*')]))
+        subprocess.check_call(cmd + seeds)
+
+
 def short_help(args):
     name = args[0]
     print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
@@ -690,6 +747,8 @@ def help(args):
     print("\tafl\t\tRun an AFL fuzzer")
     print("\tregression\tRun a regression test")
     print("\tgen\t\tGenerate a seed corpus for a fuzzer")
+    print("\tminimize\tMinimize the test corpora")
+    print("\tzip\t\tZip the minimized corpora up")
 
 
 def main():
@@ -705,13 +764,17 @@ def main():
     if command == "build":
         return build(args)
     if command == "libfuzzer":
-        return libfuzzer(args)
+        return libfuzzer_cmd(args)
     if command == "regression":
         return regression(args)
     if command == "afl":
         return afl(args)
     if command == "gen":
         return gen(args)
+    if command == "minimize":
+        return minimize(args)
+    if command == "zip":
+        return zip_cmd(args)
     short_help(args)
     print("Error: No such command {} (pass -h for help)".format(command))
     return 1

From 23199b6daf4757b41b20fc83d95f5f4b50bad948 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 13:28:18 -0700
Subject: [PATCH 2/9] [fuzz] Fix fuzz.py env flags parsing

---
 tests/fuzz/fuzz.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index 8c381ecf8..cd4087090 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -115,7 +115,6 @@ def parse_env_flags(args, flags):
     """
     Look for flags set by environment variables.
     """
-    flags = ' '.join(flags)
     san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
     nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
 

From bfad5568b5318adf6766de6deaaf4f9b0cc1c668 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 13:28:45 -0700
Subject: [PATCH 3/9] [fuzz] Make simple_round_trip compile cleanly

---
 tests/fuzz/simple_round_trip.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/fuzz/simple_round_trip.c b/tests/fuzz/simple_round_trip.c
index f853485ad..617e45df6 100644
--- a/tests/fuzz/simple_round_trip.c
+++ b/tests/fuzz/simple_round_trip.c
@@ -38,10 +38,11 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
     if (FUZZ_rand(&seed) & 1) {
         ZSTD_inBuffer in = {src, srcSize, 0};
         ZSTD_outBuffer out = {compressed, compressedCapacity, 0};
+        size_t err;
 
         ZSTD_CCtx_reset(cctx);
         FUZZ_setRandomParameters(cctx, &seed);
-        size_t const err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
+        err = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
         if (err != 0) {
             return err;
         }

From bbe77212efebcfefb6b510f2509a674a5dd25245 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 18 Sep 2017 16:54:53 -0700
Subject: [PATCH 4/9] [libzstd] Increase MaxOff

---
 lib/common/zstd_internal.h       |  7 +++--
 lib/compress/zstd_compress.c     | 33 ++++++++++++++------
 lib/decompress/zstd_decompress.c | 53 ++++++++++++++++++++++----------
 tests/decodecorpus.c             |  2 +-
 4 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h
index cd0dbcc27..403c0cbdb 100644
--- a/lib/common/zstd_internal.h
+++ b/lib/common/zstd_internal.h
@@ -123,7 +123,8 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 #define MaxLit ((1<<Litbits) - 1)
 #define MaxML  52
 #define MaxLL  35
-#define MaxOff 28
+#define DefaultMaxOff 28
+#define MaxOff 31
 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
 #define MLFSELog    9
 #define LLFSELog    9
@@ -149,8 +150,8 @@ static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1,
 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
 
-static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
-                                              1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
+static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+                                                     1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index b0e9195dd..48150cfed 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -1250,20 +1250,30 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
-MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode,
-        size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog)
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
+        U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
 {
 #define MIN_SEQ_FOR_DYNAMIC_FSE   64
 #define MAX_SEQ_FOR_STATIC_FSE  1000
-
-    if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
+        /* Prefer set_basic over set_rle when there are 2 or less symbols,
+         * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+         * If basic encoding isn't possible, always choose RLE.
+         */
         *repeatMode = FSE_repeat_check;
         return set_rle;
     }
-    if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
+    if (isDefaultAllowed && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
         return set_repeat;
     }
-    if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) {
+    if (isDefaultAllowed && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1))))) {
         *repeatMode = FSE_repeat_valid;
         return set_basic;
     }
@@ -1299,6 +1309,7 @@ MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
             count[codeTable[nbSeq-1]]--;
             nbSeq_1--;
         }
+        assert(nbSeq_1 > 1);
         CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
         {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
             if (FSE_isError(NCountSize)) return NCountSize;
@@ -1436,7 +1447,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
     /* CTable for Literal Lengths */
     {   U32 max = MaxLL;
         size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
-        LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog);
+        LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
         {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
                     count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
                     entropy->workspace, sizeof(entropy->workspace));
@@ -1446,9 +1457,11 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
     /* CTable for Offsets */
     {   U32 max = MaxOff;
         size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
-        Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog);
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = max <= DefaultMaxOff ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
         {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                    count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff,
+                    count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
                     entropy->workspace, sizeof(entropy->workspace));
             if (ZSTD_isError(countSize)) return countSize;
             op += countSize;
@@ -1456,7 +1469,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
     /* CTable for MatchLengths */
     {   U32 max = MaxML;
         size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
-        MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog);
+        MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
         {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
                     count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
                     entropy->workspace, sizeof(entropy->workspace));
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index 6d6d83396..b6bfa0c49 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -862,6 +862,15 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
 
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
 
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32                                         \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32                         \
+        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32                    \
+        : 0)
 
 static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
@@ -869,7 +878,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
 
     U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
     U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= MaxOff, by table construction */
 
     U32 const llBits = LL_bits[llCode];
     U32 const mlBits = ML_bits[mlCode];
@@ -896,7 +905,7 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
                      0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
                      0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
                      0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
 
     /* sequence */
     {   size_t offset;
@@ -904,8 +913,10 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
             offset = 0;
         else {
             ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-            if (longOffsets) {
-                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets) {
+                U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
                 offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
                 if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
                 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
@@ -936,13 +947,17 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e l
 
     seq.matchLength = ML_base[mlCode]
                     + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
 
     seq.litLength = LL_base[llCode]
                   + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
-    if (  MEM_32bits()
-      || (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) )
-       BIT_reloadDStream(&seqState->DStream);
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
 
     DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
@@ -1102,7 +1117,6 @@ static size_t ZSTD_decompressSequences(
 }
 
 
-
 HINT_INLINE
 seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
 {
@@ -1110,7 +1124,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
 
     U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
     U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= maxOff, by table construction */
+    U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb);   /* <= MaxOff, by table construction */
 
     U32 const llBits = LL_bits[llCode];
     U32 const mlBits = ML_bits[mlCode];
@@ -1137,7 +1151,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
                      0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
                      0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
                      0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
-                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD };
+                     0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
 
     /* sequence */
     {   size_t offset;
@@ -1145,8 +1159,10 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
             offset = 0;
         else {
             ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-            if (longOffsets) {
-                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 2);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets) {
+                U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
                 offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
                 if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
                 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
@@ -1176,11 +1192,16 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
     }
 
     seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0);  /* <=  16 bits */
-    if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream);
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
 
     seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0);    /* <=  16 bits */
-    if (MEM_32bits() ||
-       (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream);
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
 
     {   size_t const pos = seqState->pos + seq.litLength;
         seq.match = seqState->base + pos - seq.offset;    /* single memory segment */
diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c
index 9cde2825e..ea01d2718 100644
--- a/tests/decodecorpus.c
+++ b/tests/decodecorpus.c
@@ -881,7 +881,7 @@ static size_t writeSequences(U32* seed, frame_t* frame, seqStore_t* seqStorePtr,
                                   frame->stats.offsetSymbolSet, 28)) {
             Offtype = set_repeat;
         } else if (!(RAND(seed) & 3)) {
-            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
+            FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, DefaultMaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer));
             Offtype = set_basic;
         } else {
             size_t nbSeq_1 = nbSeq;

From 6bb781e0f11105c961e89414e81eb8944e8d914b Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 13:29:50 -0700
Subject: [PATCH 5/9] [fuzz] Add regressiontest targets

---
 Makefile            | 10 ++++++++++
 tests/fuzz/Makefile | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index e8bdcea33..7baff751c 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,7 @@ ZSTDDIR  = lib
 BUILDIR  = build
 ZWRAPDIR = zlibWrapper
 TESTDIR  = tests
+FUZZDIR  = $(TESTDIR)/fuzz
 
 # Define nul output
 VOID = /dev/null
@@ -215,6 +216,15 @@ arm-ppc-compilation:
 	$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc-static ZSTDRTTEST= MOREFLAGS="-Werror -Wno-attributes -static"
 	$(MAKE) -C $(PRGDIR) clean zstd CC=powerpc-linux-gnu-gcc QEMU_SYS=qemu-ppc64-static ZSTDRTTEST= MOREFLAGS="-m64 -static"
 
+regressiontest:
+	$(MAKE) -C $(FUZZDIR) regressiontest
+
+uasanregressiontest:
+	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=address,undefined" CXXFLAGS="-O3 -fsanitize=address,undefined"
+
+msanregressiontest:
+	$(MAKE) -C $(FUZZDIR) regressiontest CC=clang CXX=clang++ CFLAGS="-O3 -fsanitize=memory" CXXFLAGS="-O3 -fsanitize=memory"
+
 # run UBsan with -fsanitize-recover=signed-integer-overflow
 # due to a bug in UBsan when doing pointer subtraction
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63303
diff --git a/tests/fuzz/Makefile b/tests/fuzz/Makefile
index 60822d498..6d2a0cfa9 100644
--- a/tests/fuzz/Makefile
+++ b/tests/fuzz/Makefile
@@ -14,6 +14,13 @@ CPPFLAGS ?=
 LDFLAGS ?=
 ARFLAGS ?=
 LIB_FUZZING_ENGINE ?= libregression.a
+PYTHON ?= python
+ifeq ($(shell uname), Darwin)
+	DOWNLOAD?=curl -L -o
+else
+	DOWNLOAD?=wget -O
+endif
+CORPORA_URL_PREFIX:=https://github.com/facebook/zstd/releases/download/fuzz-corpora/
 
 ZSTDDIR = ../../lib
 PRGDIR = ../../programs
@@ -48,18 +55,20 @@ FUZZ_SRC       := \
 FUZZ_OBJ := $(patsubst %.c,%.o, $(wildcard $(FUZZ_SRC)))
 
 
-.PHONY: default all clean
+.PHONY: default all clean cleanall
 
 default: all
 
-all: \
+FUZZ_TARGETS :=       \
 	simple_round_trip \
 	stream_round_trip \
-	block_round_trip \
+	block_round_trip  \
 	simple_decompress \
 	stream_decompress \
 	block_decompress
 
+all: $(FUZZ_TARGETS)
+
 %.o: %.c
 	$(CC) $(FUZZ_CPPFLAGS) $(FUZZ_CFLAGS) $^ -c -o $@
 
@@ -93,7 +102,25 @@ libFuzzer:
 	@git clone https://chromium.googlesource.com/chromium/llvm-project/llvm/lib/Fuzzer
 	@cd Fuzzer && ./build.sh
 
+corpora/%_seed_corpus.zip:
+	@mkdir -p corpora
+	$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
+
+corpora/%: corpora/%_seed_corpus.zip
+	unzip -q $^ -d $@
+
+.PHONY: corpora
+corpora: $(patsubst %,corpora/%,$(FUZZ_TARGETS))
+
+regressiontest: corpora
+	CC="$(CC)" CXX="$(CXX)" CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" LDFLAGS="$(LDFLAGS)" $(PYTHON) ./fuzz.py build all
+	$(PYTHON) ./fuzz.py regression all
+
 clean:
 	@$(MAKE) -C $(ZSTDDIR) clean
 	@$(RM) -f *.a *.o
 	@$(RM) -f simple_round_trip stream_round_trip simple_decompress stream_decompress
+
+cleanall:
+	@$(RM) -rf Fuzzer
+	@$(RM) -rf corpora

From 11e21f23cbac2f0fe2c1bc87d427172211cf8e9f Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 13:32:50 -0700
Subject: [PATCH 6/9] [fuzz] Mention the corpora in the README

---
 tests/fuzz/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
index 6d0fab556..f184be646 100644
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@@ -1,6 +1,14 @@
 # Fuzzing
 
 Each fuzzing target can be built with multiple engines.
+Zstd provides a fuzz corpus for each target that can be downloaded with
+the command:
+
+```
+make corpora
+```
+
+It will download each corpus into `./corpora/TARGET`.
 
 ## fuzz.py
 

From 77d5bc2d626386527a3c290588c86f72fbd5a6f9 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 13:33:12 -0700
Subject: [PATCH 7/9] [fuzz][CI] Add regression tests to the CI

---
 .travis.yml | 2 ++
 circle.yml  | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a52d57af3..67da248d9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,6 +21,8 @@ matrix:
     - env: Cmd='make arminstall && make aarch64fuzz'
     - env: Cmd='make ppcinstall && make ppcfuzz'
     - env: Cmd='make ppcinstall && make ppc64fuzz'
+    - env: Cmd='make -j uasanregressiontest'
+    - env: Cmd='make -j msanregressiontest'
 
 git:
   depth: 1
diff --git a/circle.yml b/circle.yml
index e89d548ac..5bc0ce643 100644
--- a/circle.yml
+++ b/circle.yml
@@ -45,7 +45,7 @@ test:
         parallel: true
     - ? |
         if [[ "$CIRCLE_NODE_INDEX" == "0" ]]                                    ; then make ppc64build   && make clean; fi &&
-        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build    && make clean; fi #could add another test here
+        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make gcc7build    && make clean; fi
       :
         parallel: true
     - ? |
@@ -53,6 +53,11 @@ test:
         if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then make -C tests test-legacy test-longmatch test-symbols && make clean; fi
       :
         parallel: true
+    - ? |
+        if [[ "$CIRCLE_NODE_INDEX" == "0" ]]                                    ; then make -j regressiontest && make clean; fi &&
+        if [[ "$CIRCLE_NODE_TOTAL" < "2" ]] || [[ "$CIRCLE_NODE_INDEX" == "1" ]]; then true; fi # Could add another test here
+      :
+        parallel: true
 
   post:
     - echo Circle CI tests finished

From 917a21325478bf23c5fb8f4605cde4a75f74a986 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 15:00:50 -0700
Subject: [PATCH 8/9] [fuzz] Determine flags based on compiler version

---
 tests/fuzz/fuzz.py | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/tests/fuzz/fuzz.py b/tests/fuzz/fuzz.py
index cd4087090..9864d822d 100755
--- a/tests/fuzz/fuzz.py
+++ b/tests/fuzz/fuzz.py
@@ -140,6 +140,34 @@ def parse_env_flags(args, flags):
     return args
 
 
+def compiler_version(cc, cxx):
+    """
+    Determines the compiler and version.
+    Only works for clang and gcc.
+    """
+    cc_version_bytes = subprocess.check_output([cc, "--version"])
+    cxx_version_bytes = subprocess.check_output([cxx, "--version"])
+    if cc_version_bytes.startswith(b'clang'):
+        assert(cxx_version_bytes.startswith(b'clang'))
+        compiler = 'clang'
+    if cc_version_bytes.startswith(b'gcc'):
+        assert(cxx_version_bytes.startswith(b'g++'))
+        compiler = 'gcc'
+    version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
+    version_match = re.search(version_regex, cc_version_bytes)
+    version = tuple(int(version_match.group(i)) for i in range(1, 4))
+    return compiler, version
+
+
+def overflow_ubsan_flags(cc, cxx):
+    compiler, version = compiler_version(cc, cxx)
+    if compiler == 'gcc':
+        return ['-fno-sanitize=signed-integer-overflow']
+    if compiler == 'clang' and version >= (5, 0, 0):
+        return ['-fno-sanitize=pointer-overflow']
+    return []
+
+
 def build_parser(args):
     description = """
     Cleans the repository and builds a fuzz target (or all).
@@ -364,7 +392,7 @@ def build(args):
     if args.ubsan:
         ubsan_flags = ['-fsanitize=undefined']
         if not args.ubsan_pointer_overflow:
-            ubsan_flags += ['-fno-sanitize=pointer-overflow']
+            ubsan_flags += overflow_ubsan_flags(cc, cxx)
         common_flags += ubsan_flags
 
     if args.stateful_fuzzing:

From 76cb38d0854433e2dc5ca6a6dd212fccb96ccd41 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 25 Sep 2017 16:12:46 -0700
Subject: [PATCH 9/9] [zstd] Backport kernel patch from @ColinIanKing

* Make the U32 table in `FSE_normalizeCount()` static.
* Patch from https://lkml.kernel.org/r/20170922145946.14316-1-colin.king@canonical.com.
* Clang makes non-static tables static anyways. gcc however, does [weird things](https://godbolt.org/g/fvTcED).
* Benchmarks showed no difference in speed.
---
 lib/compress/fse_compress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c
index 599280b90..549c115d4 100644
--- a/lib/compress/fse_compress.c
+++ b/lib/compress/fse_compress.c
@@ -582,7 +582,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
 
-    {   U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
         U64 const scale = 62 - tableLog;
         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
         U64 const vStep = 1ULL<<(scale-20);