Merge pull request #88 from Cyan4973/dev

Dev
2025-11-13 00:04:04 -05:00 · 2015-12-07 11:11:01 +01:00 · 2015-12-07 11:11:01 +01:00 · d89d578ce9
commit d89d578ce9
parent a174fb1b41 56005e7ac8
10 changed files with 263 additions and 72 deletions
--- a/6
+++ b/6
@ -32,7 +32,7 @@
 # ################################################################

 # Version number
-export VERSION := 0.4.2
+export VERSION := 0.4.3

 PRGDIR  = programs
 ZSTDDIR = lib
@ -87,8 +87,8 @@ gpptest: clean
 	$(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"

 armtest: clean
-	$(MAKE) -C $(ZSTDDIR) -e all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
-	$(MAKE) -C $(PRGDIR) -e CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+	$(MAKE) -C $(ZSTDDIR) all CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror"
+	$(MAKE) -C $(PRGDIR) CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static"

 usan: clean
 	$(MAKE) test CC=clang MOREFLAGS="-g -fsanitize=undefined"
--- a/4
+++ b/4
@ -1,3 +1,7 @@
+v0.4.3 :
+new : external dictionary API
+new : zstd-frugal
+
 v0.4.2 :
 Generic minor improvements for small blocks
 Fixed : big-endian compatibility, by Peter Harris (#85)
--- a/images/CSpeed.png
+++ b/images/CSpeed.png
--- a/lib/zstd.h
+++ b/lib/zstd.h
@ -48,7 +48,7 @@ extern "C" {
 ***************************************/
 #define ZSTD_VERSION_MAJOR    0    /* for breaking interface changes  */
 #define ZSTD_VERSION_MINOR    4    /* for new (non-breaking) interface capabilities */
-#define ZSTD_VERSION_RELEASE  2    /* for tweaks, bug-fixes, or development */
+#define ZSTD_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 unsigned ZSTD_versionNumber (void);

--- a/lib/zstd_compress.c
+++ b/lib/zstd_compress.c
@ -488,7 +488,7 @@ size_t ZSTD_compressSequences(void* dst, size_t maxDstSize,
            BYTE litLength = llTable[i];                                    /* (7)*/  /* (7)*/
            FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */  /* 17 */
            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
-            BIT_addBits(&blockStream, offset, nbBits);                      /* 32 */  /* 42 */
+            BIT_addBits(&blockStream, offset, nbBits);                      /* 31 */  /* 42 */   /* 24 bits max in 32-bits mode */
            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /*  7 */
            FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode);      /* 16 */  /* 51 */
            FSE_encodeSymbol(&blockStream, &stateLitLength, litLength);     /* 26 */  /* 61 */
@ -730,13 +730,30 @@ static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
 *  Fast Scan
 ***************************************/

+#define FILLHASHSTEP 3
+static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
+{
+    U32* const hashTable = zc->hashTable;
+    const U32 hBits = zc->params.hashLog;
+    const BYTE* const base = zc->base;
+    const BYTE* ip = base + zc->nextToUpdate;
+    const BYTE* const iend = (const BYTE*) end;
+
+    while(ip <= iend)
+    {
+        hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
+        ip += FILLHASHSTEP;
+    }
+}
+
+
 FORCE_INLINE
 size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,
                                       void* dst, size_t maxDstSize,
                                 const void* src, size_t srcSize,
                                 const U32 mls)
 {
-    U32* hashTable = zc->hashTable;
+    U32* const hashTable = zc->hashTable;
    const U32 hBits = zc->params.hashLog;
    seqStore_t* seqStorePtr = &(zc->seqStore);
    const BYTE* const base = zc->base;
@ -752,12 +769,12 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* zc,

    /* init */
    ZSTD_resetSeqStore(seqStorePtr);
-    if (ip < base+4)
+    if (ip < lowest+4)
    {
-        hashTable[ZSTD_hashPtr(base+1, hBits, mls)] = 1;
-        hashTable[ZSTD_hashPtr(base+2, hBits, mls)] = 2;
-        hashTable[ZSTD_hashPtr(base+3, hBits, mls)] = 3;
-        ip = base+4;
+        hashTable[ZSTD_hashPtr(lowest+1, hBits, mls)] = zc->dictLimit+1;
+        hashTable[ZSTD_hashPtr(lowest+2, hBits, mls)] = zc->dictLimit+2;
+        hashTable[ZSTD_hashPtr(lowest+3, hBits, mls)] = zc->dictLimit+3;
+        ip = lowest+4;
    }

    /* Main Search Loop */
@ -1518,6 +1535,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ctx->base + ctx->dictLimit;

    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
    const U32 maxSearches = 1 << ctx->params.searchLog;
@ -1530,7 +1548,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,

    /* init */
    ZSTD_resetSeqStore(seqStorePtr);
-    if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
+    if ((ip-base) < REPCODE_STARTVALUE) ip = base + REPCODE_STARTVALUE;

    /* Match Loop */
    while (ip < ilimit)
@ -1555,7 +1573,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
                matchLength = ml2, start = ip, offset=offsetFound;
        }

-         if (matchLength < MINMATCH)
+        if (matchLength < MINMATCH)
        {
            ip += ((ip-anchor) >> g_searchStrength) + 1;   /* jump faster over incompressible sections */
            continue;
@ -1616,7 +1634,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
        /* catch up */
        if (offset)
        {
-            while ((start>anchor) && (start>ctx->base+offset) && (start[-1] == start[-1-offset]))   /* only search for offset within prefix */
+            while ((start>anchor) && (start>base+offset) && (start[-1] == start[-1-offset]))   /* only search for offset within prefix */
                { start--; matchLength++; }
            offset_2 = offset_1; offset_1 = offset;
        }
@ -1975,8 +1993,21 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
 {
    const BYTE* const ip = (const BYTE*) src;

+    /* Check if blocks follow each other */
+    if (src != zc->nextSrc)
+    {
+        /* not contiguous */
+        size_t delta = zc->nextSrc - ip;
+        zc->lowLimit = zc->dictLimit;
+        zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+        zc->dictBase = zc->base;
+        zc->base -= delta;
+        zc->nextToUpdate = zc->dictLimit;
+        if (zc->dictLimit - zc->lowLimit < 8) zc->lowLimit = zc->dictLimit;   /* too small extDict */
+    }
+
    /* preemptive overflow correction */
-    if ((zc->base > (const BYTE*)src) || (zc->lowLimit > (1<<30) ))
+    if ((zc->base > ip) || (zc->lowLimit > (1<<30) ))
    {
        U32 correction = zc->lowLimit-1;
        ZSTD_reduceIndex(zc, correction);
@ -1988,17 +2019,6 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
        else zc->nextToUpdate -= correction;
    }

-    /* Check if blocks follow each other */
-    if (src != zc->nextSrc)
-    {
-        /* not contiguous */
-        zc->lowLimit = zc->dictLimit;
-        zc->dictLimit = (U32)(zc->nextSrc - zc->base);
-        zc->dictBase = zc->base;
-        zc->base += ip - zc->nextSrc;
-        zc->nextToUpdate = zc->dictLimit;
-    }
-
    /* input-dictionary overlap */
    if ((ip+srcSize > zc->dictBase + zc->lowLimit) && (ip < zc->dictBase + zc->dictLimit))
    {
@ -2011,8 +2031,46 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* zc,
    return ZSTD_compress_generic (zc, dst, dstSize, src, srcSize);
 }

+size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    const BYTE* const ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;

-/** ZSTD_compressBegin_advanced
+    /* input becomes current prefix */
+    zc->lowLimit = zc->dictLimit;
+    zc->dictLimit = (U32)(zc->nextSrc - zc->base);
+    zc->dictBase = zc->base;
+    zc->base += ip - zc->nextSrc;
+    zc->nextToUpdate = zc->dictLimit;
+
+    zc->nextSrc = iend;
+    if (srcSize <= 8) return 0;
+
+    switch(zc->params.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable (zc, iend-8, zc->params.searchLength);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+        ZSTD_insertAndFindFirstIndex (zc, iend-8, zc->params.searchLength);
+        break;
+
+    case ZSTD_btlazy2:
+        ZSTD_updateTree(zc, iend-8, iend, 1 << zc->params.searchLog, zc->params.searchLength);
+        break;
+
+    default:
+        return ERROR(GENERIC);   /* strategy doesn't exist; impossible */
+    }
+
+    return 0;
+}
+
+
+/*! ZSTD_compressBegin_advanced
 *   Write frame header, according to params
 *   @return : nb of bytes written */
 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx,
--- a/lib/zstd_decompress.c
+++ b/lib/zstd_decompress.c
@ -127,10 +127,10 @@ struct ZSTD_DCtx_s
    U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
    U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
    U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
-    void* previousDstEnd;
-    void* base;
-    void* vBase;
-    void* dictEnd;
+    const void* previousDstEnd;
+    const void* base;
+    const void* vBase;
+    const void* dictEnd;
    size_t expected;
    size_t headerSize;
    ZSTD_parameters params;
@ -141,7 +141,7 @@ struct ZSTD_DCtx_s
    size_t litSize;
    BYTE litBuffer[BLOCKSIZE + 8 /* margin for wildcopy */];
    BYTE headerBuffer[ZSTD_frameHeaderSize_max];
-};   /* typedef'd to ZSTD_Dctx within "zstd_static.h" */
+};  /* typedef'd to ZSTD_DCtx within "zstd_static.h" */

 size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx)
 {
@ -505,7 +505,7 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
 FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
                                BYTE* const oend, seq_t sequence,
                                const BYTE** litPtr, const BYTE* const litLimit_8,
-                                BYTE* const base, BYTE* const vBase, BYTE* const dictEnd)
+                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
@ -538,13 +538,13 @@ FORCE_INLINE size_t ZSTD_execSequence(BYTE* op,
        match = dictEnd - (base-match);
        if (match + sequence.matchLength <= dictEnd)
        {
-            memcpy(oLitEnd, match, sequence.matchLength);
+            memmove(oLitEnd, match, sequence.matchLength);
            return sequenceLength;
        }
        /* span extDict & currentPrefixSegment */
        {
            size_t length1 = dictEnd - match;
-            memcpy(oLitEnd, match, length1);
+            memmove(oLitEnd, match, length1);
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
            match = base;
@ -607,9 +607,9 @@ static size_t ZSTD_decompressSequences(
    U32* DTableLL = dctx->LLTable;
    U32* DTableML = dctx->MLTable;
    U32* DTableOffb = dctx->OffTable;
-    BYTE* const base = (BYTE*) (dctx->base);
-    BYTE* const vBase = (BYTE*) (dctx->vBase);
-    BYTE* const dictEnd = (BYTE*) (dctx->dictEnd);
+    const BYTE* const base = (const BYTE*) (dctx->base);
+    const BYTE* const vBase = (const BYTE*) (dctx->vBase);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);

    /* Build Decoding Tables */
    errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
@ -691,7 +691,7 @@ size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, const v


    /* init */
-    ctx->base = ctx->vBase = ctx->dictEnd = dst;
+    ctx->vBase = ctx->base = ctx->dictEnd = dst;

    /* Frame Header */
    {
@ -776,7 +776,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con
        if ((dst > ctx->base) && (dst < ctx->previousDstEnd))   /* rolling buffer : new segment into dictionary */
            ctx->base = (char*)dst;   /* temporary affectation, for vBase calculation */
        ctx->dictEnd = ctx->previousDstEnd;
-        ctx->vBase = (char*)dst - ((char*)(ctx->previousDstEnd) - (char*)(ctx->base));
+        ctx->vBase = (const char*)dst - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
        ctx->base = dst;
        ctx->previousDstEnd = dst;
    }
@ -827,10 +827,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con
                ctx->bType = bp.blockType;
                ctx->stage = ZSTDds_decompressBlock;
            }
-
            return 0;
        }
-    case 3:
+    case ZSTDds_decompressBlock:
        {
            /* Decompress : block content */
            size_t rSize;
@ -862,3 +861,10 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSize, con
 }


+void ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize)
+{
+    ctx->dictEnd = ctx->previousDstEnd;
+    ctx->vBase = (const char*)src - ((const char*)(ctx->previousDstEnd) - (const char*)(ctx->base));
+    ctx->base = src;
+    ctx->previousDstEnd = (const char*)src + srcSize;
+}
--- a/lib/zstd_static.h
+++ b/lib/zstd_static.h
@ -104,6 +104,8 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
 ****************************************/
 size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, int compressionLevel);
 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* ctx, void* dst, size_t maxDstSize, ZSTD_parameters params);
+size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+
 size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
 size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);

@ -118,6 +120,10 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);
  Use ZSTD_compressBegin().
  You may also prefer the advanced derivative ZSTD_compressBegin_advanced(), for finer parameter control.

+  It's then possible to add a dictionary with ZSTD_compressDictionary()
+  Note that dictionary presence is a "hidden" information,
+  the decoder needs to be aware that it is required for proper decoding, or decoding will fail.
+
  Then, consume your input using ZSTD_compressContinue().
  The interface is synchronous, so all input will be consumed.
  You must ensure there is enough space in destination buffer to store compressed data under worst case scenario.
@ -131,12 +137,15 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t maxDstSize);

 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTD_DCtx* ZSTD_createDCtx(void);
-size_t     ZSTD_resetDCtx(ZSTD_DCtx* dctx);
 size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);

+size_t ZSTD_resetDCtx(ZSTD_DCtx* dctx);
 size_t ZSTD_getFrameParams(ZSTD_parameters* params, const void* src, size_t srcSize);
+void   ZSTD_decompress_insertDictionary(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
+
 size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
 size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
+
 /**
  Streaming decompression, bufferless mode

@ -146,15 +155,17 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, co

  First operation is to retrieve frame parameters, using ZSTD_getFrameParams().
  This function doesn't consume its input. It needs enough input data to properly decode the frame header.
-  The objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
+  Objective is to retrieve *params.windowlog, to know minimum amount of memory required during decoding.
  Result : 0 when successful, it means the ZSTD_parameters structure has been filled.
           >0 : means there is not enough data into src. Provides the expected size to successfully decode header.
           errorCode, which can be tested using ZSTD_isError() (For example, if it's not a ZSTD header)

+  Then, you can optionally insert a dictionary. This operation must mimic the compressor behavior, otherwise decompression will fail or be corrupted.
+
  Then it's possible to start decompression.
  Use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
  ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue().
-  ZSTD_decompressContinue() requires this exact amount of bytes, or just fails.
+  ZSTD_decompressContinue() requires this exact amount of bytes, or it will fail.
  ZSTD_decompressContinue() needs previous data blocks during decompression, up to (1 << windowlog).
  They should preferably be located contiguously, prior to current block. Alternatively, a round buffer is also possible.

--- a/programs/Makefile
+++ b/programs/Makefile
@ -30,7 +30,7 @@
 # fullbench32: Same as fullbench, but forced to compile in 32-bits mode
 # ##########################################################################

-VERSION?= 0.4.2
+VERSION?= 0.4.3

 DESTDIR?=
 PREFIX ?= /usr/local
@ -65,6 +65,7 @@ VOID = /dev/null
 endif

 ZBUFFTEST = -T2mn
+FUZZERTEST= -T5mn

 .PHONY: default all clean install uninstall test test32 test-all

@ -89,6 +90,13 @@ zstd-pgo : clean zstd
 	rm zstd
 	$(MAKE) zstd MOREFLAGS=-fprofile-use

+zstd-noBench: $(ZSTD_FILES) $(ZSTDDIR)/zstd_buffered.c \
+        zstdcli.c fileio.c $(ZSTD_FILEIO_LEGACY)
+	$(CC)      $(FLAGS) -DZSTD_NOBENCH $^ -o zstd$(EXT)
+
+zstd-frugal: clean
+	CFLAGS=-Os $(MAKE) zstd-noBench ZSTD_LEGACY_SUPPORT=0 
+
 fullbench  : $(ZSTD_FILES) \
        datagen.c fullbench.c
 	$(CC)      $(FLAGS) $^ -o $@$(EXT)
@ -251,10 +259,10 @@ test-fullbench32: fullbench32 datagen
 	./fullbench32 -i1 -P0

 test-fuzzer: fuzzer
-	./fuzzer
+	./fuzzer $(FUZZERTEST)

 test-fuzzer32: fuzzer32
-	./fuzzer32
+	./fuzzer32 $(FUZZERTEST)

 test-zbuff: zbufftest
 	./zbufftest $(ZBUFFTEST)
@ -273,7 +281,7 @@ valgrindTest: zstd datagen fuzzer fullbench zbufftest
 	./datagen -g64MB > tmp
 	valgrind --leak-check=yes --error-exitcode=1 ./zstd -vf tmp $(VOID)
 	@rm tmp
-	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i1000 -t1
+	valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -T1mn -t1
 	valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1
 	valgrind --leak-check=yes --error-exitcode=1 ./zbufftest -T1mn

--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@ -79,11 +79,13 @@ static const U32 prime2 = 2246822519U;
 static U32 g_displayLevel = 2;

 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if ((FUZ_GetMilliSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) \
-            { g_time = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
+            if ((FUZ_GetMilliSpan(g_displayTime) > g_refreshRate) || (g_displayLevel>=4)) \
+            { g_displayTime = FUZ_GetMilliStart(); DISPLAY(__VA_ARGS__); \
            if (g_displayLevel>=4) fflush(stdout); } }
 static const U32 g_refreshRate = 150;
-static U32 g_time = 0;
+static U32 g_displayTime = 0;
+
+static U32 g_testTime = 0;


 /*********************************************************
@ -259,6 +261,7 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
    BYTE* srcBuffer;
    BYTE* cBuffer;
    BYTE* dstBuffer;
+    BYTE* mirrorBuffer;
    size_t srcBufferSize = (size_t)1<<maxSrcLog;
    size_t dstBufferSize = (size_t)1<<maxSampleLog;
    size_t cBufferSize   = ZSTD_compressBound(dstBufferSize);
@ -266,17 +269,22 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
    U32 testNb = 0;
    U32 coreSeed = seed, lseed = 0;
    ZSTD_CCtx* ctx;
+    ZSTD_DCtx* dctx;
+    U32 startTime = FUZ_GetMilliStart();

    /* allocation */
    ctx = ZSTD_createCCtx();
+    dctx= ZSTD_createDCtx();
    cNoiseBuffer[0] = (BYTE*)malloc (srcBufferSize);
    cNoiseBuffer[1] = (BYTE*)malloc (srcBufferSize);
    cNoiseBuffer[2] = (BYTE*)malloc (srcBufferSize);
    cNoiseBuffer[3] = (BYTE*)malloc (srcBufferSize);
    cNoiseBuffer[4] = (BYTE*)malloc (srcBufferSize);
    dstBuffer = (BYTE*)malloc (dstBufferSize);
+    mirrorBuffer = (BYTE*)malloc (dstBufferSize);
    cBuffer   = (BYTE*)malloc (cBufferSize);
-    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !dstBuffer || !cBuffer || !ctx,
+    CHECK (!cNoiseBuffer[0] || !cNoiseBuffer[1] || !cNoiseBuffer[2] || !cNoiseBuffer[3] || !cNoiseBuffer[4]
+           || !dstBuffer || !mirrorBuffer || !cBuffer || !ctx || !dctx,
           "Not enough memory, fuzzer tests cancelled");

    /* Create initial samples */
@ -292,17 +300,23 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
        FUZ_rand(&coreSeed);

    /* test loop */
-    for ( ; testNb <= nbTests; testNb++ )
+    for ( ; (testNb <= nbTests) || (FUZ_GetMilliSpan(startTime) < g_testTime); testNb++ )
    {
-        size_t sampleSize, sampleStart;
-        size_t cSize, dSize, dSupSize;
-        U32 sampleSizeLog, buffNb, cLevelMod;
+        size_t sampleSize, sampleStart, maxTestSize, totalTestSize;
+        size_t cSize, dSize, dSupSize, errorCode, totalCSize, totalGenSize;
+        U32 sampleSizeLog, buffNb, cLevelMod, nbChunks, n;
+        XXH64_state_t crc64;
        U64 crcOrig, crcDest;
        int cLevel;
        BYTE* sampleBuffer;
+        const BYTE* dict;
+        size_t dictSize;

        /* init */
-        DISPLAYUPDATE(2, "\r%6u/%6u   ", testNb, nbTests);
+        if (nbTests >= testNb)
+             { DISPLAYUPDATE(2, "\r%6u/%6u    ", testNb, nbTests); }
+        else { DISPLAYUPDATE(2, "\r%6u      ", testNb); }
+
        FUZ_rand(&coreSeed);
        lseed = coreSeed ^ prime1;
        buffNb = FUZ_rand(&lseed) & 127;
@ -342,7 +356,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
        /* compression failure test : too small dest buffer */
        if (cSize > 3)
        {
-            size_t errorCode;
            const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
            const size_t tooSmallSize = cSize - missing;
            static const U32 endMark = 0x4DC2B1A9;
@ -365,7 +378,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit

        /* truncated src decompression test */
        {
-            size_t errorCode;
            const size_t missing = (FUZ_rand(&lseed) % (cSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
            const size_t tooSmallSize = cSize - missing;
            void* cBufferTooSmall = malloc(tooSmallSize);   /* valgrind will catch overflows */
@ -379,7 +391,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
        /* too small dst decompression test */
        if (sampleSize > 3)
        {
-            size_t errorCode;
            const size_t missing = (FUZ_rand(&lseed) % (sampleSize-2)) + 1;   /* no problem, as cSize > 4 (frameHeaderSizer) */
            const size_t tooSmallSize = sampleSize - missing;
            static const BYTE token = 0xA9;
@ -424,7 +435,6 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
                U32 noiseSrc = FUZ_rand(&lseed) % 5;
                const U32 endMark = 0xA9B1C3D6;
                U32 endCheck;
-                size_t errorCode;
                srcBuffer = cNoiseBuffer[noiseSrc];
                memcpy(dstBuffer+sampleSize, &endMark, 4);
                errorCode = ZSTD_decompress(dstBuffer, sampleSize, cBuffer, cSize);
@ -435,11 +445,80 @@ int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compressibilit
                CHECK(endMark!=endCheck, "ZSTD_decompress on noisy src : dst buffer overflow");
            }
        }
+
+        /* Streaming compression of scattered segments test */
+        XXH64_reset(&crc64, 0);
+        nbChunks = (FUZ_rand(&lseed) & 127) + 2;
+        sampleSizeLog = FUZ_rand(&lseed) % maxSrcLog;
+        maxTestSize = (size_t)1 << sampleSizeLog;
+        maxTestSize += FUZ_rand(&lseed) & (maxTestSize-1);
+        if (maxTestSize >= dstBufferSize) maxTestSize = dstBufferSize-1;
+
+        sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+        sampleSize = (size_t)1 << sampleSizeLog;
+        sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+        sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+        dict = srcBuffer + sampleStart;
+        dictSize = sampleSize;
+
+        cSize = ZSTD_compressBegin(ctx, cBuffer, cBufferSize, (FUZ_rand(&lseed) % (20 - (sampleSizeLog/3))) + 1);
+        errorCode = ZSTD_compress_insertDictionary(ctx, dict, dictSize);
+        CHECK (ZSTD_isError(errorCode), "dictionary insertion error : %s", ZSTD_getErrorName(errorCode));
+        totalTestSize = 0;
+        for (n=0; n<nbChunks; n++)
+        {
+            sampleSizeLog = FUZ_rand(&lseed) % maxSampleLog;
+            sampleSize = (size_t)1 << sampleSizeLog;
+            sampleSize += FUZ_rand(&lseed) & (sampleSize-1);
+            sampleStart = FUZ_rand(&lseed) % (srcBufferSize - sampleSize);
+
+            if (cBufferSize-cSize < ZSTD_compressBound(sampleSize))
+                /* avoid invalid dstBufferTooSmall */
+                break;
+            if (totalTestSize+sampleSize > maxTestSize) break;
+
+            errorCode = ZSTD_compressContinue(ctx, cBuffer+cSize, cBufferSize-cSize, srcBuffer+sampleStart, sampleSize);
+            CHECK (ZSTD_isError(errorCode), "multi-segments compression error : %s", ZSTD_getErrorName(errorCode));
+            cSize += errorCode;
+
+            XXH64_update(&crc64, srcBuffer+sampleStart, sampleSize);
+            memcpy(mirrorBuffer + totalTestSize, srcBuffer+sampleStart, sampleSize);
+            totalTestSize += sampleSize;
+        }
+        errorCode = ZSTD_compressEnd(ctx, cBuffer+cSize, cBufferSize-cSize);
+        CHECK (ZSTD_isError(errorCode), "multi-segments epilogue error : %s", ZSTD_getErrorName(errorCode));
+        cSize += errorCode;
+        crcOrig = XXH64_digest(&crc64);
+
+        /* streaming decompression test */
+        errorCode = ZSTD_resetDCtx(dctx);
+        CHECK (ZSTD_isError(errorCode), "cannot init DCtx : %s", ZSTD_getErrorName(errorCode));
+        ZSTD_decompress_insertDictionary(dctx, dict, dictSize);
+        totalCSize = 0;
+        totalGenSize = 0;
+        while (totalCSize < cSize)
+        {
+            size_t inSize = ZSTD_nextSrcSizeToDecompress(dctx);
+            size_t genSize = ZSTD_decompressContinue(dctx, dstBuffer+totalGenSize, dstBufferSize-totalGenSize, cBuffer+totalCSize, inSize);
+            CHECK (ZSTD_isError(genSize), "streaming decompression error : %s", ZSTD_getErrorName(genSize));
+            totalGenSize += genSize;
+            totalCSize += inSize;
+        }
+        CHECK (ZSTD_nextSrcSizeToDecompress(dctx) != 0, "frame not fully decoded");
+        CHECK (totalGenSize != totalTestSize, "decompressed data : wrong size")
+        CHECK (totalCSize != cSize, "compressed data should be fully read")
+        crcDest = XXH64(dstBuffer, totalTestSize, 0);
+        if (crcDest!=crcOrig)
+            errorCode = findDiff(mirrorBuffer, dstBuffer, totalTestSize);
+        CHECK (crcDest!=crcOrig, "streaming decompressed data corrupted : byte %u / %u  (%02X!=%02X)",
+               (U32)errorCode, (U32)totalTestSize, dstBuffer[errorCode], mirrorBuffer[errorCode]);
+
    }
-    DISPLAY("\rAll fuzzer tests completed   \n");
+    DISPLAY("\r%u fuzzer tests completed   \n", testNb-1);

 _cleanup:
    ZSTD_freeCCtx(ctx);
+    ZSTD_freeDCtx(dctx);
    free(cNoiseBuffer[0]);
    free(cNoiseBuffer[1]);
    free(cNoiseBuffer[2]);
@ -447,6 +526,7 @@ _cleanup:
    free(cNoiseBuffer[4]);
    free(cBuffer);
    free(dstBuffer);
+    free(mirrorBuffer);
    return result;

 _output_error:
@ -520,7 +600,7 @@ int main(int argc, char** argv)
                    break;

                case 'i':
-                    argument++;
+                    argument++; g_testTime=0;
                    nbTests=0;
                    while ((*argument>='0') && (*argument<='9'))
                    {
@ -530,6 +610,20 @@ int main(int argc, char** argv)
                    }
                    break;

+                case 'T':
+                    argument++;
+                    nbTests=0; g_testTime=0;
+                    while ((*argument>='0') && (*argument<='9'))
+                    {
+                        g_testTime *= 10;
+                        g_testTime += *argument - '0';
+                        argument++;
+                    }
+                    if (*argument=='m') g_testTime *=60, argument++;
+                    if (*argument=='n') argument++;
+                    g_testTime *= 1000;
+                    break;
+
                case 's':
                    argument++;
                    seed=0;
@ -580,8 +674,6 @@ int main(int argc, char** argv)
    DISPLAY("Seed = %u\n", seed);
    if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba);

-    if (nbTests<=0) nbTests=1;
-
    if (testNb==0) result = basicUnitTests(0, ((double)proba) / 100);  /* constant seed for predictability */
    if (!result)
        result = fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100);
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@ -119,7 +119,7 @@ static int usage(const char* programName)
    DISPLAY( "          with no FILE, or when FILE is - , read standard input\n");
    DISPLAY( "Arguments :\n");
    DISPLAY( " -1     : Fast compression (default) \n");
-    DISPLAY( " -9     : High compression \n");
+    DISPLAY( " -19    : High compression \n");
    DISPLAY( " -d     : decompression (default for %s extension)\n", ZSTD_EXTENSION);
    //DISPLAY( " -z     : force compression\n");
    DISPLAY( " -f     : overwrite output without prompting \n");
@ -138,11 +138,13 @@ static int usage_advanced(const char* programName)
    DISPLAY( " -q     : suppress warnings; specify twice to suppress errors too\n");
    DISPLAY( " -c     : force write to standard output, even if it is the console\n");
    //DISPLAY( " -t     : test compressed file integrity\n");
+#ifndef ZSTD_NOBENCH
    DISPLAY( "Benchmark arguments :\n");
    DISPLAY( " -b#    : benchmark file(s), using # compression level (default : 1) \n");
    DISPLAY( " -B#    : cut file into independent blocks of size # (default : no block)\n");
    DISPLAY( " -i#    : iteration loops [1-9](default : 3)\n");
    DISPLAY( " -r#    : test all compression levels from 1 to # (default : disabled)\n");
+#endif
    return 0;
 }

@ -169,17 +171,19 @@ int main(int argCount, const char** argv)
        bench=0,
        decode=0,
        forceStdout=0,
-        main_pause=0,
-        rangeBench = 1;
-    unsigned fileNameStart = 0;
-    unsigned nbFiles = 0;
+        main_pause=0;
    unsigned cLevel = 1;
    const char* programName = argv[0];
    const char* inFileName = NULL;
    const char* outFileName = NULL;
    char* dynNameSpace = NULL;
    const char extension[] = ZSTD_EXTENSION;
+    unsigned fileNameStart = 0;
+    unsigned nbFiles = 0;
+    int rangeBench = 1;

+    /* init */
+    (void)rangeBench; (void)nbFiles; (void)fileNameStart;   /* not used when ZSTD_NOBENCH set */
    displayOut = stderr;
    /* Pick out basename component. Don't rely on stdlib because of conflicting behavior. */
    for (i = (int)strlen(programName); i > 0; i--) { if (programName[i] == '/') { i++; break; } }
@ -260,6 +264,7 @@ int main(int argCount, const char** argv)
                    /* keep source file (default anyway, so useless; only for xz/lzma compatibility) */
                case 'k': argument++; break;

+#ifndef ZSTD_NOBENCH
                    /* Benchmark */
                case 'b': bench=1; argument++; break;

@ -293,6 +298,7 @@ int main(int argCount, const char** argv)
                        rangeBench = -1;
                        argument++;
                        break;
+#endif   /* ZSTD_NOBENCH */

                    /* Pause at the end (hidden option) */
                case 'p': main_pause=1; argument++; break;
@ -320,7 +326,13 @@ int main(int argCount, const char** argv)
    DISPLAYLEVEL(3, WELCOME_MESSAGE);

    /* Check if benchmark is selected */
-    if (bench) { BMK_benchFiles(argv+fileNameStart, nbFiles, cLevel*rangeBench); goto _end; }
+    if (bench)
+    {
+#ifndef ZSTD_NOBENCH
+        BMK_benchFiles(argv+fileNameStart, nbFiles, cLevel*rangeBench);
+#endif
+        goto _end;
+    }

    /* No input filename ==> use stdin */
    if(!inFileName) { inFileName=stdinmark; }