diff --git a/.travis.yml b/.travis.yml
index 972c1b4ba..9e2f58613 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -44,7 +44,7 @@ matrix:
             - qemu-user-static
     - os: linux
       sudo: required
-      env: PLATFORM="Ubuntu 12.04" CMD="make -C programs zstd-small && make -C programs zstd-decompress && make -C programs zstd-compress && make clean && make -C tests versionsTest"
+      env: PLATFORM="Ubuntu 12.04" CMD="make -C programs zstd-small && make -C programs zstd-decompress && make -C programs zstd-compress && make -C programs clean && make -C tests versionsTest"
     - os: linux
       sudo: required
       env: PLATFORM="Ubuntu 12.04" CMD="make asan32"
diff --git a/NEWS b/NEWS
index a9454b40b..570d66799 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,6 @@
 v1.1.1
 New : command -M#, --memory=, --memlimit=, --memlimit-decompress= to limit allowed memory consumption
+Changed : zstd_errors.h is now part of include installation
 
 v1.1.0
 New : contrib/pzstd, parallel version of zstd, by Nick Terrell
diff --git a/contrib/gen_html/Makefile b/contrib/gen_html/Makefile
new file mode 100644
index 000000000..c68e560a1
--- /dev/null
+++ b/contrib/gen_html/Makefile
@@ -0,0 +1,36 @@
+# ##########################################################################
+# Copyright (c) 2016-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree. An additional grant
+# of patent rights can be found in the PATENTS file in the same directory.
+# ##########################################################################
+
+
+CFLAGS ?= -O3
+CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow -Wstrict-aliasing=1 -Wswitch-enum -Wno-comment
+CFLAGS += $(MOREFLAGS)
+FLAGS   = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS)
+
+
+
+# Define *.exe as extension for Windows systems
+ifneq (,$(filter Windows%,$(OS)))
+EXT =.exe
+else
+EXT =
+endif
+
+
+.PHONY: default gen_html
+
+default: gen_html
+
+gen_html: gen_html.cpp
+	$(CXX)      $(FLAGS) $^ -o $@$(EXT)
+
+
+clean:
+	@$(RM) gen_html$(EXT)
+	@echo Cleaning completed
diff --git a/contrib/gen_html/README.md b/contrib/gen_html/README.md
new file mode 100644
index 000000000..63a4caa25
--- /dev/null
+++ b/contrib/gen_html/README.md
@@ -0,0 +1,31 @@
+gen_html - a program for automatic generation of zstd manual 
+============================================================
+
+#### Introduction
+
+This simple C++ program generates a single-page HTML manual from `zstd.h`.
+
+The format of recognized comment blocks is following:
+- comments of type `/*!` mean: this is a function declaration; switch comments with declarations
+- comments of type `/**` and `/*-` mean: this is a comment; use a `<H2>` header for the first line
+- comments of type `/*=` and `/**=` mean: use a `<H3>` header and show also all functions until first empty line
+- comments of type `/*X` where `X` is different from above-mentioned are ignored
+
+Moreover:
+- `ZSTDLIB_API` is removed to improve readability
+- `typedef` are detected and included even if uncommented
+- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
+
+
+#### Usage
+
+The program requires 3 parameters:
+```
+gen_html [zstd_version] [input_file] [output_html]
+```
+
+To compile program and generate zstd manual we have used: 
+```
+make
+./gen_html.exe 1.1.1 ../../lib/zstd.h zstd_manual.html
+```
diff --git a/contrib/gen_html/gen_html.cpp b/contrib/gen_html/gen_html.cpp
new file mode 100644
index 000000000..2157829f7
--- /dev/null
+++ b/contrib/gen_html/gen_html.cpp
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <vector>
+using namespace std;
+
+
+/* trim string at the beginning and at the end */
+void trim(string& s, string characters)
+{
+    size_t p = s.find_first_not_of(characters);
+    s.erase(0, p);
+ 
+    p = s.find_last_not_of(characters);
+    if (string::npos != p)
+       s.erase(p+1);
+}
+
+
+/* trim C++ style comments */
+void trim_comments(string &s)
+{
+    size_t spos, epos;
+
+    spos = s.find("/*");
+    epos = s.find("*/");
+    s = s.substr(spos+3, epos-(spos+3));
+}
+
+
+/* get lines until a given terminator */
+vector<string> get_lines(vector<string>& input, int& linenum, string terminator)
+{
+    vector<string> out;
+    string line;
+    size_t epos;
+
+    while ((size_t)linenum < input.size()) {
+        line = input[linenum];
+
+        if (terminator.empty() && line.empty()) { linenum--; break; }
+        
+        epos = line.find(terminator);
+        if (!terminator.empty() && epos!=string::npos) {
+            out.push_back(line);
+            break;
+        }
+        out.push_back(line);
+        linenum++;
+    }
+    return out;
+}
+
+
+/* print line with ZSTDLIB_API removed and C++ comments not bold */
+void print_line(stringstream &sout, string line)
+{
+    size_t spos;
+
+    if (line.substr(0,12) == "ZSTDLIB_API ") line = line.substr(12);
+    spos = line.find("/*");
+    if (spos!=string::npos) {
+        sout << line.substr(0, spos);
+        sout << "</b>" << line.substr(spos) << "<b>" << endl;
+    } else {
+      //  fprintf(stderr, "lines=%s\n", line.c_str());
+        sout << line << endl;
+    }
+}
+
+
+int main(int argc, char *argv[]) {
+    char exclam;
+    int linenum, chapter = 1;
+    vector<string> input, lines, comments, chapters;
+    string line, version;
+    size_t spos, l;
+    stringstream sout;
+    ifstream istream;
+    ofstream ostream;
+
+    if (argc < 4) {
+        cout << "usage: " << argv[0] << " [zstd_version] [input_file] [output_html]" << endl;
+        exit(0);
+    }
+
+    version = "zstd " + string(argv[1]) + " Manual";
+
+    istream.open(argv[2], ifstream::in);
+    if (!istream.is_open()) {
+        cout << "Error opening file " << argv[2] << endl;
+        exit(0);
+    }
+
+    ostream.open(argv[3], ifstream::out);
+    if (!ostream.is_open()) {
+        cout << "Error opening file " << argv[3] << endl;
+        exit(0);
+    }
+
+    while (getline(istream, line)) {
+        input.push_back(line);
+    }
+
+    for (linenum=0; (size_t)linenum < input.size(); linenum++) {
+        line = input[linenum];
+
+        /* typedefs are detected and included even if uncommented */
+        if (line.substr(0,7) == "typedef" && line.find("{")!=string::npos) {
+            lines = get_lines(input, linenum, "}");
+            sout << "<pre><b>";
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        /* comments of type /**< and /*!< are detected and only function declaration is highlighted (bold) */
+        if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos) && line.find("*/")!=string::npos) {
+            sout << "<pre><b>";
+            print_line(sout, line);
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        /* comments of type /*= and /**= mean: use a <H3> header and show also all functions until first empty line */
+        if ((line.substr(0,3) == "/*=" || line.substr(0,4) == "/**=") && line.find("*/")!=string::npos) {
+            trim_comments(line);
+            trim(line, "= ");
+            sout << "<h3>" << line << "</h3><pre><b>";
+            lines = get_lines(input, ++linenum, "");
+            for (l=0; l<lines.size(); l++) {
+                print_line(sout, lines[l]);
+            }
+            sout << "</b></pre><BR>" << endl;
+            continue;
+        }
+
+        spos = line.find("/*!");
+        if (spos==string::npos)
+            spos = line.find("/**");
+        if (spos==string::npos)
+            spos = line.find("/*-");
+
+        if (spos==string::npos)
+            continue;
+
+        exclam = line[spos+2];
+        comments = get_lines(input, linenum, "*/");
+        if (!comments.empty()) comments[0] = line.substr(spos+3);
+        if (!comments.empty()) comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
+        for (l=0; l<comments.size(); l++) {
+            if (comments[l].find(" *")==0) comments[l] = comments[l].substr(2);
+            else if (comments[l].find("  *")==0) comments[l] = comments[l].substr(3);
+            trim(comments[l], "*-");
+        }
+        while (!comments.empty() && comments[comments.size()-1].empty()) comments.pop_back(); // remove empty line at the end
+        while (!comments.empty() && comments[0].empty()) comments.erase(comments.begin()); // remove empty line at the start
+
+        /* comments of type /*! mean: this is a function declaration; switch comments with declarations */
+        if (exclam == '!') {
+            if (!comments.empty()) comments.erase(comments.begin()); /* remove first line like "ZSTD_XXX() :" */
+            linenum++;
+            lines = get_lines(input, linenum, "");
+
+            sout << "<pre><b>";
+            for (l=0; l<lines.size(); l++) {
+              //  fprintf(stderr, "line[%d]=%s\n", l, lines[l].c_str());
+                print_line(sout, lines[l]);
+            }
+            sout << "</b><p>";
+            for (l=0; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            sout << "</p></pre><BR>" << endl << endl;
+        } else { /* comments of type /** and /*- mean: this is a comment; use a <H2> header for the first line */
+            if (comments.empty()) continue;
+
+            trim(comments[0], " ");
+            sout << "<a name=\"Chapter" << chapter << "\"></a><h2>" << comments[0] << "</h2><pre>";
+            chapters.push_back(comments[0]);
+            chapter++;
+
+            for (l=1; l<comments.size(); l++) {
+                print_line(sout, comments[l]);
+            }
+            if (comments.size() > 1)
+                sout << "<BR></pre>" << endl << endl;
+            else
+                sout << "</pre>" << endl << endl;
+        }
+    }
+
+    ostream << "<html>\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\">\n<title>" << version << "</title>\n</head>\n<body>" << endl;
+    ostream << "<h1>" << version << "</h1>\n";
+
+    ostream << "<hr>\n<a name=\"Contents\"></a><h2>Contents</h2>\n<ol>\n";
+    for (size_t i=0; i<chapters.size(); i++)
+        ostream << "<li><a href=\"#Chapter" << i+1 << "\">" << chapters[i].c_str() << "</a></li>\n";
+    ostream << "</ol>\n<hr>\n";
+
+    ostream << sout.str();
+    ostream << "</html>" << endl << "</body>" << endl;
+
+    return 0;
+}
\ No newline at end of file
diff --git a/zstd_compression_format.md b/doc/zstd_compression_format.md
similarity index 100%
rename from zstd_compression_format.md
rename to doc/zstd_compression_format.md
diff --git a/doc/zstd_manual.html b/doc/zstd_manual.html
new file mode 100644
index 000000000..aac5af9c2
--- /dev/null
+++ b/doc/zstd_manual.html
@@ -0,0 +1,528 @@
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>zstd 1.1.1 Manual</title>
+</head>
+<body>
+<h1>zstd 1.1.1 Manual</h1>
+<hr>
+<a name="Contents"></a><h2>Contents</h2>
+<ol>
+<li><a href="#Chapter1">Introduction</a></li>
+<li><a href="#Chapter2">Version</a></li>
+<li><a href="#Chapter3">Simple API</a></li>
+<li><a href="#Chapter4">Explicit memory management</a></li>
+<li><a href="#Chapter5">Simple dictionary API</a></li>
+<li><a href="#Chapter6">Fast dictionary API</a></li>
+<li><a href="#Chapter7">Streaming</a></li>
+<li><a href="#Chapter8">Streaming compression - HowTo</a></li>
+<li><a href="#Chapter9">Streaming decompression - HowTo</a></li>
+<li><a href="#Chapter10">START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</a></li>
+<li><a href="#Chapter11">Advanced types</a></li>
+<li><a href="#Chapter12">Advanced compression functions</a></li>
+<li><a href="#Chapter13">Advanced decompression functions</a></li>
+<li><a href="#Chapter14">Advanced streaming functions</a></li>
+<li><a href="#Chapter15">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter16">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter17">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter18">Block functions</a></li>
+</ol>
+<hr>
+<a name="Chapter1"></a><h2>Introduction</h2><pre>
+  Zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios
+  at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and
+  decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22.
+  Levels from 20 to 22 should be used with caution as they require about 300-1300 MB for compression.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - repeated calls of the compression function (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using compression with a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced and experimantal functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  These APIs shall never be used with a dynamic library. 
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+<BR></pre>
+
+<a name="Chapter2"></a><h2>Version</h2><pre></pre>
+
+<pre><b>unsigned ZSTD_versionNumber (void);  </b>/**< returns version number of ZSTD */<b>
+</b></pre><BR>
+<a name="Chapter3"></a><h2>Simple API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+</b><p>    Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+    Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+    @return : compressed size written into `dst` (<= `dstCapacity),
+              or an error code if it fails (which can be tested using ZSTD_isError()) 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+</b><p>    `compressedSize` : must be the _exact_ size of a single compressed frame.
+    `dstCapacity` is an upper bound of originalSize.
+    If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+    @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+              or an errorCode if it fails (which can be tested using ZSTD_isError()) 
+</p></pre><BR>
+
+<pre><b>unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+</b><p>   'src' is the start of a zstd compressed frame.
+   @return : content size to be decompressed, as a 64-bits value _if known_, 0 otherwise.
+    note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+             When `return==0`, data to decompress could be any size.
+             In which case, it's necessary to use streaming mode to decompress data.
+             Optionally, application can still use ZSTD_decompress() while relying on implied limits.
+             (For example, data may be necessarily cut into blocks <= 16 KB).
+    note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+    note 3 : decompressed size can be very large (64-bits value),
+             potentially larger than what local system can handle as a single memory segment.
+             In which case, it's necessary to use streaming mode to decompress data.
+    note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+             Always ensure result fits within application's authorized limits.
+             Each application can set its own limits.
+    note 5 : when `return==0`, if precise failure cause is needed, use ZSTD_getFrameParams() to know more. 
+</p></pre><BR>
+
+<h3>Helper functions</h3><pre><b>int         ZSTD_maxCLevel(void);               </b>/*!< maximum compression level available */<b>
+size_t      ZSTD_compressBound(size_t srcSize); </b>/*!< maximum compressed size in worst case scenario */<b>
+unsigned    ZSTD_isError(size_t code);          </b>/*!< tells if a `size_t` function result is an error code */<b>
+const char* ZSTD_getErrorName(size_t code);     </b>/*!< provides readable string from an error code */<b>
+</b></pre><BR>
+<a name="Chapter4"></a><h2>Explicit memory management</h2><pre></pre>
+
+<h3>Compression context</h3><pre><b>typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTD_CCtx* ZSTD_createCCtx(void);
+size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
+</b></pre><BR>
+<pre><b>size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
+</b><p>    Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) 
+</p></pre><BR>
+
+<h3>Decompression context</h3><pre><b>typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTD_DCtx* ZSTD_createDCtx(void);
+size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
+</b></pre><BR>
+<pre><b>size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+</b><p>   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) 
+</p></pre><BR>
+
+<a name="Chapter5"></a><h2>Simple dictionary API</h2><pre></pre>
+
+<pre><b>size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+</b><p>   Compression using a predefined Dictionary (see dictBuilder/zdict.h).
+   Note : This function load the dictionary, resulting in significant startup delay. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+</b><p>   Decompression using a predefined Dictionary (see dictBuilder/zdict.h).
+   Dictionary must be identical to the one used during compression.
+   Note : This function load the dictionary, resulting in significant startup delay 
+</p></pre><BR>
+
+<a name="Chapter6"></a><h2>Fast dictionary API</h2><pre></pre>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+</b><p>   Create a digested dictionary, ready to start compression operation without startup delay.
+   `dict` can be released after ZSTD_CDict creation 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+</b><p>   Function frees memory allocated with ZSTD_createCDict() 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+</b><p>   Compression using a digested Dictionary.
+   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+   Note that compression level is decided during dictionary creation 
+</p></pre><BR>
+
+<pre><b>ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+</b><p>   Create a digested dictionary, ready to start decompression operation without startup delay.
+   `dict` can be released after creation 
+</p></pre><BR>
+
+<pre><b>size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+</b><p>   Function frees memory allocated with ZSTD_createDDict() 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+</b><p>   Decompression using a digested Dictionary
+   Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. 
+</p></pre><BR>
+
+<a name="Chapter7"></a><h2>Streaming</h2><pre></pre>
+
+<pre><b>typedef struct ZSTD_inBuffer_s {
+  const void* src;    </b>/**< start of input buffer */<b>
+  size_t size;        </b>/**< size of input buffer */<b>
+  size_t pos;         </b>/**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_inBuffer;
+</b></pre><BR>
+<pre><b>typedef struct ZSTD_outBuffer_s {
+  void*  dst;         </b>/**< start of output buffer */<b>
+  size_t size;        </b>/**< size of output buffer */<b>
+  size_t pos;         </b>/**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */<b>
+} ZSTD_outBuffer;
+</b></pre><BR>
+<a name="Chapter8"></a><h2>Streaming compression - HowTo</h2><pre>
+  A ZSTD_CStream object is required to track streaming operation.
+  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+
+  Start by initializing ZSTD_CStream.
+  Use ZSTD_initCStream() to start a new compression operation.
+  Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary.
+
+  Use ZSTD_compressStream() repetitively to consume input stream.
+  The function will automatically update both `pos` fields.
+  Note that it may not consume the entire input, in which case `pos < size`,
+  and it's up to the caller to present again remaining data.
+  @return : a size hint, preferred nb of bytes to use as input for next function call
+           (it's just a hint, to help latency a little, any other value will work fine)
+           (note : the size hint is guaranteed to be <= ZSTD_CStreamInSize() )
+            or an error code, which can be tested using ZSTD_isError().
+
+  At any moment, it's possible to flush whatever data remains within buffer, using ZSTD_flushStream().
+  `output->pos` will be updated.
+  Note some content might still be left within internal buffer if `output->size` is too small.
+  @return : nb of bytes still present within internal buffer (0 if it's empty)
+            or an error code, which can be tested using ZSTD_isError().
+
+  ZSTD_endStream() instructs to finish a frame.
+  It will perform a flush and write frame epilogue.
+  The epilogue is required for decoders to consider a frame completed.
+  Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small.
+  In which case, call again ZSTD_endStream() to complete the flush.
+  @return : nb of bytes still present within internal buffer (0 if it's empty)
+            or an error code, which can be tested using ZSTD_isError().
+
+ 
+<BR></pre>
+
+<h3>Streaming compression functions</h3><pre><b>typedef struct ZSTD_CStream_s ZSTD_CStream;
+ZSTD_CStream* ZSTD_createCStream(void);
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+</b></pre><BR>
+<pre><b>size_t ZSTD_CStreamInSize(void);    </b>/**< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_CStreamOutSize(void);   </b>/**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter9"></a><h2>Streaming decompression - HowTo</h2><pre>
+  A ZSTD_DStream object is required to track streaming operations.
+  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+  ZSTD_DStream objects can be re-used multiple times.
+
+  Use ZSTD_initDStream() to start a new decompression operation,
+   or ZSTD_initDStream_usingDict() if decompression requires a dictionary.
+   @return : recommended first input size
+
+  Use ZSTD_decompressStream() repetitively to consume your input.
+  The function will update both `pos` fields.
+  If `input.pos < input.size`, some input has not been consumed.
+  It's up to the caller to present again remaining data.
+  If `output.pos < output.size`, decoder has flushed everything it could.
+  @return : 0 when a frame is completely decoded and fully flushed,
+            an error code, which can be tested using ZSTD_isError(),
+            any other value > 0, which means there is still some work to do to complete the frame.
+            The return value is a suggested next input size (just an hint, to help latency).
+ 
+<BR></pre>
+
+<h3>Streaming decompression functions</h3><pre><b>typedef struct ZSTD_DStream_s ZSTD_DStream;
+ZSTD_DStream* ZSTD_createDStream(void);
+size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+size_t ZSTD_initDStream(ZSTD_DStream* zds);
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+</b></pre><BR>
+<pre><b>size_t ZSTD_DStreamInSize(void);    </b>/*!< recommended size for input buffer */<b>
+</b></pre><BR>
+<pre><b>size_t ZSTD_DStreamOutSize(void);   </b>/*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */<b>
+</b></pre><BR>
+<a name="Chapter10"></a><h2>START OF ADVANCED AND EXPERIMENTAL FUNCTIONS</h2><pre> The definitions in this section are considered experimental.
+ They should never be used with a dynamic library, as they may change in the future.
+ They are provided for advanced usages.
+ Use them only in association with static linking.
+ 
+<BR></pre>
+
+<a name="Chapter11"></a><h2>Advanced types</h2><pre></pre>
+
+<pre><b>typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   </b>/* from faster to stronger */<b>
+</b></pre><BR>
+<pre><b>typedef struct {
+    unsigned windowLog;      </b>/**< largest match distance : larger == more compression, more memory needed during decompression */<b>
+    unsigned chainLog;       </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
+    unsigned hashLog;        </b>/**< dispatch table : larger == faster, more memory */<b>
+    unsigned searchLog;      </b>/**< nb of searches : larger == more compression, slower */<b>
+    unsigned searchLength;   </b>/**< match length searched : larger == faster decompression, sometimes less compression */<b>
+    unsigned targetLength;   </b>/**< acceptable match size for optimal parser (only) : larger == more compression, slower */<b>
+    ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+</b></pre><BR>
+<pre><b>typedef struct {
+    unsigned contentSizeFlag; </b>/**< 1: content size will be in frame header (if known). */<b>
+    unsigned checksumFlag;    </b>/**< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */<b>
+    unsigned noDictIDFlag;    </b>/**< 1: no dict ID will be saved into frame header (if dictionary compression) */<b>
+} ZSTD_frameParameters;
+</b></pre><BR>
+<pre><b>typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+</b></pre><BR>
+<h3>Custom memory allocation functions</h3><pre><b>typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+</b></pre><BR>
+<a name="Chapter12"></a><h2>Advanced compression functions</h2><pre></pre>
+
+<pre><b>size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams);
+</b><p>  Gives the amount of memory allocated for a ZSTD_CCtx given a set of compression parameters.
+  `frameContentSize` is an optional parameter, provide `0` if unknown 
+</p></pre><BR>
+
+<pre><b>ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+</b><p>  Create a ZSTD compression context using external alloc and free functions 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+</b><p>  Gives the amount of memory used by a given ZSTD_CCtx 
+</p></pre><BR>
+
+<pre><b>ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_parameters params, ZSTD_customMem customMem);
+</b><p>  Create a ZSTD_CDict using external alloc and free, and customized compression parameters 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+</b><p>  Gives the amount of memory used by a given ZSTD_sizeof_CDict 
+</p></pre><BR>
+
+<pre><b>ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+</b><p>   same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`.
+   All fields of `ZSTD_frameParameters` are set to default (0) 
+</p></pre><BR>
+
+<pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize);
+</b><p>   @return ZSTD_compressionParameters structure for a selected compression level and srcSize.
+   `srcSize` value is optional, select 0 if not known 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+</b><p>   Ensure param values remain within authorized range 
+</p></pre><BR>
+
+<pre><b>ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+</b><p>   optimize params for a given `srcSize` and `dictSize`.
+   both values are optional, select `0` if unknown. 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           ZSTD_parameters params);
+</b><p>   Same as ZSTD_compress_usingDict(), with fine-tune control of each compression parameter 
+</p></pre><BR>
+
+<a name="Chapter13"></a><h2>Advanced decompression functions</h2><pre></pre>
+
+<pre><b>size_t ZSTD_estimateDCtxSize(void);
+</b><p>  Gives the potential amount of memory allocated to create a ZSTD_DCtx 
+</p></pre><BR>
+
+<pre><b>ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+</b><p>  Create a ZSTD decompression context using external alloc and free functions 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+</b><p>  Gives the amount of memory used by a given ZSTD_DCtx 
+</p></pre><BR>
+
+<pre><b>size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+</b><p>  Gives the amount of memory used by a given ZSTD_DDict 
+</p></pre><BR>
+
+<a name="Chapter14"></a><h2>Advanced streaming functions</h2><pre></pre>
+
+<h3>Advanced Streaming compression functions</h3><pre><b>ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
+                                             ZSTD_parameters params, unsigned long long pledgedSrcSize);  </b>/**< pledgedSrcSize is optional and can be zero == unknown */<b>
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);  </b>/**< re-use compression parameters from previous init; saves dictionary loading */<b>
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+</b></pre><BR>
+<h3>Advanced Streaming decompression functions</h3><pre><b>typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
+size_t ZSTD_resetDStream(ZSTD_DStream* zds);  </b>/**< re-use decompression parameters from previous init; saves dictionary loading */<b>
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+</b></pre><BR>
+<a name="Chapter15"></a><h2>Buffer-less and synchronous inner streaming functions</h2><pre>
+  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+  But it's also a complex one, with many restrictions (documented below).
+  Prefer using normal streaming API for an easier experience 
+ 
+<BR></pre>
+
+<a name="Chapter16"></a><h2>Buffer-less streaming compression (synchronous mode)</h2><pre>
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffer only.
+  - Interface is synchronous : input is consumed entirely and produce 1+ (or more) compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use a NULL,0 src content, in which case, it will write a final empty block to end the frame,
+  Without last block mark, frames will be considered unfinished (broken) by decoders.
+
+  You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
+<BR></pre>
+
+<h3>Buffer-less streaming compression functions</h3><pre><b>size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
+size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+</b></pre><BR>
+<a name="Chapter17"></a><h2>Buffer-less streaming decompression (synchronous mode)</h2><pre>
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameParams().
+  It fills a ZSTD_frameParams structure which provide important information to correctly decode the frame,
+  such as the minimum rolling buffer size to allocate to decompress data (`windowSize`),
+  and the dictionary ID used.
+  (Note : content size is optional, it may not be present. 0 means : content size unknown).
+  Note that these values could be wrong, either because of data malformation, or because an attacker is spoofing deliberate false information.
+  As a consequence, check that values remain within valid application range, especially `windowSize`, before allocation.
+  Each application can set its own limit, depending on local restrictions. For extended interoperability, it is recommended to support at least 8 MB.
+  Frame parameters are extracted from the beginning of the compressed frame.
+  Data fragment must be large enough to ensure successful decoding, typically `ZSTD_frameHeaderSize_max` bytes.
+  @result : 0 : successful decoding, the `ZSTD_frameParams` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  Start decompression, with ZSTD_decompressBegin() or ZSTD_decompressBegin_usingDict().
+  Alternatively, you can copy a prepared context, using ZSTD_copyDCtx().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+  @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize`.
+  They should preferably be located contiguously, prior to current block.
+  Alternatively, a round buffer of sufficient size is also possible. Sufficient size is determined by frame parameters.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference.
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by a decompressor. The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_decompressContinue() always returns 0.
+  For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 what means that a frame is skippable.
+  It also returns Frame Size as fparamsPtr->frameContentSize.
+<BR></pre>
+
+<pre><b>typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameParams;
+</b></pre><BR>
+<h3>Buffer-less streaming decompression functions</h3><pre><b>size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   </b>/**< doesn't consume input, see details below */<b>
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+</b></pre><BR>
+<a name="Chapter18"></a><h2>Block functions</h2><pre>
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    User will have to take in charge required information to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : ZSTD_compressBegin()
+      + decompression : ZSTD_decompressBegin()
+      + variants _usingDict() are also allowed
+      + copyCCtx() and copyDCtx() work too
+    - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
+      + If you need to compress more, cut data into multiple blocks
+      + Consider using the regular ZSTD_compress() instead, as frame metadata costs become negligible when source size is large.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero.
+      In which case, nothing is produced into `dst`.
+      + User must test for such outcome and deal directly with uncompressed data
+      + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!!
+      + In case of multiple successive blocks, decoder must be informed of uncompressed block existence to follow proper history.
+        Use ZSTD_insertBlock() in such a case.
+<BR></pre>
+
+<h3>Raw zstd block functions</h3><pre><b>size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
+size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  </b>/**< insert block into `dctx` history. Useful for uncompressed blocks */<b>
+</b></pre><BR>
+</html>
+</body>
diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c
index 41de7449a..78784aa36 100644
--- a/lib/compress/huf_compress.c
+++ b/lib/compress/huf_compress.c
@@ -155,13 +155,14 @@ size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, si
     }   }
 
     /* fill val */
-    {   U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0};
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
         /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
         {   U16 min = 0;
-            U32 n; for (n=HUF_TABLELOG_MAX; n>0; n--) {
-                valPerRank[n] = min;      /* get starting value within each rank */
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
                 min += nbPerRank[n];
                 min >>= 1;
         }   }
diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
index fb12898e8..1d4450aa5 100644
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@@ -377,7 +377,7 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
 *  Block entropic compression
 *********************************************************/
 
-/* See zstd_compression_format.md for detailed format description */
+/* See doc/zstd_compression_format.md for detailed format description */
 
 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
diff --git a/lib/compress/zstd_opt.h b/lib/compress/zstd_opt.h
index cea67056a..5e4cb7c27 100644
--- a/lib/compress/zstd_opt.h
+++ b/lib/compress/zstd_opt.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
  * This source code is licensed under the BSD-style license found in the
@@ -16,6 +16,7 @@
 
 
 #define ZSTD_FREQ_DIV   5
+#define ZSTD_MAX_PRICE  (1<<30)
 
 /*-*************************************
 *  Price functions for optimal parser
@@ -171,7 +172,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
 
 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_)   \
     {                                                 \
-        while (last_pos < pos)  { opt[last_pos+1].price = 1<<30; last_pos++; } \
+        while (last_pos < pos)  { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
         opt[pos].mlen = mlen_;                         \
         opt[pos].off = offset_;                        \
         opt[pos].litlen = litlen_;                     \
@@ -401,7 +402,6 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
     ZSTD_rescaleFreqs(seqStorePtr);
     ip += (ip==prefixStart);
     { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
-    //inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -496,7 +496,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
            }
 
-           best_mlen = minMatch;
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i=(opt[cur].mlen != 1); i<last_i; i++) {  /* check rep */
                     const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
@@ -510,21 +510,20 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
                        }
 
                        best_off = i - (opt[cur].mlen != 1);
+                       if (mlen > best_mlen) best_mlen = mlen;
 
-                       if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
+                       do {
+                           if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
+                            }
 
-                        if (mlen > best_mlen) best_mlen = mlen;
-
-                        do {
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -657,7 +656,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
     ctx->nextToUpdate3 = ctx->nextToUpdate;
     ZSTD_rescaleFreqs(seqStorePtr);
     ip += (ip==prefixStart);
-    //inr = ip;
 
     /* Match Loop */
     while (ip < ilimit) {
@@ -666,7 +664,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
         U32 current = (U32)(ip-base);
         memset(opt, 0, sizeof(ZSTD_optimal_t));
         last_pos = 0;
-        //inr = ip;
         opt[0].litlen = (U32)(ip - anchor);
 
         /* check repCode */
@@ -765,8 +762,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
             }
 
-            best_mlen = 0;
-
+            best_mlen = minMatch;
             {   U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
                 for (i = (mlen != 1); i<last_i; i++) {
                     const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
@@ -786,20 +782,20 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                         }
 
                         best_off = i - (opt[cur].mlen != 1);
-                        if (opt[cur].mlen == 1) {
-                            litlen = opt[cur].litlen;
-                            if (cur > litlen) {
-                                price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
-                            } else
-                                price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
-                        } else {
-                            litlen = 0;
-                            price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
-                        }
-
-                        best_mlen = mlen;
+                        if (mlen > best_mlen) best_mlen = mlen;
 
                         do {
+                            if (opt[cur].mlen == 1) {
+                                litlen = opt[cur].litlen;
+                                if (cur > litlen) {
+                                    price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH);
+                                } else
+                                    price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH);
+                            } else {
+                                litlen = 0;
+                                price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH);
+                            }
+
                             if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
                                 SET_PRICE(cur + mlen, mlen, i, litlen, price);
                             mlen--;
@@ -815,8 +811,6 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
                 goto _storeSequence;
             }
 
-            best_mlen = (best_mlen > minMatch) ? best_mlen : minMatch;
-
             /* set prices using matches at position = cur */
             for (u = 0; u < match_num; u++) {
                 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c
index de5f46eef..2a1232eee 100644
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@@ -191,7 +191,7 @@ static void ZSTD_refDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
 *   Decompression section
 ***************************************************************/
 
-/* See compression format details in : zstd_compression_format.md */
+/* See compression format details in : doc/zstd_compression_format.md */
 
 /** ZSTD_frameHeaderSize() :
 *   srcSize must be >= ZSTD_frameHeaderSize_prefix.
@@ -301,14 +301,16 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
 
 
 /** ZSTD_decodeFrameHeader() :
-*   `srcSize` must be the size provided by ZSTD_frameHeaderSize().
+*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
 *   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t srcSize)
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
 {
-    size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, srcSize);
+    size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize);
+    if (ZSTD_isError(result)) return result;  /* invalid header */
+    if (result>0) return ERROR(srcSize_wrong);   /* headerSize too small */
     if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) return ERROR(dictionary_wrong);
     if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
-    return result;
+    return 0;
 }
 
 
@@ -810,7 +812,8 @@ static seq_t ZSTD_decodeSequence(seqState_t* seqState)
         if (ofCode <= 1) {
             offset += (llCode==0);
             if (offset) {
-                size_t const temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
                 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                 seqState->prevOffset[1] = seqState->prevOffset[0];
                 seqState->prevOffset[0] = offset = temp;
@@ -882,7 +885,7 @@ size_t ZSTD_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
             if (op > oend_w) {
-              memmove(op, match, sequence.matchLength);
+              while (op < oMatchEnd) *op++ = *match++;
               return sequenceLength;
             }
     }   }
diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c
index a88e70520..b3f20b12b 100644
--- a/lib/dictBuilder/zdict.c
+++ b/lib/dictBuilder/zdict.c
@@ -371,21 +371,22 @@ static dictItem ZDICT_analyzePos(
 static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
 {
     const U32 tableSize = table->pos;
-    const U32 max = elt.pos + (elt.length-1);
+    const U32 eltEnd = elt.pos + elt.length;
 
     /* tail overlap */
     U32 u; for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */
+        if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) {  /* overlap, existing > new */
             /* append */
             U32 addedLength = table[u].pos - elt.pos;
             table[u].length += addedLength;
             table[u].pos = elt.pos;
             table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
-            table[u].savings += elt.length / 8;    /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
             elt = table[u];
+            /* sort : improve rank */
             while ((u>1) && (table[u-1].savings < elt.savings))
-                table[u] = table[u-1], u--;
+            table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
     }   }
@@ -393,14 +394,15 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
     /* front overlap */
     for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */
+        if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) {  /* overlap, existing < new */
             /* append */
-            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
-            table[u].savings += elt.length / 8;    /* rough approx */
-            if (addedLength > 0) {   /* otherwise, already included */
+            int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            if (addedLength > 0) {   /* otherwise, elt fully included into existing */
                 table[u].length += addedLength;
                 table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
             }
+            /* sort : improve rank */
             elt = table[u];
             while ((u>1) && (table[u-1].savings < elt.savings))
                 table[u] = table[u-1], u--;
diff --git a/lib/legacy/zstd_v04.c b/lib/legacy/zstd_v04.c
index 60479cb79..0a740baca 100644
--- a/lib/legacy/zstd_v04.c
+++ b/lib/legacy/zstd_v04.c
@@ -3111,7 +3111,7 @@ static size_t ZSTD_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
             if (op > oend_8) {
-              memmove(op, match, sequence.matchLength);
+              while (op < oMatchEnd) *op++ = *match++;
               return sequenceLength;
             }
         }
diff --git a/lib/legacy/zstd_v05.c b/lib/legacy/zstd_v05.c
index 2fde052c2..201bf3c6b 100644
--- a/lib/legacy/zstd_v05.c
+++ b/lib/legacy/zstd_v05.c
@@ -2035,13 +2035,14 @@ size_t HUFv05_decompress1X2_usingDTable(
 {
     BYTE* op = (BYTE*)dst;
     BYTE* const oend = op + dstSize;
-    size_t errorCode;
     const U32 dtLog = DTable[0];
     const void* dtPtr = DTable;
     const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr)+1;
     BITv05_DStream_t bitD;
-    errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
-    if (HUFv05_isError(errorCode)) return errorCode;
+
+    if (dstSize <= cSrcSize) return ERROR(dstSize_tooSmall);
+    { size_t const errorCode = BITv05_initDStream(&bitD, cSrc, cSrcSize);
+      if (HUFv05_isError(errorCode)) return errorCode; }
 
     HUFv05_decodeStreamX2(op, &bitD, oend, dt, dtLog);
 
@@ -3070,7 +3071,7 @@ size_t ZSTDv05_decodeLiteralsBlock(ZSTDv05_DCtx* dctx,
 
 size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr,
                          FSEv05_DTable* DTableLL, FSEv05_DTable* DTableML, FSEv05_DTable* DTableOffb,
-                         const void* src, size_t srcSize)
+                         const void* src, size_t srcSize, U32 flagStaticTable)
 {
     const BYTE* const istart = (const BYTE* const)src;
     const BYTE* ip = istart;
@@ -3130,6 +3131,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
             FSEv05_buildDTable_raw(DTableLL, LLbits);
             break;
         case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
             break;
         case FSEv05_ENCODING_DYNAMIC :
         default :   /* impossible */
@@ -3153,6 +3155,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
             FSEv05_buildDTable_raw(DTableOffb, Offbits);
             break;
         case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
             break;
         case FSEv05_ENCODING_DYNAMIC :
         default :   /* impossible */
@@ -3176,6 +3179,7 @@ size_t ZSTDv05_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumps
             FSEv05_buildDTable_raw(DTableML, MLbits);
             break;
         case FSEv05_ENCODING_STATIC:
+            if (!flagStaticTable) return ERROR(corruption_detected);
             break;
         case FSEv05_ENCODING_DYNAMIC :
         default :   /* impossible */
@@ -3325,7 +3329,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
             if (op > oend_8) {
-              memmove(op, match, sequence.matchLength);
+              while (op < oMatchEnd) *op++ = *match++;
               return sequenceLength;
             }
     }   }
@@ -3388,7 +3392,7 @@ static size_t ZSTDv05_decompressSequences(
     /* Build Decoding Tables */
     errorCode = ZSTDv05_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength,
                                       DTableLL, DTableML, DTableOffb,
-                                      ip, seqSize);
+                                      ip, seqSize, dctx->flagStaticTables);
     if (ZSTDv05_isError(errorCode)) return errorCode;
     ip += errorCode;
 
diff --git a/lib/legacy/zstd_v06.c b/lib/legacy/zstd_v06.c
index 3d6512846..b6fde3aa6 100644
--- a/lib/legacy/zstd_v06.c
+++ b/lib/legacy/zstd_v06.c
@@ -3474,7 +3474,7 @@ size_t ZSTDv06_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
             if (op > oend_8) {
-              memmove(op, match, sequence.matchLength);
+              while (op < oMatchEnd) *op++ = *match++;
               return sequenceLength;
             }
     }   }
diff --git a/lib/legacy/zstd_v07.c b/lib/legacy/zstd_v07.c
index ace64ede4..c7693f240 100644
--- a/lib/legacy/zstd_v07.c
+++ b/lib/legacy/zstd_v07.c
@@ -3697,7 +3697,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
             sequence.matchLength -= length1;
             match = base;
             if (op > oend_w) {
-              memmove(op, match, sequence.matchLength);
+              while (op < oMatchEnd) *op++ = *match++;
               return sequenceLength;
             }
     }   }
diff --git a/lib/zstd.h b/lib/zstd.h
index c2f5f5d4c..1b6665c2b 100644
--- a/lib/zstd.h
+++ b/lib/zstd.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
@@ -14,12 +14,12 @@
 extern "C" {
 #endif
 
-/*======   Dependency   ======*/
+/* ======   Dependency   ======*/
 #include <stddef.h>   /* size_t */
 
 
-/*======  Export for Windows  ======*/
-/*!
+/* ======  Export for Windows  ======*/
+/*
 *  ZSTD_DLL_EXPORT :
 *  Enable exporting of functions when building a Windows DLL
 */
@@ -30,7 +30,29 @@ extern "C" {
 #endif
 
 
-/*=======   Version   =======*/
+/*******************************************************************************************************
+  Introduction
+
+  Zstd, short for Zstandard, is a fast lossless compression algorithm, targeting real-time compression scenarios
+  at zlib-level and better compression ratios. The zstd compression library provides in-memory compression and
+  decompression functions. The library supports compression levels from 1 up to ZSTD_maxCLevel() which is 22.
+  Levels from 20 to 22 should be used with caution as they require about 300-1300 MB for compression.
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit memory management)
+    - repeated calls of the compression function (described as Streaming compression)
+  The compression ratio achievable on small data can be highly improved using compression with a dictionary in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Fast dictionary API)
+
+  Advanced and experimantal functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
+  These APIs shall never be used with a dynamic library. 
+  They are not "stable", their definition may change in the future. Only static linking is allowed.
+*********************************************************************************************************/
+
+/*------   Version   ------*/
+ZSTDLIB_API unsigned ZSTD_versionNumber (void);  /**< returns version number of ZSTD */
+
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    1
 #define ZSTD_VERSION_RELEASE  1
@@ -41,10 +63,9 @@ extern "C" {
 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
 
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
-ZSTDLIB_API unsigned ZSTD_versionNumber (void);
 
 
-/* *************************************
+/***************************************
 *  Simple API
 ***************************************/
 /*! ZSTD_compress() :
@@ -91,29 +112,29 @@ ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `siz
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
 
 
-/*-*************************************
+/***************************************
 *  Explicit memory management
 ***************************************/
-/** Compression context */
+/*= Compression context */
 typedef struct ZSTD_CCtx_s ZSTD_CCtx;
 ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);
 
-/** ZSTD_compressCCtx() :
+/*! ZSTD_compressCCtx() :
     Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */
 ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel);
 
-/** Decompression context */
+/*= Decompression context */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
 ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
 ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);
 
-/** ZSTD_decompressDCtx() :
+/*! ZSTD_decompressDCtx() :
 *   Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */
 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
 
-/*-************************
+/**************************
 *  Simple dictionary API
 ***************************/
 /*! ZSTD_compress_usingDict() :
@@ -135,14 +156,18 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
                                        const void* dict,size_t dictSize);
 
 
-/*-**************************
-*  Fast Dictionary API
+/****************************
+*  Fast dictionary API
 ****************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
 /*! ZSTD_createCDict() :
 *   Create a digested dictionary, ready to start compression operation without startup delay.
 *   `dict` can be released after ZSTD_CDict creation */
-typedef struct ZSTD_CDict_s ZSTD_CDict;
 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+*   Function frees memory allocated with ZSTD_createCDict() */
 ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
 
 /*! ZSTD_compress_usingCDict() :
@@ -154,11 +179,16 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                       const void* src, size_t srcSize,
                                       const ZSTD_CDict* cdict);
 
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
 /*! ZSTD_createDDict() :
 *   Create a digested dictionary, ready to start decompression operation without startup delay.
 *   `dict` can be released after creation */
-typedef struct ZSTD_DDict_s ZSTD_DDict;
 ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+*   Function frees memory allocated with ZSTD_createDDict() */
 ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
 
 /*! ZSTD_decompress_usingDDict() :
@@ -170,7 +200,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
                                         const ZSTD_DDict* ddict);
 
 
-/*-**************************
+/****************************
 *  Streaming
 ****************************/
 
@@ -187,10 +217,9 @@ typedef struct ZSTD_outBuffer_s {
 } ZSTD_outBuffer;
 
 
-/*======   streaming compression   ======*/
 
 /*-***********************************************************************
-*  Streaming compression - howto
+*  Streaming compression - HowTo
 *
 *  A ZSTD_CStream object is required to track streaming operation.
 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
@@ -225,23 +254,22 @@ typedef struct ZSTD_outBuffer_s {
 *
 * *******************************************************************/
 
+/*=====   Streaming compression functions   ======*/
 typedef struct ZSTD_CStream_s ZSTD_CStream;
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
-
-ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
-ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
-
 ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /**< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */
+
 
-/*======   decompression   ======*/
 
 /*-***************************************************************************
-*  Streaming decompression howto
+*  Streaming decompression - HowTo
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
@@ -262,28 +290,29 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *            The return value is a suggested next input size (just an hint, to help latency).
 * *******************************************************************************/
 
+/*=====   Streaming decompression functions   =====*/
 typedef struct ZSTD_DStream_s ZSTD_DStream;
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
 ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 
 ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
 ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
 
-ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
-ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-
 
 
 #ifdef ZSTD_STATIC_LINKING_ONLY
 
-/* ====================================================================================
+/****************************************************************************************
+ * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
  * The definitions in this section are considered experimental.
  * They should never be used with a dynamic library, as they may change in the future.
  * They are provided for advanced usages.
  * Use them only in association with static linking.
- * ==================================================================================== */
+ * ***************************************************************************************/
 
-/*--- Constants ---*/
+/* --- Constants ---*/
 #define ZSTD_MAGICNUMBER            0xFD2FB528   /* v0.8 */
 #define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
 
@@ -310,7 +339,7 @@ static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX;
 static const size_t ZSTD_skippableHeaderSize = 8;  /* magic number + skippable frame length */
 
 
-/*--- Types ---*/
+/*--- Advanced types ---*/
 typedef enum { ZSTD_fast, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy;   /* from faster to stronger */
 
 typedef struct {
@@ -334,13 +363,13 @@ typedef struct {
     ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
-/* custom memory allocation functions */
+/*= Custom memory allocation functions */
 typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
 typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
 typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
 
 
-/*-*************************************
+/***************************************
 *  Advanced compression functions
 ***************************************/
 /*! ZSTD_estimateCCtxSize() :
@@ -393,7 +422,7 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
                                            ZSTD_parameters params);
 
 
-/*--- Advanced Decompression functions ---*/
+/*--- Advanced decompression functions ---*/
 
 /*! ZSTD_estimateDCtxSize() :
  *  Gives the potential amount of memory allocated to create a ZSTD_DCtx */
@@ -412,12 +441,11 @@ ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
 ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 
 
-/* ******************************************************************
-*  Advanced Streaming functions
+/********************************************************************
+*  Advanced streaming functions
 ********************************************************************/
 
-/*======   compression   ======*/
-
+/*=====   Advanced Streaming compression functions  =====*/
 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel);
 ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
@@ -426,10 +454,8 @@ ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledg
 ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
 
 
-/*======   decompression   ======*/
-
+/*=====   Advanced Streaming decompression functions  =====*/
 typedef enum { ZSTDdsp_maxWindowSize } ZSTD_DStreamParameter_e;
-
 ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
 ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
 ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue);
@@ -437,22 +463,17 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);  /**< re-use decompress
 ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
 
 
-/* ******************************************************************
+/*********************************************************************
 *  Buffer-less and synchronous inner streaming functions
-********************************************************************/
-/* This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
 *  But it's also a complex one, with many restrictions (documented below).
-*  Prefer using normal streaming API for an easier experience */
+*  Prefer using normal streaming API for an easier experience 
+********************************************************************* */
 
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+/**
+  Buffer-less streaming compression (synchronous mode)
 
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-/*
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
@@ -481,26 +502,17 @@ ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapaci
   You can then reuse `ZSTD_CCtx` (ZSTD_compressBegin()) to compress some new frame.
 */
 
-typedef struct {
-    unsigned long long frameContentSize;
-    unsigned windowSize;
-    unsigned dictID;
-    unsigned checksumFlag;
-} ZSTD_frameParams;
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize);
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
-
-/*
+/*-
   Buffer-less streaming decompression (synchronous mode)
 
   A ZSTD_DCtx object is required to track streaming operations.
@@ -557,11 +569,27 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
   It also returns Frame Size as fparamsPtr->frameContentSize.
 */
 
+typedef struct {
+    unsigned long long frameContentSize;
+    unsigned windowSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameParams;
 
-/* **************************************
-*  Block functions
-****************************************/
-/*! Block functions produce and decode raw zstd blocks, without frame metadata.
+/*=====   Buffer-less streaming decompression functions  =====*/
+ZSTDLIB_API size_t ZSTD_getFrameParams(ZSTD_frameParams* fparamsPtr, const void* src, size_t srcSize);   /**< doesn't consume input, see details below */
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+/**
+    Block functions
+
+    Block functions produce and decode raw zstd blocks, without frame metadata.
     Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes).
     User will have to take in charge required information to regenerate data, such as compressed and content sizes.
 
@@ -585,6 +613,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
 */
 
 #define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)   /* define, for static allocation */
+/*=====   Raw zstd block functions  =====*/
 ZSTDLIB_API size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx);
 ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
diff --git a/programs/Makefile b/programs/Makefile
index 06da7c44b..0ed7f3d07 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -76,17 +76,18 @@ default: zstd
 
 all: zstd
 
-
+$(ZSTDDECOMP_O): CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
+$(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 $(ZSTDDECOMP_O): $(ZSTDDIR)/decompress/zstd_decompress.c
-	$(CC)    $(ALIGN_LOOP) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -c -o $@
-
-$(ZSTDDECOMP32_O): $(ZSTDDIR)/decompress/zstd_decompress.c
-	$(CC)  -m32 $(ALIGN_LOOP) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -c -o $@
 
 zstd  : $(ZSTDDECOMP_O) $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \
         zstdcli.c fileio.c bench.c datagen.c dibio.c
 	$(CC)      $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(RES_FILE) -o $@$(EXT)
 
+
+$(ZSTDDECOMP32_O): $(ZSTDDIR)/decompress/zstd_decompress.c
+	$(CC)  -m32 $(ALIGN_LOOP) $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ -c -o $@
+
 zstd32 : $(ZSTDDECOMP32_O) $(ZSTD_FILES) $(ZSTDLEGACY_FILES) $(ZDICT_FILES) \
         zstdcli.c fileio.c bench.c datagen.c dibio.c
 	$(CC)  -m32 $(FLAGS) -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) $^ $(RES32_FILE) -o $@$(EXT)
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 2df410431..9aa97cdd2 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -25,7 +25,6 @@
 **************************************/
 #include "util.h"     /* Compiler options, UTIL_HAS_CREATEFILELIST */
 #include <string.h>   /* strcmp, strlen */
-#include <ctype.h>    /* toupper */
 #include <errno.h>    /* errno */
 #include "fileio.h"
 #ifndef ZSTD_NOBENCH
@@ -182,10 +181,13 @@ static unsigned readU32FromChar(const char** stringPtr)
     unsigned result = 0;
     while ((**stringPtr >='0') && (**stringPtr <='9'))
         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
-    if (toupper(**stringPtr)=='K') result <<= 10, (*stringPtr)++ ;
-    else if (toupper(**stringPtr)=='M') result <<= 20, (*stringPtr)++ ;
-    if (toupper(**stringPtr)=='i') (*stringPtr)++;
-    if (toupper(**stringPtr)=='B') (*stringPtr)++;
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        result <<= 10;
+        if (**stringPtr=='M') result <<= 10;
+        (*stringPtr)++ ;
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
     return result;
 }
 
diff --git a/zlibWrapper/Makefile b/zlibWrapper/Makefile
index 69c976fa5..0e4ca9e6f 100644
--- a/zlibWrapper/Makefile
+++ b/zlibWrapper/Makefile
@@ -14,6 +14,7 @@ ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.a
 ZLIBWRAPPER_PATH = .
 EXAMPLE_PATH = examples
 PROGRAMS_PATH = ../programs
+TEST_FILE = ../doc/zstd_compression_format.md
 CC ?= gcc
 CFLAGS ?= -O3 
 CFLAGS += $(LOC) -I$(PROGRAMS_PATH) -I$(ZSTDLIBDIR) -I$(ZSTDLIBDIR)/common -I$(ZLIBWRAPPER_PATH) -std=gnu99
@@ -27,11 +28,11 @@ all: clean fitblk example zwrapbench
 test: example fitblk example_zstd fitblk_zstd zwrapbench
 	./example
 	./example_zstd
-	./fitblk 10240 <../zstd_compression_format.md
-	./fitblk 40960 <../zstd_compression_format.md
-	./fitblk_zstd 10240 <../zstd_compression_format.md
-	./fitblk_zstd 40960 <../zstd_compression_format.md
-	./zwrapbench -qb3B1K ../zstd_compression_format.md
+	./fitblk 10240 <$(TEST_FILE)
+	./fitblk 40960 <$(TEST_FILE)
+	./fitblk_zstd 10240 <$(TEST_FILE)
+	./fitblk_zstd 40960 <$(TEST_FILE)
+	./zwrapbench -qb3B1K $(TEST_FILE)
 	./zwrapbench -rqb1e5 ../lib ../programs ../tests
 
 #valgrindTest: ZSTDLIBRARY = $(ZSTDLIBDIR)/libzstd.so
@@ -40,11 +41,11 @@ valgrindTest: clean example fitblk example_zstd fitblk_zstd zwrapbench
 	@echo "\n ---- valgrind tests ----"
 	$(VALGRIND) ./example
 	$(VALGRIND) ./example_zstd
-	$(VALGRIND) ./fitblk 10240 <../zstd_compression_format.md
-	$(VALGRIND) ./fitblk 40960 <../zstd_compression_format.md
-	$(VALGRIND) ./fitblk_zstd 10240 <../zstd_compression_format.md
-	$(VALGRIND) ./fitblk_zstd 40960 <../zstd_compression_format.md
-	$(VALGRIND) ./zwrapbench -qb3B1K ../zstd_compression_format.md
+	$(VALGRIND) ./fitblk 10240 <$(TEST_FILE)
+	$(VALGRIND) ./fitblk 40960 <$(TEST_FILE)
+	$(VALGRIND) ./fitblk_zstd 10240 <$(TEST_FILE)
+	$(VALGRIND) ./fitblk_zstd 40960 <$(TEST_FILE)
+	$(VALGRIND) ./zwrapbench -qb3B1K $(TEST_FILE)
 	$(VALGRIND) ./zwrapbench -rqb1e5 ../lib ../programs ../tests
 
 .c.o: