From c53eea7c1a84e0afd8eebc849e499a7e9948781c Mon Sep 17 00:00:00 2001 From: OBATA Akio Date: Tue, 29 Nov 2016 16:47:53 +0900 Subject: [PATCH 1/7] libzstd.pc.in: Change to use variables for libdir and includedir --- lib/libzstd.pc.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in index 9399363dd..703da060b 100644 --- a/lib/libzstd.pc.in +++ b/lib/libzstd.pc.in @@ -10,5 +10,5 @@ Name: zstd Description: lossless compression algorithm library URL: https://github.com/facebook/zstd Version: @VERSION@ -Libs: -L@LIBDIR@ -lzstd -Cflags: -I@INCLUDEDIR@ +Libs: -L${libdir} -lzstd +Cflags: -I${includedir} From 1cc37ad8eed7b88bba28f7e25e9fbb843e8eb0a5 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Tue, 29 Nov 2016 18:06:59 -0800 Subject: [PATCH 2/7] modified NEWS --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 28ea1c85d..79ceec771 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,5 @@ v1.1.2 -Improved : better decompression speed at high compression settings +Improved : faster decompression speed at ultra compression settings and in 32-bits mode cli : new : preserve file attributes, by Przemyslaw Skibinski cli : fixed : status displays total amount decoded when stream/file consists of multiple appended frames (like pzstd) API : changed : zbuff prototypes now generate deprecation warnings From 95eb43be09e84f229a953697a5e5b7644e27eb01 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Nov 2016 11:06:58 -0800 Subject: [PATCH 3/7] updated pkg config file --- lib/libzstd.pc.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/libzstd.pc.in b/lib/libzstd.pc.in index 703da060b..1d07b91f2 100644 --- a/lib/libzstd.pc.in +++ b/lib/libzstd.pc.in @@ -1,5 +1,5 @@ # ZSTD - standard compression algorithm -# Copyright (C) 2014-2015, Yann Collet. +# Copyright (C) 2014-2016, Yann Collet, Facebook # BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) prefix=@PREFIX@ @@ -7,8 +7,8 @@ libdir=@LIBDIR@ includedir=@INCLUDEDIR@ Name: zstd -Description: lossless compression algorithm library -URL: https://github.com/facebook/zstd +Description: fast lossless compression algorithm library +URL: http://www.zstd.net/ Version: @VERSION@ Libs: -L${libdir} -lzstd Cflags: -I${includedir} From 766431909f61f450d6e93c7e4405fdbabfb0c175 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Nov 2016 12:36:45 -0800 Subject: [PATCH 4/7] introduced FSE_decompress_wksp(), to use less stack space --- lib/common/entropy_common.c | 3 ++- lib/common/fse.h | 5 ++++- lib/common/fse_decompress.c | 36 ++++++++++++++++++------------------ 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/lib/common/entropy_common.c b/lib/common/entropy_common.c index 18bba0e87..8a31c4696 100644 --- a/lib/common/entropy_common.c +++ b/lib/common/entropy_common.c @@ -187,8 +187,9 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, huffWeight[n+1] = ip[n/2] & 15; } } } else { /* header compressed with FSE (normal case) */ + FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(5)]; /* 5 is max possible tableLog for HUF header */ if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 5); /* max (hwSize-1) values decoded, as last one is implied */ if (FSE_isError(oSize)) return oSize; } diff --git a/lib/common/fse.h b/lib/common/fse.h index cecb1aef7..2f4506330 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -286,7 +286,7 @@ If there is an error, the function will return an error code, which can be teste #define FSE_BLOCKBOUND(size) (size + (size>>7)) #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ -/* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */ +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ - ip += NCountLength; - cSrcSize -= NCountLength; - } + size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(NCountLength)) return NCountLength; + //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + ip += NCountLength; + cSrcSize -= NCountLength; - CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) ); + CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); - return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); } From d79a9a00d91f6feb60d43906ee849438b1703c0c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Nov 2016 15:52:20 -0800 Subject: [PATCH 5/7] Introduced FSE_compress_wksp() and FSE_buildCTable_wksp() to reduce stack memory usage --- lib/common/fse.h | 13 +++++ lib/compress/fse_compress.c | 104 +++++++++++++++++++++-------------- lib/compress/huf_compress.c | 15 ++--- lib/compress/zstd_compress.c | 13 +++-- 4 files changed, 90 insertions(+), 55 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 2f4506330..9de22b5e1 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -300,12 +300,25 @@ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* s unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /**< same as FSE_optimalTableLog(), which used `minus==2` */ +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `workSpace` size must be `(1< wkspSize) return ERROR(tableLog_tooLarge); tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; @@ -181,6 +188,13 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned } +size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ + return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); +} + + #ifndef FSE_COMMONDEFS_ONLY @@ -189,7 +203,7 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned ****************************************************************/ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) { - size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; + size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ } @@ -486,7 +500,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, U32 ToDistribute; /* Init */ - U32 lowThreshold = (U32)(total >> tableLog); + U32 const lowThreshold = (U32)(total >> tableLog); U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); for (s=0; s<=maxSymbolValue; s++) { @@ -534,17 +548,16 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, return 0; } - { - U64 const vStepLog = 62 - tableLog; + { U64 const vStepLog = 62 - tableLog; U64 const mid = (1ULL << (vStepLog-1)) - 1; U64 const rStep = ((((U64)1<> vStepLog); - U32 sEnd = (U32)(end >> vStepLog); - U32 weight = sEnd - sStart; + U64 const end = tmpTotal + (count[s] * rStep); + U32 const sStart = (U32)(tmpTotal >> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; if (weight < 1) return ERROR(GENERIC); norm[s] = (short)weight; @@ -566,7 +579,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; - U64 const scale = 62 - tableLog; U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ U64 const vStep = 1ULL<<(scale-20); @@ -594,7 +606,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, } } if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { /* corner case, need another normalization method */ - size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); if (FSE_isError(errorCode)) return errorCode; } else normalizedCounter[largest] += (short)stillToDistribute; @@ -643,17 +655,15 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits) /* Build Symbol Transformation Table */ { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); - for (s=0; s<=maxSymbolValue; s++) { symbolTT[s].deltaNbBits = deltaNbBits; symbolTT[s].deltaFindState = s-1; } } - return 0; } -/* fake FSE_CTable, for rle (100% always same symbol) input */ +/* fake FSE_CTable, for rle input (always same symbol) */ size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) { void* ptr = ct; @@ -685,14 +695,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, const BYTE* const iend = istart + srcSize; const BYTE* ip=iend; - BIT_CStream_t bitC; FSE_CState_t CState1, CState2; /* init */ if (srcSize <= 2) return 0; - { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize); - if (FSE_isError(errorCode)) return 0; } + { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); + if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) @@ -715,7 +724,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize, } /* 2 or 4 encoding per loop */ - for ( ; ip>istart ; ) { + while ( ip>istart ) { FSE_encodeSymbol(&bitC, &CState2, *--ip); @@ -741,7 +750,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, const void* src, size_t srcSize, const FSE_CTable* ct) { - const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); if (fast) return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); @@ -752,7 +761,14 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +#define CHECK_E_F(e, f) size_t const e = f; if (FSE_isError(e)) return f +#define CHECK_F(f) { CHECK_E_F(_var_err__, f); } + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `workSpace` size must be `(1<> 7)) return 0; /* Heuristic : not compressible enough */ + { CHECK_E_F(maxCount, FSE_count(count, &maxSymbolValue, ip, srcSize) ); + if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); - errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue); - if (FSE_isError(errorCode)) return errorCode; + CHECK_F( FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue) ); /* Write table description header */ - errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog); - if (FSE_isError(errorCode)) return errorCode; - op += errorCode; + { CHECK_E_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } /* Compress */ - errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct); - if (errorCode == 0) return 0; /* not enough space for compressed data */ - op += errorCode; + CHECK_F( FSE_buildCTable_wksp(ct, norm, maxSymbolValue, tableLog, workSpace, 1<= srcSize-1 ) - return 0; + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; return op-ostart; } -size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize) +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) { - return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, scratchBuffer); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); } diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index 78784aa36..d4ae35ecd 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -88,13 +88,15 @@ typedef struct nodeElt_s { size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) { - BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; BYTE* op = (BYTE*)dst; +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 + BYTE scratchBuffer[1< HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); /* convert to weight */ bitsToWeight[0] = 0; @@ -103,23 +105,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, for (n=0; n1) & (size < maxSymbolValue/2)) { /* FSE compressed */ op[0] = (BYTE)size; return size+1; - } - } + } } /* raw values */ - if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ for (n=0; nsequences - seqStorePtr->sequencesStart; BYTE* seqHead; + BYTE scratchBuffer[1<litStart; @@ -601,7 +602,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { LLtype = set_repeat; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { - FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); + FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); LLtype = set_basic; } else { size_t nbSeq_1 = nbSeq; @@ -611,7 +612,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; } - FSE_buildCTable(CTable_LitLength, norm, max, tableLog); + FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); LLtype = set_compressed; } } @@ -625,7 +626,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { Offtype = set_repeat; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { - FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); + FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); Offtype = set_basic; } else { size_t nbSeq_1 = nbSeq; @@ -635,7 +636,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; } - FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); + FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); Offtype = set_compressed; } } @@ -649,7 +650,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { MLtype = set_repeat; } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { - FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); + FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); MLtype = set_basic; } else { size_t nbSeq_1 = nbSeq; @@ -659,7 +660,7 @@ size_t ZSTD_compressSequences(ZSTD_CCtx* zc, { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ if (FSE_isError(NCountSize)) return ERROR(GENERIC); op += NCountSize; } - FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); + FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); MLtype = set_compressed; } } From 5e00b848a837c82117b6cd6186837e8a7d0bf175 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Nov 2016 16:46:13 -0800 Subject: [PATCH 6/7] FSE_compress_wksp() uses less stack space --- lib/common/fse.h | 12 ++++++------ lib/compress/fse_compress.c | 38 ++++++++++++++++++------------------- lib/compress/huf_compress.c | 4 ++-- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/lib/common/fse.h b/lib/common/fse.h index 9de22b5e1..0588651e2 100644 --- a/lib/common/fse.h +++ b/lib/common/fse.h @@ -302,25 +302,25 @@ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsi /* FSE_compress_wksp() : * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * `workSpace` size must be `(1<2)?(maxTableLog-2):0)) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */ +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /**< build a fake FSE_CTable, designed to compress always the same symbolValue */ /* FSE_buildCTable_wksp() : * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * wkspSize should be sized to handle worst case situation, which is `(1<= `(1<= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return size; + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); } FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) @@ -766,9 +756,9 @@ size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } /* FSE_compress_wksp() : * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * `workSpace` size must be `(1<= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, scratchBuffer); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); } size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) diff --git a/lib/compress/huf_compress.c b/lib/compress/huf_compress.c index d4ae35ecd..3a1c05110 100644 --- a/lib/compress/huf_compress.c +++ b/lib/compress/huf_compress.c @@ -92,7 +92,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; BYTE* op = (BYTE*)dst; #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 - BYTE scratchBuffer[1<1) & (size < maxSymbolValue/2)) { /* FSE compressed */ op[0] = (BYTE)size; From 979cab412b4cf2962fb267833f54a14d83074a01 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Wed, 30 Nov 2016 18:10:38 -0800 Subject: [PATCH 7/7] fixed some minor visual silent cast warnings. introduced FSE_count_parallel_wksp(). --- lib/compress/fse_compress.c | 41 ++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/lib/compress/fse_compress.c b/lib/compress/fse_compress.c index 6e949d654..e4b3dcaca 100644 --- a/lib/compress/fse_compress.c +++ b/lib/compress/fse_compress.c @@ -115,7 +115,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi U32 highThreshold = tableSize-1; /* CTable header */ - if ((1< wkspSize) return ERROR(tableLog_tooLarge); + if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); tableU16[-2] = (U16) tableLog; tableU16[-1] = (U16) maxSymbolValue; @@ -322,7 +322,6 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; - memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } @@ -337,20 +336,24 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, } -static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, +/* FSE_count_parallel_wksp() : + * Same as FSE_count_parallel(), but using an externally provided scratch buffer. + * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ +static size_t FSE_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize, - unsigned checkMax) + unsigned checkMax, unsigned* const workSpace) { const BYTE* ip = (const BYTE*)source; const BYTE* const iend = ip+sourceSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; - - U32 Counting1[256] = { 0 }; - U32 Counting2[256] = { 0 }; - U32 Counting3[256] = { 0 }; - U32 Counting4[256] = { 0 }; + memset(Counting1, 0, 4*256*sizeof(unsigned)); /* safety checks */ if (!sourceSize) { @@ -396,16 +399,26 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); } } - { U32 s; for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - }} + { U32 s; for (s=0; s<=maxSymbolValue; s++) { + count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; + if (count[s] > max) max = count[s]; + } } while (!count[maxSymbolValue]) maxSymbolValue--; *maxSymbolValuePtr = maxSymbolValue; return (size_t)max; } + +static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + unsigned checkMax) +{ + U32 tmpCounters[1024]; + return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMax, tmpCounters); +} + + /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize) @@ -417,7 +430,7 @@ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize) { - if (*maxSymbolValuePtr <255) + if (*maxSymbolValuePtr < 255) return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1); *maxSymbolValuePtr = 255; return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);