mirror of
https://github.com/facebook/zstd.git
synced 2025-10-04 00:02:33 -04:00
[huf] Add generic C versions of the fast decoding loops
Add generic C versions of the fast decoding loops to serve architectures that don't have an assembly implementation. Also allow selecting the C decoding loop over the assembly decoding loop through a zstd decompression parameter `ZSTD_d_disableHuffmanAssembly`. I benchmarked on my Intel i9-9900K and my Macbook Air with an M1 processor. The benchmark command forces zstd to compress without any matches, using only literals compression, and measures only Huffman decompression speed: ``` zstd -b1e1 --compress-literals --zstd=tlen=131072 silesia.tar ``` The new fast decoding loops outperform the previous implementation uniformly, but don't beat the x86-64 assembly. Additionally, the fast C decoding loops suffer from the same stability problems that we've seen in the past, where the assembly version doesn't. So even though clang gets close to assembly on x86-64, it still has stability issues. | Arch | Function | Compiler | Default (MB/s) | Assembly (MB/s) | Fast (MB/s) | |---------|----------------|--------------|----------------|-----------------|-------------| | x86-64 | decompress 4X1 | gcc-12.2.0 | 1029.6 | 1308.1 | 1208.1 | | x86-64 | decompress 4X1 | clang-14.0.6 | 1019.3 | 1305.6 | 1276.3 | | x86-64 | decompress 4X2 | gcc-12.2.0 | 1348.5 | 1657.0 | 1374.1 | | x86-64 | decompress 4X2 | clang-14.0.6 | 1027.6 | 1659.9 | 1468.1 | | aarch64 | decompress 4X1 | clang-12.0.5 | 1081.0 | N/A | 1234.9 | | aarch64 | decompress 4X2 | clang-12.0.5 | 1270.0 | N/A | 1516.6 |
This commit is contained in:
parent
f3255bfeff
commit
8957fef554
@ -236,7 +236,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
const void* src, size_t srcSize)
|
||||
{
|
||||
U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
|
||||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
|
||||
return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0);
|
||||
}
|
||||
|
||||
FORCE_INLINE_TEMPLATE size_t
|
||||
@ -328,13 +328,13 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize,
|
||||
int bmi2)
|
||||
int flags)
|
||||
{
|
||||
#if DYNAMIC_BMI2
|
||||
if (bmi2) {
|
||||
if (flags & HUF_flags_bmi2) {
|
||||
return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
#endif
|
||||
(void)bmi2;
|
||||
(void)flags;
|
||||
return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
|
||||
}
|
||||
|
@ -107,7 +107,12 @@ typedef enum {
|
||||
* If set: Don't use assembly implementations
|
||||
* If unset: Allow using assembly implementations
|
||||
*/
|
||||
HUF_flags_disableAsm = (1 << 4)
|
||||
HUF_flags_disableAsm = (1 << 4),
|
||||
/**
|
||||
* If set: Don't use the fast decoding loop, always use the fallback decoding loop.
|
||||
* If unset: Use the fast decoding loop when possible.
|
||||
*/
|
||||
HUF_flags_disableFast = (1 << 5)
|
||||
} HUF_flags_e;
|
||||
|
||||
|
||||
|
@ -137,12 +137,20 @@
|
||||
/*
|
||||
* For x86 ELF targets, add .note.gnu.property section for Intel CET in
|
||||
* assembly sources when CET is enabled.
|
||||
*
|
||||
* Additionally, any function that may be called indirectly must begin
|
||||
* with ZSTD_CET_ENDBRANCH.
|
||||
*/
|
||||
#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \
|
||||
&& defined(__has_include)
|
||||
# if __has_include(<cet.h>)
|
||||
# include <cet.h>
|
||||
# define ZSTD_CET_ENDBRANCH _CET_ENDBR
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef ZSTD_CET_ENDBRANCH
|
||||
# define ZSTD_CET_ENDBRANCH
|
||||
#endif
|
||||
|
||||
#endif /* ZSTD_PORTABILITY_MACROS_H */
|
||||
|
@ -43,10 +43,14 @@
|
||||
#error "Cannot force the use of the X1 and X2 decoders at the same time!"
|
||||
#endif
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
|
||||
# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
||||
/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is
|
||||
* supported at runtime, so we can add the BMI2 target attribute.
|
||||
* When it is disabled, we will still get BMI2 if it is enabled statically.
|
||||
*/
|
||||
#if DYNAMIC_BMI2
|
||||
# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
||||
#else
|
||||
# define HUF_ASM_X86_64_BMI2_ATTRS
|
||||
# define HUF_FAST_BMI2_ATTRS
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -56,7 +60,7 @@
|
||||
#endif
|
||||
#define HUF_ASM_DECL HUF_EXTERN_C
|
||||
|
||||
#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
|
||||
#if DYNAMIC_BMI2
|
||||
# define HUF_NEED_BMI2_FUNCTION 1
|
||||
#else
|
||||
# define HUF_NEED_BMI2_FUNCTION 0
|
||||
@ -78,6 +82,11 @@
|
||||
/* **************************************************************
|
||||
* BMI2 Variant Wrappers
|
||||
****************************************************************/
|
||||
typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize,
|
||||
const void *cSrc,
|
||||
size_t cSrcSize,
|
||||
const HUF_DTable *DTable);
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
#define HUF_DGEN(fn) \
|
||||
@ -132,15 +141,28 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
||||
return dtd;
|
||||
}
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
|
||||
static size_t HUF_initDStream(BYTE const* ip) {
|
||||
static size_t HUF_initFastDStream(BYTE const* ip) {
|
||||
BYTE const lastByte = ip[7];
|
||||
size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
|
||||
size_t const value = MEM_readLEST(ip) | 1;
|
||||
assert(bitsConsumed <= 8);
|
||||
assert(sizeof(size_t) == 8);
|
||||
return value << bitsConsumed;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The input/output arguments to the Huffman fast decoding loop:
|
||||
*
|
||||
* ip [in/out] - The input pointers, must be updated to reflect what is consumed.
|
||||
* op [in/out] - The output pointers, must be updated to reflect what is written.
|
||||
* bits [in/out] - The bitstream containers, must be updated to reflect the current state.
|
||||
* dt [in] - The decoding table.
|
||||
* ilimit [in] - The input limit, stop when any input pointer is below ilimit.
|
||||
* oend [in] - The end of the output stream. op[3] must not cross oend.
|
||||
* iend [in] - The end of each input stream. ip[i] may cross iend[i],
|
||||
* as long as it is above ilimit, but that indicates corruption.
|
||||
*/
|
||||
typedef struct {
|
||||
BYTE const* ip[4];
|
||||
BYTE* op[4];
|
||||
@ -149,15 +171,17 @@ typedef struct {
|
||||
BYTE const* ilimit;
|
||||
BYTE* oend;
|
||||
BYTE const* iend[4];
|
||||
} HUF_DecompressAsmArgs;
|
||||
} HUF_DecompressFastArgs;
|
||||
|
||||
typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*);
|
||||
|
||||
/**
|
||||
* Initializes args for the asm decoding loop.
|
||||
* @returns 0 on success
|
||||
* 1 if the fallback implementation should be used.
|
||||
* Initializes args for the fast decoding loop.
|
||||
* @returns 1 on success
|
||||
* 0 if the fallback implementation should be used.
|
||||
* Or an error code on failure.
|
||||
*/
|
||||
static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
||||
static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
|
||||
{
|
||||
void const* dt = DTable + 1;
|
||||
U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
|
||||
@ -166,9 +190,11 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
||||
|
||||
BYTE* const oend = (BYTE*)dst + dstSize;
|
||||
|
||||
/* The following condition is false on x32 platform,
|
||||
* but HUF_asm is not compatible with this ABI */
|
||||
if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
|
||||
/* The fast decoding loop assumes 64-bit little-endian.
|
||||
* This condition is false on x32.
|
||||
*/
|
||||
if (!MEM_isLittleEndian() || MEM_32bits())
|
||||
return 0;
|
||||
|
||||
/* strict minimum : jump table + 1 byte per stream */
|
||||
if (srcSize < 10)
|
||||
@ -179,7 +205,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
||||
* On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
|
||||
*/
|
||||
if (dtLog != HUF_DECODER_FAST_TABLELOG)
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
/* Read the jump table. */
|
||||
{
|
||||
@ -193,13 +219,13 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
||||
args->iend[2] = args->iend[1] + length2;
|
||||
args->iend[3] = args->iend[2] + length3;
|
||||
|
||||
/* HUF_initDStream() requires this, and this small of an input
|
||||
/* HUF_initFastDStream() requires this, and this small of an input
|
||||
* won't benefit from the ASM loop anyways.
|
||||
* length1 must be >= 16 so that ip[0] >= ilimit before the loop
|
||||
* starts.
|
||||
*/
|
||||
if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
|
||||
return 1;
|
||||
return 0;
|
||||
if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
|
||||
}
|
||||
/* ip[] contains the position that is currently loaded into bits[]. */
|
||||
@ -216,7 +242,7 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
||||
|
||||
/* No point to call the ASM loop for tiny outputs. */
|
||||
if (args->op[3] >= oend)
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
/* bits[] is the bit container.
|
||||
* It is read from the MSB down to the LSB.
|
||||
@ -225,10 +251,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
||||
* set, so that CountTrailingZeros(bits[]) can be used
|
||||
* to count how many bits we've consumed.
|
||||
*/
|
||||
args->bits[0] = HUF_initDStream(args->ip[0]);
|
||||
args->bits[1] = HUF_initDStream(args->ip[1]);
|
||||
args->bits[2] = HUF_initDStream(args->ip[2]);
|
||||
args->bits[3] = HUF_initDStream(args->ip[3]);
|
||||
args->bits[0] = HUF_initFastDStream(args->ip[0]);
|
||||
args->bits[1] = HUF_initFastDStream(args->ip[1]);
|
||||
args->bits[2] = HUF_initFastDStream(args->ip[2]);
|
||||
args->bits[3] = HUF_initFastDStream(args->ip[3]);
|
||||
|
||||
/* If ip[] >= ilimit, it is guaranteed to be safe to
|
||||
* reload bits[]. It may be beyond its section, but is
|
||||
@ -239,10 +265,10 @@ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst,
|
||||
args->oend = oend;
|
||||
args->dt = dt;
|
||||
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
|
||||
static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd)
|
||||
{
|
||||
/* Validate that we haven't overwritten. */
|
||||
if (args->op[stream] > segmentEnd)
|
||||
@ -257,7 +283,7 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
|
||||
|
||||
/* Construct the BIT_DStream_t. */
|
||||
assert(sizeof(size_t) == 8);
|
||||
bit->bitContainer = MEM_readLE64(args->ip[stream]);
|
||||
bit->bitContainer = MEM_readLEST(args->ip[stream]);
|
||||
bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
|
||||
bit->start = (const char*)args->iend[0];
|
||||
bit->limitPtr = bit->start + sizeof(size_t);
|
||||
@ -265,7 +291,6 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef HUF_FORCE_DECOMPRESS_X2
|
||||
@ -655,27 +680,132 @@ size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize,
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
|
||||
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
|
||||
HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
||||
|
||||
static HUF_ASM_X86_64_BMI2_ATTRS
|
||||
#endif
|
||||
|
||||
static HUF_FAST_BMI2_ATTRS
|
||||
void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
||||
{
|
||||
U64 bits[4];
|
||||
BYTE const* ip[4];
|
||||
BYTE* op[4];
|
||||
U16 const* const dtable = (U16 const*)args->dt;
|
||||
BYTE* const oend = args->oend;
|
||||
BYTE const* const ilimit = args->ilimit;
|
||||
|
||||
/* Copy the arguments to local variables */
|
||||
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
||||
ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
|
||||
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
||||
|
||||
assert(MEM_isLittleEndian());
|
||||
assert(!MEM_32bits());
|
||||
|
||||
for (;;) {
|
||||
BYTE* olimit;
|
||||
int stream;
|
||||
int symbol;
|
||||
|
||||
/* Assert loop preconditions */
|
||||
#ifndef NDEBUG
|
||||
for (stream = 0; stream < 4; ++stream) {
|
||||
assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
|
||||
assert(ip[stream] >= ilimit);
|
||||
}
|
||||
#endif
|
||||
/* Compute olimit */
|
||||
{
|
||||
/* Each iteration produces 5 output symbols per stream */
|
||||
size_t const oiters = (size_t)(oend - op[3]) / 5;
|
||||
/* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
|
||||
* per stream.
|
||||
*/
|
||||
size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
|
||||
/* We can safely run iters iterations before running bounds checks */
|
||||
size_t const iters = MIN(oiters, iiters);
|
||||
size_t const symbols = iters * 5;
|
||||
|
||||
/* We can simply check that op[3] < olimit, instead of checking all
|
||||
* of our bounds, since we can't hit the other bounds until we've run
|
||||
* iters iterations, which only happens when op[3] == olimit.
|
||||
*/
|
||||
olimit = op[3] + symbols;
|
||||
|
||||
/* Exit fast decoding loop once we get close to the end. */
|
||||
if (op[3] + 20 > olimit)
|
||||
break;
|
||||
|
||||
/* Exit the decoding loop if any input pointer has crossed the
|
||||
* previous one. This indicates corruption, and a precondition
|
||||
* to our loop is that ip[i] >= ip[0].
|
||||
*/
|
||||
for (stream = 1; stream < 4; ++stream) {
|
||||
if (ip[stream] < ip[stream - 1])
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (stream = 1; stream < 4; ++stream) {
|
||||
assert(ip[stream] >= ip[stream - 1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
do {
|
||||
/* Decode 5 symbols in each of the 4 streams */
|
||||
for (symbol = 0; symbol < 5; ++symbol) {
|
||||
for (stream = 0; stream < 4; ++stream) {
|
||||
int const index = (int)(bits[stream] >> 53);
|
||||
int const entry = (int)dtable[index];
|
||||
bits[stream] <<= (entry & 63);
|
||||
op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
|
||||
}
|
||||
}
|
||||
/* Reload the bitstreams */
|
||||
for (stream = 0; stream < 4; ++stream) {
|
||||
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
||||
int const nbBits = ctz & 7;
|
||||
int const nbBytes = ctz >> 3;
|
||||
op[stream] += 5;
|
||||
ip[stream] -= nbBytes;
|
||||
bits[stream] = MEM_read64(ip[stream]) | 1;
|
||||
bits[stream] <<= nbBits;
|
||||
}
|
||||
} while (op[3] < olimit);
|
||||
}
|
||||
|
||||
/* Save the final values of each of the state variables back to args. */
|
||||
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
||||
ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
|
||||
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns @p dstSize on success (>= 6)
|
||||
* 0 if the fallback implementation should be used
|
||||
* An error if an error occurred
|
||||
*/
|
||||
static HUF_FAST_BMI2_ATTRS
|
||||
size_t
|
||||
HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
||||
HUF_decompress4X1_usingDTable_internal_fast(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const HUF_DTable* DTable)
|
||||
const HUF_DTable* DTable,
|
||||
HUF_DecompressFastLoopFn loopFn)
|
||||
{
|
||||
void const* dt = DTable + 1;
|
||||
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
||||
BYTE* const oend = (BYTE*)dst + dstSize;
|
||||
HUF_DecompressAsmArgs args;
|
||||
{ size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
||||
if (ret != 0)
|
||||
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
HUF_DecompressFastArgs args;
|
||||
{ size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(args.ip[0] >= args.ilimit);
|
||||
HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
|
||||
loopFn(&args);
|
||||
|
||||
/* Our loop guarantees that ip[] >= ilimit and that we haven't
|
||||
* overwritten any op[].
|
||||
@ -705,37 +835,43 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
|
||||
}
|
||||
|
||||
/* decoded size */
|
||||
assert(dstSize != 0);
|
||||
return dstSize;
|
||||
}
|
||||
#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
|
||||
|
||||
typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
||||
const void *cSrc,
|
||||
size_t cSrcSize,
|
||||
const HUF_DTable *DTable);
|
||||
|
||||
HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
|
||||
|
||||
static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
||||
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
||||
{
|
||||
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default;
|
||||
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop;
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
if (flags & HUF_flags_bmi2) {
|
||||
fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2;
|
||||
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
if (!(flags & HUF_flags_disableAsm))
|
||||
return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
if (!(flags & HUF_flags_disableAsm)) {
|
||||
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
||||
}
|
||||
# endif
|
||||
return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
} else {
|
||||
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
}
|
||||
#else
|
||||
(void)flags;
|
||||
#endif
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
||||
if (!(flags & HUF_flags_disableAsm))
|
||||
return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
if (!(flags & HUF_flags_disableAsm)) {
|
||||
loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop;
|
||||
}
|
||||
#endif
|
||||
return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
|
||||
if (!(flags & HUF_flags_disableFast)) {
|
||||
size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
}
|
||||
|
||||
static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
||||
@ -1322,26 +1458,167 @@ size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize,
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
|
||||
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
|
||||
HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN;
|
||||
|
||||
static HUF_ASM_X86_64_BMI2_ATTRS size_t
|
||||
HUF_decompress4X2_usingDTable_internal_bmi2_asm(
|
||||
#endif
|
||||
|
||||
static HUF_FAST_BMI2_ATTRS
|
||||
void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args)
|
||||
{
|
||||
U64 bits[4];
|
||||
BYTE const* ip[4];
|
||||
BYTE* op[4];
|
||||
BYTE* oend[4];
|
||||
HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
|
||||
BYTE const* const ilimit = args->ilimit;
|
||||
|
||||
/* Copy the arguments to local registers. */
|
||||
ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
|
||||
ZSTD_memcpy(&ip, &args->ip, sizeof(ip));
|
||||
ZSTD_memcpy(&op, &args->op, sizeof(op));
|
||||
|
||||
oend[0] = op[1];
|
||||
oend[1] = op[2];
|
||||
oend[2] = op[3];
|
||||
oend[3] = args->oend;
|
||||
|
||||
assert(MEM_isLittleEndian());
|
||||
assert(!MEM_32bits());
|
||||
|
||||
for (;;) {
|
||||
BYTE* olimit;
|
||||
int stream;
|
||||
int symbol;
|
||||
|
||||
/* Assert loop preconditions */
|
||||
#ifndef NDEBUG
|
||||
for (stream = 0; stream < 4; ++stream) {
|
||||
assert(op[stream] <= oend[stream]);
|
||||
assert(ip[stream] >= ilimit);
|
||||
}
|
||||
#endif
|
||||
/* Compute olimit */
|
||||
{
|
||||
/* Each loop does 5 table lookups for each of the 4 streams.
|
||||
* Each table lookup consumes up to 11 bits of input, and produces
|
||||
* up to 2 bytes of output.
|
||||
*/
|
||||
/* We can consume up to 7 bytes of input per iteration per stream.
|
||||
* We also know that each input pointer is >= ip[0]. So we can run
|
||||
* iters loops before running out of input.
|
||||
*/
|
||||
size_t iters = (size_t)(ip[0] - ilimit) / 7;
|
||||
/* Each iteration can produce up to 10 bytes of output per stream.
|
||||
* Each output stream my advance at different rates. So take the
|
||||
* minimum number of safe iterations among all the output streams.
|
||||
*/
|
||||
for (stream = 0; stream < 4; ++stream) {
|
||||
size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10;
|
||||
iters = MIN(iters, oiters);
|
||||
}
|
||||
|
||||
/* Each iteration produces at least 5 output symbols. So until
|
||||
* op[3] crosses olimit, we know we haven't executed iters
|
||||
* iterations yet. This saves us maintaining an iters counter,
|
||||
* at the expense of computing the remaining # of iterations
|
||||
* more frequently.
|
||||
*/
|
||||
olimit = op[3] + (iters * 5);
|
||||
|
||||
/* Exit the fast decoding loop if we are too close to the end. */
|
||||
if (op[3] + 10 > olimit)
|
||||
break;
|
||||
|
||||
/* Exit the decoding loop if any input pointer has crossed the
|
||||
* previous one. This indicates corruption, and a precondition
|
||||
* to our loop is that ip[i] >= ip[0].
|
||||
*/
|
||||
for (stream = 1; stream < 4; ++stream) {
|
||||
if (ip[stream] < ip[stream - 1])
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
for (stream = 1; stream < 4; ++stream) {
|
||||
assert(ip[stream] >= ip[stream - 1]);
|
||||
}
|
||||
#endif
|
||||
|
||||
do {
|
||||
/* Do 5 table lookups for each of the first 3 streams */
|
||||
for (symbol = 0; symbol < 5; ++symbol) {
|
||||
for (stream = 0; stream < 3; ++stream) {
|
||||
int const index = (int)(bits[stream] >> 53);
|
||||
HUF_DEltX2 const entry = dtable[index];
|
||||
MEM_write16(op[stream], entry.sequence);
|
||||
bits[stream] <<= (entry.nbBits);
|
||||
op[stream] += (entry.length);
|
||||
}
|
||||
}
|
||||
/* Do 1 table lookup from the final stream */
|
||||
{
|
||||
int const index = (int)(bits[3] >> 53);
|
||||
HUF_DEltX2 const entry = dtable[index];
|
||||
MEM_write16(op[3], entry.sequence);
|
||||
bits[3] <<= (entry.nbBits);
|
||||
op[3] += (entry.length);
|
||||
}
|
||||
/* Do 4 table lookups from the final stream & reload bitstreams */
|
||||
for (stream = 0; stream < 4; ++stream) {
|
||||
/* Do a table lookup from the final stream.
|
||||
* This is interleaved with the reloading to reduce register
|
||||
* pressure. This shouldn't be necessary, but compilers can
|
||||
* struggle with codegen with high register pressure.
|
||||
*/
|
||||
{
|
||||
int const index = (int)(bits[3] >> 53);
|
||||
HUF_DEltX2 const entry = dtable[index];
|
||||
MEM_write16(op[3], entry.sequence);
|
||||
bits[3] <<= (entry.nbBits);
|
||||
op[3] += (entry.length);
|
||||
}
|
||||
/* Reload the bistreams. The final bitstream must be reloaded
|
||||
* after the 5th symbol was decoded.
|
||||
*/
|
||||
{
|
||||
int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
|
||||
int const nbBits = ctz & 7;
|
||||
int const nbBytes = ctz >> 3;
|
||||
ip[stream] -= nbBytes;
|
||||
bits[stream] = MEM_read64(ip[stream]) | 1;
|
||||
bits[stream] <<= nbBits;
|
||||
}
|
||||
}
|
||||
} while (op[3] < olimit);
|
||||
}
|
||||
|
||||
/* Save the final values of each of the state variables back to args. */
|
||||
ZSTD_memcpy(&args->bits, &bits, sizeof(bits));
|
||||
ZSTD_memcpy(&args->ip, &ip, sizeof(ip));
|
||||
ZSTD_memcpy(&args->op, &op, sizeof(op));
|
||||
}
|
||||
|
||||
|
||||
static HUF_FAST_BMI2_ATTRS size_t
|
||||
HUF_decompress4X2_usingDTable_internal_fast(
|
||||
void* dst, size_t dstSize,
|
||||
const void* cSrc, size_t cSrcSize,
|
||||
const HUF_DTable* DTable) {
|
||||
const HUF_DTable* DTable,
|
||||
HUF_DecompressFastLoopFn loopFn) {
|
||||
void const* dt = DTable + 1;
|
||||
const BYTE* const iend = (const BYTE*)cSrc + 6;
|
||||
BYTE* const oend = (BYTE*)dst + dstSize;
|
||||
HUF_DecompressAsmArgs args;
|
||||
HUF_DecompressFastArgs args;
|
||||
{
|
||||
size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
FORWARD_IF_ERROR(ret, "Failed to init asm args");
|
||||
if (ret != 0)
|
||||
return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
if (ret == 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(args.ip[0] >= args.ilimit);
|
||||
HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
|
||||
loopFn(&args);
|
||||
|
||||
/* note : op4 already verified within main loop */
|
||||
assert(args.ip[0] >= iend);
|
||||
@ -1372,28 +1649,38 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm(
|
||||
/* decoded size */
|
||||
return dstSize;
|
||||
}
|
||||
#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
|
||||
|
||||
static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
|
||||
size_t cSrcSize, HUF_DTable const* DTable, int flags)
|
||||
{
|
||||
HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default;
|
||||
HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop;
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
if (flags & HUF_flags_bmi2) {
|
||||
fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2;
|
||||
# if ZSTD_ENABLE_ASM_X86_64_BMI2
|
||||
if (!(flags & HUF_flags_disableAsm))
|
||||
return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
if (!(flags & HUF_flags_disableAsm)) {
|
||||
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
||||
}
|
||||
# endif
|
||||
return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
} else {
|
||||
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
}
|
||||
#else
|
||||
(void)flags;
|
||||
#endif
|
||||
|
||||
#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
|
||||
if (!(flags & HUF_flags_disableAsm))
|
||||
return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
if (!(flags & HUF_flags_disableAsm)) {
|
||||
loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop;
|
||||
}
|
||||
#endif
|
||||
return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
|
||||
if (!(flags & HUF_flags_disableFast)) {
|
||||
size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
}
|
||||
|
||||
HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
|
||||
|
@ -30,14 +30,14 @@
|
||||
* TODO: Support Windows calling convention.
|
||||
*/
|
||||
|
||||
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
|
||||
.global HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
|
||||
.global HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
|
||||
.global _HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop
|
||||
.global _HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop
|
||||
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X2_usingDTable_internal_fast_asm_loop)
|
||||
ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_fast_asm_loop)
|
||||
.global HUF_decompress4X1_usingDTable_internal_fast_asm_loop
|
||||
.global HUF_decompress4X2_usingDTable_internal_fast_asm_loop
|
||||
.global _HUF_decompress4X1_usingDTable_internal_fast_asm_loop
|
||||
.global _HUF_decompress4X2_usingDTable_internal_fast_asm_loop
|
||||
.text
|
||||
|
||||
/* Sets up register mappings for clarity.
|
||||
@ -95,8 +95,9 @@ ZSTD_HIDE_ASM_FUNCTION(_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop)
|
||||
/* Define both _HUF_* & HUF_* symbols because MacOS
|
||||
* C symbols are prefixed with '_' & Linux symbols aren't.
|
||||
*/
|
||||
_HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
||||
HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
||||
_HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
|
||||
ZSTD_CET_ENDBRANCH
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
push %rbx
|
||||
@ -350,8 +351,9 @@ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop:
|
||||
pop %rax
|
||||
ret
|
||||
|
||||
_HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
|
||||
HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
|
||||
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
|
||||
ZSTD_CET_ENDBRANCH
|
||||
/* Save all registers - even if they are callee saved for simplicity. */
|
||||
push %rax
|
||||
push %rbx
|
||||
|
@ -243,6 +243,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
|
||||
dctx->outBufferMode = ZSTD_bm_buffered;
|
||||
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
|
||||
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
|
||||
dctx->disableHufAsm = 0;
|
||||
}
|
||||
|
||||
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
@ -1811,6 +1812,11 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
|
||||
bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
|
||||
bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
|
||||
return bounds;
|
||||
case ZSTD_d_disableHuffmanAssembly:
|
||||
bounds.lowerBound = 0;
|
||||
bounds.upperBound = 1;
|
||||
return bounds;
|
||||
|
||||
default:;
|
||||
}
|
||||
bounds.error = ERROR(parameter_unsupported);
|
||||
@ -1851,6 +1857,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
|
||||
case ZSTD_d_refMultipleDDicts:
|
||||
*value = (int)dctx->refMultipleDDicts;
|
||||
return 0;
|
||||
case ZSTD_d_disableHuffmanAssembly:
|
||||
*value = (int)dctx->disableHufAsm;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
@ -1884,6 +1893,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
|
||||
}
|
||||
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
|
||||
return 0;
|
||||
case ZSTD_d_disableHuffmanAssembly:
|
||||
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
|
||||
dctx->disableHufAsm = value != 0;
|
||||
return 0;
|
||||
default:;
|
||||
}
|
||||
RETURN_ERROR(parameter_unsupported, "");
|
||||
|
@ -141,7 +141,9 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
U32 const lhc = MEM_readLE32(istart);
|
||||
size_t hufSuccess;
|
||||
size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
|
||||
int const flags = ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0;
|
||||
int const flags = 0
|
||||
| (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
|
||||
| (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
|
||||
switch(lhlCode)
|
||||
{
|
||||
case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
|
||||
|
@ -165,6 +165,7 @@ struct ZSTD_DCtx_s
|
||||
ZSTD_dictUses_e dictUses;
|
||||
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
|
||||
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
|
||||
int disableHufAsm;
|
||||
|
||||
/* streaming */
|
||||
ZSTD_dStreamStage streamStage;
|
||||
|
15
lib/zstd.h
15
lib/zstd.h
@ -614,13 +614,15 @@ typedef enum {
|
||||
* ZSTD_d_stableOutBuffer
|
||||
* ZSTD_d_forceIgnoreChecksum
|
||||
* ZSTD_d_refMultipleDDicts
|
||||
* ZSTD_d_disableHuffmanAssembly
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly
|
||||
*/
|
||||
ZSTD_d_experimentalParam1=1000,
|
||||
ZSTD_d_experimentalParam2=1001,
|
||||
ZSTD_d_experimentalParam3=1002,
|
||||
ZSTD_d_experimentalParam4=1003
|
||||
ZSTD_d_experimentalParam4=1003,
|
||||
ZSTD_d_experimentalParam5=1004
|
||||
|
||||
} ZSTD_dParameter;
|
||||
|
||||
@ -2345,6 +2347,17 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
|
||||
*/
|
||||
#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
|
||||
|
||||
/* ZSTD_d_disableHuffmanAssembly
|
||||
* Set to 1 to disable the Huffman assembly implementation.
|
||||
* The default value is 0, which allows zstd to use the Huffman assembly
|
||||
* implementation if available.
|
||||
*
|
||||
* This parameter can be used to disable Huffman assembly at runtime.
|
||||
* If you want to disable it at compile time you can define the macro
|
||||
* ZSTD_DISABLE_ASM.
|
||||
*/
|
||||
#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
|
||||
|
||||
|
||||
/*! ZSTD_DCtx_setFormat() :
|
||||
* This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
|
||||
|
@ -33,7 +33,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_optimalDepth : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_preferRepeat : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_suspectUncompressible : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0);
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableFast : 0);
|
||||
/* Select a random cBufSize - it may be too small */
|
||||
size_t const dBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 8 * size + 500);
|
||||
size_t const maxTableLog = FUZZ_dataProducer_uint32Range(producer, 1, HUF_TABLELOG_MAX);
|
||||
|
@ -49,7 +49,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_optimalDepth : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_preferRepeat : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_suspectUncompressible : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0);
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableAsm : 0)
|
||||
| (FUZZ_dataProducer_int32Range(producer, 0, 1) ? HUF_flags_disableFast : 0);
|
||||
/* Select a random cBufSize - it may be too small */
|
||||
size_t const cBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 4 * size);
|
||||
/* Select a random tableLog - we'll adjust it up later */
|
||||
|
@ -684,6 +684,17 @@ static int basicUnitTests(U32 const seed, double compressibility)
|
||||
if (r != CNBuffSize) goto _output_error; }
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : decompress %u bytes with Huffman assembly disabled : ", testNb++, (unsigned)CNBuffSize);
|
||||
{
|
||||
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||
size_t r;
|
||||
CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_disableHuffmanAssembly, 1));
|
||||
r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
|
||||
if (r != CNBuffSize || memcmp(decodedBuffer, CNBuffer, CNBuffSize)) goto _output_error;
|
||||
ZSTD_freeDCtx(dctx);
|
||||
}
|
||||
DISPLAYLEVEL(3, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(3, "test%3i : check decompressed result : ", testNb++);
|
||||
{ size_t u;
|
||||
for (u=0; u<CNBuffSize; u++) {
|
||||
|
@ -2889,6 +2889,9 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
|
||||
CHECK(badParameters(zc, savedParams), "CCtx params are wrong");
|
||||
|
||||
/* multi - fragments decompression test */
|
||||
if (FUZ_rand(&lseed) & 1) {
|
||||
CHECK_Z(ZSTD_DCtx_reset(zd, ZSTD_reset_session_and_parameters));
|
||||
}
|
||||
if (!dictSize /* don't reset if dictionary : could be different */ && (FUZ_rand(&lseed) & 1)) {
|
||||
DISPLAYLEVEL(5, "resetting DCtx (dict:%p) \n", (void const*)dict);
|
||||
CHECK_Z( ZSTD_resetDStream(zd) );
|
||||
@ -2897,6 +2900,9 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
|
||||
DISPLAYLEVEL(5, "using dictionary of size %zu \n", dictSize);
|
||||
CHECK_Z( ZSTD_initDStream_usingDict(zd, dict, dictSize) );
|
||||
}
|
||||
if (FUZ_rand(&lseed) & 1) {
|
||||
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_disableHuffmanAssembly, FUZ_rand(&lseed) & 1));
|
||||
}
|
||||
{ size_t decompressionResult = 1;
|
||||
ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 };
|
||||
ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };
|
||||
@ -2938,7 +2944,14 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
|
||||
} }
|
||||
|
||||
/* try decompression on noisy data */
|
||||
CHECK_Z( ZSTD_initDStream(zd_noise) ); /* note : no dictionary */
|
||||
if (FUZ_rand(&lseed) & 1) {
|
||||
CHECK_Z(ZSTD_DCtx_reset(zd_noise, ZSTD_reset_session_and_parameters));
|
||||
} else {
|
||||
CHECK_Z(ZSTD_DCtx_reset(zd_noise, ZSTD_reset_session_only));
|
||||
}
|
||||
if (FUZ_rand(&lseed) & 1) {
|
||||
CHECK_Z(ZSTD_DCtx_setParameter(zd_noise, ZSTD_d_disableHuffmanAssembly, FUZ_rand(&lseed) & 1));
|
||||
}
|
||||
{ ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 };
|
||||
ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };
|
||||
while (outBuff.pos < dstBufferSize) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user