Merge pull request #2784 from terrelln/huf-asm-fuzzer

[fuzzer] Add Huffman decompression fuzzer
This commit is contained in:
Nick Terrell 2021-09-20 11:57:27 -07:00 committed by GitHub
commit 51b123d5f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 159 additions and 78 deletions

View File

@ -304,11 +304,13 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
BYTE* const pStart = p;
/* up to 4 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
if ((pEnd - p) > 3) {
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
}
}
/* [0-3] symbols remaining */
@ -388,33 +390,36 @@ HUF_decompress4X1_usingDTable_internal_body(
U32 endSignal = 1;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
for ( ; (endSignal) & (op4 < olimit) ; ) {
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
if ((size_t)(oend - op4) >= sizeof(size_t)) {
for ( ; (endSignal) & (op4 < olimit) ; ) {
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
}
}
/* check corruption */
@ -753,19 +758,23 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
BYTE* const pStart = p;
/* up to 8 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
}
}
/* closer to end : up to 2 symbols at a time */
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
if ((size_t)(pEnd - p) >= 2) {
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
while (p <= pEnd-2)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
while (p <= pEnd-2)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
}
if (p < pEnd)
p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
@ -841,57 +850,60 @@ HUF_decompress4X2_usingDTable_internal_body(
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
/* 16-32 symbols per loop (4-8 symbols per stream) */
for ( ; (endSignal) & (op4 < olimit); ) {
if ((size_t)(oend - op4) >= sizeof(size_t)) {
for ( ; (endSignal) & (op4 < olimit); ) {
#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
#else
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = (U32)LIKELY(
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
endSignal = (U32)LIKELY(
(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
& (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
#endif
}
}
/* check corruption */

View File

@ -112,7 +112,8 @@ FUZZ_TARGETS := \
fse_read_ncount \
sequence_compression_api \
seekable_roundtrip \
huf_round_trip
huf_round_trip \
huf_decompress
all: libregression.a $(FUZZ_TARGETS)
@ -218,6 +219,9 @@ seekable_roundtrip: $(FUZZ_HEADERS) $(SEEKABLE_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) $
huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o $(LIB_FUZZING_ENGINE) -o $@
huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o

View File

@ -64,6 +64,7 @@ TARGET_INFO = {
'sequence_compression_api': TargetInfo(InputType.RAW_DATA),
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
'huf_decompress': TargetInfo(InputType.RAW_DATA),
}
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
/**
* This fuzz target performs a zstd round-trip test (compress & decompress),
* compares the result with the original, and calls abort() on corruption.
*/
#define HUF_STATIC_LINKING_ONLY
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "common/cpu.h"
#include "common/huf.h"
#include "fuzz_helpers.h"
#include "fuzz_data_producer.h"
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
/* Select random parameters: #streams, X1 or X2 decoding, bmi2 */
int const streams = FUZZ_dataProducer_int32Range(producer, 0, 1);
int const symbols = FUZZ_dataProducer_int32Range(producer, 0, 1);
int const bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()) && FUZZ_dataProducer_int32Range(producer, 0, 1);
/* Select a random cBufSize - it may be too small */
size_t const dBufSize = FUZZ_dataProducer_uint32Range(producer, 0, 8 * size + 500);
size_t const maxTableLog = FUZZ_dataProducer_uint32Range(producer, 1, HUF_TABLELOG_MAX);
HUF_DTable* dt = (HUF_DTable*)FUZZ_malloc(HUF_DTABLE_SIZE(maxTableLog) * sizeof(HUF_DTable));
size_t const wkspSize = HUF_WORKSPACE_SIZE;
void* wksp = FUZZ_malloc(wkspSize);
void* dBuf = FUZZ_malloc(dBufSize);
dt[0] = maxTableLog * 0x01000001;
size = FUZZ_dataProducer_remainingBytes(producer);
if (symbols == 0) {
size_t const err = HUF_readDTableX1_wksp_bmi2(dt, src, size, wksp, wkspSize, bmi2);
if (ZSTD_isError(err))
goto _out;
} else {
size_t const err = HUF_readDTableX2_wksp(dt, src, size, wksp, wkspSize);
if (ZSTD_isError(err))
goto _out;
}
if (streams == 0)
HUF_decompress1X_usingDTable_bmi2(dBuf, dBufSize, src, size, dt, bmi2);
else
HUF_decompress4X_usingDTable_bmi2(dBuf, dBufSize, src, size, dt, bmi2);
_out:
free(dt);
free(wksp);
free(dBuf);
FUZZ_dataProducer_free(producer);
return 0;
}