mirror of
https://github.com/facebook/zstd.git
synced 2025-12-08 00:03:24 -05:00
record long offsets in ZSTD_symbolEncodingTypeStats_t + add test case
This commit is contained in:
parent
d210628b0b
commit
9e4c66b9e9
@ -299,7 +299,6 @@ typedef struct {
|
|||||||
BYTE* ofCode;
|
BYTE* ofCode;
|
||||||
size_t maxNbSeq;
|
size_t maxNbSeq;
|
||||||
size_t maxNbLit;
|
size_t maxNbLit;
|
||||||
BYTE* longOffsets;
|
|
||||||
|
|
||||||
/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
|
/* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
|
||||||
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
|
||||||
@ -347,7 +346,7 @@ typedef struct {
|
|||||||
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
} ZSTD_frameSizeInfo; /* decompress & legacy */
|
||||||
|
|
||||||
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
|
||||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
|
||||||
|
|
||||||
/* custom memory allocation functions */
|
/* custom memory allocation functions */
|
||||||
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
|
void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
|
||||||
|
|||||||
@ -1610,8 +1610,7 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
|
|||||||
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
|
size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useExternalMatchFinder);
|
||||||
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
|
||||||
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
|
+ ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
|
||||||
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE))
|
+ 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
|
||||||
+ ZSTD_cwksp_alloc_size(sizeof(BYTE)); /* longOffsets */
|
|
||||||
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
|
size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
|
||||||
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
|
size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
|
||||||
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
|
size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
|
||||||
@ -2110,8 +2109,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
|
|||||||
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
||||||
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
||||||
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
|
||||||
zc->seqStore.longOffsets = ZSTD_cwksp_reserve_buffer(ws, sizeof(BYTE));
|
|
||||||
zc->seqStore.longOffsets[0] = 0;
|
|
||||||
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
|
zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
|
||||||
|
|
||||||
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
FORWARD_IF_ERROR(ZSTD_reset_matchState(
|
||||||
@ -2562,16 +2559,15 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
|
|||||||
|
|
||||||
/* See doc/zstd_compression_format.md for detailed format description */
|
/* See doc/zstd_compression_format.md for detailed format description */
|
||||||
|
|
||||||
void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
int ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
||||||
{
|
{
|
||||||
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
const seqDef* const sequences = seqStorePtr->sequencesStart;
|
||||||
BYTE* const llCodeTable = seqStorePtr->llCode;
|
BYTE* const llCodeTable = seqStorePtr->llCode;
|
||||||
BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||||
BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
||||||
BYTE* const longOffsetsFlag = seqStorePtr->longOffsets;
|
|
||||||
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
|
||||||
U32 u;
|
U32 u;
|
||||||
BYTE longOffsets = 0;
|
int longOffsets = 0;
|
||||||
assert(nbSeq <= seqStorePtr->maxNbSeq);
|
assert(nbSeq <= seqStorePtr->maxNbSeq);
|
||||||
for (u=0; u<nbSeq; u++) {
|
for (u=0; u<nbSeq; u++) {
|
||||||
U32 const llv = sequences[u].litLength;
|
U32 const llv = sequences[u].litLength;
|
||||||
@ -2582,11 +2578,11 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
|
|||||||
mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
|
mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
|
||||||
longOffsets |= (ofCode >= STREAM_ACCUMULATOR_MIN);
|
longOffsets |= (ofCode >= STREAM_ACCUMULATOR_MIN);
|
||||||
}
|
}
|
||||||
longOffsetsFlag[0] = longOffsets;
|
|
||||||
if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
|
if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
|
||||||
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
|
llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
|
||||||
if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
|
if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
|
||||||
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
|
||||||
|
return longOffsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ZSTD_useTargetCBlockSize():
|
/* ZSTD_useTargetCBlockSize():
|
||||||
@ -2620,6 +2616,7 @@ typedef struct {
|
|||||||
U32 MLtype;
|
U32 MLtype;
|
||||||
size_t size;
|
size_t size;
|
||||||
size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
|
||||||
|
int longOffsets;
|
||||||
} ZSTD_symbolEncodingTypeStats_t;
|
} ZSTD_symbolEncodingTypeStats_t;
|
||||||
|
|
||||||
/* ZSTD_buildSequencesStatistics():
|
/* ZSTD_buildSequencesStatistics():
|
||||||
@ -2650,7 +2647,7 @@ ZSTD_buildSequencesStatistics(
|
|||||||
|
|
||||||
stats.lastCountSize = 0;
|
stats.lastCountSize = 0;
|
||||||
/* convert length/distances into codes */
|
/* convert length/distances into codes */
|
||||||
ZSTD_seqToCodes(seqStorePtr);
|
stats.longOffsets = ZSTD_seqToCodes(seqStorePtr);
|
||||||
assert(op <= oend);
|
assert(op <= oend);
|
||||||
assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
|
assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
|
||||||
/* build CTable for Literal Lengths */
|
/* build CTable for Literal Lengths */
|
||||||
@ -2774,11 +2771,11 @@ ZSTD_entropyCompressSeqStore_internal(
|
|||||||
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
|
||||||
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
const BYTE* const llCodeTable = seqStorePtr->llCode;
|
||||||
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
const BYTE* const mlCodeTable = seqStorePtr->mlCode;
|
||||||
const BYTE* const longOffsetsFlag = seqStorePtr->longOffsets;
|
|
||||||
BYTE* const ostart = (BYTE*)dst;
|
BYTE* const ostart = (BYTE*)dst;
|
||||||
BYTE* const oend = ostart + dstCapacity;
|
BYTE* const oend = ostart + dstCapacity;
|
||||||
BYTE* op = ostart;
|
BYTE* op = ostart;
|
||||||
size_t lastCountSize;
|
size_t lastCountSize;
|
||||||
|
int longOffsets = 0;
|
||||||
|
|
||||||
entropyWorkspace = count + (MaxSeq + 1);
|
entropyWorkspace = count + (MaxSeq + 1);
|
||||||
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
|
||||||
@ -2840,10 +2837,10 @@ ZSTD_entropyCompressSeqStore_internal(
|
|||||||
*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
|
*seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
|
||||||
lastCountSize = stats.lastCountSize;
|
lastCountSize = stats.lastCountSize;
|
||||||
op += stats.size;
|
op += stats.size;
|
||||||
|
longOffsets = stats.longOffsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
{ const BYTE longOffsets = longOffsetsFlag[0];
|
{ size_t const bitstreamSize = ZSTD_encodeSequences(
|
||||||
size_t const bitstreamSize = ZSTD_encodeSequences(
|
|
||||||
op, (size_t)(oend - op),
|
op, (size_t)(oend - op),
|
||||||
CTable_MatchLength, mlCodeTable,
|
CTable_MatchLength, mlCodeTable,
|
||||||
CTable_OffsetBits, ofCodeTable,
|
CTable_OffsetBits, ofCodeTable,
|
||||||
@ -3485,7 +3482,7 @@ ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
|
|||||||
static ZSTD_symbolEncodingTypeStats_t
|
static ZSTD_symbolEncodingTypeStats_t
|
||||||
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy)
|
||||||
{
|
{
|
||||||
ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
|
ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0};
|
||||||
nextEntropy->litlength_repeatMode = FSE_repeat_none;
|
nextEntropy->litlength_repeatMode = FSE_repeat_none;
|
||||||
nextEntropy->offcode_repeatMode = FSE_repeat_none;
|
nextEntropy->offcode_repeatMode = FSE_repeat_none;
|
||||||
nextEntropy->matchlength_repeatMode = FSE_repeat_none;
|
nextEntropy->matchlength_repeatMode = FSE_repeat_none;
|
||||||
|
|||||||
@ -186,7 +186,6 @@ BYTE SEQUENCE_LITERAL_BUFFER[ZSTD_BLOCKSIZE_MAX]; /* storeSeq expects a place to
|
|||||||
BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
|
BYTE SEQUENCE_LLCODE[ZSTD_BLOCKSIZE_MAX];
|
||||||
BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
|
BYTE SEQUENCE_MLCODE[ZSTD_BLOCKSIZE_MAX];
|
||||||
BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
|
BYTE SEQUENCE_OFCODE[ZSTD_BLOCKSIZE_MAX];
|
||||||
BYTE SEQUENCE_LONGOFFSETS[1];
|
|
||||||
|
|
||||||
U64 WKSP[HUF_WORKSPACE_SIZE_U64];
|
U64 WKSP[HUF_WORKSPACE_SIZE_U64];
|
||||||
|
|
||||||
@ -635,7 +634,6 @@ static inline void initSeqStore(seqStore_t *seqStore) {
|
|||||||
seqStore->llCode = SEQUENCE_LLCODE;
|
seqStore->llCode = SEQUENCE_LLCODE;
|
||||||
seqStore->mlCode = SEQUENCE_MLCODE;
|
seqStore->mlCode = SEQUENCE_MLCODE;
|
||||||
seqStore->ofCode = SEQUENCE_OFCODE;
|
seqStore->ofCode = SEQUENCE_OFCODE;
|
||||||
seqStore->longOffsets = SEQUENCE_LONGOFFSETS;
|
|
||||||
|
|
||||||
ZSTD_resetSeqStore(seqStore);
|
ZSTD_resetSeqStore(seqStore);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2222,6 +2222,66 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
|
|||||||
}
|
}
|
||||||
DISPLAYLEVEL(3, "OK \n");
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
|
|
||||||
|
|
||||||
|
DISPLAYLEVEL(3, "test%3i : Testing large offset with small window size: ", testNb++);
|
||||||
|
{
|
||||||
|
ZSTD_CCtx* cctx = ZSTD_createCCtx();
|
||||||
|
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||||
|
|
||||||
|
/* Test large offset, small window size*/
|
||||||
|
{
|
||||||
|
size_t srcSize = 21;
|
||||||
|
void* const src = CNBuffer;
|
||||||
|
size_t dstSize = ZSTD_compressBound(srcSize);
|
||||||
|
void* const dst = compressedBuffer;
|
||||||
|
size_t const kNbSequences = 4;
|
||||||
|
ZSTD_Sequence* sequences = malloc(sizeof(ZSTD_Sequence) * kNbSequences);
|
||||||
|
void* const checkBuf = malloc(srcSize);
|
||||||
|
const size_t largeDictSize = 1 << 30;
|
||||||
|
ZSTD_CDict* cdict = NULL;
|
||||||
|
ZSTD_DDict* ddict = NULL;
|
||||||
|
|
||||||
|
/* Generate large dictionary */
|
||||||
|
void* dictBuffer = calloc(largeDictSize, 1);
|
||||||
|
ZSTD_compressionParameters cParams = ZSTD_getCParams(1, srcSize, largeDictSize);
|
||||||
|
cParams.minMatch = ZSTD_MINMATCH_MIN;
|
||||||
|
cParams.hashLog = ZSTD_HASHLOG_MIN;
|
||||||
|
cParams.chainLog = ZSTD_CHAINLOG_MIN;
|
||||||
|
|
||||||
|
cdict = ZSTD_createCDict_advanced(dictBuffer, largeDictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, cParams, ZSTD_defaultCMem);
|
||||||
|
ddict = ZSTD_createDDict_advanced(dictBuffer, largeDictSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, ZSTD_defaultCMem);
|
||||||
|
|
||||||
|
ZSTD_CCtx_refCDict(cctx, cdict);
|
||||||
|
ZSTD_DCtx_refDDict(dctx, ddict);
|
||||||
|
|
||||||
|
sequences[0] = (ZSTD_Sequence) {3, 3, 3, 0};
|
||||||
|
sequences[1] = (ZSTD_Sequence) {1 << 29, 0, 3, 0};
|
||||||
|
sequences[2] = (ZSTD_Sequence) {1 << 29, 0, 9, 0};
|
||||||
|
sequences[3] = (ZSTD_Sequence) {3, 0, 3, 0};
|
||||||
|
|
||||||
|
cSize = ZSTD_compressSequences(cctx, dst, dstSize,
|
||||||
|
sequences, kNbSequences,
|
||||||
|
src, srcSize);
|
||||||
|
|
||||||
|
CHECK(ZSTD_isError(cSize), "Should not throw an error");
|
||||||
|
|
||||||
|
{
|
||||||
|
size_t dSize = ZSTD_decompressDCtx(dctx, checkBuf, srcSize, dst, cSize);
|
||||||
|
CHECK(ZSTD_isError(dSize), "Should not throw an error");
|
||||||
|
CHECK(memcmp(src, checkBuf, srcSize) != 0, "Corruption!");
|
||||||
|
}
|
||||||
|
|
||||||
|
free(sequences);
|
||||||
|
free(checkBuf);
|
||||||
|
free(dictBuffer);
|
||||||
|
ZSTD_freeCDict(cdict);
|
||||||
|
ZSTD_freeDDict(ddict);
|
||||||
|
}
|
||||||
|
ZSTD_freeCCtx(cctx);
|
||||||
|
ZSTD_freeDCtx(dctx);
|
||||||
|
}
|
||||||
|
DISPLAYLEVEL(3, "OK \n");
|
||||||
|
|
||||||
_end:
|
_end:
|
||||||
FUZ_freeDictionary(dictionary);
|
FUZ_freeDictionary(dictionary);
|
||||||
ZSTD_freeCStream(zc);
|
ZSTD_freeCStream(zc);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user