diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile index 3b19d49af..730250f96 100644 --- a/contrib/largeNbDicts/Makefile +++ b/contrib/largeNbDicts/Makefile @@ -33,7 +33,7 @@ largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) .PHONY: $(LIBZSTD) $(LIBZSTD): - $(MAKE) -C $(LIBDIR) libzstd.a + $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" bench.o : $(PROGDIR)/bench.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c @@ -50,4 +50,5 @@ xxhash.o : $(LIBDIR)/common/xxhash.c clean: $(RM) *.o + $(MAKE) -C $(LIBDIR) clean > /dev/null $(RM) largeNbDicts diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index e0bc55380..d7639fc40 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -49,6 +49,7 @@ /*--- Macros ---*/ + #define CONTROL(c) { if (!(c)) abort(); } #undef MIN #define MIN(a,b) ((a) < (b) ? (a) : (b)) @@ -594,6 +595,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, if (blockSize) DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize); DISPLAYLEVEL(3, "\n"); + size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices); size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities)); @@ -625,8 +627,8 @@ int bench(const char** fileNameTable, unsigned nbFiles, /* dictionary determination */ buffer_t const dictBuffer = createDictionaryBuffer(dictionary, - srcBuffer.ptr, - srcSlices.capacities, nbBlocks, + srcs.buffer.ptr, + srcs.slices.capacities, srcs.slices.nbSlices, DICTSIZE); CONTROL(dictBuffer.ptr != NULL); @@ -637,7 +639,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, CONTROL(cTotalSizeNoDict != 0); DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n", clevel, - (double)srcSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); + (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict); size_t* const cSizes = malloc(nbBlocks * sizeof(size_t)); CONTROL(cSizes != NULL); @@ -646,7 +648,7 @@ int bench(const char** fileNameTable, unsigned nbFiles, CONTROL(cTotalSize != 0); DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n", (unsigned)dictBuffer.size, - (double)srcSize / cTotalSize, (unsigned)cTotalSize); + (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize); /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */ shrinkSizes(dstSlices, cSizes); diff --git a/lib/decompress/zstd_ddict.c b/lib/decompress/zstd_ddict.c index 6569eeed1..2ad044068 100644 --- a/lib/decompress/zstd_ddict.c +++ b/lib/decompress/zstd_ddict.c @@ -15,7 +15,6 @@ * Dependencies *********************************************************/ #include /* memcpy, memmove, memset */ -#include "compiler.h" /* prefetch */ #include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 986ebff9e..549cc7582 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -56,7 +56,6 @@ * Dependencies *********************************************************/ #include /* memcpy, memmove, memset */ -#include "compiler.h" /* prefetch */ #include "cpu.h" /* bmi2 */ #include "mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY diff --git a/lib/decompress/zstd_decompress_block.c b/lib/decompress/zstd_decompress_block.c index 71bb8e97b..869bdd9aa 100644 --- a/lib/decompress/zstd_decompress_block.c +++ b/lib/decompress/zstd_decompress_block.c @@ -507,16 +507,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, } } - /* prefetch dictionary content */ - if (dctx->ddictIsCold) { - size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart; - size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ ); - size_t const pSize = MIN(psmin, 128 KB /* protection */ ); - const void* const pStart = (const char*)dctx->dictEnd - pSize; - PREFETCH_AREA(pStart, pSize); - dctx->ddictIsCold = 0; - } - return ip-istart; } @@ -1046,6 +1036,7 @@ ZSTD_decompressSequencesLong_body( /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } -#undef STORED_SEQS -#undef STORED_SEQS_MASK -#undef ADVANCED_SEQS } /* last literal segment */ @@ -1213,20 +1201,27 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, } /* Build Decoding Tables */ - { int nbSeq; + { int usePrefetchDecoder = dctx->ddictIsCold; + int nbSeq; size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); if (ZSTD_isError(seqHSize)) return seqHSize; ip += seqHSize; srcSize -= seqHSize; - if ( (!frame || (dctx->fParams.windowSize > (1<<24))) - && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */ + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ - if (shareLongOffsets >= minShare) - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + usePrefetchDecoder = (shareLongOffsets >= minShare); } + dctx->ddictIsCold = 0; + + if (usePrefetchDecoder) + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + + /* else */ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); } }