added conditional prefetch

depending on amount of work to do.
This commit is contained in:
Yann Collet 2018-09-12 10:29:47 -07:00
parent 63a519dbf6
commit 4de344d505
2 changed files with 35 additions and 30 deletions

View File

@ -95,18 +95,20 @@
#else #else
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T1)
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 0 /* locality */) # define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 2 /* locality */)
# else # else
# define PREFETCH(ptr) /* disabled */ # define PREFETCH(ptr) /* disabled */
# endif # endif
#endif /* NO_PREFETCH */ #endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
#define PREFETCH_AREA(ptr, size) { \ #define PREFETCH_AREA(ptr, size) { \
size_t pos; \ size_t pos; \
for (pos=0; pos<size; pos++) { \ for (pos=0; pos<size; pos+=CACHELINE_SIZE) { \
PREFETCH( (const char*)(const void*)ptr + pos); \ PREFETCH( (const char*)ptr + pos); \
} \ } \
} }

View File

@ -578,13 +578,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
{ {
case set_repeat: case set_repeat:
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
/* prefetch huffman table if cold */
if (dctx->ddictIsCold) {
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
}
/* fall-through */ /* fall-through */
case set_compressed: case set_compressed:
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
{ size_t lhSize, litSize, litCSize; { size_t lhSize, litSize, litCSize;
@ -616,6 +611,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected); if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected); if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
/* prefetch huffman table if cold */
if (dctx->ddictIsCold && (litSize > 256 /* heuristic */)) {
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
}
if (HUF_isError((litEncType==set_repeat) ? if (HUF_isError((litEncType==set_repeat) ?
( singleStream ? ( singleStream ?
HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) : HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
@ -897,7 +897,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
const void* src, size_t srcSize, const void* src, size_t srcSize,
const U32* baseValue, const U32* nbAdditionalBits, const U32* baseValue, const U32* nbAdditionalBits,
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
int ddictIsCold) int ddictIsCold, int nbSeq)
{ {
switch(type) switch(type)
{ {
@ -917,7 +917,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
case set_repeat: case set_repeat:
if (!flagRepeatTable) return ERROR(corruption_detected); if (!flagRepeatTable) return ERROR(corruption_detected);
/* prefetch FSE table if used */ /* prefetch FSE table if used */
if (ddictIsCold) { if (ddictIsCold && (nbSeq > 16 /* heuristic */)) {
//if (ddictIsCold) {
const void* const pStart = *DTablePtr; const void* const pStart = *DTablePtr;
size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
PREFETCH_AREA(pStart, pSize); PREFETCH_AREA(pStart, pSize);
@ -974,13 +975,14 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const BYTE* const istart = (const BYTE* const)src; const BYTE* const istart = (const BYTE* const)src;
const BYTE* const iend = istart + srcSize; const BYTE* const iend = istart + srcSize;
const BYTE* ip = istart; const BYTE* ip = istart;
int nbSeq;
DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
/* check */ /* check */
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong); if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
/* SeqHead */ /* SeqHead */
{ int nbSeq = *ip++; nbSeq = *ip++;
if (!nbSeq) { *nbSeqPtr=0; return 1; } if (!nbSeq) { *nbSeqPtr=0; return 1; }
if (nbSeq > 0x7F) { if (nbSeq > 0x7F) {
if (nbSeq == 0xFF) { if (nbSeq == 0xFF) {
@ -992,7 +994,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
} }
} }
*nbSeqPtr = nbSeq; *nbSeqPtr = nbSeq;
} DEBUGLOG(2, "nbSeqs=%i", nbSeq);
/* FSE table descriptors */ /* FSE table descriptors */
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */ if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
@ -1007,7 +1010,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
ip, iend-ip, ip, iend-ip,
LL_base, LL_bits, LL_base, LL_bits,
LL_defaultDTable, dctx->fseEntropy, LL_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold); dctx->ddictIsCold, nbSeq);
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected); if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
ip += llhSize; ip += llhSize;
} }
@ -1017,7 +1020,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
ip, iend-ip, ip, iend-ip,
OF_base, OF_bits, OF_base, OF_bits,
OF_defaultDTable, dctx->fseEntropy, OF_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold); dctx->ddictIsCold, nbSeq);
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected); if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
ip += ofhSize; ip += ofhSize;
} }
@ -1027,7 +1030,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
ip, iend-ip, ip, iend-ip,
ML_base, ML_bits, ML_base, ML_bits,
ML_defaultDTable, dctx->fseEntropy, ML_defaultDTable, dctx->fseEntropy,
dctx->ddictIsCold); dctx->ddictIsCold, nbSeq);
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected); if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
ip += mlhSize; ip += mlhSize;
} }
@ -2395,7 +2398,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
/* prefetch dictionary content */ /* prefetch dictionary content */
if (dctx->ddictIsCold) { if (dctx->ddictIsCold) {
size_t const dictSize = ddict->dictSize; size_t const dictSize = ddict->dictSize;
size_t const pSize = MIN(dictSize, 32 KB); /* proposed heuristic : 8 x frameContentSize => need to know frameContentSize */ size_t const pSize = MIN(dictSize, 2 KB); /* very conservative; would need to know Nb of Copies in dictionary, or frameContentSize as a proxy */
const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize; const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize;
PREFETCH_AREA(pStart, pSize); PREFETCH_AREA(pStart, pSize);
} }