mirror of
https://github.com/facebook/zstd.git
synced 2025-10-09 00:05:28 -04:00
added conditional prefetch
depending on amount of work to do.
This commit is contained in:
parent
63a519dbf6
commit
4de344d505
@ -95,18 +95,20 @@
|
|||||||
#else
|
#else
|
||||||
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
|
||||||
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
||||||
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
|
# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T1)
|
||||||
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
||||||
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 0 /* locality */)
|
# define PREFETCH(ptr) __builtin_prefetch(ptr, 0 /* rw==read */, 2 /* locality */)
|
||||||
# else
|
# else
|
||||||
# define PREFETCH(ptr) /* disabled */
|
# define PREFETCH(ptr) /* disabled */
|
||||||
# endif
|
# endif
|
||||||
#endif /* NO_PREFETCH */
|
#endif /* NO_PREFETCH */
|
||||||
|
|
||||||
|
#define CACHELINE_SIZE 64
|
||||||
|
|
||||||
#define PREFETCH_AREA(ptr, size) { \
|
#define PREFETCH_AREA(ptr, size) { \
|
||||||
size_t pos; \
|
size_t pos; \
|
||||||
for (pos=0; pos<size; pos++) { \
|
for (pos=0; pos<size; pos+=CACHELINE_SIZE) { \
|
||||||
PREFETCH( (const char*)(const void*)ptr + pos); \
|
PREFETCH( (const char*)ptr + pos); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,13 +578,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|||||||
{
|
{
|
||||||
case set_repeat:
|
case set_repeat:
|
||||||
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
|
if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
|
||||||
|
|
||||||
/* prefetch huffman table if cold */
|
|
||||||
if (dctx->ddictIsCold) {
|
|
||||||
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* fall-through */
|
/* fall-through */
|
||||||
|
|
||||||
case set_compressed:
|
case set_compressed:
|
||||||
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
|
if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
|
||||||
{ size_t lhSize, litSize, litCSize;
|
{ size_t lhSize, litSize, litCSize;
|
||||||
@ -616,6 +611,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
|||||||
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
|
||||||
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
|
||||||
|
|
||||||
|
/* prefetch huffman table if cold */
|
||||||
|
if (dctx->ddictIsCold && (litSize > 256 /* heuristic */)) {
|
||||||
|
PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
|
||||||
|
}
|
||||||
|
|
||||||
if (HUF_isError((litEncType==set_repeat) ?
|
if (HUF_isError((litEncType==set_repeat) ?
|
||||||
( singleStream ?
|
( singleStream ?
|
||||||
HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
|
HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
|
||||||
@ -897,7 +897,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|||||||
const void* src, size_t srcSize,
|
const void* src, size_t srcSize,
|
||||||
const U32* baseValue, const U32* nbAdditionalBits,
|
const U32* baseValue, const U32* nbAdditionalBits,
|
||||||
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
|
||||||
int ddictIsCold)
|
int ddictIsCold, int nbSeq)
|
||||||
{
|
{
|
||||||
switch(type)
|
switch(type)
|
||||||
{
|
{
|
||||||
@ -917,7 +917,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
|
|||||||
case set_repeat:
|
case set_repeat:
|
||||||
if (!flagRepeatTable) return ERROR(corruption_detected);
|
if (!flagRepeatTable) return ERROR(corruption_detected);
|
||||||
/* prefetch FSE table if used */
|
/* prefetch FSE table if used */
|
||||||
if (ddictIsCold) {
|
if (ddictIsCold && (nbSeq > 16 /* heuristic */)) {
|
||||||
|
//if (ddictIsCold) {
|
||||||
const void* const pStart = *DTablePtr;
|
const void* const pStart = *DTablePtr;
|
||||||
size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
|
size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
|
||||||
PREFETCH_AREA(pStart, pSize);
|
PREFETCH_AREA(pStart, pSize);
|
||||||
@ -974,13 +975,14 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|||||||
const BYTE* const istart = (const BYTE* const)src;
|
const BYTE* const istart = (const BYTE* const)src;
|
||||||
const BYTE* const iend = istart + srcSize;
|
const BYTE* const iend = istart + srcSize;
|
||||||
const BYTE* ip = istart;
|
const BYTE* ip = istart;
|
||||||
|
int nbSeq;
|
||||||
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
|
||||||
|
|
||||||
/* check */
|
/* check */
|
||||||
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
|
if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
|
||||||
|
|
||||||
/* SeqHead */
|
/* SeqHead */
|
||||||
{ int nbSeq = *ip++;
|
nbSeq = *ip++;
|
||||||
if (!nbSeq) { *nbSeqPtr=0; return 1; }
|
if (!nbSeq) { *nbSeqPtr=0; return 1; }
|
||||||
if (nbSeq > 0x7F) {
|
if (nbSeq > 0x7F) {
|
||||||
if (nbSeq == 0xFF) {
|
if (nbSeq == 0xFF) {
|
||||||
@ -992,7 +994,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
*nbSeqPtr = nbSeq;
|
*nbSeqPtr = nbSeq;
|
||||||
}
|
DEBUGLOG(2, "nbSeqs=%i", nbSeq);
|
||||||
|
|
||||||
|
|
||||||
/* FSE table descriptors */
|
/* FSE table descriptors */
|
||||||
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
|
if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
|
||||||
@ -1007,7 +1010,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|||||||
ip, iend-ip,
|
ip, iend-ip,
|
||||||
LL_base, LL_bits,
|
LL_base, LL_bits,
|
||||||
LL_defaultDTable, dctx->fseEntropy,
|
LL_defaultDTable, dctx->fseEntropy,
|
||||||
dctx->ddictIsCold);
|
dctx->ddictIsCold, nbSeq);
|
||||||
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
|
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
|
||||||
ip += llhSize;
|
ip += llhSize;
|
||||||
}
|
}
|
||||||
@ -1017,7 +1020,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|||||||
ip, iend-ip,
|
ip, iend-ip,
|
||||||
OF_base, OF_bits,
|
OF_base, OF_bits,
|
||||||
OF_defaultDTable, dctx->fseEntropy,
|
OF_defaultDTable, dctx->fseEntropy,
|
||||||
dctx->ddictIsCold);
|
dctx->ddictIsCold, nbSeq);
|
||||||
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
|
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
|
||||||
ip += ofhSize;
|
ip += ofhSize;
|
||||||
}
|
}
|
||||||
@ -1027,7 +1030,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
|||||||
ip, iend-ip,
|
ip, iend-ip,
|
||||||
ML_base, ML_bits,
|
ML_base, ML_bits,
|
||||||
ML_defaultDTable, dctx->fseEntropy,
|
ML_defaultDTable, dctx->fseEntropy,
|
||||||
dctx->ddictIsCold);
|
dctx->ddictIsCold, nbSeq);
|
||||||
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
|
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
|
||||||
ip += mlhSize;
|
ip += mlhSize;
|
||||||
}
|
}
|
||||||
@ -2395,7 +2398,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
|
|||||||
/* prefetch dictionary content */
|
/* prefetch dictionary content */
|
||||||
if (dctx->ddictIsCold) {
|
if (dctx->ddictIsCold) {
|
||||||
size_t const dictSize = ddict->dictSize;
|
size_t const dictSize = ddict->dictSize;
|
||||||
size_t const pSize = MIN(dictSize, 32 KB); /* proposed heuristic : 8 x frameContentSize => need to know frameContentSize */
|
size_t const pSize = MIN(dictSize, 2 KB); /* very conservative; would need to know Nb of Copies in dictionary, or frameContentSize as a proxy */
|
||||||
const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize;
|
const void* const pStart = (const char*)ddict->dictContent + dictSize - pSize;
|
||||||
PREFETCH_AREA(pStart, pSize);
|
PREFETCH_AREA(pStart, pSize);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user