mirror of
https://github.com/facebook/zstd.git
synced 2025-11-22 00:10:22 -05:00
[bmi2] Add lzcnt and bmi target attributes
* When dynamic dispatching to bmi2 add lzcnt and bmi to the TARGET_ATTRIBUTE. * Centralize the bmi2 TARGET_ATTRIBUTE definition to BMI2_TARGET_ATTRIBUTE so we can change it in the future. * Only enable bmi2 when both bmi1 & bmi2 are supported. There shouldn't be any cases where bmi2 is supported but bmi1 isn't. But, since we are using the instruction we should check bmi1 as well.
This commit is contained in:
parent
7847c2fd68
commit
5414dd7978
@ -101,6 +101,13 @@
|
||||
# define TARGET_ATTRIBUTE(target)
|
||||
#endif
|
||||
|
||||
/* Target attribute for BMI2 dynamic dispatch.
|
||||
* Enable lzcnt, bmi, and bmi2.
|
||||
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
|
||||
*/
|
||||
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
|
||||
|
||||
|
||||
/* Enable runtime BMI2 dispatch based on the CPU.
|
||||
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
|
||||
*/
|
||||
|
||||
@ -223,7 +223,7 @@ static size_t FSE_readNCount_body_default(
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
|
||||
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
|
||||
const void* headerBuffer, size_t hbSize)
|
||||
{
|
||||
@ -343,7 +343,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
|
||||
U32* nbSymbolsPtr, U32* tableLogPtr,
|
||||
const void* src, size_t srcSize,
|
||||
void* workSpace, size_t wkspSize)
|
||||
|
||||
@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
|
||||
{
|
||||
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
|
||||
}
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "compiler.h"
|
||||
#include "cpu.h"
|
||||
#include "mem.h"
|
||||
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
|
||||
#include "error_private.h"
|
||||
@ -472,6 +473,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
|
||||
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
|
||||
const void* src, size_t srcSize);
|
||||
|
||||
/**
|
||||
* @returns true iff the CPU supports dynamic BMI2 dispatch.
|
||||
*/
|
||||
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
|
||||
{
|
||||
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
|
||||
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
|
||||
}
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
|
||||
@ -1029,7 +1029,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
||||
const void* src, size_t srcSize,
|
||||
const HUF_CElt* CTable)
|
||||
|
||||
@ -12,7 +12,6 @@
|
||||
* Dependencies
|
||||
***************************************/
|
||||
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
|
||||
#include "../common/cpu.h"
|
||||
#include "../common/mem.h"
|
||||
#include "hist.h" /* HIST_countFast_wksp */
|
||||
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
|
||||
@ -100,7 +99,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
|
||||
assert(cctx != NULL);
|
||||
ZSTD_memset(cctx, 0, sizeof(*cctx));
|
||||
cctx->customMem = memManager;
|
||||
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
cctx->bmi2 = ZSTD_cpuSupportsBmi2();
|
||||
{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
|
||||
assert(!ZSTD_isError(err));
|
||||
(void)err;
|
||||
|
||||
@ -399,7 +399,7 @@ ZSTD_encodeSequences_default(
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
ZSTD_encodeSequences_bmi2(
|
||||
void* dst, size_t dstCapacity,
|
||||
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
#endif
|
||||
|
||||
#if HUF_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
|
||||
# define HUF_ASM_X86_64_BMI2_ATTRS TARGET_ATTRIBUTE("bmi2")
|
||||
# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
|
||||
#else
|
||||
# define HUF_ASM_X86_64_BMI2_ATTRS
|
||||
#endif
|
||||
@ -120,7 +120,7 @@
|
||||
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
||||
} \
|
||||
\
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
|
||||
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
|
||||
void* dst, size_t dstSize, \
|
||||
const void* cSrc, size_t cSrcSize, \
|
||||
const HUF_DTable* DTable) \
|
||||
@ -670,7 +670,7 @@ HUF_decompress4X1_usingDTable_internal_body(
|
||||
}
|
||||
|
||||
#if HUF_NEED_BMI2_FUNCTION
|
||||
static TARGET_ATTRIBUTE("bmi2")
|
||||
static BMI2_TARGET_ATTRIBUTE
|
||||
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
||||
size_t cSrcSize, HUF_DTable const* DTable) {
|
||||
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
@ -1386,7 +1386,7 @@ HUF_decompress4X2_usingDTable_internal_body(
|
||||
}
|
||||
|
||||
#if HUF_NEED_BMI2_FUNCTION
|
||||
static TARGET_ATTRIBUTE("bmi2")
|
||||
static BMI2_TARGET_ATTRIBUTE
|
||||
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
|
||||
size_t cSrcSize, HUF_DTable const* DTable) {
|
||||
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
|
||||
|
||||
@ -56,7 +56,6 @@
|
||||
* Dependencies
|
||||
*********************************************************/
|
||||
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
|
||||
#include "../common/cpu.h" /* bmi2 */
|
||||
#include "../common/mem.h" /* low level memory routines */
|
||||
#define FSE_STATIC_LINKING_ONLY
|
||||
#include "../common/fse.h"
|
||||
@ -265,7 +264,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
|
||||
dctx->noForwardProgress = 0;
|
||||
dctx->oversizedDuration = 0;
|
||||
#if DYNAMIC_BMI2
|
||||
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
|
||||
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
|
||||
#endif
|
||||
dctx->ddictSet = NULL;
|
||||
ZSTD_DCtx_resetParameters(dctx);
|
||||
|
||||
@ -571,7 +571,7 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
|
||||
}
|
||||
|
||||
#if DYNAMIC_BMI2
|
||||
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
||||
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
|
||||
const short* normalizedCounter, unsigned maxSymbolValue,
|
||||
const U32* baseValue, const U32* nbAdditionalBits,
|
||||
unsigned tableLog, void* wksp, size_t wkspSize)
|
||||
@ -1846,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
|
||||
#if DYNAMIC_BMI2
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
@ -1856,7 +1856,7 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
|
||||
{
|
||||
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
|
||||
}
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
DONT_VECTORIZE
|
||||
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
@ -1869,7 +1869,7 @@ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
|
||||
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
|
||||
|
||||
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
|
||||
static TARGET_ATTRIBUTE("bmi2") size_t
|
||||
static BMI2_TARGET_ATTRIBUTE size_t
|
||||
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
|
||||
void* dst, size_t maxDstSize,
|
||||
const void* seqStart, size_t seqSize, int nbSeq,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user