[bmi2] Add lzcnt and bmi target attributes

* When dynamic dispatching to bmi2 add lzcnt and bmi to the
  TARGET_ATTRIBUTE.
* Centralize the bmi2 TARGET_ATTRIBUTE definition to
  BMI2_TARGET_ATTRIBUTE so we can change it in the future.
* Only enable bmi2 when both bmi1 & bmi2 are supported. There shouldn't
  be any cases where bmi2 is supported but bmi1 isn't. But, since we are
  using the instruction we should check bmi1 as well.
This commit is contained in:
Nick Terrell 2021-11-30 17:43:28 -08:00
parent 7847c2fd68
commit 5414dd7978
10 changed files with 31 additions and 17 deletions

View File

@ -101,6 +101,13 @@
# define TARGET_ATTRIBUTE(target)
#endif
/* Target attribute for BMI2 dynamic dispatch.
* Enable lzcnt, bmi, and bmi2.
* We test for bmi1 & bmi2. lzcnt is included in bmi1.
*/
#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
/* Enable runtime BMI2 dispatch based on the CPU.
* Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
*/

View File

@ -223,7 +223,7 @@ static size_t FSE_readNCount_body_default(
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
const void* headerBuffer, size_t hbSize)
{
@ -343,7 +343,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
}
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize,
void* workSpace, size_t wkspSize)

View File

@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
{
return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
}

View File

@ -20,6 +20,7 @@
* Dependencies
***************************************/
#include "compiler.h"
#include "cpu.h"
#include "mem.h"
#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
#include "error_private.h"
@ -472,6 +473,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize);
/**
* @returns true iff the CPU supports dynamic BMI2 dispatch.
*/
MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
{
ZSTD_cpuid_t cpuid = ZSTD_cpuid();
return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
}
#if defined (__cplusplus)
}

View File

@ -1029,7 +1029,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
const void* src, size_t srcSize,
const HUF_CElt* CTable)

View File

@ -12,7 +12,6 @@
* Dependencies
***************************************/
#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
#include "../common/cpu.h"
#include "../common/mem.h"
#include "hist.h" /* HIST_countFast_wksp */
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
@ -100,7 +99,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
assert(cctx != NULL);
ZSTD_memset(cctx, 0, sizeof(*cctx));
cctx->customMem = memManager;
cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
cctx->bmi2 = ZSTD_cpuSupportsBmi2();
{ size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
assert(!ZSTD_isError(err));
(void)err;

View File

@ -399,7 +399,7 @@ ZSTD_encodeSequences_default(
#if DYNAMIC_BMI2
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_encodeSequences_bmi2(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,

View File

@ -68,7 +68,7 @@
#endif
#if HUF_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
# define HUF_ASM_X86_64_BMI2_ATTRS TARGET_ATTRIBUTE("bmi2")
# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
#else
# define HUF_ASM_X86_64_BMI2_ATTRS
#endif
@ -120,7 +120,7 @@
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
} \
\
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
void* dst, size_t dstSize, \
const void* cSrc, size_t cSrcSize, \
const HUF_DTable* DTable) \
@ -670,7 +670,7 @@ HUF_decompress4X1_usingDTable_internal_body(
}
#if HUF_NEED_BMI2_FUNCTION
static TARGET_ATTRIBUTE("bmi2")
static BMI2_TARGET_ATTRIBUTE
size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
size_t cSrcSize, HUF_DTable const* DTable) {
return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
@ -1386,7 +1386,7 @@ HUF_decompress4X2_usingDTable_internal_body(
}
#if HUF_NEED_BMI2_FUNCTION
static TARGET_ATTRIBUTE("bmi2")
static BMI2_TARGET_ATTRIBUTE
size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
size_t cSrcSize, HUF_DTable const* DTable) {
return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);

View File

@ -56,7 +56,6 @@
* Dependencies
*********************************************************/
#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
#include "../common/cpu.h" /* bmi2 */
#include "../common/mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "../common/fse.h"
@ -265,7 +264,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
#if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif
dctx->ddictSet = NULL;
ZSTD_DCtx_resetParameters(dctx);

View File

@ -571,7 +571,7 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
}
#if DYNAMIC_BMI2
TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
const short* normalizedCounter, unsigned maxSymbolValue,
const U32* baseValue, const U32* nbAdditionalBits,
unsigned tableLog, void* wksp, size_t wkspSize)
@ -1846,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
#if DYNAMIC_BMI2
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
DONT_VECTORIZE
ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
@ -1856,7 +1856,7 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
{
return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
}
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
DONT_VECTORIZE
ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
@ -1869,7 +1869,7 @@ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
static TARGET_ATTRIBUTE("bmi2") size_t
static BMI2_TARGET_ATTRIBUTE size_t
ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
const void* seqStart, size_t seqSize, int nbSeq,