Enable STATIC_BMI2 for gcc/clang

Some usage (e.g. BIT_getLowerBit) uses it without checking for MSVC,
so enabling for clang gives a small performance boost.
This commit is contained in:
Ilya Tokar 2022-03-01 18:49:10 -05:00
parent 0c386afbfd
commit 7c3d1cb3ab
2 changed files with 6 additions and 4 deletions

View File

@ -37,8 +37,8 @@ extern "C" {
* Target specific
=========================================*/
#ifndef ZSTD_NO_INTRINSICS
# if defined(__BMI__) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental) */
# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__)
# include <immintrin.h> /* support for bextr (experimental)/bzhi */
# elif defined(__ICCARM__)
# include <intrinsics.h>
# endif
@ -164,8 +164,8 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
{
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1
return _bzhi_u64(bitContainer, nbBits);
#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
return _bzhi_u64(bitContainer, nbBits);
#else
assert(nbBits < BIT_MASK_SIZE);
return bitContainer & BIT_mask[nbBits];

View File

@ -181,6 +181,8 @@
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
# define STATIC_BMI2 1
# endif
# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__)
# define STATIC_BMI2 1
# endif
#endif