mirror of
https://github.com/facebook/zstd.git
synced 2025-11-28 00:04:28 -05:00
Merge pull request #2258 from Niadb/dev
Added STATIC_BMI2 for compile time detection of BMI2 on MSVC, when enabled various intrinsics are used
This commit is contained in:
commit
38e38546a4
@ -17,7 +17,6 @@
|
|||||||
#if defined (__cplusplus)
|
#if defined (__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This API consists of small unitary functions, which must be inlined for best performance.
|
* This API consists of small unitary functions, which must be inlined for best performance.
|
||||||
* Since link-time-optimization is not available for all compilers,
|
* Since link-time-optimization is not available for all compilers,
|
||||||
@ -141,8 +140,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
|
|||||||
assert(val != 0);
|
assert(val != 0);
|
||||||
{
|
{
|
||||||
# if defined(_MSC_VER) /* Visual */
|
# if defined(_MSC_VER) /* Visual */
|
||||||
unsigned long r=0;
|
# if STATIC_BMI2 == 1
|
||||||
return _BitScanReverse ( &r, val ) ? (unsigned)r : 0;
|
return _lzcnt_u32(val) ^ 31;
|
||||||
|
# else
|
||||||
|
unsigned long r = 0;
|
||||||
|
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||||
|
# endif
|
||||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
|
||||||
return __builtin_clz (val) ^ 31;
|
return __builtin_clz (val) ^ 31;
|
||||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||||
@ -317,12 +320,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
|
|||||||
return srcSize;
|
return srcSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
|
||||||
{
|
{
|
||||||
return bitContainer >> start;
|
return bitContainer >> start;
|
||||||
}
|
}
|
||||||
|
|
||||||
MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
|
||||||
{
|
{
|
||||||
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
U32 const regMask = sizeof(bitContainer)*8 - 1;
|
||||||
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
/* if start > regMask, bitstream is corrupted, and result is undefined */
|
||||||
@ -330,10 +333,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co
|
|||||||
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
|
||||||
}
|
}
|
||||||
|
|
||||||
MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
||||||
{
|
{
|
||||||
|
#if STATIC_BMI2
|
||||||
|
return _bzhi_u64(bitContainer, nbBits);
|
||||||
|
#else
|
||||||
assert(nbBits < BIT_MASK_SIZE);
|
assert(nbBits < BIT_MASK_SIZE);
|
||||||
return bitContainer & BIT_mask[nbBits];
|
return bitContainer & BIT_mask[nbBits];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*! BIT_lookBits() :
|
/*! BIT_lookBits() :
|
||||||
@ -342,7 +349,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
|
|||||||
* On 32-bits, maxNbBits==24.
|
* On 32-bits, maxNbBits==24.
|
||||||
* On 64-bits, maxNbBits==56.
|
* On 64-bits, maxNbBits==56.
|
||||||
* @return : value extracted */
|
* @return : value extracted */
|
||||||
MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
|
||||||
{
|
{
|
||||||
/* arbitrate between double-shift and shift+mask */
|
/* arbitrate between double-shift and shift+mask */
|
||||||
#if 1
|
#if 1
|
||||||
@ -365,7 +372,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
|
|||||||
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
||||||
{
|
{
|
||||||
bitD->bitsConsumed += nbBits;
|
bitD->bitsConsumed += nbBits;
|
||||||
}
|
}
|
||||||
@ -374,7 +381,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
|
|||||||
* Read (consume) next n bits from local register and update.
|
* Read (consume) next n bits from local register and update.
|
||||||
* Pay attention to not read more than nbBits contained into local register.
|
* Pay attention to not read more than nbBits contained into local register.
|
||||||
* @return : extracted value. */
|
* @return : extracted value. */
|
||||||
MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
|
||||||
{
|
{
|
||||||
size_t const value = BIT_lookBits(bitD, nbBits);
|
size_t const value = BIT_lookBits(bitD, nbBits);
|
||||||
BIT_skipBits(bitD, nbBits);
|
BIT_skipBits(bitD, nbBits);
|
||||||
|
|||||||
@ -183,4 +183,17 @@
|
|||||||
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
|
# pragma warning(disable : 4324) /* disable: C4324: padded structure */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
|
||||||
|
#ifndef STATIC_BMI2
|
||||||
|
# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))
|
||||||
|
# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2
|
||||||
|
# define STATIC_BMI2 1
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef STATIC_BMI2
|
||||||
|
#define STATIC_BMI2 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* ZSTD_COMPILER_H */
|
#endif /* ZSTD_COMPILER_H */
|
||||||
|
|||||||
@ -396,8 +396,12 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
|
|||||||
assert(val != 0);
|
assert(val != 0);
|
||||||
{
|
{
|
||||||
# if defined(_MSC_VER) /* Visual */
|
# if defined(_MSC_VER) /* Visual */
|
||||||
|
# if STATIC_BMI2 == 1
|
||||||
|
return _lzcnt_u32(val)^31;
|
||||||
|
# else
|
||||||
unsigned long r=0;
|
unsigned long r=0;
|
||||||
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
||||||
|
# endif
|
||||||
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
|
||||||
return __builtin_clz (val) ^ 31;
|
return __builtin_clz (val) ^ 31;
|
||||||
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
# elif defined(__ICCARM__) /* IAR Intrinsic */
|
||||||
|
|||||||
@ -498,8 +498,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|||||||
if (MEM_isLittleEndian()) {
|
if (MEM_isLittleEndian()) {
|
||||||
if (MEM_64bits()) {
|
if (MEM_64bits()) {
|
||||||
# if defined(_MSC_VER) && defined(_WIN64)
|
# if defined(_MSC_VER) && defined(_WIN64)
|
||||||
|
# if STATIC_BMI2
|
||||||
|
return _tzcnt_u64(val) >> 3;
|
||||||
|
# else
|
||||||
unsigned long r = 0;
|
unsigned long r = 0;
|
||||||
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0;
|
||||||
|
# endif
|
||||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||||
return (__builtin_ctzll((U64)val) >> 3);
|
return (__builtin_ctzll((U64)val) >> 3);
|
||||||
# else
|
# else
|
||||||
@ -530,8 +534,12 @@ static unsigned ZSTD_NbCommonBytes (size_t val)
|
|||||||
} else { /* Big Endian CPU */
|
} else { /* Big Endian CPU */
|
||||||
if (MEM_64bits()) {
|
if (MEM_64bits()) {
|
||||||
# if defined(_MSC_VER) && defined(_WIN64)
|
# if defined(_MSC_VER) && defined(_WIN64)
|
||||||
|
# if STATIC_BMI2
|
||||||
|
return _lzcnt_u64(val) >> 3;
|
||||||
|
# else
|
||||||
unsigned long r = 0;
|
unsigned long r = 0;
|
||||||
return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0;
|
return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0;
|
||||||
|
# endif
|
||||||
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
# elif defined(__GNUC__) && (__GNUC__ >= 4)
|
||||||
return (__builtin_clzll(val) >> 3);
|
return (__builtin_clzll(val) >> 3);
|
||||||
# else
|
# else
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user