Merge pull request #3933 from facebook/fix3819

prevent XXH64 from being autovectorized by XXH512 by default
This commit is contained in:
Yann Collet 2024-03-12 09:46:48 -07:00 committed by GitHub
commit db996d253e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3329,6 +3329,23 @@ static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
acc += input * XXH_PRIME64_2;
acc = XXH_rotl64(acc, 31);
acc *= XXH_PRIME64_1;
#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
/*
* DISABLE AUTOVECTORIZATION:
* A compiler fence is used to prevent GCC and Clang from
* autovectorizing the XXH64 loop (pragmas and attributes don't work for some
* reason) without globally disabling AVX512.
*
* Autovectorization of XXH64 tends to be detrimental,
* though the exact outcome may change depending on exact cpu and compiler version.
* For information, it has been reported as detrimental for Skylake-X,
* but possibly beneficial for Zen4.
*
* The default is to disable auto-vectorization,
* but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
*/
XXH_COMPILER_GUARD(acc);
#endif
return acc;
}