mirror of
https://github.com/facebook/zstd.git
synced 2025-10-03 00:03:25 -04:00
Compare commits
4 Commits
7842f86142
...
6800430569
Author | SHA1 | Date | |
---|---|---|---|
|
6800430569 | ||
|
b8d6101fba | ||
|
87cc127705 | ||
|
347cf45277 |
2
.github/workflows/dev-short-tests.yml
vendored
2
.github/workflows/dev-short-tests.yml
vendored
@ -403,7 +403,7 @@ jobs:
|
|||||||
{ name: PPC64LE, xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc64le-static },
|
{ name: PPC64LE, xcc_pkg: gcc-powerpc64le-linux-gnu, xcc: powerpc64le-linux-gnu-gcc, xemu_pkg: qemu-system-ppc, xemu: qemu-ppc64le-static },
|
||||||
{ name: S390X, xcc_pkg: gcc-s390x-linux-gnu, xcc: s390x-linux-gnu-gcc, xemu_pkg: qemu-system-s390x, xemu: qemu-s390x-static },
|
{ name: S390X, xcc_pkg: gcc-s390x-linux-gnu, xcc: s390x-linux-gnu-gcc, xemu_pkg: qemu-system-s390x, xemu: qemu-s390x-static },
|
||||||
{ name: MIPS, xcc_pkg: gcc-mips-linux-gnu, xcc: mips-linux-gnu-gcc, xemu_pkg: qemu-system-mips, xemu: qemu-mips-static },
|
{ name: MIPS, xcc_pkg: gcc-mips-linux-gnu, xcc: mips-linux-gnu-gcc, xemu_pkg: qemu-system-mips, xemu: qemu-mips-static },
|
||||||
{ name: RISC-V, xcc_pkg: gcc-riscv64-linux-gnu, xcc: riscv64-linux-gnu-gcc, xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
|
{ name: RISC-V, xcc_pkg: gcc-14-riscv64-linux-gnu, xcc: riscv64-linux-gnu-gcc-14, xemu_pkg: qemu-system-riscv64,xemu: qemu-riscv64-static },
|
||||||
{ name: M68K, xcc_pkg: gcc-m68k-linux-gnu, xcc: m68k-linux-gnu-gcc, xemu_pkg: qemu-system-m68k, xemu: qemu-m68k-static },
|
{ name: M68K, xcc_pkg: gcc-m68k-linux-gnu, xcc: m68k-linux-gnu-gcc, xemu_pkg: qemu-system-m68k, xemu: qemu-m68k-static },
|
||||||
{ name: SPARC, xcc_pkg: gcc-sparc64-linux-gnu, xcc: sparc64-linux-gnu-gcc, xemu_pkg: qemu-system-sparc, xemu: qemu-sparc64-static },
|
{ name: SPARC, xcc_pkg: gcc-sparc64-linux-gnu, xcc: sparc64-linux-gnu-gcc, xemu_pkg: qemu-system-sparc, xemu: qemu-sparc64-static },
|
||||||
]
|
]
|
||||||
|
@ -224,16 +224,11 @@
|
|||||||
# if defined(__ARM_FEATURE_SVE2)
|
# if defined(__ARM_FEATURE_SVE2)
|
||||||
# define ZSTD_ARCH_ARM_SVE2
|
# define ZSTD_ARCH_ARM_SVE2
|
||||||
# endif
|
# endif
|
||||||
#if defined(__riscv) && defined(__riscv_vector)
|
# if defined(__riscv) && defined(__riscv_vector)
|
||||||
#if defined(__GNUC__)
|
# if ((defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 14) || \
|
||||||
#if (__GNUC__ > 14 || (__GNUC__ == 14 && __GNUC_MINOR__ >= 1))
|
(defined(__clang__) && __clang_major__ >= 19))
|
||||||
#define ZSTD_ARCH_RISCV_RVV
|
#define ZSTD_ARCH_RISCV_RVV
|
||||||
#endif
|
# endif
|
||||||
#elif defined(__clang__)
|
|
||||||
#if __clang_major__ > 18 || (__clang_major__ == 18 && __clang_minor__ >= 1)
|
|
||||||
#define ZSTD_ARCH_RISCV_RVV
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
#
|
#
|
||||||
# if defined(ZSTD_ARCH_X86_AVX2)
|
# if defined(ZSTD_ARCH_X86_AVX2)
|
||||||
|
@ -3730,6 +3730,8 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can
|
|||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
# elif defined(__SSE2__)
|
# elif defined(__SSE2__)
|
||||||
# include <emmintrin.h>
|
# include <emmintrin.h>
|
||||||
|
# elif defined(__riscv_vector)
|
||||||
|
# include <riscv_vector.h>
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -3852,6 +3854,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|||||||
*/
|
*/
|
||||||
XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
|
XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
|
||||||
XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
|
XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
|
||||||
|
XXH_RVV = 7, /*!< RVV (RISC-V Vector) for RISC-V */
|
||||||
};
|
};
|
||||||
/*!
|
/*!
|
||||||
* @ingroup tuning
|
* @ingroup tuning
|
||||||
@ -3874,6 +3877,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|||||||
# define XXH_NEON 4
|
# define XXH_NEON 4
|
||||||
# define XXH_VSX 5
|
# define XXH_VSX 5
|
||||||
# define XXH_SVE 6
|
# define XXH_SVE 6
|
||||||
|
# define XXH_RVV 7
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef XXH_VECTOR /* can be defined on command line */
|
#ifndef XXH_VECTOR /* can be defined on command line */
|
||||||
@ -3898,6 +3902,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|||||||
|| (defined(__s390x__) && defined(__VEC__)) \
|
|| (defined(__s390x__) && defined(__VEC__)) \
|
||||||
&& defined(__GNUC__) /* TODO: IBM XL */
|
&& defined(__GNUC__) /* TODO: IBM XL */
|
||||||
# define XXH_VECTOR XXH_VSX
|
# define XXH_VECTOR XXH_VSX
|
||||||
|
# elif defined(__riscv_vector)
|
||||||
|
# define XXH_VECTOR XXH_RVV
|
||||||
# else
|
# else
|
||||||
# define XXH_VECTOR XXH_SCALAR
|
# define XXH_VECTOR XXH_SCALAR
|
||||||
# endif
|
# endif
|
||||||
@ -3935,6 +3941,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|||||||
# define XXH_ACC_ALIGN 64
|
# define XXH_ACC_ALIGN 64
|
||||||
# elif XXH_VECTOR == XXH_SVE /* sve */
|
# elif XXH_VECTOR == XXH_SVE /* sve */
|
||||||
# define XXH_ACC_ALIGN 64
|
# define XXH_ACC_ALIGN 64
|
||||||
|
# elif XXH_VECTOR == XXH_RVV /* rvv */
|
||||||
|
# define XXH_ACC_ALIGN 64 /* could be 8, but 64 may be faster */
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -3943,6 +3951,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|||||||
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
||||||
#elif XXH_VECTOR == XXH_SVE
|
#elif XXH_VECTOR == XXH_SVE
|
||||||
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
||||||
|
#elif XXH_VECTOR == XXH_RVV
|
||||||
|
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
||||||
#else
|
#else
|
||||||
# define XXH_SEC_ALIGN 8
|
# define XXH_SEC_ALIGN 8
|
||||||
#endif
|
#endif
|
||||||
@ -5601,6 +5611,132 @@ XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (XXH_VECTOR == XXH_RVV)
|
||||||
|
#define XXH_CONCAT2(X, Y) X ## Y
|
||||||
|
#define XXH_CONCAT(X, Y) XXH_CONCAT2(X, Y)
|
||||||
|
#if ((defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 13) || \
|
||||||
|
(defined(__clang__) && __clang_major__ < 16))
|
||||||
|
#define XXH_RVOP(op) op
|
||||||
|
#define XXH_RVCAST(op) XXH_CONCAT(vreinterpret_v_, op)
|
||||||
|
#else
|
||||||
|
#define XXH_RVOP(op) XXH_CONCAT(__riscv_, op)
|
||||||
|
#define XXH_RVCAST(op) XXH_CONCAT(__riscv_vreinterpret_v_, op)
|
||||||
|
#endif
|
||||||
|
XXH_FORCE_INLINE void
|
||||||
|
XXH3_accumulate_512_rvv( void* XXH_RESTRICT acc,
|
||||||
|
const void* XXH_RESTRICT input,
|
||||||
|
const void* XXH_RESTRICT secret)
|
||||||
|
{
|
||||||
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
||||||
|
{
|
||||||
|
// Try to set vector lenght to 512 bits.
|
||||||
|
// If this length is unavailable, then maximum available will be used
|
||||||
|
size_t vl = XXH_RVOP(vsetvl_e64m2)(8);
|
||||||
|
|
||||||
|
uint64_t* xacc = (uint64_t*) acc;
|
||||||
|
const uint64_t* xinput = (const uint64_t*) input;
|
||||||
|
const uint64_t* xsecret = (const uint64_t*) secret;
|
||||||
|
static const uint64_t swap_mask[16] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
|
||||||
|
vuint64m2_t xswap_mask = XXH_RVOP(vle64_v_u64m2)(swap_mask, vl);
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
for (i = 0; i < XXH_STRIPE_LEN/8; i += vl) {
|
||||||
|
/* data_vec = xinput[i]; */
|
||||||
|
vuint64m2_t data_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)((const uint8_t*)(xinput + i), vl * 8));
|
||||||
|
/* key_vec = xsecret[i]; */
|
||||||
|
vuint64m2_t key_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)((const uint8_t*)(xsecret + i), vl * 8));
|
||||||
|
/* acc_vec = xacc[i]; */
|
||||||
|
vuint64m2_t acc_vec = XXH_RVOP(vle64_v_u64m2)(xacc + i, vl);
|
||||||
|
/* data_key = data_vec ^ key_vec; */
|
||||||
|
vuint64m2_t data_key = XXH_RVOP(vxor_vv_u64m2)(data_vec, key_vec, vl);
|
||||||
|
/* data_key_hi = data_key >> 32; */
|
||||||
|
vuint64m2_t data_key_hi = XXH_RVOP(vsrl_vx_u64m2)(data_key, 32, vl);
|
||||||
|
/* data_key_lo = data_key & 0xffffffff; */
|
||||||
|
vuint64m2_t data_key_lo = XXH_RVOP(vand_vx_u64m2)(data_key, 0xffffffff, vl);
|
||||||
|
/* swap high and low halves */
|
||||||
|
vuint64m2_t data_swap = XXH_RVOP(vrgather_vv_u64m2)(data_vec, xswap_mask, vl);
|
||||||
|
/* acc_vec += data_key_lo * data_key_hi; */
|
||||||
|
acc_vec = XXH_RVOP(vmacc_vv_u64m2)(acc_vec, data_key_lo, data_key_hi, vl);
|
||||||
|
/* acc_vec += data_swap; */
|
||||||
|
acc_vec = XXH_RVOP(vadd_vv_u64m2)(acc_vec, data_swap, vl);
|
||||||
|
/* xacc[i] = acc_vec; */
|
||||||
|
XXH_RVOP(vse64_v_u64m2)(xacc + i, acc_vec, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(rvv)
|
||||||
|
|
||||||
|
XXH_FORCE_INLINE void
|
||||||
|
XXH3_scrambleAcc_rvv(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
||||||
|
{
|
||||||
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
||||||
|
{
|
||||||
|
size_t count = XXH_STRIPE_LEN/8;
|
||||||
|
uint64_t* xacc = (uint64_t*)acc;
|
||||||
|
const uint8_t* xsecret = (const uint8_t *)secret;
|
||||||
|
size_t vl;
|
||||||
|
for (; count > 0; count -= vl, xacc += vl, xsecret += vl*8) {
|
||||||
|
vl = XXH_RVOP(vsetvl_e64m2)(count);
|
||||||
|
{
|
||||||
|
/* key_vec = xsecret[i]; */
|
||||||
|
vuint64m2_t key_vec = XXH_RVCAST(u8m2_u64m2)(XXH_RVOP(vle8_v_u8m2)(xsecret, vl*8));
|
||||||
|
/* acc_vec = xacc[i]; */
|
||||||
|
vuint64m2_t acc_vec = XXH_RVOP(vle64_v_u64m2)(xacc, vl);
|
||||||
|
/* acc_vec ^= acc_vec >> 47; */
|
||||||
|
vuint64m2_t vsrl = XXH_RVOP(vsrl_vx_u64m2)(acc_vec, 47, vl);
|
||||||
|
acc_vec = XXH_RVOP(vxor_vv_u64m2)(acc_vec, vsrl, vl);
|
||||||
|
/* acc_vec ^= key_vec; */
|
||||||
|
acc_vec = XXH_RVOP(vxor_vv_u64m2)(acc_vec, key_vec, vl);
|
||||||
|
/* acc_vec *= XXH_PRIME32_1; */
|
||||||
|
acc_vec = XXH_RVOP(vmul_vx_u64m2)(acc_vec, XXH_PRIME32_1, vl);
|
||||||
|
/* xacc[i] *= acc_vec; */
|
||||||
|
XXH_RVOP(vse64_v_u64m2)(xacc, acc_vec, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
XXH_FORCE_INLINE void
|
||||||
|
XXH3_initCustomSecret_rvv(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
||||||
|
{
|
||||||
|
XXH_STATIC_ASSERT(XXH_SEC_ALIGN >= 8);
|
||||||
|
XXH_ASSERT(((size_t)customSecret & 7) == 0);
|
||||||
|
(void)(&XXH_writeLE64);
|
||||||
|
{
|
||||||
|
size_t count = XXH_SECRET_DEFAULT_SIZE/8;
|
||||||
|
size_t vl;
|
||||||
|
size_t VLMAX = XXH_RVOP(vsetvlmax_e64m2)();
|
||||||
|
int64_t* cSecret = (int64_t*)customSecret;
|
||||||
|
const int64_t* kSecret = (const int64_t*)(const void*)XXH3_kSecret;
|
||||||
|
|
||||||
|
#if __riscv_v_intrinsic >= 1000000
|
||||||
|
// ratified v1.0 intrinics version
|
||||||
|
vbool32_t mneg = XXH_RVCAST(u8m1_b32)(
|
||||||
|
XXH_RVOP(vmv_v_x_u8m1)(0xaa, XXH_RVOP(vsetvlmax_e8m1)()));
|
||||||
|
#else
|
||||||
|
// support pre-ratification intrinics, which lack mask to vector casts
|
||||||
|
size_t vlmax = XXH_RVOP(vsetvlmax_e8m1)();
|
||||||
|
vbool32_t mneg = XXH_RVOP(vmseq_vx_u8mf4_b32)(
|
||||||
|
XXH_RVOP(vand_vx_u8mf4)(
|
||||||
|
XXH_RVOP(vid_v_u8mf4)(vlmax), 1, vlmax), 1, vlmax);
|
||||||
|
#endif
|
||||||
|
vint64m2_t seed = XXH_RVOP(vmv_v_x_i64m2)((int64_t)seed64, VLMAX);
|
||||||
|
seed = XXH_RVOP(vneg_v_i64m2_mu)(mneg, seed, seed, VLMAX);
|
||||||
|
|
||||||
|
for (; count > 0; count -= vl, cSecret += vl, kSecret += vl) {
|
||||||
|
/* make sure vl=VLMAX until last iteration */
|
||||||
|
vl = XXH_RVOP(vsetvl_e64m2)(count < VLMAX ? count : VLMAX);
|
||||||
|
{
|
||||||
|
vint64m2_t src = XXH_RVOP(vle64_v_i64m2)(kSecret, vl);
|
||||||
|
vint64m2_t res = XXH_RVOP(vadd_vv_i64m2)(src, seed, vl);
|
||||||
|
XXH_RVOP(vse64_v_i64m2)(cSecret, res, vl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* scalar variants - universal */
|
/* scalar variants - universal */
|
||||||
|
|
||||||
#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
|
#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
|
||||||
@ -5831,6 +5967,12 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
|
|||||||
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
|
||||||
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
||||||
|
|
||||||
|
#elif (XXH_VECTOR == XXH_RVV)
|
||||||
|
#define XXH3_accumulate_512 XXH3_accumulate_512_rvv
|
||||||
|
#define XXH3_accumulate XXH3_accumulate_rvv
|
||||||
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_rvv
|
||||||
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_rvv
|
||||||
|
|
||||||
#else /* scalar */
|
#else /* scalar */
|
||||||
|
|
||||||
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
|
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
|
||||||
|
@ -185,6 +185,8 @@ static void ZSTD_copy16(void* dst, const void* src) {
|
|||||||
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
|
||||||
#elif defined(ZSTD_ARCH_X86_SSE2)
|
#elif defined(ZSTD_ARCH_X86_SSE2)
|
||||||
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
_mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
|
||||||
|
#elif defined(ZSTD_ARCH_RISCV_RVV)
|
||||||
|
__riscv_vse8_v_u8m1((uint8_t*)dst, __riscv_vle8_v_u8m1((const uint8_t*)src, 16), 16);
|
||||||
#elif defined(__clang__)
|
#elif defined(__clang__)
|
||||||
ZSTD_memmove(dst, src, 16);
|
ZSTD_memmove(dst, src, 16);
|
||||||
#else
|
#else
|
||||||
|
@ -7292,7 +7292,7 @@ size_t convertSequences_noRepcodes(
|
|||||||
return longLen;
|
return longLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined ZSTD_ARCH_RISCV_RVV
|
#elif defined (ZSTD_ARCH_RISCV_RVV)
|
||||||
#include <riscv_vector.h>
|
#include <riscv_vector.h>
|
||||||
/*
|
/*
|
||||||
* Convert `vl` sequences per iteration, using RVV intrinsics:
|
* Convert `vl` sequences per iteration, using RVV intrinsics:
|
||||||
@ -7824,7 +7824,7 @@ BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined ZSTD_ARCH_RISCV_RVV
|
#elif defined (ZSTD_ARCH_RISCV_RVV)
|
||||||
|
|
||||||
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs)
|
||||||
{
|
{
|
||||||
|
@ -1052,34 +1052,40 @@ ZSTD_row_getNEONMask(const U32 rowEntries, const BYTE* const src, const BYTE tag
|
|||||||
#endif
|
#endif
|
||||||
#if defined(ZSTD_ARCH_RISCV_RVV) && (__riscv_xlen == 64)
|
#if defined(ZSTD_ARCH_RISCV_RVV) && (__riscv_xlen == 64)
|
||||||
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
FORCE_INLINE_TEMPLATE ZSTD_VecMask
|
||||||
ZSTD_row_getRVVMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head)
|
ZSTD_row_getRVVMask(int rowEntries, const BYTE* const src, const BYTE tag, const U32 head)
|
||||||
{
|
{
|
||||||
ZSTD_VecMask matches;
|
ZSTD_VecMask matches;
|
||||||
size_t vl;
|
size_t vl;
|
||||||
|
|
||||||
if (rowEntries == 16) {
|
if (rowEntries == 16) {
|
||||||
vl = __riscv_vsetvl_e8m1(16);
|
vl = __riscv_vsetvl_e8m1(16);
|
||||||
|
{
|
||||||
vuint8m1_t chunk = __riscv_vle8_v_u8m1(src, vl);
|
vuint8m1_t chunk = __riscv_vle8_v_u8m1(src, vl);
|
||||||
vbool8_t mask = __riscv_vmseq_vx_u8m1_b8(chunk, tag, vl);
|
vbool8_t mask = __riscv_vmseq_vx_u8m1_b8(chunk, tag, vl);
|
||||||
vuint16m1_t mask_u16 = __riscv_vreinterpret_v_b8_u16m1(mask);
|
vuint16m1_t mask_u16 = __riscv_vreinterpret_v_b8_u16m1(mask);
|
||||||
matches = __riscv_vmv_x_s_u16m1_u16(mask_u16);
|
matches = __riscv_vmv_x_s_u16m1_u16(mask_u16);
|
||||||
return ZSTD_rotateRight_U16((U16)matches, head);
|
return ZSTD_rotateRight_U16((U16)matches, head);
|
||||||
|
}
|
||||||
|
|
||||||
} else if (rowEntries == 32) {
|
} else if (rowEntries == 32) {
|
||||||
vl = __riscv_vsetvl_e8m2(32);
|
vl = __riscv_vsetvl_e8m2(32);
|
||||||
|
{
|
||||||
vuint8m2_t chunk = __riscv_vle8_v_u8m2(src, vl);
|
vuint8m2_t chunk = __riscv_vle8_v_u8m2(src, vl);
|
||||||
vbool4_t mask = __riscv_vmseq_vx_u8m2_b4(chunk, tag, vl);
|
vbool4_t mask = __riscv_vmseq_vx_u8m2_b4(chunk, tag, vl);
|
||||||
vuint32m1_t mask_u32 = __riscv_vreinterpret_v_b4_u32m1(mask);
|
vuint32m1_t mask_u32 = __riscv_vreinterpret_v_b4_u32m1(mask);
|
||||||
matches = __riscv_vmv_x_s_u32m1_u32(mask_u32);
|
matches = __riscv_vmv_x_s_u32m1_u32(mask_u32);
|
||||||
return ZSTD_rotateRight_U32((U32)matches, head);
|
return ZSTD_rotateRight_U32((U32)matches, head);
|
||||||
|
}
|
||||||
} else { // rowEntries = 64
|
} else { // rowEntries = 64
|
||||||
vl = __riscv_vsetvl_e8m4(64);
|
vl = __riscv_vsetvl_e8m4(64);
|
||||||
|
{
|
||||||
vuint8m4_t chunk = __riscv_vle8_v_u8m4(src, vl);
|
vuint8m4_t chunk = __riscv_vle8_v_u8m4(src, vl);
|
||||||
vbool2_t mask = __riscv_vmseq_vx_u8m4_b2(chunk, tag, vl);
|
vbool2_t mask = __riscv_vmseq_vx_u8m4_b2(chunk, tag, vl);
|
||||||
vuint64m1_t mask_u64 = __riscv_vreinterpret_v_b2_u64m1(mask);
|
vuint64m1_t mask_u64 = __riscv_vreinterpret_v_b2_u64m1(mask);
|
||||||
matches = __riscv_vmv_x_s_u64m1_u64(mask_u64);
|
matches = __riscv_vmv_x_s_u64m1_u64(mask_u64);
|
||||||
return ZSTD_rotateRight_U64(matches, head);
|
return ZSTD_rotateRight_U64(matches, head);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user