From d6826615147859dd5afe4f5711ec3cf2b55b8aed Mon Sep 17 00:00:00 2001 From: "John M. Schanck" Date: Thu, 18 Mar 2021 10:59:00 -0400 Subject: [PATCH] Remove unused McEliece implementations --- .../pqclean_mceliece348864_clean/LICENSE | 16 - .../pqclean_mceliece348864_clean/aes256ctr.c | 13 - .../pqclean_mceliece348864_clean/aes256ctr.h | 17 - .../pqclean_mceliece348864_clean/api.h | 32 - .../pqclean_mceliece348864_clean/benes.c | 139 - .../pqclean_mceliece348864_clean/benes.h | 14 - .../pqclean_mceliece348864_clean/bm.c | 83 - .../pqclean_mceliece348864_clean/bm.h | 13 - .../controlbits.c | 274 - .../controlbits.h | 15 - .../crypto_hash.h | 7 - .../pqclean_mceliece348864_clean/decrypt.c | 90 - .../pqclean_mceliece348864_clean/decrypt.h | 10 - .../pqclean_mceliece348864_clean/encrypt.c | 138 - .../pqclean_mceliece348864_clean/encrypt.h | 11 - .../pqclean_mceliece348864_clean/gf.c | 139 - .../pqclean_mceliece348864_clean/gf.h | 22 - .../pqclean_mceliece348864_clean/operations.c | 136 - .../pqclean_mceliece348864_clean/params.h | 21 - .../pqclean_mceliece348864_clean/pk_gen.c | 144 - .../pqclean_mceliece348864_clean/pk_gen.h | 13 - .../pqclean_mceliece348864_clean/root.c | 33 - .../pqclean_mceliece348864_clean/root.h | 14 - .../pqclean_mceliece348864_clean/sk_gen.c | 98 - .../pqclean_mceliece348864_clean/sk_gen.h | 16 - .../pqclean_mceliece348864_clean/synd.c | 33 - .../pqclean_mceliece348864_clean/synd.h | 12 - .../pqclean_mceliece348864_clean/transpose.c | 42 - .../pqclean_mceliece348864_clean/transpose.h | 13 - .../pqclean_mceliece348864_clean/util.c | 67 - .../pqclean_mceliece348864_clean/util.h | 22 - .../pqclean_mceliece348864_sse/LICENSE | 16 - .../pqclean_mceliece348864_sse/aes256ctr.c | 13 - .../pqclean_mceliece348864_sse/aes256ctr.h | 17 - .../pqclean_mceliece348864_sse/api.h | 32 - .../pqclean_mceliece348864_sse/benes.c | 287 - .../pqclean_mceliece348864_sse/benes.h | 15 - .../pqclean_mceliece348864_sse/bm.c | 220 - .../pqclean_mceliece348864_sse/bm.h | 17 - .../pqclean_mceliece348864_sse/consts.S | 32 - .../pqclean_mceliece348864_sse/consts.inc | 448 - .../pqclean_mceliece348864_sse/controlbits.c | 274 - .../pqclean_mceliece348864_sse/controlbits.h | 15 - .../pqclean_mceliece348864_sse/crypto_hash.h | 7 - .../pqclean_mceliece348864_sse/decrypt.c | 203 - .../pqclean_mceliece348864_sse/decrypt.h | 10 - .../pqclean_mceliece348864_sse/encrypt.c | 99 - .../pqclean_mceliece348864_sse/encrypt.h | 11 - .../pqclean_mceliece348864_sse/fft.c | 155 - .../pqclean_mceliece348864_sse/fft.h | 17 - .../pqclean_mceliece348864_sse/fft_tr.c | 312 - .../pqclean_mceliece348864_sse/fft_tr.h | 13 - .../pqclean_mceliece348864_sse/gf.c | 169 - .../pqclean_mceliece348864_sse/gf.h | 26 - .../pqclean_mceliece348864_sse/operations.c | 136 - .../pqclean_mceliece348864_sse/params.h | 21 - .../pqclean_mceliece348864_sse/pk_gen.c | 329 - .../pqclean_mceliece348864_sse/pk_gen.h | 13 - .../pqclean_mceliece348864_sse/powers.inc | 448 - .../pqclean_mceliece348864_sse/scalars.inc | 70 - .../pqclean_mceliece348864_sse/scalars_2x.inc | 70 - .../pqclean_mceliece348864_sse/sk_gen.c | 98 - .../pqclean_mceliece348864_sse/sk_gen.h | 16 - .../pqclean_mceliece348864_sse/syndrome_asm.S | 740 -- .../pqclean_mceliece348864_sse/transpose.c | 12 - .../pqclean_mceliece348864_sse/transpose.h | 16 - .../transpose_64x128_sp_asm.S | 8145 ---------------- .../transpose_64x64_asm.S | 8467 ----------------- .../pqclean_mceliece348864_sse/update_asm.S | 354 - .../pqclean_mceliece348864_sse/util.c | 106 - .../pqclean_mceliece348864_sse/util.h | 33 - .../pqclean_mceliece348864_sse/vec.c | 17 - .../pqclean_mceliece348864_sse/vec.h | 11 - .../pqclean_mceliece348864_sse/vec128.c | 143 - .../pqclean_mceliece348864_sse/vec128.h | 42 - .../vec128_mul_asm.S | 1736 ---- .../pqclean_mceliece348864_sse/vec_mul_asm.S | 1515 --- .../vec_reduce_asm.S | 356 - 78 files changed, 26999 deletions(-) delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/LICENSE delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/api.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/crypto_hash.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/operations.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/params.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/LICENSE delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/api.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.inc delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/crypto_hash.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/operations.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/params.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/powers.inc delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars.inc delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars_2x.inc delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/syndrome_asm.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose_64x128_sp_asm.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose_64x64_asm.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/update_asm.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/util.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/util.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.c delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.h delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128_mul_asm.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec_mul_asm.S delete mode 100644 src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec_reduce_asm.S diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/LICENSE b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/LICENSE deleted file mode 100644 index eba3e7ced..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -Public Domain. - -Authors of Classic McEliece in alphabetical order: - -Daniel J. Bernstein, University of Illinois at Chicago -Tung Chou, Osaka University -Tanja Lange, Technische Universiteit Eindhoven -Ingo von Maurich, self -Rafael Misoczki, Intel Corporation -Ruben Niederhagen, Fraunhofer SIT -Edoardo Persichetti, Florida Atlantic University -Christiane Peters, self -Peter Schwabe, Radboud University -Nicolas Sendrier, Inria -Jakub Szefer, Yale University -Wen Wang, Yale University diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.c deleted file mode 100644 index c733d2e99..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "aes256ctr.h" - -void PQCLEAN_MCELIECE348864_CLEAN_aes256ctr( - uint8_t *out, - size_t outlen, - const uint8_t nonce[AESCTR_NONCEBYTES], - const uint8_t key[AES256_KEYBYTES]) { - - aes256ctx state; - aes256_ctr_keyexp(&state, key); - aes256_ctr(out, outlen, nonce, &state); - aes256_ctx_release(&state); -} diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.h deleted file mode 100644 index 13f1f9a73..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/aes256ctr.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_AES256CTR_H -#define PQCLEAN_MCELIECE348864_CLEAN_AES256CTR_H - -#include -#include - -#include "aes.h" - - -void PQCLEAN_MCELIECE348864_CLEAN_aes256ctr( - uint8_t *out, - size_t outlen, - const uint8_t nonce[AESCTR_NONCEBYTES], - const uint8_t key[AES256_KEYBYTES] -); - -#endif diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/api.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/api.h deleted file mode 100644 index 0bebdcb57..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/api.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_API_H -#define PQCLEAN_MCELIECE348864_CLEAN_API_H - -#include - -#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_ALGNAME "Classic McEliece 348864" -#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_PUBLICKEYBYTES 261120 -#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_SECRETKEYBYTES 6452 -#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_CIPHERTEXTBYTES 128 -#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_BYTES 32 - - -int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_enc( - uint8_t *c, - uint8_t *key, - const uint8_t *pk -); - -int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_dec( - uint8_t *key, - const uint8_t *c, - const uint8_t *sk -); - -int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_keypair -( - uint8_t *pk, - uint8_t *sk -); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.c deleted file mode 100644 index 39f639a1c..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - This file is for Benes network related functions -*/ - -#include "benes.h" - -#include "params.h" -#include "transpose.h" -#include "util.h" - -/* one layer of the benes network */ -static void layer(uint64_t *data, uint64_t *bits, int lgs) { - int i, j, s; - - uint64_t d; - - s = 1 << lgs; - - for (i = 0; i < 64; i += s * 2) { - for (j = i; j < i + s; j++) { - - d = (data[j + 0] ^ data[j + s]); - d &= (*bits++); - data[j + 0] ^= d; - data[j + s] ^= d; - } - } -} - -/* input: r, sequence of bits to be permuted */ -/* bits, condition bits of the Benes network */ -/* rev, 0 for normal application; !0 for inverse */ -/* output: r, permuted bits */ -void PQCLEAN_MCELIECE348864_CLEAN_apply_benes(unsigned char *r, const unsigned char *bits, int rev) { - int i; - - const unsigned char *cond_ptr; - int inc, low; - - uint64_t bs[64]; - uint64_t cond[64]; - - // - - for (i = 0; i < 64; i++) { - bs[i] = PQCLEAN_MCELIECE348864_CLEAN_load8(r + i * 8); - } - - if (rev == 0) { - inc = 256; - cond_ptr = bits; - } else { - inc = -256; - cond_ptr = bits + (2 * GFBITS - 2) * 256; - } - - // - - PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs); - - for (low = 0; low <= 5; low++) { - for (i = 0; i < 64; i++) { - cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load4(cond_ptr + i * 4); - } - PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(cond, cond); - layer(bs, cond, low); - cond_ptr += inc; - } - - PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs); - - for (low = 0; low <= 5; low++) { - for (i = 0; i < 32; i++) { - cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load8(cond_ptr + i * 8); - } - layer(bs, cond, low); - cond_ptr += inc; - } - for (low = 4; low >= 0; low--) { - for (i = 0; i < 32; i++) { - cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load8(cond_ptr + i * 8); - } - layer(bs, cond, low); - cond_ptr += inc; - } - - PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs); - - for (low = 5; low >= 0; low--) { - for (i = 0; i < 64; i++) { - cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load4(cond_ptr + i * 4); - } - PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(cond, cond); - layer(bs, cond, low); - cond_ptr += inc; - } - - PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs); - - - for (i = 0; i < 64; i++) { - PQCLEAN_MCELIECE348864_CLEAN_store8(r + i * 8, bs[i]); - } -} - -/* input: condition bits c */ -/* output: support s */ -void PQCLEAN_MCELIECE348864_CLEAN_support_gen(gf *s, const unsigned char *c) { - gf a; - int i, j; - unsigned char L[ GFBITS ][ (1 << GFBITS) / 8 ]; - - for (i = 0; i < GFBITS; i++) { - for (j = 0; j < (1 << GFBITS) / 8; j++) { - L[i][j] = 0; - } - } - - for (i = 0; i < (1 << GFBITS); i++) { - a = PQCLEAN_MCELIECE348864_CLEAN_bitrev((gf) i); - - for (j = 0; j < GFBITS; j++) { - L[j][ i / 8 ] |= ((a >> j) & 1) << (i % 8); - } - } - - for (j = 0; j < GFBITS; j++) { - PQCLEAN_MCELIECE348864_CLEAN_apply_benes(L[j], c, 0); - } - - for (i = 0; i < SYS_N; i++) { - s[i] = 0; - for (j = GFBITS - 1; j >= 0; j--) { - s[i] <<= 1; - s[i] |= (L[j][i / 8] >> (i % 8)) & 1; - } - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.h deleted file mode 100644 index 29fc27407..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/benes.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_BENES_H -#define PQCLEAN_MCELIECE348864_CLEAN_BENES_H -/* - This file is for Benes network related functions -*/ - - -#include "gf.h" - -void PQCLEAN_MCELIECE348864_CLEAN_apply_benes(unsigned char * /*r*/, const unsigned char * /*bits*/, int /*rev*/); -void PQCLEAN_MCELIECE348864_CLEAN_support_gen(gf * /*s*/, const unsigned char * /*c*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.c deleted file mode 100644 index 89b8ed4a7..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - This file is for the Berlekamp-Massey algorithm - see http://crypto.stanford.edu/~mironov/cs359/massey.pdf -*/ -#include "bm.h" - -#include "params.h" - -#define min(a, b) (((a) < (b)) ? (a) : (b)) - -/* the Berlekamp-Massey algorithm */ -/* input: s, sequence of field elements */ -/* output: out, minimal polynomial of s */ -void PQCLEAN_MCELIECE348864_CLEAN_bm(gf *out, gf *s) { - int i; - - uint16_t N = 0; - uint16_t L = 0; - uint16_t mle; - uint16_t mne; - - gf T[ SYS_T + 1 ]; - gf C[ SYS_T + 1 ]; - gf B[ SYS_T + 1 ]; - - gf b = 1, d, f; - - // - - for (i = 0; i < SYS_T + 1; i++) { - C[i] = B[i] = 0; - } - - B[1] = C[0] = 1; - - // - - for (N = 0; N < 2 * SYS_T; N++) { - d = 0; - - for (i = 0; i <= min(N, SYS_T); i++) { - d ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(C[i], s[ N - i]); - } - - mne = d; - mne -= 1; - mne >>= 15; - mne -= 1; - mle = N; - mle -= 2 * L; - mle >>= 15; - mle -= 1; - mle &= mne; - - for (i = 0; i <= SYS_T; i++) { - T[i] = C[i]; - } - - f = PQCLEAN_MCELIECE348864_CLEAN_gf_frac(b, d); - - for (i = 0; i <= SYS_T; i++) { - C[i] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(f, B[i]) & mne; - } - - L = (L & ~mle) | ((N + 1 - L) & mle); - - for (i = 0; i <= SYS_T; i++) { - B[i] = (B[i] & ~mle) | (T[i] & mle); - } - - b = (b & ~mle) | (d & mle); - - for (i = SYS_T; i >= 1; i--) { - B[i] = B[i - 1]; - } - B[0] = 0; - } - - for (i = 0; i <= SYS_T; i++) { - out[i] = C[ SYS_T - i ]; - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.h deleted file mode 100644 index c7da4878e..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/bm.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_BM_H -#define PQCLEAN_MCELIECE348864_CLEAN_BM_H -/* - This file is for the Berlekamp-Massey algorithm - see http://crypto.stanford.edu/~mironov/cs359/massey.pdf -*/ - -#include "gf.h" - -void PQCLEAN_MCELIECE348864_CLEAN_bm(gf * /*out*/, gf * /*s*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.c deleted file mode 100644 index 7b3444e34..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation - see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf -*/ - -#include "controlbits.h" - -#include "params.h" - -#include - -typedef uint8_t bit; - -#define N (1 << GFBITS) - -static bit is_smaller(uint32_t a, uint32_t b) { - uint32_t ret = 0; - - ret = a - b; - ret >>= 31; - - return (bit)ret; -} - -static bit is_smaller_63b(uint64_t a, uint64_t b) { - uint64_t ret = 0; - - ret = a - b; - ret >>= 63; - - return (bit)ret; -} - -static void cswap(uint32_t *x, uint32_t *y, bit swap) { - uint32_t m; - uint32_t d; - - m = swap; - m = 0 - m; - - d = (*x ^ *y); - d &= m; - *x ^= d; - *y ^= d; -} - -static void cswap_63b(uint64_t *x, uint64_t *y, bit swap) { - uint64_t m; - uint64_t d; - - m = swap; - m = 0 - m; - - d = (*x ^ *y); - d &= m; - *x ^= d; - *y ^= d; -} - -/* output x = min(input x,input y) */ -/* output y = max(input x,input y) */ - -static void minmax(uint32_t *x, uint32_t *y) { - bit m; - - m = is_smaller(*y, *x); - cswap(x, y, m); -} - -static void minmax_63b(uint64_t *x, uint64_t *y) { - bit m; - - m = is_smaller_63b(*y, *x); - cswap_63b(x, y, m); -} - -/* merge first half of x[0],x[step],...,x[(2*n-1)*step] with second half */ -/* requires n to be a power of 2 */ - -static void merge(int n, uint32_t *x, int step) { - int i; - if (n == 1) { - minmax(&x[0], &x[step]); - } else { - merge(n / 2, x, step * 2); - merge(n / 2, x + step, step * 2); - for (i = 1; i < 2 * n - 1; i += 2) { - minmax(&x[i * step], &x[(i + 1) * step]); - } - } -} - -static void merge_63b(int n, uint64_t *x, int step) { - int i; - if (n == 1) { - minmax_63b(&x[0], &x[step]); - } else { - merge_63b(n / 2, x, step * 2); - merge_63b(n / 2, x + step, step * 2); - for (i = 1; i < 2 * n - 1; i += 2) { - minmax_63b(&x[i * step], &x[(i + 1) * step]); - } - } -} - -/* sort x[0],x[1],...,x[n-1] in place */ -/* requires n to be a power of 2 */ - -static void sort(int n, uint32_t *x) { - if (n <= 1) { - return; - } - sort(n / 2, x); - sort(n / 2, x + n / 2); - merge(n / 2, x, 1); -} - -void PQCLEAN_MCELIECE348864_CLEAN_sort_63b(int n, uint64_t *x) { - if (n <= 1) { - return; - } - PQCLEAN_MCELIECE348864_CLEAN_sort_63b(n / 2, x); - PQCLEAN_MCELIECE348864_CLEAN_sort_63b(n / 2, x + n / 2); - merge_63b(n / 2, x, 1); -} - -/* y[pi[i]] = x[i] */ -/* requires n = 2^w */ -/* requires pi to be a permutation */ -static void composeinv(int n, uint32_t *y, const uint32_t *x, const uint32_t *pi) { // NC - int i; - uint32_t t[2 * N]; - - for (i = 0; i < n; ++i) { - t[i] = x[i] | (pi[i] << 16); - } - - sort(n, t); - - for (i = 0; i < n; ++i) { - y[i] = t[i] & 0xFFFF; - } -} - -/* ip[i] = j iff pi[i] = j */ -/* requires n = 2^w */ -/* requires pi to be a permutation */ -static void invert(int n, uint32_t *ip, const uint32_t *pi) { - int i; - - for (i = 0; i < n; i++) { - ip[i] = i; - } - - composeinv(n, ip, ip, pi); -} - - -static void flow(int w, uint32_t *x, const uint32_t *y, int t) { - bit m0; - bit m1; - - uint32_t b; - uint32_t y_copy = *y; - - m0 = is_smaller(*y & ((1 << w) - 1), *x & ((1 << w) - 1)); - m1 = is_smaller(0, t); - - cswap(x, &y_copy, m0); - b = m0 & m1; - *x ^= b << w; -} - -/* input: permutation pi */ -/* output: (2w-1)n/2 (or 0 if n==1) control bits c[0],c[step],c[2*step],... */ -/* requires n = 2^w */ -static void controlbitsfrompermutation(int w, int n, int step, int off, unsigned char *c, const uint32_t *pi) { - int i; - int j; - int k; - int t; - uint32_t ip[N] = {0}; - uint32_t I[2 * N] = {0}; - uint32_t P[2 * N] = {0}; - uint32_t PI[2 * N] = {0}; - uint32_t T[2 * N] = {0}; - uint32_t piflip[N] = {0}; - uint32_t subpi[2][N / 2] = {{0}}; - - if (w == 1) { - c[ off / 8 ] |= (pi[0] & 1) << (off % 8); - } - if (w <= 1) { - return; - } - - invert(n, ip, pi); - - for (i = 0; i < n; ++i) { - I[i] = ip[i] | (1 << w); - I[n + i] = pi[i]; - } - - for (i = 0; i < 2 * n; ++i) { - P[i] = (i >> w) + (i & ((1 << w) - 2)) + ((i & 1) << w); - } - - for (t = 0; t < w; ++t) { - composeinv(2 * n, PI, P, I); - - for (i = 0; i < 2 * n; ++i) { - flow(w, &P[i], &PI[i], t); - } - - for (i = 0; i < 2 * n; ++i) { - T[i] = I[i ^ 1]; - } - - composeinv(2 * n, I, I, T); - - for (i = 0; i < 2 * n; ++i) { - T[i] = P[i ^ 1]; - } - - for (i = 0; i < 2 * n; ++i) { - flow(w, &P[i], &T[i], 1); - } - } - - for (i = 0; i < n; ++i) { - for (j = 0; j < w; ++j) { - piflip[i] = pi[i]; - } - } - - for (i = 0; i < n / 2; ++i) { - c[ (off + i * step) / 8 ] |= ((P[i * 2] >> w) & 1) << ((off + i * step) % 8); - } - for (i = 0; i < n / 2; ++i) { - c[ (off + ((w - 1)*n + i) * step) / 8 ] |= ((P[n + i * 2] >> w) & 1) << ((off + ((w - 1) * n + i) * step) % 8); - } - - for (i = 0; i < n / 2; ++i) { - cswap(&piflip[i * 2], &piflip[i * 2 + 1], (P[n + i * 2] >> w) & 1); - } - - for (k = 0; k < 2; ++k) { - for (i = 0; i < n / 2; ++i) { - subpi[k][i] = piflip[i * 2 + k] >> 1; - } - } - - for (k = 0; k < 2; ++k) { - controlbitsfrompermutation(w - 1, n / 2, step * 2, off + step * (n / 2 + k), c, subpi[k]); - } -} - -/* input: pi, a permutation*/ -/* output: out, control bits w.r.t. pi */ -void PQCLEAN_MCELIECE348864_CLEAN_controlbits(unsigned char *out, const uint32_t *pi) { - unsigned int i; - unsigned char c[ (2 * GFBITS - 1) * (1 << GFBITS) / 16 ]; - - for (i = 0; i < sizeof(c); i++) { - c[i] = 0; - } - - controlbitsfrompermutation(GFBITS, (1 << GFBITS), 1, 0, c, pi); - - for (i = 0; i < sizeof(c); i++) { - out[i] = c[i]; - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.h deleted file mode 100644 index 3125a8629..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/controlbits.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_CONTROLBITS_H -#define PQCLEAN_MCELIECE348864_CLEAN_CONTROLBITS_H -/* - This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation - see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf -*/ - - -#include - -void PQCLEAN_MCELIECE348864_CLEAN_sort_63b(int n, uint64_t *x); -void PQCLEAN_MCELIECE348864_CLEAN_controlbits(unsigned char *out, const uint32_t *pi); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/crypto_hash.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/crypto_hash.h deleted file mode 100644 index 110ecfc9c..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/crypto_hash.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_HASH_H -#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_HASH_H -#include "fips202.h" - -#define crypto_hash_32b(out,in,inlen) shake256(out, 32, in, inlen) - -#endif diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.c deleted file mode 100644 index d180c5cdb..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - This file is for Niederreiter decryption -*/ - -#include "decrypt.h" - -#include "benes.h" -#include "bm.h" -#include "gf.h" -#include "params.h" -#include "root.h" -#include "synd.h" -#include "util.h" - -/* Niederreiter decryption with the Berlekamp decoder */ -/* intput: sk, secret key */ -/* c, ciphertext */ -/* output: e, error vector */ -/* return: 0 for success; 1 for failure */ -int PQCLEAN_MCELIECE348864_CLEAN_decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *c) { - int i, w = 0; - uint16_t check; - - unsigned char r[ SYS_N / 8 ]; - - gf g[ SYS_T + 1 ]; - gf L[ SYS_N ]; - - gf s[ SYS_T * 2 ]; - gf s_cmp[ SYS_T * 2 ]; - gf locator[ SYS_T + 1 ]; - gf images[ SYS_N ]; - - gf t; - - // - - for (i = 0; i < SYND_BYTES; i++) { - r[i] = c[i]; - } - for (i = SYND_BYTES; i < SYS_N / 8; i++) { - r[i] = 0; - } - - for (i = 0; i < SYS_T; i++) { - g[i] = PQCLEAN_MCELIECE348864_CLEAN_load2(sk); - g[i] &= GFMASK; - sk += 2; - } - g[ SYS_T ] = 1; - - PQCLEAN_MCELIECE348864_CLEAN_support_gen(L, sk); - - PQCLEAN_MCELIECE348864_CLEAN_synd(s, g, L, r); - - PQCLEAN_MCELIECE348864_CLEAN_bm(locator, s); - - PQCLEAN_MCELIECE348864_CLEAN_root(images, locator, L); - - // - - for (i = 0; i < SYS_N / 8; i++) { - e[i] = 0; - } - - for (i = 0; i < SYS_N; i++) { - t = PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(images[i]) & 1; - - e[ i / 8 ] |= t << (i % 8); - w += t; - - } - - PQCLEAN_MCELIECE348864_CLEAN_synd(s_cmp, g, L, e); - - // - - check = (uint16_t)w; - check ^= SYS_T; - - for (i = 0; i < SYS_T * 2; i++) { - check |= s[i] ^ s_cmp[i]; - } - - check -= 1; - check >>= 15; - - return check ^ 1; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.h deleted file mode 100644 index 4a80e068e..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/decrypt.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_DECRYPT_H -#define PQCLEAN_MCELIECE348864_CLEAN_DECRYPT_H -/* - This file is for Nieddereiter decryption -*/ - -int PQCLEAN_MCELIECE348864_CLEAN_decrypt(unsigned char * /*e*/, const unsigned char * /*sk*/, const unsigned char * /*c*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.c deleted file mode 100644 index 27a6ea4f0..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - This file is for Niederreiter encryption -*/ - -#include "encrypt.h" - -#include "params.h" -#include "randombytes.h" -#include "util.h" - -#include -#include - -#include "gf.h" - -static inline uint8_t same_mask(uint16_t x, uint16_t y) { - uint32_t mask; - - mask = x ^ y; - mask -= 1; - mask >>= 31; - mask = -mask; - - return (uint8_t)mask; -} - -/* output: e, an error vector of weight t */ -static void gen_e(unsigned char *e) { - size_t i, j; - int eq, count; - - uint16_t ind_[ SYS_T * 2 ]; - uint8_t *ind_8 = (uint8_t *)ind_; - uint16_t ind[ SYS_T * 2 ]; - uint8_t mask; - unsigned char val[ SYS_T ]; - - while (1) { - randombytes(ind_8, sizeof(ind_)); - // Copy to uint16_t ind_ in a little-endian way - for (i = 0; i < sizeof(ind_); i += 2) { - ind_[i / 2] = ((uint16_t)ind_8[i + 1]) << 8 | (uint16_t)ind_8[i]; - } - - for (i = 0; i < SYS_T * 2; i++) { - ind_[i] &= GFMASK; - } - - // moving and counting indices in the correct range - - count = 0; - for (i = 0; i < SYS_T * 2; i++) { - if (ind_[i] < SYS_N) { - ind[ count++ ] = ind_[i]; - } - } - - if (count < SYS_T) { - continue; - } - - // check for repetition - - eq = 0; - - for (i = 1; i < SYS_T; i++) { - for (j = 0; j < i; j++) { - if (ind[i] == ind[j]) { - eq = 1; - } - } - } - - if (eq == 0) { - break; - } - } - - for (j = 0; j < SYS_T; j++) { - val[j] = 1 << (ind[j] & 7); - } - - for (i = 0; i < SYS_N / 8; i++) { - e[i] = 0; - - for (j = 0; j < SYS_T; j++) { - mask = same_mask((uint16_t)i, (ind[j] >> 3)); - - e[i] |= val[j] & mask; - } - } -} - -/* input: public key pk, error vector e */ -/* output: syndrome s */ -static void syndrome(unsigned char *s, const unsigned char *pk, const unsigned char *e) { - unsigned char b, row[SYS_N / 8]; - const unsigned char *pk_ptr = pk; - - int i, j; - - for (i = 0; i < SYND_BYTES; i++) { - s[i] = 0; - } - - for (i = 0; i < PK_NROWS; i++) { - for (j = 0; j < SYS_N / 8; j++) { - row[j] = 0; - } - - for (j = 0; j < PK_ROW_BYTES; j++) { - row[ SYS_N / 8 - PK_ROW_BYTES + j ] = pk_ptr[j]; - } - - row[i / 8] |= 1 << (i % 8); - - b = 0; - for (j = 0; j < SYS_N / 8; j++) { - b ^= row[j] & e[j]; - } - - b ^= b >> 4; - b ^= b >> 2; - b ^= b >> 1; - b &= 1; - - s[ i / 8 ] |= (b << (i % 8)); - - pk_ptr += PK_ROW_BYTES; - } -} - -void PQCLEAN_MCELIECE348864_CLEAN_encrypt(unsigned char *s, unsigned char *e, const unsigned char *pk) { - gen_e(e); - - syndrome(s, pk, e); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.h deleted file mode 100644 index 2b6daf868..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/encrypt.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_ENCRYPT_H -#define PQCLEAN_MCELIECE348864_CLEAN_ENCRYPT_H -/* - This file is for Niederreiter encryption -*/ - - -void PQCLEAN_MCELIECE348864_CLEAN_encrypt(unsigned char * /*s*/, unsigned char * /*e*/, const unsigned char * /*pk*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.c deleted file mode 100644 index d974bf607..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - This file is for functions for field arithmetic -*/ - -#include "gf.h" - -#include "params.h" - -gf PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(gf a) { - uint32_t t = a; - - t -= 1; - t >>= 19; - - return (gf) t; -} - -gf PQCLEAN_MCELIECE348864_CLEAN_gf_add(gf in0, gf in1) { - return in0 ^ in1; -} - -gf PQCLEAN_MCELIECE348864_CLEAN_gf_mul(gf in0, gf in1) { - int i; - - uint32_t tmp; - uint32_t t0; - uint32_t t1; - uint32_t t; - - t0 = in0; - t1 = in1; - - tmp = t0 * (t1 & 1); - - for (i = 1; i < GFBITS; i++) { - tmp ^= (t0 * (t1 & (1 << i))); - } - - t = tmp & 0x7FC000; - tmp ^= t >> 9; - tmp ^= t >> 12; - - t = tmp & 0x3000; - tmp ^= t >> 9; - tmp ^= t >> 12; - - return tmp & ((1 << GFBITS) - 1); -} - -/* input: field element in */ -/* return: in^2 */ -static inline gf gf_sq(gf in) { - const uint32_t B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF}; - - uint32_t x = in; - uint32_t t; - - x = (x | (x << 8)) & B[3]; - x = (x | (x << 4)) & B[2]; - x = (x | (x << 2)) & B[1]; - x = (x | (x << 1)) & B[0]; - - t = x & 0x7FC000; - x ^= t >> 9; - x ^= t >> 12; - - t = x & 0x3000; - x ^= t >> 9; - x ^= t >> 12; - - return x & ((1 << GFBITS) - 1); -} - -gf PQCLEAN_MCELIECE348864_CLEAN_gf_inv(gf in) { - gf tmp_11; - gf tmp_1111; - - gf out = in; - - out = gf_sq(out); - tmp_11 = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, in); // 11 - - out = gf_sq(tmp_11); - out = gf_sq(out); - tmp_1111 = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, tmp_11); // 1111 - - out = gf_sq(tmp_1111); - out = gf_sq(out); - out = gf_sq(out); - out = gf_sq(out); - out = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, tmp_1111); // 11111111 - - out = gf_sq(out); - out = gf_sq(out); - out = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, tmp_11); // 1111111111 - - out = gf_sq(out); - out = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, in); // 11111111111 - - return gf_sq(out); // 111111111110 -} - -/* input: field element den, num */ -/* return: (num/den) */ -gf PQCLEAN_MCELIECE348864_CLEAN_gf_frac(gf den, gf num) { - return PQCLEAN_MCELIECE348864_CLEAN_gf_mul(PQCLEAN_MCELIECE348864_CLEAN_gf_inv(den), num); -} - -/* input: in0, in1 in GF((2^m)^t)*/ -/* output: out = in0*in1 */ -void PQCLEAN_MCELIECE348864_CLEAN_GF_mul(gf *out, const gf *in0, const gf *in1) { - int i, j; - - gf prod[ SYS_T * 2 - 1 ]; - - for (i = 0; i < SYS_T * 2 - 1; i++) { - prod[i] = 0; - } - - for (i = 0; i < SYS_T; i++) { - for (j = 0; j < SYS_T; j++) { - prod[i + j] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(in0[i], in1[j]); - } - } - - // - - for (i = (SYS_T - 1) * 2; i >= SYS_T; i--) { - prod[i - SYS_T + 9] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 877); - prod[i - SYS_T + 7] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 2888); - prod[i - SYS_T + 5] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 1781); - prod[i - SYS_T + 0] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 373); - } - - for (i = 0; i < SYS_T; i++) { - out[i] = prod[i]; - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.h deleted file mode 100644 index c445925a6..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/gf.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_GF_H -#define PQCLEAN_MCELIECE348864_CLEAN_GF_H -/* - This file is for functions for field arithmetic -*/ - - -#include - -typedef uint16_t gf; - -gf PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(gf a); -gf PQCLEAN_MCELIECE348864_CLEAN_gf_add(gf in0, gf in1); -gf PQCLEAN_MCELIECE348864_CLEAN_gf_mul(gf in0, gf in1); -gf PQCLEAN_MCELIECE348864_CLEAN_gf_frac(gf den, gf num); -gf PQCLEAN_MCELIECE348864_CLEAN_gf_inv(gf in); -uint64_t PQCLEAN_MCELIECE348864_CLEAN_gf_mul2(gf a, gf b0, gf b1); - -void PQCLEAN_MCELIECE348864_CLEAN_GF_mul(gf *out, const gf *in0, const gf *in1); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/operations.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/operations.c deleted file mode 100644 index 3a222d77e..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/operations.c +++ /dev/null @@ -1,136 +0,0 @@ -#include "api.h" - -#include "aes256ctr.h" -#include "controlbits.h" -#include "crypto_hash.h" -#include "decrypt.h" -#include "encrypt.h" -#include "params.h" -#include "pk_gen.h" -#include "randombytes.h" -#include "sk_gen.h" -#include "util.h" - -#include -#include - -int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_enc( - uint8_t *c, - uint8_t *key, - const uint8_t *pk -) { - uint8_t two_e[ 1 + SYS_N / 8 ] = {2}; - uint8_t *e = two_e + 1; - uint8_t one_ec[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ] = {1}; - - PQCLEAN_MCELIECE348864_CLEAN_encrypt(c, e, pk); - - crypto_hash_32b(c + SYND_BYTES, two_e, sizeof(two_e)); - - memcpy(one_ec + 1, e, SYS_N / 8); - memcpy(one_ec + 1 + SYS_N / 8, c, SYND_BYTES + 32); - - crypto_hash_32b(key, one_ec, sizeof(one_ec)); - - return 0; -} - -int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_dec( - uint8_t *key, - const uint8_t *c, - const uint8_t *sk -) { - int i; - - uint8_t ret_confirm = 0; - uint8_t ret_decrypt = 0; - - uint16_t m; - - uint8_t conf[32]; - uint8_t two_e[ 1 + SYS_N / 8 ] = {2}; - uint8_t *e = two_e + 1; - uint8_t preimage[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ]; - uint8_t *x = preimage; - - // - - ret_decrypt = (uint8_t)PQCLEAN_MCELIECE348864_CLEAN_decrypt(e, sk + SYS_N / 8, c); - - crypto_hash_32b(conf, two_e, sizeof(two_e)); - - for (i = 0; i < 32; i++) { - ret_confirm |= conf[i] ^ c[SYND_BYTES + i]; - } - - m = ret_decrypt | ret_confirm; - m -= 1; - m >>= 8; - - *x++ = (~m & 0) | (m & 1); - for (i = 0; i < SYS_N / 8; i++) { - *x++ = (~m & sk[i]) | (m & e[i]); - } - for (i = 0; i < SYND_BYTES + 32; i++) { - *x++ = c[i]; - } - - crypto_hash_32b(key, preimage, sizeof(preimage)); - - return 0; -} - -int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_keypair -( - uint8_t *pk, - uint8_t *sk -) { - int i; - uint8_t seed[ 32 ]; - uint8_t r[ SYS_T * 2 + (1 << GFBITS)*sizeof(uint32_t) + SYS_N / 8 + 32 ]; - uint8_t nonce[ 16 ] = {0}; - uint8_t *rp; - - gf f[ SYS_T ]; // element in GF(2^mt) - gf irr[ SYS_T ]; // Goppa polynomial - uint32_t perm[ 1 << GFBITS ]; // random permutation - - randombytes(seed, sizeof(seed)); - - while (1) { - rp = r; - PQCLEAN_MCELIECE348864_CLEAN_aes256ctr(r, sizeof(r), nonce, seed); - memcpy(seed, &r[ sizeof(r) - 32 ], 32); - - for (i = 0; i < SYS_T; i++) { - f[i] = PQCLEAN_MCELIECE348864_CLEAN_load2(rp + i * 2); - } - rp += sizeof(f); - if (PQCLEAN_MCELIECE348864_CLEAN_genpoly_gen(irr, f)) { - continue; - } - - for (i = 0; i < (1 << GFBITS); i++) { - perm[i] = PQCLEAN_MCELIECE348864_CLEAN_load4(rp + i * 4); - } - rp += sizeof(perm); - if (PQCLEAN_MCELIECE348864_CLEAN_perm_check(perm)) { - continue; - } - - for (i = 0; i < SYS_T; i++) { - PQCLEAN_MCELIECE348864_CLEAN_store2(sk + SYS_N / 8 + i * 2, irr[i]); - } - if (PQCLEAN_MCELIECE348864_CLEAN_pk_gen(pk, perm, sk + SYS_N / 8)) { - continue; - } - - memcpy(sk, rp, SYS_N / 8); - PQCLEAN_MCELIECE348864_CLEAN_controlbits(sk + SYS_N / 8 + IRR_BYTES, perm); - - break; - } - - return 0; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/params.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/params.h deleted file mode 100644 index ae8aaa917..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/params.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_PARAMS_H -#define PQCLEAN_MCELIECE348864_CLEAN_PARAMS_H - -#define GFBITS 12 -#define SYS_N 3488 -#define SYS_T 64 - -#define COND_BYTES ((1 << (GFBITS-4))*(2*GFBITS - 1)) -#define IRR_BYTES (SYS_T * 2) - -#define PK_NROWS (SYS_T*GFBITS) -#define PK_NCOLS (SYS_N - PK_NROWS) -#define PK_ROW_BYTES ((PK_NCOLS + 7)/8) - -#define SK_BYTES (SYS_N/8 + IRR_BYTES + COND_BYTES) -#define SYND_BYTES ((PK_NROWS + 7)/8) - -#define GFMASK ((1 << GFBITS) - 1) - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.c deleted file mode 100644 index eec02be00..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - This file is for public-key generation -*/ - -#include - -#include "benes.h" -#include "controlbits.h" -#include "gf.h" -#include "params.h" -#include "pk_gen.h" -#include "root.h" -#include "util.h" - -/* input: secret key sk */ -/* output: public key pk */ -int PQCLEAN_MCELIECE348864_CLEAN_pk_gen(uint8_t *pk, uint32_t *perm, const uint8_t *sk) { - int i, j, k; - int row, c; - - uint64_t buf[ 1 << GFBITS ]; - - uint8_t mat[ GFBITS * SYS_T ][ SYS_N / 8 ]; - uint8_t mask; - uint8_t b; - - gf g[ SYS_T + 1 ]; // Goppa polynomial - gf L[ SYS_N ]; // support - gf inv[ SYS_N ]; - - // - - g[ SYS_T ] = 1; - - for (i = 0; i < SYS_T; i++) { - g[i] = PQCLEAN_MCELIECE348864_CLEAN_load2(sk); - g[i] &= GFMASK; - sk += 2; - } - - for (i = 0; i < (1 << GFBITS); i++) { - buf[i] = perm[i]; - buf[i] <<= 31; - buf[i] |= i; - } - - PQCLEAN_MCELIECE348864_CLEAN_sort_63b(1 << GFBITS, buf); - - for (i = 0; i < (1 << GFBITS); i++) { - perm[i] = buf[i] & GFMASK; - } - for (i = 0; i < SYS_N; i++) { - L[i] = PQCLEAN_MCELIECE348864_CLEAN_bitrev((gf)perm[i]); - } - - // filling the matrix - - PQCLEAN_MCELIECE348864_CLEAN_root(inv, g, L); - - for (i = 0; i < SYS_N; i++) { - inv[i] = PQCLEAN_MCELIECE348864_CLEAN_gf_inv(inv[i]); - } - - for (i = 0; i < PK_NROWS; i++) { - for (j = 0; j < SYS_N / 8; j++) { - mat[i][j] = 0; - } - } - - for (i = 0; i < SYS_T; i++) { - for (j = 0; j < SYS_N; j += 8) { - for (k = 0; k < GFBITS; k++) { - b = (inv[j + 7] >> k) & 1; - b <<= 1; - b |= (inv[j + 6] >> k) & 1; - b <<= 1; - b |= (inv[j + 5] >> k) & 1; - b <<= 1; - b |= (inv[j + 4] >> k) & 1; - b <<= 1; - b |= (inv[j + 3] >> k) & 1; - b <<= 1; - b |= (inv[j + 2] >> k) & 1; - b <<= 1; - b |= (inv[j + 1] >> k) & 1; - b <<= 1; - b |= (inv[j + 0] >> k) & 1; - - mat[ i * GFBITS + k ][ j / 8 ] = b; - } - } - - for (j = 0; j < SYS_N; j++) { - inv[j] = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(inv[j], L[j]); - } - - } - - // gaussian elimination - - for (i = 0; i < (GFBITS * SYS_T + 7) / 8; i++) { - for (j = 0; j < 8; j++) { - row = i * 8 + j; - - if (row >= GFBITS * SYS_T) { - break; - } - - for (k = row + 1; k < GFBITS * SYS_T; k++) { - mask = mat[ row ][ i ] ^ mat[ k ][ i ]; - mask >>= j; - mask &= 1; - mask = -mask; - - for (c = 0; c < SYS_N / 8; c++) { - mat[ row ][ c ] ^= mat[ k ][ c ] & mask; - } - } - - if ( ((mat[ row ][ i ] >> j) & 1) == 0 ) { // return if not systematic - return -1; - } - - for (k = 0; k < GFBITS * SYS_T; k++) { - if (k != row) { - mask = mat[ k ][ i ] >> j; - mask &= 1; - mask = -mask; - - for (c = 0; c < SYS_N / 8; c++) { - mat[ k ][ c ] ^= mat[ row ][ c ] & mask; - } - } - } - } - } - - for (i = 0; i < PK_NROWS; i++) { - memcpy(pk + i * PK_ROW_BYTES, mat[i] + PK_NROWS / 8, PK_ROW_BYTES); - } - - return 0; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.h deleted file mode 100644 index e92992f55..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/pk_gen.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_PK_GEN_H -#define PQCLEAN_MCELIECE348864_CLEAN_PK_GEN_H -/* - This file is for public-key generation -*/ - - -#include - -int PQCLEAN_MCELIECE348864_CLEAN_pk_gen(uint8_t * /*pk*/, uint32_t * /*perm*/, const uint8_t * /*sk*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.c deleted file mode 100644 index a57f215c3..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - This file is for evaluating a polynomial at one or more field elements -*/ -#include "root.h" - -#include "params.h" - -/* input: polynomial f and field element a */ -/* return f(a) */ -gf PQCLEAN_MCELIECE348864_CLEAN_eval(gf *f, gf a) { - int i; - gf r; - - r = f[ SYS_T ]; - - for (i = SYS_T - 1; i >= 0; i--) { - r = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(r, a); - r = PQCLEAN_MCELIECE348864_CLEAN_gf_add(r, f[i]); - } - - return r; -} - -/* input: polynomial f and list of field elements L */ -/* output: out = [ f(a) for a in L ] */ -void PQCLEAN_MCELIECE348864_CLEAN_root(gf *out, gf *f, gf *L) { - int i; - - for (i = 0; i < SYS_N; i++) { - out[i] = PQCLEAN_MCELIECE348864_CLEAN_eval(f, L[i]); - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.h deleted file mode 100644 index 6b125234e..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/root.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_ROOT_H -#define PQCLEAN_MCELIECE348864_CLEAN_ROOT_H -/* - This file is for evaluating a polynomial at one or more field elements -*/ - - -#include "gf.h" - -gf PQCLEAN_MCELIECE348864_CLEAN_eval(gf * /*f*/, gf /*a*/); -void PQCLEAN_MCELIECE348864_CLEAN_root(gf * /*out*/, gf * /*f*/, gf * /*L*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.c deleted file mode 100644 index d75075e38..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - This file is for secret-key generation -*/ - -#include "sk_gen.h" - -#include "controlbits.h" -#include "gf.h" -#include "params.h" -#include "util.h" - -/* input: f, element in GF((2^m)^t) */ -/* output: out, minimal polynomial of f */ -/* return: 0 for success and -1 for failure */ -int PQCLEAN_MCELIECE348864_CLEAN_genpoly_gen(gf *out, gf *f) { - int i, j, k, c; - - gf mat[ SYS_T + 1 ][ SYS_T ]; - gf mask, inv, t; - - // fill matrix - - mat[0][0] = 1; - - for (i = 1; i < SYS_T; i++) { - mat[0][i] = 0; - } - - for (i = 0; i < SYS_T; i++) { - mat[1][i] = f[i]; - } - - for (j = 2; j <= SYS_T; j++) { - PQCLEAN_MCELIECE348864_CLEAN_GF_mul(mat[j], mat[j - 1], f); - } - - // gaussian - - for (j = 0; j < SYS_T; j++) { - for (k = j + 1; k < SYS_T; k++) { - mask = PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(mat[ j ][ j ]); - - for (c = j; c < SYS_T + 1; c++) { - mat[ c ][ j ] ^= mat[ c ][ k ] & mask; - } - - } - - if ( mat[ j ][ j ] == 0 ) { // return if not systematic - return -1; - } - - inv = PQCLEAN_MCELIECE348864_CLEAN_gf_inv(mat[j][j]); - - for (c = j; c < SYS_T + 1; c++) { - mat[ c ][ j ] = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(mat[ c ][ j ], inv) ; - } - - for (k = 0; k < SYS_T; k++) { - if (k != j) { - t = mat[ j ][ k ]; - - for (c = j; c < SYS_T + 1; c++) { - mat[ c ][ k ] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(mat[ c ][ j ], t); - } - } - } - } - - for (i = 0; i < SYS_T; i++) { - out[i] = mat[ SYS_T ][ i ]; - } - - return 0; -} - -/* input: permutation p represented as a list of 32-bit intergers */ -/* output: -1 if some interger repeats in p */ -/* 0 otherwise */ -int PQCLEAN_MCELIECE348864_CLEAN_perm_check(const uint32_t *p) { - int i; - uint64_t list[1 << GFBITS]; - - for (i = 0; i < (1 << GFBITS); i++) { - list[i] = p[i]; - } - - PQCLEAN_MCELIECE348864_CLEAN_sort_63b(1 << GFBITS, list); - - for (i = 1; i < (1 << GFBITS); i++) { - if (list[i - 1] == list[i]) { - return -1; - } - } - - return 0; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.h deleted file mode 100644 index 6f1df9afe..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/sk_gen.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_SK_GEN_H -#define PQCLEAN_MCELIECE348864_CLEAN_SK_GEN_H -/* - This file is for secret-key generation -*/ - - -#include "gf.h" - -#include - -int PQCLEAN_MCELIECE348864_CLEAN_genpoly_gen(gf * /*out*/, gf * /*f*/); -int PQCLEAN_MCELIECE348864_CLEAN_perm_check(const uint32_t * /*p*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.c deleted file mode 100644 index d473bb1e1..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - This file is for syndrome computation -*/ - -#include "synd.h" - -#include "params.h" -#include "root.h" - - -/* input: Goppa polynomial f, support L, received word r */ -/* output: out, the syndrome of length 2t */ -void PQCLEAN_MCELIECE348864_CLEAN_synd(gf *out, gf *f, gf *L, const unsigned char *r) { - int i, j; - gf e, e_inv, c; - - for (j = 0; j < 2 * SYS_T; j++) { - out[j] = 0; - } - - for (i = 0; i < SYS_N; i++) { - c = (r[i / 8] >> (i % 8)) & 1; - - e = PQCLEAN_MCELIECE348864_CLEAN_eval(f, L[i]); - e_inv = PQCLEAN_MCELIECE348864_CLEAN_gf_inv(PQCLEAN_MCELIECE348864_CLEAN_gf_mul(e, e)); - - for (j = 0; j < 2 * SYS_T; j++) { - out[j] = PQCLEAN_MCELIECE348864_CLEAN_gf_add(out[j], PQCLEAN_MCELIECE348864_CLEAN_gf_mul(e_inv, c)); - e_inv = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(e_inv, L[i]); - } - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.h deleted file mode 100644 index 34b61bcd4..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/synd.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_SYND_H -#define PQCLEAN_MCELIECE348864_CLEAN_SYND_H -/* - This file is for syndrome computation -*/ - -#include "gf.h" - -void PQCLEAN_MCELIECE348864_CLEAN_synd(gf * /*out*/, gf * /*f*/, gf * /*L*/, const unsigned char * /*r*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.c deleted file mode 100644 index cbad4f7b9..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - This file is for matrix transposition -*/ - -#include "transpose.h" - -#include - -/* input: in, a 64x64 matrix over GF(2) */ -/* output: out, transpose of in */ -void PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(uint64_t *out, const uint64_t *in) { - int i, j, s, d; - - uint64_t x, y; - uint64_t masks[6][2] = { - {0x5555555555555555, 0xAAAAAAAAAAAAAAAA}, - {0x3333333333333333, 0xCCCCCCCCCCCCCCCC}, - {0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0}, - {0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00}, - {0x0000FFFF0000FFFF, 0xFFFF0000FFFF0000}, - {0x00000000FFFFFFFF, 0xFFFFFFFF00000000} - }; - - for (i = 0; i < 64; i++) { - out[i] = in[i]; - } - - for (d = 5; d >= 0; d--) { - s = 1 << d; - - for (i = 0; i < 64; i += s * 2) { - for (j = i; j < i + s; j++) { - x = (out[j] & masks[d][0]) | ((out[j + s] & masks[d][0]) << s); - y = ((out[j] & masks[d][1]) >> s) | (out[j + s] & masks[d][1]); - - out[j + 0] = x; - out[j + s] = y; - } - } - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.h deleted file mode 100644 index 1bdc673dd..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/transpose.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_TRANSPOSE_H -#define PQCLEAN_MCELIECE348864_CLEAN_TRANSPOSE_H -/* - This file is for matrix transposition -*/ - - -#include - -void PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(uint64_t * /*out*/, const uint64_t * /*in*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.c b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.c deleted file mode 100644 index 75f1bc9ca..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - This file is for loading/storing data in a little-endian fashion -*/ - -#include "util.h" - -#include "params.h" - -void PQCLEAN_MCELIECE348864_CLEAN_store2(unsigned char *dest, gf a) { - dest[0] = a & 0xFF; - dest[1] = a >> 8; -} - -uint16_t PQCLEAN_MCELIECE348864_CLEAN_load2(const unsigned char *src) { - uint16_t a; - - a = src[1]; - a <<= 8; - a |= src[0]; - - return a & GFMASK; -} - -uint32_t PQCLEAN_MCELIECE348864_CLEAN_load4(const unsigned char *in) { - int i; - uint32_t ret = in[3]; - - for (i = 2; i >= 0; i--) { - ret <<= 8; - ret |= in[i]; - } - - return ret; -} - -void PQCLEAN_MCELIECE348864_CLEAN_store8(unsigned char *out, uint64_t in) { - out[0] = (in >> 0x00) & 0xFF; - out[1] = (in >> 0x08) & 0xFF; - out[2] = (in >> 0x10) & 0xFF; - out[3] = (in >> 0x18) & 0xFF; - out[4] = (in >> 0x20) & 0xFF; - out[5] = (in >> 0x28) & 0xFF; - out[6] = (in >> 0x30) & 0xFF; - out[7] = (in >> 0x38) & 0xFF; -} - -uint64_t PQCLEAN_MCELIECE348864_CLEAN_load8(const unsigned char *in) { - int i; - uint64_t ret = in[7]; - - for (i = 6; i >= 0; i--) { - ret <<= 8; - ret |= in[i]; - } - - return ret; -} - -gf PQCLEAN_MCELIECE348864_CLEAN_bitrev(gf a) { - a = ((a & 0x00FF) << 8) | ((a & 0xFF00) >> 8); - a = ((a & 0x0F0F) << 4) | ((a & 0xF0F0) >> 4); - a = ((a & 0x3333) << 2) | ((a & 0xCCCC) >> 2); - a = ((a & 0x5555) << 1) | ((a & 0xAAAA) >> 1); - - return a >> 4; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.h b/src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.h deleted file mode 100644 index 25b6f9663..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_clean/util.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_CLEAN_UTIL_H -#define PQCLEAN_MCELIECE348864_CLEAN_UTIL_H -/* - This file is for loading/storing data in a little-endian fashion -*/ - - -#include "gf.h" -#include - -void PQCLEAN_MCELIECE348864_CLEAN_store2(unsigned char * /*dest*/, gf /*a*/); -uint16_t PQCLEAN_MCELIECE348864_CLEAN_load2(const unsigned char * /*src*/); - -uint32_t PQCLEAN_MCELIECE348864_CLEAN_load4(const unsigned char * /*in*/); - -void PQCLEAN_MCELIECE348864_CLEAN_store8(unsigned char * /*out*/, uint64_t /*in*/); -uint64_t PQCLEAN_MCELIECE348864_CLEAN_load8(const unsigned char * /*in*/); - -gf PQCLEAN_MCELIECE348864_CLEAN_bitrev(gf /*a*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/LICENSE b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/LICENSE deleted file mode 100644 index eba3e7ced..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -Public Domain. - -Authors of Classic McEliece in alphabetical order: - -Daniel J. Bernstein, University of Illinois at Chicago -Tung Chou, Osaka University -Tanja Lange, Technische Universiteit Eindhoven -Ingo von Maurich, self -Rafael Misoczki, Intel Corporation -Ruben Niederhagen, Fraunhofer SIT -Edoardo Persichetti, Florida Atlantic University -Christiane Peters, self -Peter Schwabe, Radboud University -Nicolas Sendrier, Inria -Jakub Szefer, Yale University -Wen Wang, Yale University diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.c deleted file mode 100644 index 788493486..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.c +++ /dev/null @@ -1,13 +0,0 @@ -#include "aes256ctr.h" - -void PQCLEAN_MCELIECE348864_SSE_aes256ctr( - uint8_t *out, - size_t outlen, - const uint8_t nonce[AESCTR_NONCEBYTES], - const uint8_t key[AES256_KEYBYTES]) { - - aes256ctx state; - aes256_ctr_keyexp(&state, key); - aes256_ctr(out, outlen, nonce, &state); - aes256_ctx_release(&state); -} diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.h deleted file mode 100644 index 9f62b86d7..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/aes256ctr.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_AES256CTR_H -#define PQCLEAN_MCELIECE348864_SSE_AES256CTR_H - -#include -#include - -#include "aes.h" - - -void PQCLEAN_MCELIECE348864_SSE_aes256ctr( - uint8_t *out, - size_t outlen, - const uint8_t nonce[AESCTR_NONCEBYTES], - const uint8_t key[AES256_KEYBYTES] -); - -#endif diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/api.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/api.h deleted file mode 100644 index d834750e2..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/api.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_API_H -#define PQCLEAN_MCELIECE348864_SSE_API_H - -#include - -#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_ALGNAME "Classic McEliece 348864" -#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_PUBLICKEYBYTES 261120 -#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_SECRETKEYBYTES 6452 -#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_CIPHERTEXTBYTES 128 -#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_BYTES 32 - - -int PQCLEAN_MCELIECE348864_SSE_crypto_kem_enc( - uint8_t *c, - uint8_t *key, - const uint8_t *pk -); - -int PQCLEAN_MCELIECE348864_SSE_crypto_kem_dec( - uint8_t *key, - const uint8_t *c, - const uint8_t *sk -); - -int PQCLEAN_MCELIECE348864_SSE_crypto_kem_keypair -( - uint8_t *pk, - uint8_t *sk -); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.c deleted file mode 100644 index d0bb0f1ab..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.c +++ /dev/null @@ -1,287 +0,0 @@ -/* - This file is for Benes network related functions -*/ -#include "benes.h" - -#include "params.h" -#include "transpose.h" -#include "util.h" - -static void layer_0(uint64_t *bs, const uint64_t *cond) { - int x; - uint64_t diff; - - for (x = 0; x < (1 << 6); x += 2) { - diff = bs[ x ] ^ bs[ x + 1 ]; - diff &= *cond++; - bs[ x ] ^= diff; - bs[ x + 1 ] ^= diff; - } -} - -static void layer_1(uint64_t *bs, const uint64_t *cond) { - int x; - uint64_t diff; - - for (x = 0; x < (1 << 6); x += 4) { - diff = bs[ x + 0 ] ^ bs[ x + 2 ]; - diff &= cond[0]; - bs[ x + 0 ] ^= diff; - bs[ x + 2 ] ^= diff; - - diff = bs[ x + 1 ] ^ bs[ x + 3 ]; - diff &= cond[1]; - bs[ x + 1 ] ^= diff; - bs[ x + 3 ] ^= diff; - - cond += 2; - } -} - -static void layer_2(uint64_t *bs, const uint64_t *cond) { - int x; - uint64_t diff; - - for (x = 0; x < (1 << 6); x += 8) { - diff = bs[ x + 0 ] ^ bs[ x + 4 ]; - diff &= cond[0]; - bs[ x + 0 ] ^= diff; - bs[ x + 4 ] ^= diff; - - diff = bs[ x + 1 ] ^ bs[ x + 5 ]; - diff &= cond[1]; - bs[ x + 1 ] ^= diff; - bs[ x + 5 ] ^= diff; - - diff = bs[ x + 2 ] ^ bs[ x + 6 ]; - diff &= cond[2]; - bs[ x + 2 ] ^= diff; - bs[ x + 6 ] ^= diff; - - diff = bs[ x + 3 ] ^ bs[ x + 7 ]; - diff &= cond[3]; - bs[ x + 3 ] ^= diff; - bs[ x + 7 ] ^= diff; - - cond += 4; - } -} - -static void layer_3(uint64_t *bs, const uint64_t *cond) { - int x, s; - uint64_t diff; - - for (x = 0; x < (1 << 6); x += 16) { - for (s = x; s < x + 8; s += 4) { - diff = bs[ s + 0 ] ^ bs[ s + 8 ]; - diff &= cond[0]; - bs[ s + 0 ] ^= diff; - bs[ s + 8 ] ^= diff; - - diff = bs[ s + 1 ] ^ bs[ s + 9 ]; - diff &= cond[1]; - bs[ s + 1 ] ^= diff; - bs[ s + 9 ] ^= diff; - - diff = bs[ s + 2 ] ^ bs[ s + 10 ]; - diff &= cond[2]; - bs[ s + 2 ] ^= diff; - bs[ s + 10 ] ^= diff; - - diff = bs[ s + 3 ] ^ bs[ s + 11 ]; - diff &= cond[3]; - bs[ s + 3 ] ^= diff; - bs[ s + 11 ] ^= diff; - - cond += 4; - } - } -} - -static void layer_4(uint64_t *bs, const uint64_t *cond) { - int x, s; - uint64_t diff; - - for (x = 0; x < (1 << 6); x += 32) { - for (s = x; s < x + 16; s += 4) { - diff = bs[ s + 0 ] ^ bs[ s + 16 ]; - diff &= cond[0]; - bs[ s + 0 ] ^= diff; - bs[ s + 16 ] ^= diff; - - diff = bs[ s + 1 ] ^ bs[ s + 17 ]; - diff &= cond[1]; - bs[ s + 1 ] ^= diff; - bs[ s + 17 ] ^= diff; - - diff = bs[ s + 2 ] ^ bs[ s + 18 ]; - diff &= cond[2]; - bs[ s + 2 ] ^= diff; - bs[ s + 18 ] ^= diff; - - diff = bs[ s + 3 ] ^ bs[ s + 19 ]; - diff &= cond[3]; - bs[ s + 3 ] ^= diff; - bs[ s + 19 ] ^= diff; - - cond += 4; - } - } -} - -static void layer_5(uint64_t *bs, const uint64_t *cond) { - int x, s; - uint64_t diff; - - for (x = 0; x < (1 << 6); x += 64) { - for (s = x; s < x + 32; s += 4) { - diff = bs[ s + 0 ] ^ bs[ s + 32 ]; - diff &= cond[0]; - bs[ s + 0 ] ^= diff; - bs[ s + 32 ] ^= diff; - - diff = bs[ s + 1 ] ^ bs[ s + 33 ]; - diff &= cond[1]; - bs[ s + 1 ] ^= diff; - bs[ s + 33 ] ^= diff; - - diff = bs[ s + 2 ] ^ bs[ s + 34 ]; - diff &= cond[2]; - bs[ s + 2 ] ^= diff; - bs[ s + 34 ] ^= diff; - - diff = bs[ s + 3 ] ^ bs[ s + 35 ]; - diff &= cond[3]; - bs[ s + 3 ] ^= diff; - bs[ s + 35 ] ^= diff; - - cond += 4; - } - } -} - -/* input: bits, control bits as array of bytes */ -/* output: out, control bits as array of 128-bit vectors */ -void PQCLEAN_MCELIECE348864_SSE_load_bits(uint64_t out[][32], const unsigned char *bits) { - int i, low, block = 0; - - uint64_t cond[64]; - - // - - for (low = 0; low <= 5; low++) { - for (i = 0; i < 64; i++) { - cond[i] = PQCLEAN_MCELIECE348864_SSE_load4(bits + block * 256 + i * 4); - } - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(cond); - - for (i = 0; i < 32; i++) { - out[ block ][i] = cond[i]; - } - block++; - } - - for (low = 0; low <= 5; low++) { - for (i = 0; i < 32; i++) { - out[ block ][i] = PQCLEAN_MCELIECE348864_SSE_load8(bits + block * 256 + i * 8); - } - block++; - } - - for (low = 4; low >= 0; low--) { - for (i = 0; i < 32; i++) { - out[ block ][i] = PQCLEAN_MCELIECE348864_SSE_load8(bits + block * 256 + i * 8); - } - block++; - } - - for (low = 5; low >= 0; low--) { - for (i = 0; i < 64; i++) { - cond[i] = PQCLEAN_MCELIECE348864_SSE_load4(bits + block * 256 + i * 4); - } - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(cond); - - for (i = 0; i < 32; i++) { - out[ block ][i] = cond[i]; - } - block++; - } -} - -/* input: r, sequence of bits to be permuted */ -/* cond, control bits as array of 128-bit vectors */ -/* rev, 0 for normal application; !0 for inverse */ -/* output: r, permuted bits */ -void PQCLEAN_MCELIECE348864_SSE_benes(uint64_t *r, uint64_t cond[][32], int rev) { - int block, inc; - - uint64_t *bs = r; - - // - - if (rev == 0) { - block = 0; - inc = 1; - } else { - block = 22; - inc = -1; - } - - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs); - - layer_0(bs, cond[ block ]); - block += inc; - layer_1(bs, cond[ block ]); - block += inc; - layer_2(bs, cond[ block ]); - block += inc; - layer_3(bs, cond[ block ]); - block += inc; - layer_4(bs, cond[ block ]); - block += inc; - layer_5(bs, cond[ block ]); - block += inc; - - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs); - - layer_0(bs, cond[ block ]); - block += inc; - layer_1(bs, cond[ block ]); - block += inc; - layer_2(bs, cond[ block ]); - block += inc; - layer_3(bs, cond[ block ]); - block += inc; - layer_4(bs, cond[ block ]); - block += inc; - layer_5(bs, cond[ block ]); - block += inc; - layer_4(bs, cond[ block ]); - block += inc; - layer_3(bs, cond[ block ]); - block += inc; - layer_2(bs, cond[ block ]); - block += inc; - layer_1(bs, cond[ block ]); - block += inc; - layer_0(bs, cond[ block ]); - block += inc; - - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs); - - layer_5(bs, cond[ block ]); - block += inc; - layer_4(bs, cond[ block ]); - block += inc; - layer_3(bs, cond[ block ]); - block += inc; - layer_2(bs, cond[ block ]); - block += inc; - layer_1(bs, cond[ block ]); - block += inc; - layer_0(bs, cond[ block ]); - //block += inc; - - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.h deleted file mode 100644 index 267744bef..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/benes.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_BENES_H -#define PQCLEAN_MCELIECE348864_SSE_BENES_H -/* - This file is for Benes network related functions -*/ - - -#include "gf.h" -#include "vec128.h" - -void PQCLEAN_MCELIECE348864_SSE_load_bits(uint64_t /*out*/[][32], const unsigned char * /*bits*/); -void PQCLEAN_MCELIECE348864_SSE_benes(uint64_t * /*r*/, uint64_t /*cond*/[][32], int /*rev*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.c deleted file mode 100644 index e3257e44c..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - This file is for the inversion-free Berlekamp-Massey algorithm - see https://ieeexplore.ieee.org/document/87857 -*/ - -#include "bm.h" - -#include "gf.h" -#include "util.h" -#include "vec.h" -#include "vec128.h" - -#include -#include - -extern void PQCLEAN_MCELIECE348864_SSE_update_asm(void *, gf, int); -extern gf PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm(uint64_t *); - -static inline uint64_t mask_nonzero(gf a) { - uint64_t ret = a; - - ret -= 1; - ret >>= 63; - ret -= 1; - - return ret; -} - -static inline uint64_t mask_leq(uint16_t a, uint16_t b) { - uint64_t a_tmp = a; - uint64_t b_tmp = b; - uint64_t ret = b_tmp - a_tmp; - - ret >>= 63; - ret -= 1; - - return ret; -} - -static void vec_cmov(uint64_t out[][2], uint64_t mask) { - int i; - - for (i = 0; i < GFBITS; i++) { - out[i][0] = (out[i][0] & ~mask) | (out[i][1] & mask); - } -} - -static inline void interleave(vec128 *in, int idx0, int idx1, vec128 *mask, int b) { - int s = 1 << b; - - vec128 x, y; - - x = PQCLEAN_MCELIECE348864_SSE_vec128_or(PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx0], mask[0]), - PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx1], mask[0]), s)); - - y = PQCLEAN_MCELIECE348864_SSE_vec128_or(PQCLEAN_MCELIECE348864_SSE_vec128_srl_2x(PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx0], mask[1]), s), - PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx1], mask[1])); - - in[idx0] = x; - in[idx1] = y; -} - -/* input: in, field elements in bitsliced form */ -/* output: out, field elements in non-bitsliced form */ -static inline void get_coefs(gf *out, vec128 *in) { - int i, k; - - vec128 mask[4][2]; - vec128 buf[16]; - - for (i = 0; i < GFBITS; i++) { - buf[i] = in[i]; - } - for (i = GFBITS; i < 16; i++) { - buf[i] = PQCLEAN_MCELIECE348864_SSE_vec128_setzero(); - } - - mask[0][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x5555); - mask[0][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xAAAA); - mask[1][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x3333); - mask[1][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xCCCC); - mask[2][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x0F0F); - mask[2][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xF0F0); - mask[3][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x00FF); - mask[3][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xFF00); - - interleave(buf, 0, 8, mask[3], 3); - interleave(buf, 1, 9, mask[3], 3); - interleave(buf, 2, 10, mask[3], 3); - interleave(buf, 3, 11, mask[3], 3); - interleave(buf, 4, 12, mask[3], 3); - interleave(buf, 5, 13, mask[3], 3); - interleave(buf, 6, 14, mask[3], 3); - interleave(buf, 7, 15, mask[3], 3); - - interleave(buf, 0, 4, mask[2], 2); - interleave(buf, 1, 5, mask[2], 2); - interleave(buf, 2, 6, mask[2], 2); - interleave(buf, 3, 7, mask[2], 2); - interleave(buf, 8, 12, mask[2], 2); - interleave(buf, 9, 13, mask[2], 2); - interleave(buf, 10, 14, mask[2], 2); - interleave(buf, 11, 15, mask[2], 2); - - interleave(buf, 0, 2, mask[1], 1); - interleave(buf, 1, 3, mask[1], 1); - interleave(buf, 4, 6, mask[1], 1); - interleave(buf, 5, 7, mask[1], 1); - interleave(buf, 8, 10, mask[1], 1); - interleave(buf, 9, 11, mask[1], 1); - interleave(buf, 12, 14, mask[1], 1); - interleave(buf, 13, 15, mask[1], 1); - - interleave(buf, 0, 1, mask[0], 0); - interleave(buf, 2, 3, mask[0], 0); - interleave(buf, 4, 5, mask[0], 0); - interleave(buf, 6, 7, mask[0], 0); - interleave(buf, 8, 9, mask[0], 0); - interleave(buf, 10, 11, mask[0], 0); - interleave(buf, 12, 13, mask[0], 0); - interleave(buf, 14, 15, mask[0], 0); - - for (i = 0; i < 16; i++) { - for (k = 0; k < 4; k++) { - out[ (4 * 0 + k) * 16 + i ] = (PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[i], 0) >> (k * 16)) & GFMASK; - out[ (4 * 1 + k) * 16 + i ] = (PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[i], 1) >> (k * 16)) & GFMASK; - } - } -} - -/* input: in, sequence of field elements */ -/* output: out, minimal polynomial of in */ -void PQCLEAN_MCELIECE348864_SSE_bm(uint64_t out[ GFBITS ], vec128 in[ GFBITS ]) { - uint16_t i; - uint16_t N, L; - - uint64_t prod[ GFBITS ]; - uint64_t in_tmp[ GFBITS ]; - - uint64_t db[ GFBITS ][ 2 ]; - uint64_t BC_tmp[ GFBITS ][ 2 ]; - uint64_t BC[ GFBITS ][ 2 ]; - - uint64_t mask, t; - - gf d, b, c0 = 1; - - gf coefs[SYS_T * 2]; - - // init - - BC[0][1] = 0; - BC[0][0] = 1; - BC[0][0] <<= 63; - - for (i = 1; i < GFBITS; i++) { - BC[i][0] = BC[i][1] = 0; - } - - b = 1; - L = 0; - - // - - get_coefs(coefs, in); - - for (i = 0; i < GFBITS; i++) { - in_tmp[i] = 0; - } - - for (N = 0; N < SYS_T * 2; N++) { - // computing d - - PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(prod, in_tmp, &BC[0][1], 16); - - PQCLEAN_MCELIECE348864_SSE_update_asm(in_tmp, coefs[N], 8); - - d = PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm(prod); - - t = PQCLEAN_MCELIECE348864_SSE_gf_mul2(c0, coefs[N], b); - - d ^= t & 0xFFFFFFFF; - - // 3 cases - - mask = mask_nonzero(d) & mask_leq(L * 2, N); - - for (i = 0; i < GFBITS; i++) { - db[i][0] = (d >> i) & 1; - db[i][0] = -db[i][0]; - db[i][1] = (b >> i) & 1; - db[i][1] = -db[i][1]; - } - - PQCLEAN_MCELIECE348864_SSE_vec128_mul((vec128 *) BC_tmp, (vec128 *) db, (vec128 *) BC); - - vec_cmov(BC, mask); - - PQCLEAN_MCELIECE348864_SSE_update_asm(BC, mask & c0, 16); - - for (i = 0; i < GFBITS; i++) { - BC[i][1] = BC_tmp[i][0] ^ BC_tmp[i][1]; - } - - c0 = t >> 32; - b = (d & mask) | (b & ~mask); - L = ((N + 1 - L) & mask) | (L & ~mask); - - } - - c0 = PQCLEAN_MCELIECE348864_SSE_gf_inv(c0); - - for (i = 0; i < GFBITS; i++) { - out[i] = (c0 >> i) & 1; - out[i] = -out[i]; - } - - PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(out, out, &BC[0][1], 16); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.h deleted file mode 100644 index 9430fe2d2..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/bm.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_BM_H -#define PQCLEAN_MCELIECE348864_SSE_BM_H -/* - This file is for the inversion-free Berlekamp-Massey algorithm - see https://ieeexplore.ieee.org/document/87857 -*/ - - -#include - -#include "params.h" -#include "vec128.h" - -void PQCLEAN_MCELIECE348864_SSE_bm(uint64_t *out, vec128 *in); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.S b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.S deleted file mode 100644 index ff080b2f9..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.S +++ /dev/null @@ -1,32 +0,0 @@ -.data - -# not supported on macos -#.section .rodata -.globl PQCLEAN_MCELIECE348864_SSE_MASK0_0 -.globl PQCLEAN_MCELIECE348864_SSE_MASK0_1 -.globl PQCLEAN_MCELIECE348864_SSE_MASK1_0 -.globl PQCLEAN_MCELIECE348864_SSE_MASK1_1 -.globl PQCLEAN_MCELIECE348864_SSE_MASK2_0 -.globl PQCLEAN_MCELIECE348864_SSE_MASK2_1 -.globl PQCLEAN_MCELIECE348864_SSE_MASK3_0 -.globl PQCLEAN_MCELIECE348864_SSE_MASK3_1 -.globl PQCLEAN_MCELIECE348864_SSE_MASK4_0 -.globl PQCLEAN_MCELIECE348864_SSE_MASK4_1 -.globl PQCLEAN_MCELIECE348864_SSE_MASK5_0 -.globl PQCLEAN_MCELIECE348864_SSE_MASK5_1 - -.p2align 4 - -PQCLEAN_MCELIECE348864_SSE_MASK0_0: .quad 0x5555555555555555, 0x5555555555555555 -PQCLEAN_MCELIECE348864_SSE_MASK0_1: .quad 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA -PQCLEAN_MCELIECE348864_SSE_MASK1_0: .quad 0x3333333333333333, 0x3333333333333333 -PQCLEAN_MCELIECE348864_SSE_MASK1_1: .quad 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC -PQCLEAN_MCELIECE348864_SSE_MASK2_0: .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F -PQCLEAN_MCELIECE348864_SSE_MASK2_1: .quad 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0 -PQCLEAN_MCELIECE348864_SSE_MASK3_0: .quad 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF -PQCLEAN_MCELIECE348864_SSE_MASK3_1: .quad 0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00 -PQCLEAN_MCELIECE348864_SSE_MASK4_0: .quad 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF -PQCLEAN_MCELIECE348864_SSE_MASK4_1: .quad 0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000 -PQCLEAN_MCELIECE348864_SSE_MASK5_0: .quad 0x00000000FFFFFFFF, 0x00000000FFFFFFFF -PQCLEAN_MCELIECE348864_SSE_MASK5_1: .quad 0xFFFFFFFF00000000, 0xFFFFFFFF00000000 - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.inc b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.inc deleted file mode 100644 index 87b50f730..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/consts.inc +++ /dev/null @@ -1,448 +0,0 @@ -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00F0FF0F00F0FF0, 0xF00F0FF0F00F0FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0xF0F00F0F0F0FF0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF00FF00FF0, 0x0FF00FF00FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA5555AAAA5555AA, 0xAA5555AAAA5555AA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00F0FF0F00F0FF0, 0xF00F0FF0F00F0FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0x33CCCC33CC3333CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC33CC3333CC33CC, 0xCC33CC3333CC33CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CC33CC33CC33CC, 0x33CC33CC33CC33CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5A5A5A5A5A5A5A5A, 0x5A5A5A5A5A5A5A5A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00F0FF0F00F0FF0, 0xF00F0FF0F00F0FF0), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3C3C3C3C3C3C3C, 0x3C3C3C3C3C3C3C3C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555AAAA5555AAAA, 0xAAAA5555AAAA5555), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CCCC3333CC, 0xCC3333CCCC3333CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33CC33CC33CC33C, 0xC33CC33CC33CC33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55555555AAAAAAAA, 0x55555555AAAAAAAA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33333333CCCCCCCC, 0x33333333CCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FFFF00FF00, 0xFF00FF0000FF00FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFFFFFF0000, 0x0000FFFFFFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0xF0F00F0F0F0FF0F0), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA5555AA55AA, 0x55AA55AAAA55AA55), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC33CC3333CC33CC, 0xCC33CC3333CC33CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55555555AAAAAAAA, 0x55555555AAAAAAAA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FF00FFFF00, 0xFF0000FF00FFFF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC33CC3C33CC33C, 0xC33CC33C3CC33CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555AAAA5555AAAA, 0xAAAA5555AAAA5555), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF00FF00FF0, 0xF00FF00FF00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC33333333CCCC, 0x3333CCCCCCCC3333), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0xFF0000FFFF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33CC33CC33CC33C, 0xC33CC33CC33CC33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA5555AA55AA, 0x55AA55AAAA55AA55), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CC33CCCC33CC33, 0x33CC33CCCC33CC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CC33CCCC33, 0x33CCCC33CC3333CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55555555AAAAAAAA, 0x55555555AAAAAAAA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF00FF0000FF, 0x00FFFF00FF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC33CC3C33CC33C, 0xC33CC33C3CC33CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555AAAA5555AAAA, 0xAAAA5555AAAA5555), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF00FF00FF0, 0xF00FF00FF00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCCCCCC3333, 0xCCCC33333333CCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0xFF0000FFFF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33CC33CC33CC33C, 0xC33CC33CC33CC33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0x33CCCC33CC3333CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA5A5A5A55A5A5A5A, 0xA5A5A5A55A5A5A5A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0x33CCCC33CC3333CC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5A5A5A5AA5A5A5A5, 0x5A5A5A5AA5A5A5A5), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CC33CCCC33, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5A5A5A5AA5A5A5A5, 0x5A5A5A5AA5A5A5A5), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CC33CCCC33, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA5A5A5A55A5A5A5A, 0xA5A5A5A55A5A5A5A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.c deleted file mode 100644 index 0908baf74..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation - see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf -*/ - -#include "controlbits.h" - -#include "params.h" - -#include - -typedef uint8_t bit; - -#define N (1 << GFBITS) - -static bit is_smaller(uint32_t a, uint32_t b) { - uint32_t ret = 0; - - ret = a - b; - ret >>= 31; - - return (bit)ret; -} - -static bit is_smaller_63b(uint64_t a, uint64_t b) { - uint64_t ret = 0; - - ret = a - b; - ret >>= 63; - - return (bit)ret; -} - -static void cswap(uint32_t *x, uint32_t *y, bit swap) { - uint32_t m; - uint32_t d; - - m = swap; - m = 0 - m; - - d = (*x ^ *y); - d &= m; - *x ^= d; - *y ^= d; -} - -static void cswap_63b(uint64_t *x, uint64_t *y, bit swap) { - uint64_t m; - uint64_t d; - - m = swap; - m = 0 - m; - - d = (*x ^ *y); - d &= m; - *x ^= d; - *y ^= d; -} - -/* output x = min(input x,input y) */ -/* output y = max(input x,input y) */ - -static void minmax(uint32_t *x, uint32_t *y) { - bit m; - - m = is_smaller(*y, *x); - cswap(x, y, m); -} - -static void minmax_63b(uint64_t *x, uint64_t *y) { - bit m; - - m = is_smaller_63b(*y, *x); - cswap_63b(x, y, m); -} - -/* merge first half of x[0],x[step],...,x[(2*n-1)*step] with second half */ -/* requires n to be a power of 2 */ - -static void merge(int n, uint32_t *x, int step) { - int i; - if (n == 1) { - minmax(&x[0], &x[step]); - } else { - merge(n / 2, x, step * 2); - merge(n / 2, x + step, step * 2); - for (i = 1; i < 2 * n - 1; i += 2) { - minmax(&x[i * step], &x[(i + 1) * step]); - } - } -} - -static void merge_63b(int n, uint64_t *x, int step) { - int i; - if (n == 1) { - minmax_63b(&x[0], &x[step]); - } else { - merge_63b(n / 2, x, step * 2); - merge_63b(n / 2, x + step, step * 2); - for (i = 1; i < 2 * n - 1; i += 2) { - minmax_63b(&x[i * step], &x[(i + 1) * step]); - } - } -} - -/* sort x[0],x[1],...,x[n-1] in place */ -/* requires n to be a power of 2 */ - -static void sort(int n, uint32_t *x) { - if (n <= 1) { - return; - } - sort(n / 2, x); - sort(n / 2, x + n / 2); - merge(n / 2, x, 1); -} - -void PQCLEAN_MCELIECE348864_SSE_sort_63b(int n, uint64_t *x) { - if (n <= 1) { - return; - } - PQCLEAN_MCELIECE348864_SSE_sort_63b(n / 2, x); - PQCLEAN_MCELIECE348864_SSE_sort_63b(n / 2, x + n / 2); - merge_63b(n / 2, x, 1); -} - -/* y[pi[i]] = x[i] */ -/* requires n = 2^w */ -/* requires pi to be a permutation */ -static void composeinv(int n, uint32_t *y, const uint32_t *x, const uint32_t *pi) { // NC - int i; - uint32_t t[2 * N]; - - for (i = 0; i < n; ++i) { - t[i] = x[i] | (pi[i] << 16); - } - - sort(n, t); - - for (i = 0; i < n; ++i) { - y[i] = t[i] & 0xFFFF; - } -} - -/* ip[i] = j iff pi[i] = j */ -/* requires n = 2^w */ -/* requires pi to be a permutation */ -static void invert(int n, uint32_t *ip, const uint32_t *pi) { - int i; - - for (i = 0; i < n; i++) { - ip[i] = i; - } - - composeinv(n, ip, ip, pi); -} - - -static void flow(int w, uint32_t *x, const uint32_t *y, int t) { - bit m0; - bit m1; - - uint32_t b; - uint32_t y_copy = *y; - - m0 = is_smaller(*y & ((1 << w) - 1), *x & ((1 << w) - 1)); - m1 = is_smaller(0, t); - - cswap(x, &y_copy, m0); - b = m0 & m1; - *x ^= b << w; -} - -/* input: permutation pi */ -/* output: (2w-1)n/2 (or 0 if n==1) control bits c[0],c[step],c[2*step],... */ -/* requires n = 2^w */ -static void controlbitsfrompermutation(int w, int n, int step, int off, unsigned char *c, const uint32_t *pi) { - int i; - int j; - int k; - int t; - uint32_t ip[N] = {0}; - uint32_t I[2 * N] = {0}; - uint32_t P[2 * N] = {0}; - uint32_t PI[2 * N] = {0}; - uint32_t T[2 * N] = {0}; - uint32_t piflip[N] = {0}; - uint32_t subpi[2][N / 2] = {{0}}; - - if (w == 1) { - c[ off / 8 ] |= (pi[0] & 1) << (off % 8); - } - if (w <= 1) { - return; - } - - invert(n, ip, pi); - - for (i = 0; i < n; ++i) { - I[i] = ip[i] | (1 << w); - I[n + i] = pi[i]; - } - - for (i = 0; i < 2 * n; ++i) { - P[i] = (i >> w) + (i & ((1 << w) - 2)) + ((i & 1) << w); - } - - for (t = 0; t < w; ++t) { - composeinv(2 * n, PI, P, I); - - for (i = 0; i < 2 * n; ++i) { - flow(w, &P[i], &PI[i], t); - } - - for (i = 0; i < 2 * n; ++i) { - T[i] = I[i ^ 1]; - } - - composeinv(2 * n, I, I, T); - - for (i = 0; i < 2 * n; ++i) { - T[i] = P[i ^ 1]; - } - - for (i = 0; i < 2 * n; ++i) { - flow(w, &P[i], &T[i], 1); - } - } - - for (i = 0; i < n; ++i) { - for (j = 0; j < w; ++j) { - piflip[i] = pi[i]; - } - } - - for (i = 0; i < n / 2; ++i) { - c[ (off + i * step) / 8 ] |= ((P[i * 2] >> w) & 1) << ((off + i * step) % 8); - } - for (i = 0; i < n / 2; ++i) { - c[ (off + ((w - 1)*n + i) * step) / 8 ] |= ((P[n + i * 2] >> w) & 1) << ((off + ((w - 1) * n + i) * step) % 8); - } - - for (i = 0; i < n / 2; ++i) { - cswap(&piflip[i * 2], &piflip[i * 2 + 1], (P[n + i * 2] >> w) & 1); - } - - for (k = 0; k < 2; ++k) { - for (i = 0; i < n / 2; ++i) { - subpi[k][i] = piflip[i * 2 + k] >> 1; - } - } - - for (k = 0; k < 2; ++k) { - controlbitsfrompermutation(w - 1, n / 2, step * 2, off + step * (n / 2 + k), c, subpi[k]); - } -} - -/* input: pi, a permutation*/ -/* output: out, control bits w.r.t. pi */ -void PQCLEAN_MCELIECE348864_SSE_controlbits(unsigned char *out, const uint32_t *pi) { - unsigned int i; - unsigned char c[ (2 * GFBITS - 1) * (1 << GFBITS) / 16 ]; - - for (i = 0; i < sizeof(c); i++) { - c[i] = 0; - } - - controlbitsfrompermutation(GFBITS, (1 << GFBITS), 1, 0, c, pi); - - for (i = 0; i < sizeof(c); i++) { - out[i] = c[i]; - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.h deleted file mode 100644 index b32ba7b74..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/controlbits.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_CONTROLBITS_H -#define PQCLEAN_MCELIECE348864_SSE_CONTROLBITS_H -/* - This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation - see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf -*/ - - -#include - -void PQCLEAN_MCELIECE348864_SSE_sort_63b(int n, uint64_t *x); -void PQCLEAN_MCELIECE348864_SSE_controlbits(unsigned char *out, const uint32_t *pi); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/crypto_hash.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/crypto_hash.h deleted file mode 100644 index c69e5f3c8..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/crypto_hash.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_CRYPTO_HASH_H -#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_HASH_H -#include "fips202.h" - -#define crypto_hash_32b(out,in,inlen) shake256(out, 32, in, inlen) - -#endif diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.c deleted file mode 100644 index 653bd0054..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - This file is for Niederreiter decryption -*/ - -#include "decrypt.h" - -#include "benes.h" -#include "bm.h" -#include "fft.h" -#include "fft_tr.h" -#include "params.h" -#include "util.h" - -#include - -static void scaling(vec128 out[][GFBITS], vec128 inv[][GFBITS], const unsigned char *sk, vec128 *recv) { - int i, j; - - uint64_t irr_int[ GFBITS ]; - vec128 eval[32][ GFBITS ]; - vec128 tmp[ GFBITS ]; - - // - - PQCLEAN_MCELIECE348864_SSE_irr_load(irr_int, sk); - - PQCLEAN_MCELIECE348864_SSE_fft(eval, irr_int); - - for (i = 0; i < 32; i++) { - PQCLEAN_MCELIECE348864_SSE_vec128_sq(eval[i], eval[i]); - } - - PQCLEAN_MCELIECE348864_SSE_vec128_copy(inv[0], eval[0]); - - for (i = 1; i < 32; i++) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(inv[i], inv[i - 1], eval[i]); - } - - PQCLEAN_MCELIECE348864_SSE_vec128_inv(tmp, inv[31]); - - for (i = 30; i >= 0; i--) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(inv[i + 1], tmp, inv[i]); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, tmp, eval[i + 1]); - } - - PQCLEAN_MCELIECE348864_SSE_vec128_copy(inv[0], tmp); - - // - - for (i = 0; i < 32; i++) { - for (j = 0; j < GFBITS; j++) { - out[i][j] = PQCLEAN_MCELIECE348864_SSE_vec128_and(inv[i][j], recv[i]); - } - } -} - -static void preprocess(vec128 *recv, const unsigned char *s) { - int i; - uint8_t r[ 512 ]; - - for (i = 0; i < SYND_BYTES; i++) { - r[i] = s[i]; - } - - for (i = SYND_BYTES; i < 512; i++) { - r[i] = 0; - } - - for (i = 0; i < 32; i++) { - recv[i] = PQCLEAN_MCELIECE348864_SSE_load16(r + i * 16); - } -} - -static void postprocess(unsigned char *e, vec128 *err) { - int i; - unsigned char error8[ (1 << GFBITS) / 8 ]; - uint64_t v[2]; - - for (i = 0; i < 32; i++) { - v[0] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(err[i], 0); - v[1] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(err[i], 1); - - PQCLEAN_MCELIECE348864_SSE_store8(error8 + i * 16 + 0, v[0]); - PQCLEAN_MCELIECE348864_SSE_store8(error8 + i * 16 + 8, v[1]); - } - - for (i = 0; i < SYS_N / 8; i++) { - e[i] = error8[i]; - } -} - -static void scaling_inv(vec128 out[][GFBITS], vec128 inv[][GFBITS], vec128 *recv) { - int i, j; - - for (i = 0; i < 32; i++) { - for (j = 0; j < GFBITS; j++) { - out[i][j] = PQCLEAN_MCELIECE348864_SSE_vec128_and(inv[i][j], recv[i]); - } - } -} - -static uint16_t weight_check(unsigned char *e, vec128 *error) { - int i; - uint16_t w0 = 0; - uint16_t w1 = 0; - uint16_t check; - - for (i = 0; i < 32; i++) { - w0 += _mm_popcnt_u64( PQCLEAN_MCELIECE348864_SSE_vec128_extract(error[i], 0) ); - w0 += _mm_popcnt_u64( PQCLEAN_MCELIECE348864_SSE_vec128_extract(error[i], 1) ); - } - - for (i = 0; i < SYS_N / 8; i++) { - w1 += _mm_popcnt_u32( e[i] ); - } - - check = (w0 ^ SYS_T) | (w1 ^ SYS_T); - check -= 1; - check >>= 15; - - return check; -} - -static uint64_t synd_cmp(vec128 s0[ GFBITS ], vec128 s1[ GFBITS ]) { - int i; - vec128 diff; - - diff = PQCLEAN_MCELIECE348864_SSE_vec128_xor(s0[0], s1[0]); - - for (i = 1; i < GFBITS; i++) { - diff = PQCLEAN_MCELIECE348864_SSE_vec128_or(diff, PQCLEAN_MCELIECE348864_SSE_vec128_xor(s0[i], s1[i])); - } - - return PQCLEAN_MCELIECE348864_SSE_vec128_testz(diff); -} - -/* Niederreiter decryption with the Berlekamp decoder */ -/* intput: sk, secret key */ -/* c, ciphertext (syndrome) */ -/* output: e, error vector */ -/* return: 0 for success; 1 for failure */ -int PQCLEAN_MCELIECE348864_SSE_decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *c) { - int i; - - uint16_t check_synd; - uint16_t check_weight; - - vec128 inv[ 32 ][ GFBITS ]; - vec128 scaled[ 32 ][ GFBITS ]; - vec128 eval[ 32 ][ GFBITS ]; - - vec128 error[ 32 ]; - - vec128 s_priv[ GFBITS ]; - vec128 s_priv_cmp[ GFBITS ]; - - uint64_t locator[ GFBITS ]; - - vec128 recv[ 32 ]; - vec128 allone; - - uint64_t bits_int[23][32]; - - // Berlekamp decoder - - preprocess(recv, c); - - PQCLEAN_MCELIECE348864_SSE_load_bits(bits_int, sk + IRR_BYTES); - PQCLEAN_MCELIECE348864_SSE_benes((uint64_t *) recv, bits_int, 1); - - scaling(scaled, inv, sk, recv); - - PQCLEAN_MCELIECE348864_SSE_fft_tr(s_priv, scaled); - - PQCLEAN_MCELIECE348864_SSE_bm(locator, s_priv); - - PQCLEAN_MCELIECE348864_SSE_fft(eval, locator); - - // reencryption and weight check - - allone = PQCLEAN_MCELIECE348864_SSE_vec128_setbits(1); - - for (i = 0; i < 32; i++) { - error[i] = PQCLEAN_MCELIECE348864_SSE_vec128_or_reduce(eval[i]); - error[i] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(error[i], allone); - } - - scaling_inv(scaled, inv, error); - PQCLEAN_MCELIECE348864_SSE_fft_tr(s_priv_cmp, scaled); - - check_synd = synd_cmp(s_priv, s_priv_cmp); - - // - - PQCLEAN_MCELIECE348864_SSE_benes((uint64_t *) error, bits_int, 0); - - postprocess(e, error); - - check_weight = weight_check(e, error); - - return 1 - (check_synd & check_weight); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.h deleted file mode 100644 index 91fa2f4b8..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/decrypt.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_DECRYPT_H -#define PQCLEAN_MCELIECE348864_SSE_DECRYPT_H -/* - This file is for Nieddereiter decryption -*/ - -int PQCLEAN_MCELIECE348864_SSE_decrypt(unsigned char * /*e*/, const unsigned char * /*sk*/, const unsigned char * /*c*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.c deleted file mode 100644 index ca5fa765f..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - This file is for Niederreiter encryption -*/ - -#include "encrypt.h" - -#include "gf.h" -#include "params.h" -#include "randombytes.h" -#include "util.h" - -#include -#include -#include -#include - -/* input: public key pk, error vector e */ -/* output: syndrome s */ -extern void PQCLEAN_MCELIECE348864_SSE_syndrome_asm(unsigned char *s, const unsigned char *pk, unsigned char *e); - -/* output: e, an error vector of weight t */ -static void gen_e(unsigned char *e) { - size_t i, j; - int eq, count; - - uint16_t ind_[ SYS_T * 2 ]; - uint16_t ind[ SYS_T * 2 ]; - uint64_t e_int[ (SYS_N + 63) / 64 ]; - uint64_t one = 1; - uint64_t mask; - uint64_t val[ SYS_T ]; - - while (1) { - randombytes((uint8_t *)ind_, sizeof(ind_)); - - for (i = 0; i < SYS_T * 2; i++) { - ind_[i] &= GFMASK; - } - - count = 0; - for (i = 0; i < SYS_T * 2; i++) { - if (ind_[i] < SYS_N) { - ind[ count++ ] = ind_[i]; - } - } - - if (count < SYS_T) { - continue; - } - - // check for repetition - - eq = 0; - - for (i = 1; i < SYS_T; i++) { - for (j = 0; j < i; j++) { - if (ind[i] == ind[j]) { - eq = 1; - } - } - } - - if (eq == 0) { - break; - } - } - - for (j = 0; j < SYS_T; j++) { - val[j] = one << (ind[j] & 63); - } - - for (i = 0; i < (SYS_N + 63) / 64; i++) { - e_int[i] = 0; - - for (j = 0; j < SYS_T; j++) { - mask = i ^ (ind[j] >> 6); - mask -= 1; - mask >>= 63; - mask = -mask; - - e_int[i] |= val[j] & mask; - } - } - - for (i = 0; i < (SYS_N + 63) / 64 - 1; i++) { - PQCLEAN_MCELIECE348864_SSE_store8(e, e_int[i]); - e += 8; - } - - for (j = 0; j < (SYS_N % 64); j += 8) { - e[ j / 8 ] = (e_int[i] >> j) & 0xFF; - } -} - -void PQCLEAN_MCELIECE348864_SSE_encrypt(unsigned char *s, unsigned char *e, const unsigned char *pk) { - gen_e(e); - PQCLEAN_MCELIECE348864_SSE_syndrome_asm(s, pk, e); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.h deleted file mode 100644 index bdc078232..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/encrypt.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_ENCRYPT_H -#define PQCLEAN_MCELIECE348864_SSE_ENCRYPT_H -/* - This file is for Niederreiter encryption -*/ - - -void PQCLEAN_MCELIECE348864_SSE_encrypt(unsigned char * /*s*/, unsigned char * /*e*/, const unsigned char * /*pk*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.c deleted file mode 100644 index c8b1b23f5..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - This file is for the Gao-Mateer FFT - sse http://www.math.clemson.edu/~sgao/papers/GM10.pdf -*/ - -#include "fft.h" - -#include "vec.h" -#include "vec128.h" - -/* input: in, polynomial in bitsliced form */ -/* output: in, result of applying the radix conversions on in */ -static void radix_conversions(uint64_t *in) { - int i, j, k; - - const uint64_t mask[5][2] = { - {0x8888888888888888, 0x4444444444444444}, - {0xC0C0C0C0C0C0C0C0, 0x3030303030303030}, - {0xF000F000F000F000, 0x0F000F000F000F00}, - {0xFF000000FF000000, 0x00FF000000FF0000}, - {0xFFFF000000000000, 0x0000FFFF00000000} - }; - - const uint64_t s[5][GFBITS] = { -#include "scalars.inc" - }; - - // - - for (j = 0; j <= 4; j++) { - for (i = 0; i < GFBITS; i++) { - for (k = 4; k >= j; k--) { - in[i] ^= (in[i] & mask[k][0]) >> (1 << k); - in[i] ^= (in[i] & mask[k][1]) >> (1 << k); - } - } - - PQCLEAN_MCELIECE348864_SSE_vec_mul(in, in, s[j]); // scaling - } -} - -/* input: in, result of applying the radix conversions to the input polynomial */ -/* output: out, evaluation results (by applying the FFT butterflies) */ -static void butterflies(vec128 out[][ GFBITS ], const uint64_t *in) { - int i, j, k, s, b; - - uint64_t t0, t1; - - const vec128 consts[ 32 ][ GFBITS ] = { -#include "consts.inc" - }; - - uint64_t consts_ptr = 0; - - const uint8_t reversal[64] = { - 0, 32, 16, 48, 8, 40, 24, 56, - 4, 36, 20, 52, 12, 44, 28, 60, - 2, 34, 18, 50, 10, 42, 26, 58, - 6, 38, 22, 54, 14, 46, 30, 62, - 1, 33, 17, 49, 9, 41, 25, 57, - 5, 37, 21, 53, 13, 45, 29, 61, - 3, 35, 19, 51, 11, 43, 27, 59, - 7, 39, 23, 55, 15, 47, 31, 63 - }; - - // boradcast - - vec128 tmp[ GFBITS ]; - vec128 x[ GFBITS ], y[ GFBITS ]; - - for (j = 0; j < 64; j += 4) { - for (i = 0; i < GFBITS; i++) { - t0 = (in[i] >> reversal[j + 0]) & 1; - t0 = -t0; - t1 = (in[i] >> reversal[j + 2]) & 1; - t1 = -t1; - - out[j / 2 + 0][i] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(t0, t1); - - t0 = (in[i] >> reversal[j + 1]) & 1; - t0 = -t0; - t1 = (in[i] >> reversal[j + 3]) & 1; - t1 = -t1; - - out[j / 2 + 1][i] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(t0, t1); - } - } - - // - - - for (i = 0; i < 32; i += 2) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, out[i + 1], consts[ 0 ]); - - for (b = 0; b < GFBITS; b++) { - out[i + 0][b] ^= tmp[b]; - } - for (b = 0; b < GFBITS; b++) { - out[i + 1][b] ^= out[i + 0][b]; - } - - for (b = 0; b < GFBITS; b++) { - x[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(out[i + 0][b], out[i + 1][b]); - } - for (b = 0; b < GFBITS; b++) { - y[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(out[i + 0][b], out[i + 1][b]); - } - - for (b = 0; b < GFBITS; b++) { - out[i + 0][b] = x[b]; - } - for (b = 0; b < GFBITS; b++) { - out[i + 1][b] = y[b]; - } - } - - consts_ptr += 1; - - for (i = 0; i <= 4; i++) { - s = 1 << i; - - for (j = 0; j < 32; j += 2 * s) { - for (k = j; k < j + s; k++) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, out[k + s], consts[ consts_ptr + (k - j) ]); - - for (b = 0; b < GFBITS; b++) { - out[k][b] ^= tmp[b]; - } - for (b = 0; b < GFBITS; b++) { - out[k + s][b] ^= out[k][b]; - } - } - } - - consts_ptr += s; - } - - // adding the part contributed by x^64 - - vec128 powers[32][GFBITS] = { -#include "powers.inc" - }; - - for (i = 0; i < 32; i++) { - for (b = 0; b < GFBITS; b++) { - out[i][b] ^= powers[i][b]; - } - } -} - -void PQCLEAN_MCELIECE348864_SSE_fft(vec128 out[][ GFBITS ], uint64_t *in) { - radix_conversions(in); - butterflies(out, in); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.h deleted file mode 100644 index 4fecb38b8..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_FFT_H -#define PQCLEAN_MCELIECE348864_SSE_FFT_H -/* - This file is for the Gao-Mateer FFT - sse http://www.math.clemson.edu/~sgao/papers/GM10.pdf -*/ - - -#include - -#include "params.h" -#include "vec128.h" - -void PQCLEAN_MCELIECE348864_SSE_fft(vec128 /*out*/[][GFBITS], uint64_t * /*in*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.c deleted file mode 100644 index 74c794306..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - This file is for transpose of the Gao-Mateer FFT - Functions with names ending with _tr are (roughly) the transpose of the corresponding functions in fft.c -*/ - -#include "fft_tr.h" - -#include "transpose.h" -#include "vec.h" -#include "vec128.h" - -#include - -static void radix_conversions_tr(vec128 in[ GFBITS ]) { - int i, j, k; - - const vec128 mask[10] = { - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x2222222222222222, 0x2222222222222222), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x4444444444444444, 0x4444444444444444), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0C0C0C0C0C0C0C0C, 0x0C0C0C0C0C0C0C0C), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3030303030303030, 0x3030303030303030), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00F000F000F000F0, 0x00F000F000F000F0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F000F000F000F00, 0x0F000F000F000F00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FF000000FF00, 0x0000FF000000FF00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF000000FF0000, 0x00FF000000FF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000FFFF0000, 0x00000000FFFF0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF00000000, 0x0000FFFF00000000) - }; - - const vec128 s[5][GFBITS] = { -#include "scalars_2x.inc" - }; - - uint64_t v0, v1; - - // - - for (j = 5; j >= 0; j--) { - - if (j < 5) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(in, in, s[j]); - } - - for (i = 0; i < GFBITS; i++) { - for (k = j; k <= 4; k++) { - in[i] ^= PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(in[i] & mask[2 * k + 0], 1 << k); - in[i] ^= PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(in[i] & mask[2 * k + 1], 1 << k); - } - } - - for (i = 0; i < GFBITS; i++) { - v0 = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i], 0); - v1 = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i], 1); - - v1 ^= v0 >> 32; - v1 ^= v1 << 32; - - in[i] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(v0, v1); - } - } -} - -static void butterflies_tr(vec128 out[ GFBITS ], vec128 in[][ GFBITS ]) { - int i, j, k, s, b; - - uint64_t t[ GFBITS ]; - uint64_t pre[6][ GFBITS ]; - - uint64_t out64[2][GFBITS]; - - vec128 p2[ 6 ]; - vec128 buf[64]; - vec128 tt[ GFBITS ]; - vec128 x[ GFBITS ], y[ GFBITS ]; - - const vec128 consts[ 32 ][ GFBITS ] = { -#include "consts.inc" - }; - - uint64_t consts_ptr = 32; - - const uint8_t reversal[64] = { - 0, 32, 16, 48, 8, 40, 24, 56, - 4, 36, 20, 52, 12, 44, 28, 60, - 2, 34, 18, 50, 10, 42, 26, 58, - 6, 38, 22, 54, 14, 46, 30, 62, - 1, 33, 17, 49, 9, 41, 25, 57, - 5, 37, 21, 53, 13, 45, 29, 61, - 3, 35, 19, 51, 11, 43, 27, 59, - 7, 39, 23, 55, 15, 47, 31, 63 - }; - - const uint16_t beta[6] = {8, 1300, 3408, 1354, 2341, 1154}; - - // butterflies - - for (i = 4; i >= 0; i--) { - s = 1 << i; - consts_ptr -= s; - - for (j = 0; j < 32; j += 2 * s) { - for (k = j; k < j + s; k++) { - for (b = 0; b < GFBITS; b++) { - in[k][b] ^= in[k + s][b]; - } - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tt, in[k], consts[ consts_ptr + (k - j) ]); - for (b = 0; b < GFBITS; b++) { - in[k + s][b] ^= tt[b]; - } - } - } - } - - for (i = 0; i < 32; i += 2) { - for (b = 0; b < GFBITS; b++) { - x[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(in[i + 0][b], in[i + 1][b]); - } - for (b = 0; b < GFBITS; b++) { - y[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(in[i + 0][b], in[i + 1][b]); - } - - for (b = 0; b < GFBITS; b++) { - in[i + 0][b] = x[b] ^ y[b]; - } - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tt, in[i + 0], consts[ 0 ]); - for (b = 0; b < GFBITS; b++) { - in[i + 1][b] = y[b] ^ tt[b]; - } - } - - // transpose - - for (i = 0; i < GFBITS; i += 2) { - for (j = 0; j < 64; j += 4) { - buf[ reversal[j + 0] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 0], 0), - PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 1], 0)); - buf[ reversal[j + 1] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 0], 0), - PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 1], 0)); - buf[ reversal[j + 2] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 0], 1), - PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 1], 1)); - buf[ reversal[j + 3] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 0], 1), - PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 1], 1)); - } - - PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp(buf); - - p2[0] = buf[32]; - buf[33] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[33], buf[32]); - p2[1] = buf[33]; - buf[35] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[35], buf[33]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[35]); - buf[34] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[34], buf[35]); - p2[2] = buf[34]; - buf[38] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[38], buf[34]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[38]); - buf[39] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[39], buf[38]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[39]); - buf[37] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[37], buf[39]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[37]); - buf[36] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[36], buf[37]); - p2[3] = buf[36]; - buf[44] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[44], buf[36]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[44]); - buf[45] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[45], buf[44]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[45]); - buf[47] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[47], buf[45]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[47]); - buf[46] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[46], buf[47]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[46]); - buf[42] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[42], buf[46]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[42]); - buf[43] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[43], buf[42]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[43]); - buf[41] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[41], buf[43]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[41]); - buf[40] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[40], buf[41]); - p2[4] = buf[40]; - buf[56] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[56], buf[40]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[56]); - buf[57] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[57], buf[56]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[57]); - buf[59] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[59], buf[57]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[59]); - buf[58] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[58], buf[59]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[58]); - buf[62] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[62], buf[58]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[62]); - buf[63] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[63], buf[62]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[63]); - buf[61] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[61], buf[63]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[61]); - buf[60] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[60], buf[61]); - p2[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[3], buf[60]); - buf[52] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[52], buf[60]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[52]); - buf[53] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[53], buf[52]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[53]); - buf[55] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[55], buf[53]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[55]); - buf[54] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[54], buf[55]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[54]); - buf[50] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[50], buf[54]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[50]); - buf[51] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[51], buf[50]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[51]); - buf[49] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[49], buf[51]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[49]); - buf[48] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[48], buf[49]); - p2[5] = buf[48]; - buf[16] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[16], buf[48]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[16]); - buf[17] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[17], buf[16]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[17]); - buf[19] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[19], buf[17]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[19]); - buf[18] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[18], buf[19]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[18]); - buf[22] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[22], buf[18]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[22]); - buf[23] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[23], buf[22]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[23]); - buf[21] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[21], buf[23]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[21]); - buf[20] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[20], buf[21]); - p2[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[3], buf[20]); - buf[28] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[28], buf[20]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[28]); - buf[29] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[29], buf[28]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[29]); - buf[31] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[31], buf[29]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[31]); - buf[30] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[30], buf[31]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[30]); - buf[26] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[26], buf[30]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[26]); - buf[27] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[27], buf[26]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[27]); - buf[25] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[25], buf[27]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[25]); - buf[24] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[24], buf[25]); - p2[4] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[4], buf[24]); - buf[8] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[8], buf[24]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[8]); - buf[9] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[9], buf[8]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[9]); - buf[11] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[11], buf[9]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[11]); - buf[10] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[10], buf[11]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[10]); - buf[14] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[14], buf[10]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[14]); - buf[15] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[15], buf[14]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[15]); - buf[13] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[13], buf[15]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[13]); - buf[12] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[12], buf[13]); - p2[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[3], buf[12]); - buf[4] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[4], buf[12]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[4]); - buf[5] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[5], buf[4]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[5]); - buf[7] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[7], buf[5]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[7]); - buf[6] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[6], buf[7]); - p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[6]); - buf[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[2], buf[6]); - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[2]); - buf[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[3], buf[2]); - p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[3]); - buf[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[1], buf[3]); - - p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[1]); - buf[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[0], buf[1]); - - for (j = 0; j < 6; j++) { - pre[j][i + 0] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(p2[j], 0); - pre[j][i + 1] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(p2[j], 1); - } - - out64[0][i + 0] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[0], 0); - out64[0][i + 1] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[0], 1); - } - - // - - for (j = 0; j < GFBITS; j++) { - t[j] = (beta[0] >> j) & 1; - t[j] = -t[j]; - } - - PQCLEAN_MCELIECE348864_SSE_vec_mul(out64[1], pre[0], t); - - for (i = 1; i < 6; i++) { - for (j = 0; j < GFBITS; j++) { - t[j] = (beta[i] >> j) & 1; - t[j] = -t[j]; - } - - PQCLEAN_MCELIECE348864_SSE_vec_mul(t, pre[i], t); - PQCLEAN_MCELIECE348864_SSE_vec_add(out64[1], out64[1], t); - } - - for (b = 0; b < GFBITS; b++) { - out[b] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(out64[0][b], out64[1][b]); - } -} - -void PQCLEAN_MCELIECE348864_SSE_fft_tr(vec128 out[GFBITS], vec128 in[][ GFBITS ]) { - butterflies_tr(out, in); - radix_conversions_tr(out); -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.h deleted file mode 100644 index 924456408..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/fft_tr.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_FFT_TR_H -#define PQCLEAN_MCELIECE348864_SSE_FFT_TR_H -/* - This file is for transpose of the Gao-Mateer FFT -*/ - -#include "params.h" -#include "vec128.h" - -void PQCLEAN_MCELIECE348864_SSE_fft_tr(vec128 /*out*/[GFBITS], vec128 /*in*/[][ GFBITS ]); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.c deleted file mode 100644 index fa5f25d6c..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - This file is for functions for field arithmetic -*/ - -#include "gf.h" - -#include "params.h" - -gf PQCLEAN_MCELIECE348864_SSE_gf_iszero(gf a) { - uint32_t t = a; - - t -= 1; - t >>= 20; - - return (gf) t; -} - -gf PQCLEAN_MCELIECE348864_SSE_gf_add(gf in0, gf in1) { - return in0 ^ in1; -} - -gf PQCLEAN_MCELIECE348864_SSE_gf_mul(gf in0, gf in1) { - int i; - - uint32_t tmp; - uint32_t t0; - uint32_t t1; - uint32_t t; - - t0 = in0; - t1 = in1; - - tmp = t0 * (t1 & 1); - - for (i = 1; i < GFBITS; i++) { - tmp ^= (t0 * (t1 & (1 << i))); - } - - t = tmp & 0x7FC000; - tmp ^= t >> 9; - tmp ^= t >> 12; - - t = tmp & 0x3000; - tmp ^= t >> 9; - tmp ^= t >> 12; - - return tmp & ((1 << GFBITS) - 1); -} - -/* input: field element in */ -/* return: in^2 */ -static inline gf gf_sq(gf in) { - const uint32_t B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF}; - - uint32_t x = in; - uint32_t t; - - x = (x | (x << 8)) & B[3]; - x = (x | (x << 4)) & B[2]; - x = (x | (x << 2)) & B[1]; - x = (x | (x << 1)) & B[0]; - - t = x & 0x7FC000; - x ^= t >> 9; - x ^= t >> 12; - - t = x & 0x3000; - x ^= t >> 9; - x ^= t >> 12; - - return x & ((1 << GFBITS) - 1); -} - -gf PQCLEAN_MCELIECE348864_SSE_gf_inv(gf in) { - gf tmp_11; - gf tmp_1111; - - gf out = in; - - out = gf_sq(out); - tmp_11 = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, in); // 11 - - out = gf_sq(tmp_11); - out = gf_sq(out); - tmp_1111 = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, tmp_11); // 1111 - - out = gf_sq(tmp_1111); - out = gf_sq(out); - out = gf_sq(out); - out = gf_sq(out); - out = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, tmp_1111); // 11111111 - - out = gf_sq(out); - out = gf_sq(out); - out = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, tmp_11); // 1111111111 - - out = gf_sq(out); - out = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, in); // 11111111111 - - return gf_sq(out); // 111111111110 -} - -/* input: field element den, num */ -/* return: (num/den) */ -gf PQCLEAN_MCELIECE348864_SSE_gf_frac(gf den, gf num) { - return PQCLEAN_MCELIECE348864_SSE_gf_mul(PQCLEAN_MCELIECE348864_SSE_gf_inv(den), num); -} - -/* input: in0, in1 in GF((2^m)^t)*/ -/* output: out = in0*in1 */ -void PQCLEAN_MCELIECE348864_SSE_GF_mul(gf *out, const gf *in0, const gf *in1) { - int i, j; - - gf prod[ SYS_T * 2 - 1 ]; - - for (i = 0; i < SYS_T * 2 - 1; i++) { - prod[i] = 0; - } - - for (i = 0; i < SYS_T; i++) { - for (j = 0; j < SYS_T; j++) { - prod[i + j] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(in0[i], in1[j]); - } - } - - // - - for (i = (SYS_T - 1) * 2; i >= SYS_T; i--) { - prod[i - SYS_T + 9] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 877); - prod[i - SYS_T + 7] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 2888); - prod[i - SYS_T + 5] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 1781); - prod[i - SYS_T + 0] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 373); - } - - for (i = 0; i < SYS_T; i++) { - out[i] = prod[i]; - } -} - -/* 2 field multiplications */ -uint64_t PQCLEAN_MCELIECE348864_SSE_gf_mul2(gf a, gf b0, gf b1) { - int i; - - uint64_t tmp = 0; - uint64_t t0; - uint64_t t1; - uint64_t t; - uint64_t mask = 0x0000000100000001; - - t0 = a; - t1 = b1; - t1 = (t1 << 32) | b0; - - for (i = 0; i < GFBITS; i++) { - tmp ^= t0 * (t1 & mask); - mask += mask; - } - - // - - t = tmp & 0x007FC000007FC000; - tmp ^= (t >> 9) ^ (t >> 12); - - t = tmp & 0x0000300000003000; - tmp ^= (t >> 9) ^ (t >> 12); - - return tmp & 0x00000FFF00000FFF; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.h deleted file mode 100644 index 8b3254d6d..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/gf.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_GF_H -#define PQCLEAN_MCELIECE348864_SSE_GF_H -/* - This file is for functions for field arithmetic -*/ - - -#include "params.h" - -#include - -typedef uint16_t gf; - -gf PQCLEAN_MCELIECE348864_SSE_gf_iszero(gf /*a*/); -gf PQCLEAN_MCELIECE348864_SSE_gf_add(gf /*in0*/, gf /*in1*/); -gf PQCLEAN_MCELIECE348864_SSE_gf_mul(gf /*in0*/, gf /*in1*/); -gf PQCLEAN_MCELIECE348864_SSE_gf_frac(gf /*den*/, gf /*num*/); -gf PQCLEAN_MCELIECE348864_SSE_gf_inv(gf /*in*/); - -void PQCLEAN_MCELIECE348864_SSE_GF_mul(gf * /*out*/, const gf * /*in0*/, const gf * /*in1*/); - -/* 2 field multiplications */ -uint64_t PQCLEAN_MCELIECE348864_SSE_gf_mul2(gf a, gf b0, gf b1); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/operations.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/operations.c deleted file mode 100644 index fe43ab236..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/operations.c +++ /dev/null @@ -1,136 +0,0 @@ -#include "api.h" - -#include "aes256ctr.h" -#include "controlbits.h" -#include "crypto_hash.h" -#include "decrypt.h" -#include "encrypt.h" -#include "params.h" -#include "pk_gen.h" -#include "randombytes.h" -#include "sk_gen.h" -#include "util.h" - -#include -#include - -int PQCLEAN_MCELIECE348864_SSE_crypto_kem_enc( - uint8_t *c, - uint8_t *key, - const uint8_t *pk -) { - uint8_t two_e[ 1 + SYS_N / 8 ] = {2}; - uint8_t *e = two_e + 1; - uint8_t one_ec[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ] = {1}; - - PQCLEAN_MCELIECE348864_SSE_encrypt(c, e, pk); - - crypto_hash_32b(c + SYND_BYTES, two_e, sizeof(two_e)); - - memcpy(one_ec + 1, e, SYS_N / 8); - memcpy(one_ec + 1 + SYS_N / 8, c, SYND_BYTES + 32); - - crypto_hash_32b(key, one_ec, sizeof(one_ec)); - - return 0; -} - -int PQCLEAN_MCELIECE348864_SSE_crypto_kem_dec( - uint8_t *key, - const uint8_t *c, - const uint8_t *sk -) { - int i; - - uint8_t ret_confirm = 0; - uint8_t ret_decrypt = 0; - - uint16_t m; - - uint8_t conf[32]; - uint8_t two_e[ 1 + SYS_N / 8 ] = {2}; - uint8_t *e = two_e + 1; - uint8_t preimage[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ]; - uint8_t *x = preimage; - - // - - ret_decrypt = (uint8_t)PQCLEAN_MCELIECE348864_SSE_decrypt(e, sk + SYS_N / 8, c); - - crypto_hash_32b(conf, two_e, sizeof(two_e)); - - for (i = 0; i < 32; i++) { - ret_confirm |= conf[i] ^ c[SYND_BYTES + i]; - } - - m = ret_decrypt | ret_confirm; - m -= 1; - m >>= 8; - - *x++ = (~m & 0) | (m & 1); - for (i = 0; i < SYS_N / 8; i++) { - *x++ = (~m & sk[i]) | (m & e[i]); - } - for (i = 0; i < SYND_BYTES + 32; i++) { - *x++ = c[i]; - } - - crypto_hash_32b(key, preimage, sizeof(preimage)); - - return 0; -} - -int PQCLEAN_MCELIECE348864_SSE_crypto_kem_keypair -( - uint8_t *pk, - uint8_t *sk -) { - int i; - uint8_t seed[ 32 ]; - uint8_t r[ SYS_T * 2 + (1 << GFBITS)*sizeof(uint32_t) + SYS_N / 8 + 32 ]; - uint8_t nonce[ 16 ] = {0}; - uint8_t *rp; - - gf f[ SYS_T ]; // element in GF(2^mt) - gf irr[ SYS_T ]; // Goppa polynomial - uint32_t perm[ 1 << GFBITS ]; // random permutation - - randombytes(seed, sizeof(seed)); - - while (1) { - rp = r; - PQCLEAN_MCELIECE348864_SSE_aes256ctr(r, sizeof(r), nonce, seed); - memcpy(seed, &r[ sizeof(r) - 32 ], 32); - - for (i = 0; i < SYS_T; i++) { - f[i] = PQCLEAN_MCELIECE348864_SSE_load2(rp + i * 2); - } - rp += sizeof(f); - if (PQCLEAN_MCELIECE348864_SSE_genpoly_gen(irr, f)) { - continue; - } - - for (i = 0; i < (1 << GFBITS); i++) { - perm[i] = PQCLEAN_MCELIECE348864_SSE_load4(rp + i * 4); - } - rp += sizeof(perm); - if (PQCLEAN_MCELIECE348864_SSE_perm_check(perm)) { - continue; - } - - for (i = 0; i < SYS_T; i++) { - PQCLEAN_MCELIECE348864_SSE_store2(sk + SYS_N / 8 + i * 2, irr[i]); - } - if (PQCLEAN_MCELIECE348864_SSE_pk_gen(pk, perm, sk + SYS_N / 8)) { - continue; - } - - memcpy(sk, rp, SYS_N / 8); - PQCLEAN_MCELIECE348864_SSE_controlbits(sk + SYS_N / 8 + IRR_BYTES, perm); - - break; - } - - return 0; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/params.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/params.h deleted file mode 100644 index 1b23043c2..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/params.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_PARAMS_H -#define PQCLEAN_MCELIECE348864_SSE_PARAMS_H - -#define GFBITS 12 -#define SYS_N 3488 -#define SYS_T 64 - -#define COND_BYTES ((1 << (GFBITS-4))*(2*GFBITS - 1)) -#define IRR_BYTES (SYS_T * 2) - -#define PK_NROWS (SYS_T*GFBITS) -#define PK_NCOLS (SYS_N - PK_NROWS) -#define PK_ROW_BYTES ((PK_NCOLS + 7)/8) - -#define SK_BYTES (SYS_N/8 + IRR_BYTES + COND_BYTES) -#define SYND_BYTES ((PK_NROWS + 7)/8) - -#define GFMASK ((1 << GFBITS) - 1) - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.c deleted file mode 100644 index 0d0097c33..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - This file is for public-key generation -*/ - -#include "pk_gen.h" - -#include "benes.h" -#include "controlbits.h" -#include "fft.h" -#include "params.h" -#include "transpose.h" -#include "util.h" - -#include - -#include - -#define min(a, b) (((a) < (b)) ? (a) : (b)) - -static void de_bitslicing(uint64_t *out, vec128 in[][GFBITS]) { - int i, j, r; - uint64_t u = 0; - - for (i = 0; i < (1 << GFBITS); i++) { - out[i] = 0 ; - } - - for (i = 0; i < 32; i++) { - for (j = GFBITS - 1; j >= 0; j--) { - u = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i][j], 0); - for (r = 0; r < 64; r++) { - out[i * 128 + 0 * 64 + r] <<= 1; - out[i * 128 + 0 * 64 + r] |= (u >> r) & 1; - } - u = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i][j], 1); - for (r = 0; r < 64; r++) { - out[i * 128 + 1 * 64 + r] <<= 1; - out[i * 128 + 1 * 64 + r] |= (u >> r) & 1; - } - } - } -} - -static void to_bitslicing_2x(vec128 out0[][GFBITS], vec128 out1[][GFBITS], const uint64_t *in) { - int i, j, k, r; - uint64_t u[2] = {0}; - - for (i = 0; i < 32; i++) { - for (j = GFBITS - 1; j >= 0; j--) { - for (k = 0; k < 2; k++) { - for (r = 63; r >= 0; r--) { - u[k] <<= 1; - u[k] |= (in[i * 128 + k * 64 + r] >> (j + GFBITS)) & 1; - } - } - - out1[i][j] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(u[0], u[1]); - } - - for (j = GFBITS - 1; j >= 0; j--) { - for (k = 0; k < 2; k++) { - for (r = 63; r >= 0; r--) { - u[k] <<= 1; - u[k] |= (in[i * 128 + k * 64 + r] >> j) & 1; - } - } - - out0[i][GFBITS - 1 - j] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(u[0], u[1]); - } - } -} - -/* return number of trailing zeros of the non-zero input in */ -static inline int ctz(uint64_t in) { - return (int)_tzcnt_u64(in); -} - -static inline uint64_t same_mask(uint16_t x, uint16_t y) { - uint64_t mask; - - mask = x ^ y; - mask -= 1; - mask >>= 63; - mask = -mask; - - return mask; -} - -static int mov_columns(uint64_t mat[][ ((SYS_N + 127) / 128) * 2 ], uint32_t *perm) { - int i, j, k, s, block_idx, row; - uint64_t buf[64], ctz_list[32], t, d, mask; - - row = GFBITS * SYS_T - 32; - block_idx = row / 64; - - // extract the 32x64 matrix - - for (i = 0; i < 32; i++) { - buf[i] = (mat[ row + i ][ block_idx + 0 ] >> 32) | - (mat[ row + i ][ block_idx + 1 ] << 32); - } - - // compute the column indices of pivots by Gaussian elimination. - // the indices are stored in ctz_list - - for (i = 0; i < 32; i++) { - t = buf[i]; - for (j = i + 1; j < 32; j++) { - t |= buf[j]; - } - - if (t == 0) { - return -1; // return if buf is not full rank - } - - ctz_list[i] = s = ctz(t); - - for (j = i + 1; j < 32; j++) { - mask = (buf[i] >> s) & 1; - mask -= 1; - buf[i] ^= buf[j] & mask; - } - for (j = 0; j < i; j++) { - mask = (buf[j] >> s) & 1; - mask = -mask; - buf[j] ^= buf[i] & mask; - } - for (j = i + 1; j < 32; j++) { - mask = (buf[j] >> s) & 1; - mask = -mask; - buf[j] ^= buf[i] & mask; - } - } - - // updating permutation - - for (j = 0; j < 32; j++) { - for (k = j + 1; k < 64; k++) { - d = perm[ row + j ] ^ perm[ row + k ]; - d &= same_mask(k, ctz_list[j]); - perm[ row + j ] ^= d; - perm[ row + k ] ^= d; - } - } - - // moving columns of mat according to the column indices of pivots - - for (i = 0; i < GFBITS * SYS_T; i += 64) { - - for (j = 0; j < min(64, GFBITS * SYS_T - i); j++) { - buf[j] = (mat[ i + j ][ block_idx + 0 ] >> 32) | - (mat[ i + j ][ block_idx + 1 ] << 32); - } - - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(buf); - - for (j = 0; j < 32; j++) { - for (k = j + 1; k < 64; k++) { - d = buf[ j ] ^ buf[ k ]; - d &= same_mask(k, ctz_list[j]); - buf[ j ] ^= d; - buf[ k ] ^= d; - } - } - - PQCLEAN_MCELIECE348864_SSE_transpose_64x64(buf); - - for (j = 0; j < min(64, GFBITS * SYS_T - i); j++) { - mat[ i + j ][ block_idx + 0 ] = (mat[ i + j ][ block_idx + 0 ] << 32 >> 32) | (buf[j] << 32); - mat[ i + j ][ block_idx + 1 ] = (mat[ i + j ][ block_idx + 1 ] >> 32 << 32) | (buf[j] >> 32); - } - } - - return 0; -} - -#define NBLOCKS1_H ((SYS_N + 63) / 64) -#define NBLOCKS2_H ((SYS_N + 127) / 128) -#define NBLOCKS_I ((GFBITS * SYS_T + 63) / 64) - -int PQCLEAN_MCELIECE348864_SSE_pk_gen(unsigned char *pk, uint32_t *perm, const unsigned char *sk) { - int i, j, k; - int row, c; - - uint64_t mat[ GFBITS * SYS_T ][ NBLOCKS2_H * 2 ]; - uint64_t ops[ GFBITS * SYS_T ][ NBLOCKS_I ]; - - uint64_t mask; - - uint64_t irr_int[ GFBITS ]; - - vec128 consts[32][ GFBITS ]; - vec128 eval[ 32 ][ GFBITS ]; - vec128 prod[ 32 ][ GFBITS ]; - vec128 tmp[ GFBITS ]; - - uint64_t list[1 << GFBITS]; - - // compute the inverses - - PQCLEAN_MCELIECE348864_SSE_irr_load(irr_int, sk); - - PQCLEAN_MCELIECE348864_SSE_fft(eval, irr_int); - - PQCLEAN_MCELIECE348864_SSE_vec128_copy(prod[0], eval[0]); - - for (i = 1; i < 32; i++) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(prod[i], prod[i - 1], eval[i]); - } - - PQCLEAN_MCELIECE348864_SSE_vec128_inv(tmp, prod[31]); - - for (i = 30; i >= 0; i--) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(prod[i + 1], prod[i], tmp); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, tmp, eval[i + 1]); - } - - PQCLEAN_MCELIECE348864_SSE_vec128_copy(prod[0], tmp); - - // fill matrix - - de_bitslicing(list, prod); - - for (i = 0; i < (1 << GFBITS); i++) { - list[i] <<= GFBITS; - list[i] |= i; - list[i] |= ((uint64_t) perm[i]) << 31; - } - - PQCLEAN_MCELIECE348864_SSE_sort_63b(1 << GFBITS, list); - - to_bitslicing_2x(consts, prod, list); - - for (i = 0; i < (1 << GFBITS); i++) { - perm[i] = list[i] & GFMASK; - } - - for (j = 0; j < NBLOCKS2_H; j++) { - for (k = 0; k < GFBITS; k++) { - mat[ k ][ 2 * j + 0 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 0); - mat[ k ][ 2 * j + 1 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 1); - } - } - - for (i = 1; i < SYS_T; i++) { - for (j = 0; j < NBLOCKS2_H; j++) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul(prod[j], prod[j], consts[j]); - - for (k = 0; k < GFBITS; k++) { - mat[ i * GFBITS + k ][ 2 * j + 0 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 0); - mat[ i * GFBITS + k ][ 2 * j + 1 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 1); - } - } - } - - // gaussian elimination - - for (i = 0; i < PK_NROWS; i++) { - for (j = 0; j < NBLOCKS_I; j++) { - ops[ i ][ j ] = 0; - } - } - - for (i = 0; i < PK_NROWS; i++) { - ops[ i ][ i / 64 ] = 1; - ops[ i ][ i / 64 ] <<= (i % 64); - } - - for (row = 0; row < PK_NROWS; row++) { - i = row >> 6; - j = row & 63; - - if (row == GFBITS * SYS_T - 32) { - if (mov_columns(mat, perm)) { - return -1; - } - } - - for (k = row + 1; k < PK_NROWS; k++) { - mask = mat[ row ][ i ] >> j; - mask &= 1; - mask -= 1; - - for (c = 0; c < NBLOCKS1_H; c++) { - mat[ row ][ c ] ^= mat[ k ][ c ] & mask; - } - } - - if ( ((mat[ row ][ i ] >> j) & 1) == 0 ) { // return if not systematic - return -1; - } - - for (k = 0; k < row; k++) { - mask = mat[ k ][ i ] >> j; - mask &= 1; - mask = -mask; - - for (c = 0; c < NBLOCKS1_H; c++) { - mat[ k ][ c ] ^= mat[ row ][ c ] & mask; - } - } - - for (k = row + 1; k < PK_NROWS; k++) { - mask = mat[ k ][ i ] >> j; - mask &= 1; - mask = -mask; - - for (c = 0; c < NBLOCKS1_H; c++) { - mat[ k ][ c ] ^= mat[ row ][ c ] & mask; - } - } - } - - for (i = 0; i < GFBITS * SYS_T; i++) { - for (j = NBLOCKS_I; j < NBLOCKS1_H - 1; j++) { - PQCLEAN_MCELIECE348864_SSE_store8(pk, mat[i][j]); - pk += 8; - } - - PQCLEAN_MCELIECE348864_SSE_store_i(pk, mat[i][j], PK_ROW_BYTES % 8); - - pk += PK_ROW_BYTES % 8; - } - - // - - return 0; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.h deleted file mode 100644 index e54b9e6f6..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/pk_gen.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_PK_GEN_H -#define PQCLEAN_MCELIECE348864_SSE_PK_GEN_H -/* - This file is for public-key generation -*/ - - -#include "gf.h" - -int PQCLEAN_MCELIECE348864_SSE_pk_gen(unsigned char * /*pk*/, uint32_t * /*perm*/, const unsigned char * /*sk*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/powers.inc b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/powers.inc deleted file mode 100644 index 8e15bd373..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/powers.inc +++ /dev/null @@ -1,448 +0,0 @@ -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, -{ -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333), -PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555), -}, diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars.inc b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars.inc deleted file mode 100644 index aa8f64b95..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars.inc +++ /dev/null @@ -1,70 +0,0 @@ -{ - 0XF3CFC030FC30F003, - 0X3FCF0F003C00C00C, - 0X30033CC300C0C03C, - 0XCCFF0F3C0F30F0C0, - 0X0300C03FF303C3F0, - 0X3FFF3C0FF0CCCCC0, - 0XF3FFF0C00F3C3CC0, - 0X3003333FFFC3C000, - 0X0FF30FFFC3FFF300, - 0XFFC0F300F0F0CC00, - 0XC0CFF3FCCC3CFC00, - 0XFC3C03F0F330C000, -}, -{ - 0X000F00000000F00F, - 0X00000F00F00000F0, - 0X0F00000F00000F00, - 0XF00F00F00F000000, - 0X00F00000000000F0, - 0X0000000F00000000, - 0XF00000000F00F000, - 0X00F00F00000F0000, - 0X0000F00000F00F00, - 0X000F00F00F00F000, - 0X00F00F0000000000, - 0X0000000000F00000, -}, -{ - 0X0000FF00FF0000FF, - 0X0000FF000000FF00, - 0XFF0000FF00FF0000, - 0XFFFF0000FF000000, - 0X00FF00FF00FF0000, - 0X0000FFFFFF000000, - 0X00FFFF00FF000000, - 0XFFFFFF0000FF0000, - 0XFFFF00FFFF00FF00, - 0X0000FF0000000000, - 0XFFFFFF00FF000000, - 0X00FF000000000000, -}, -{ - 0X000000000000FFFF, - 0X00000000FFFF0000, - 0X0000000000000000, - 0XFFFF000000000000, - 0X00000000FFFF0000, - 0X0000FFFF00000000, - 0X0000000000000000, - 0X00000000FFFF0000, - 0X0000FFFF00000000, - 0X0000000000000000, - 0X0000000000000000, - 0X0000000000000000, -}, -{ - 0X00000000FFFFFFFF, - 0XFFFFFFFF00000000, - 0XFFFFFFFF00000000, - 0X0000000000000000, - 0X0000000000000000, - 0XFFFFFFFF00000000, - 0X0000000000000000, - 0X0000000000000000, - 0XFFFFFFFF00000000, - 0X0000000000000000, - 0X0000000000000000, - 0X0000000000000000, -} diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars_2x.inc b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars_2x.inc deleted file mode 100644 index 8eb780322..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/scalars_2x.inc +++ /dev/null @@ -1,70 +0,0 @@ -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf3cfc030fc30f003, 0x000c03c0c3c0330c), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3fcf0f003c00c00c, 0xf330cffcc00f33c0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x30033cc300c0c03c, 0xccf330f00f3c0333), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xccff0f3c0f30f0c0, 0xff03fff3ff0cf0c0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0300c03ff303c3f0, 0x3cc3fcf00fcc303c), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3fff3c0ff0ccccc0, 0x0f000c0fc30303f3), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf3fff0c00f3c3cc0, 0xcf0fc3ff333ccf3c), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3003333fffc3c000, 0x003f3fc3c0ff333f), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0ff30fffc3fff300, 0x3cc3f0f3cf0ff00f), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffc0f300f0f0cc00, 0xf3f33cc03fc30cc0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xc0cff3fccc3cfc00, 0x3cc330cfc333f33f), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xfc3c03f0f330c000, 0x3cc0303ff3c3fffc), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x000f00000000f00f, 0x0f00f00f00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000f00f00000f0, 0xf00000000000f000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0f00000f00000f00, 0x00000f00000000f0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf00f00f00f000000, 0x0f00f00000f00000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00f00000000000f0, 0x000f00000f00f00f), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000f00000000, 0x00f00f00f00f0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf00000000f00f000, 0x0f00f00000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00f00f00000f0000, 0x000000000f000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000f00000f00f00, 0x00f00000000f00f0), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x000f00f00f00f000, 0x0000f00f00000f00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00f00f0000000000, 0xf00000f00000f00f), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000f00000, 0x00000f00f00f00f0), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ff00ff0000ff, 0xff00ffffff000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ff000000ff00, 0xff0000ffff000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xff0000ff00ff0000, 0xffff00ffff000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffff0000ff000000, 0xff00ffffffffff00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00ff00ff00ff0000, 0x00000000ff00ff00), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ffffff000000, 0xffffffff00ff0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00ffff00ff000000, 0x00ffffff00ff0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffff0000ff0000, 0xffff00ffff00ffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffff00ffff00ff00, 0xffff0000ffffffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ff0000000000, 0xff00000000ff0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffff00ff000000, 0x000000ff00ff00ff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00ff000000000000, 0x00ff00ff00ffff00), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x000000000000ffff, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffff0000, 0xffff000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffff000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffff0000, 0xffff00000000ffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ffff00000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000ffff00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffff0000, 0xffff00000000ffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ffff00000000, 0x00000000ffff0000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffff00000000ffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x00000000ffff0000), -}, -{ - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffffffff, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0x00000000ffffffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffffffffffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0xffffffff00000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffffffffffff), - PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000), -}, diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.c deleted file mode 100644 index 255182fbe..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - This file is for secret-key generation -*/ - -#include "sk_gen.h" - -#include "controlbits.h" -#include "gf.h" -#include "params.h" -#include "util.h" - -/* input: f, element in GF((2^m)^t) */ -/* output: out, minimal polynomial of f */ -/* return: 0 for success and -1 for failure */ -int PQCLEAN_MCELIECE348864_SSE_genpoly_gen(gf *out, gf *f) { - int i, j, k, c; - - gf mat[ SYS_T + 1 ][ SYS_T ]; - gf mask, inv, t; - - // fill matrix - - mat[0][0] = 1; - - for (i = 1; i < SYS_T; i++) { - mat[0][i] = 0; - } - - for (i = 0; i < SYS_T; i++) { - mat[1][i] = f[i]; - } - - for (j = 2; j <= SYS_T; j++) { - PQCLEAN_MCELIECE348864_SSE_GF_mul(mat[j], mat[j - 1], f); - } - - // gaussian - - for (j = 0; j < SYS_T; j++) { - for (k = j + 1; k < SYS_T; k++) { - mask = PQCLEAN_MCELIECE348864_SSE_gf_iszero(mat[ j ][ j ]); - - for (c = j; c < SYS_T + 1; c++) { - mat[ c ][ j ] ^= mat[ c ][ k ] & mask; - } - - } - - if ( mat[ j ][ j ] == 0 ) { // return if not systematic - return -1; - } - - inv = PQCLEAN_MCELIECE348864_SSE_gf_inv(mat[j][j]); - - for (c = j; c < SYS_T + 1; c++) { - mat[ c ][ j ] = PQCLEAN_MCELIECE348864_SSE_gf_mul(mat[ c ][ j ], inv) ; - } - - for (k = 0; k < SYS_T; k++) { - if (k != j) { - t = mat[ j ][ k ]; - - for (c = j; c < SYS_T + 1; c++) { - mat[ c ][ k ] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(mat[ c ][ j ], t); - } - } - } - } - - for (i = 0; i < SYS_T; i++) { - out[i] = mat[ SYS_T ][ i ]; - } - - return 0; -} - -/* input: permutation p represented as a list of 32-bit intergers */ -/* output: -1 if some interger repeats in p */ -/* 0 otherwise */ -int PQCLEAN_MCELIECE348864_SSE_perm_check(const uint32_t *p) { - int i; - uint64_t list[1 << GFBITS]; - - for (i = 0; i < (1 << GFBITS); i++) { - list[i] = p[i]; - } - - PQCLEAN_MCELIECE348864_SSE_sort_63b(1 << GFBITS, list); - - for (i = 1; i < (1 << GFBITS); i++) { - if (list[i - 1] == list[i]) { - return -1; - } - } - - return 0; -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.h deleted file mode 100644 index ca96519e4..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/sk_gen.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_SK_GEN_H -#define PQCLEAN_MCELIECE348864_SSE_SK_GEN_H -/* - This file is for secret-key generation -*/ - - -#include "gf.h" - -#include - -int PQCLEAN_MCELIECE348864_SSE_genpoly_gen(gf * /*out*/, gf * /*f*/); -int PQCLEAN_MCELIECE348864_SSE_perm_check(const uint32_t * /*p*/); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/syndrome_asm.S b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/syndrome_asm.S deleted file mode 100644 index 66c9efecc..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/syndrome_asm.S +++ /dev/null @@ -1,740 +0,0 @@ - -# qhasm: int64 input_0 - -# qhasm: int64 input_1 - -# qhasm: int64 input_2 - -# qhasm: int64 input_3 - -# qhasm: int64 input_4 - -# qhasm: int64 input_5 - -# qhasm: stack64 input_6 - -# qhasm: stack64 input_7 - -# qhasm: int64 caller_r11 - -# qhasm: int64 caller_r12 - -# qhasm: int64 caller_r13 - -# qhasm: int64 caller_r14 - -# qhasm: int64 caller_r15 - -# qhasm: int64 caller_rbx - -# qhasm: int64 caller_rbp - -# qhasm: int64 b64 - -# qhasm: int64 synd - -# qhasm: int64 addr - -# qhasm: int64 c - -# qhasm: int64 c_all - -# qhasm: int64 row - -# qhasm: int64 p - -# qhasm: int64 e - -# qhasm: int64 s - -# qhasm: reg128 pp - -# qhasm: reg128 ee - -# qhasm: reg128 ss - -# qhasm: int64 buf_ptr - -# qhasm: stack128 buf - -# qhasm: enter syndrome_asm -.p2align 5 -.global _PQCLEAN_MCELIECE348864_SSE_syndrome_asm -.global PQCLEAN_MCELIECE348864_SSE_syndrome_asm -_PQCLEAN_MCELIECE348864_SSE_syndrome_asm: -PQCLEAN_MCELIECE348864_SSE_syndrome_asm: -mov %rsp,%r11 -and $31,%r11 -add $32,%r11 -sub %r11,%rsp - -# qhasm: input_1 += 260780 -# asm 1: add $260780,buf_ptr=int64#4 -# asm 2: leaq buf_ptr=%rcx -leaq 0(%rsp),%rcx - -# qhasm: row = 768 -# asm 1: mov $768,>row=int64#5 -# asm 2: mov $768,>row=%r8 -mov $768,%r8 - -# qhasm: loop: -._loop: - -# qhasm: row -= 1 -# asm 1: sub $1,ss=reg128#1 -# asm 2: movdqu 0(ss=%xmm0 -movdqu 0(%rsi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 96 ] -# asm 1: movdqu 96(ee=reg128#2 -# asm 2: movdqu 96(ee=%xmm1 -movdqu 96(%rdx),%xmm1 - -# qhasm: ss &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 16(pp=%xmm1 -movdqu 16(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 112 ] -# asm 1: movdqu 112(ee=reg128#3 -# asm 2: movdqu 112(ee=%xmm2 -movdqu 112(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 32(pp=%xmm1 -movdqu 32(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 128 ] -# asm 1: movdqu 128(ee=reg128#3 -# asm 2: movdqu 128(ee=%xmm2 -movdqu 128(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 48(pp=%xmm1 -movdqu 48(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 144 ] -# asm 1: movdqu 144(ee=reg128#3 -# asm 2: movdqu 144(ee=%xmm2 -movdqu 144(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 64(pp=%xmm1 -movdqu 64(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 160 ] -# asm 1: movdqu 160(ee=reg128#3 -# asm 2: movdqu 160(ee=%xmm2 -movdqu 160(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 80(pp=%xmm1 -movdqu 80(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 176 ] -# asm 1: movdqu 176(ee=reg128#3 -# asm 2: movdqu 176(ee=%xmm2 -movdqu 176(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 96(pp=%xmm1 -movdqu 96(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 192 ] -# asm 1: movdqu 192(ee=reg128#3 -# asm 2: movdqu 192(ee=%xmm2 -movdqu 192(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 112(pp=%xmm1 -movdqu 112(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 208 ] -# asm 1: movdqu 208(ee=reg128#3 -# asm 2: movdqu 208(ee=%xmm2 -movdqu 208(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 128(pp=%xmm1 -movdqu 128(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 224 ] -# asm 1: movdqu 224(ee=reg128#3 -# asm 2: movdqu 224(ee=%xmm2 -movdqu 224(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 144(pp=%xmm1 -movdqu 144(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 240 ] -# asm 1: movdqu 240(ee=reg128#3 -# asm 2: movdqu 240(ee=%xmm2 -movdqu 240(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 160(pp=%xmm1 -movdqu 160(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 256 ] -# asm 1: movdqu 256(ee=reg128#3 -# asm 2: movdqu 256(ee=%xmm2 -movdqu 256(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 176(pp=%xmm1 -movdqu 176(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 272 ] -# asm 1: movdqu 272(ee=reg128#3 -# asm 2: movdqu 272(ee=%xmm2 -movdqu 272(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 192(pp=%xmm1 -movdqu 192(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 288 ] -# asm 1: movdqu 288(ee=reg128#3 -# asm 2: movdqu 288(ee=%xmm2 -movdqu 288(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 208(pp=%xmm1 -movdqu 208(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 304 ] -# asm 1: movdqu 304(ee=reg128#3 -# asm 2: movdqu 304(ee=%xmm2 -movdqu 304(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 224(pp=%xmm1 -movdqu 224(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 320 ] -# asm 1: movdqu 320(ee=reg128#3 -# asm 2: movdqu 320(ee=%xmm2 -movdqu 320(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 240(pp=%xmm1 -movdqu 240(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 336 ] -# asm 1: movdqu 336(ee=reg128#3 -# asm 2: movdqu 336(ee=%xmm2 -movdqu 336(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 256(pp=%xmm1 -movdqu 256(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 352 ] -# asm 1: movdqu 352(ee=reg128#3 -# asm 2: movdqu 352(ee=%xmm2 -movdqu 352(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 272(pp=%xmm1 -movdqu 272(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 368 ] -# asm 1: movdqu 368(ee=reg128#3 -# asm 2: movdqu 368(ee=%xmm2 -movdqu 368(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 288(pp=%xmm1 -movdqu 288(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 384 ] -# asm 1: movdqu 384(ee=reg128#3 -# asm 2: movdqu 384(ee=%xmm2 -movdqu 384(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 304(pp=%xmm1 -movdqu 304(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 400 ] -# asm 1: movdqu 400(ee=reg128#3 -# asm 2: movdqu 400(ee=%xmm2 -movdqu 400(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand pp=reg128#2 -# asm 2: movdqu 320(pp=%xmm1 -movdqu 320(%rsi),%xmm1 - -# qhasm: ee = mem128[ input_2 + 416 ] -# asm 1: movdqu 416(ee=reg128#3 -# asm 2: movdqu 416(ee=%xmm2 -movdqu 416(%rdx),%xmm2 - -# qhasm: pp &= ee -# asm 1: pand buf=stack128#1 -# asm 2: movdqa buf=0(%rsp) -movdqa %xmm0,0(%rsp) - -# qhasm: s = *(uint32 *)(input_1 + 336) -# asm 1: movl 336(s=int64#6d -# asm 2: movl 336(s=%r9d -movl 336(%rsi),%r9d - -# qhasm: e = *(uint32 *)(input_2 + 432) -# asm 1: movl 432(e=int64#7d -# asm 2: movl 432(e=%eax -movl 432(%rdx),%eax - -# qhasm: s &= e -# asm 1: and c_all=int64#6 -# asm 2: popcnt c_all=%r9 -popcnt %r9, %r9 - -# qhasm: b64 = mem64[ buf_ptr + 0 ] -# asm 1: movq 0(b64=int64#7 -# asm 2: movq 0(b64=%rax -movq 0(%rcx),%rax - -# qhasm: c = count(b64) -# asm 1: popcnt c=int64#7 -# asm 2: popcnt c=%rax -popcnt %rax, %rax - -# qhasm: c_all ^= c -# asm 1: xor b64=int64#7 -# asm 2: movq 8(b64=%rax -movq 8(%rcx),%rax - -# qhasm: c = count(b64) -# asm 1: popcnt c=int64#7 -# asm 2: popcnt c=%rax -popcnt %rax, %rax - -# qhasm: c_all ^= c -# asm 1: xor addr=int64#7 -# asm 2: mov addr=%rax -mov %r8,%rax - -# qhasm: (uint64) addr >>= 3 -# asm 1: shr $3,synd=int64#8 -# asm 2: movzbq 0(synd=%r10 -movzbq 0(%rax),%r10 - -# qhasm: synd <<= 1 -# asm 1: shl $1,ss=reg128#1 -# asm 2: movdqu 0(ss=%xmm0 -movdqu 0(%rdi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 0 ] -# asm 1: movdqu 0(ee=reg128#2 -# asm 2: movdqu 0(ee=%xmm1 -movdqu 0(%rdx),%xmm1 - -# qhasm: ss ^= ee -# asm 1: pxor ss=reg128#1 -# asm 2: movdqu 16(ss=%xmm0 -movdqu 16(%rdi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 16 ] -# asm 1: movdqu 16(ee=reg128#2 -# asm 2: movdqu 16(ee=%xmm1 -movdqu 16(%rdx),%xmm1 - -# qhasm: ss ^= ee -# asm 1: pxor ss=reg128#1 -# asm 2: movdqu 32(ss=%xmm0 -movdqu 32(%rdi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 32 ] -# asm 1: movdqu 32(ee=reg128#2 -# asm 2: movdqu 32(ee=%xmm1 -movdqu 32(%rdx),%xmm1 - -# qhasm: ss ^= ee -# asm 1: pxor ss=reg128#1 -# asm 2: movdqu 48(ss=%xmm0 -movdqu 48(%rdi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 48 ] -# asm 1: movdqu 48(ee=reg128#2 -# asm 2: movdqu 48(ee=%xmm1 -movdqu 48(%rdx),%xmm1 - -# qhasm: ss ^= ee -# asm 1: pxor ss=reg128#1 -# asm 2: movdqu 64(ss=%xmm0 -movdqu 64(%rdi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 64 ] -# asm 1: movdqu 64(ee=reg128#2 -# asm 2: movdqu 64(ee=%xmm1 -movdqu 64(%rdx),%xmm1 - -# qhasm: ss ^= ee -# asm 1: pxor ss=reg128#1 -# asm 2: movdqu 80(ss=%xmm0 -movdqu 80(%rdi),%xmm0 - -# qhasm: ee = mem128[ input_2 + 80 ] -# asm 1: movdqu 80(ee=reg128#2 -# asm 2: movdqu 80(ee=%xmm1 -movdqu 80(%rdx),%xmm1 - -# qhasm: ss ^= ee -# asm 1: pxor - -void PQCLEAN_MCELIECE348864_SSE_transpose_64x64(uint64_t *in); -void PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp(vec128 *in); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose_64x128_sp_asm.S b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose_64x128_sp_asm.S deleted file mode 100644 index b3aae4909..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/transpose_64x128_sp_asm.S +++ /dev/null @@ -1,8145 +0,0 @@ - -# qhasm: int64 input_0 - -# qhasm: int64 input_1 - -# qhasm: int64 input_2 - -# qhasm: int64 input_3 - -# qhasm: int64 input_4 - -# qhasm: int64 input_5 - -# qhasm: stack64 input_6 - -# qhasm: stack64 input_7 - -# qhasm: int64 caller_r11 - -# qhasm: int64 caller_r12 - -# qhasm: int64 caller_r13 - -# qhasm: int64 caller_r14 - -# qhasm: int64 caller_r15 - -# qhasm: int64 caller_rbx - -# qhasm: int64 caller_rbp - -# qhasm: reg128 x0 - -# qhasm: reg128 x1 - -# qhasm: reg128 x2 - -# qhasm: reg128 x3 - -# qhasm: reg128 x4 - -# qhasm: reg128 x5 - -# qhasm: reg128 x6 - -# qhasm: reg128 x7 - -# qhasm: reg128 t0 - -# qhasm: reg128 t1 - -# qhasm: reg128 v00 - -# qhasm: reg128 v01 - -# qhasm: reg128 v10 - -# qhasm: reg128 v11 - -# qhasm: reg128 mask0 - -# qhasm: reg128 mask1 - -# qhasm: reg128 mask2 - -# qhasm: reg128 mask3 - -# qhasm: reg128 mask4 - -# qhasm: reg128 mask5 - -# qhasm: enter transpose_64x128_sp_asm -.p2align 5 -.global _PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp_asm -.global PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp_asm -_PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp_asm: -PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp_asm: -mov %rsp,%r11 -and $31,%r11 -add $0,%r11 -sub %r11,%rsp - -# qhasm: mask0 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK5_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_0(%rip),>mask0=reg128#1 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_0(%rip),>mask0=%xmm0 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_0(%rip),%xmm0 - -# qhasm: mask1 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK5_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_1(%rip),>mask1=reg128#2 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_1(%rip),>mask1=%xmm1 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_1(%rip),%xmm1 - -# qhasm: mask2 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK4_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_0(%rip),>mask2=reg128#3 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_0(%rip),>mask2=%xmm2 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_0(%rip),%xmm2 - -# qhasm: mask3 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK4_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_1(%rip),>mask3=reg128#4 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_1(%rip),>mask3=%xmm3 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_1(%rip),%xmm3 - -# qhasm: mask4 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK3_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_0(%rip),>mask4=reg128#5 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_0(%rip),>mask4=%xmm4 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_0(%rip),%xmm4 - -# qhasm: mask5 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK3_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_1(%rip),>mask5=reg128#6 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_1(%rip),>mask5=%xmm5 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_1(%rip),%xmm5 - -# qhasm: x0 = mem128[ input_0 + 0 ] -# asm 1: movdqu 0(x0=reg128#7 -# asm 2: movdqu 0(x0=%xmm6 -movdqu 0(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 128 ] -# asm 1: movdqu 128(x1=reg128#8 -# asm 2: movdqu 128(x1=%xmm7 -movdqu 128(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 256 ] -# asm 1: movdqu 256(x2=reg128#9 -# asm 2: movdqu 256(x2=%xmm8 -movdqu 256(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 384 ] -# asm 1: movdqu 384(x3=reg128#10 -# asm 2: movdqu 384(x3=%xmm9 -movdqu 384(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 512 ] -# asm 1: movdqu 512(x4=reg128#11 -# asm 2: movdqu 512(x4=%xmm10 -movdqu 512(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 640 ] -# asm 1: movdqu 640(x5=reg128#12 -# asm 2: movdqu 640(x5=%xmm11 -movdqu 640(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 768 ] -# asm 1: movdqu 768(x6=reg128#13 -# asm 2: movdqu 768(x6=%xmm12 -movdqu 768(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 896 ] -# asm 1: movdqu 896(x7=reg128#14 -# asm 2: movdqu 896(x7=%xmm13 -movdqu 896(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 0 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 16(x0=%xmm6 -movdqu 16(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 144 ] -# asm 1: movdqu 144(x1=reg128#8 -# asm 2: movdqu 144(x1=%xmm7 -movdqu 144(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 272 ] -# asm 1: movdqu 272(x2=reg128#9 -# asm 2: movdqu 272(x2=%xmm8 -movdqu 272(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 400 ] -# asm 1: movdqu 400(x3=reg128#10 -# asm 2: movdqu 400(x3=%xmm9 -movdqu 400(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 528 ] -# asm 1: movdqu 528(x4=reg128#11 -# asm 2: movdqu 528(x4=%xmm10 -movdqu 528(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 656 ] -# asm 1: movdqu 656(x5=reg128#12 -# asm 2: movdqu 656(x5=%xmm11 -movdqu 656(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 784 ] -# asm 1: movdqu 784(x6=reg128#13 -# asm 2: movdqu 784(x6=%xmm12 -movdqu 784(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 912 ] -# asm 1: movdqu 912(x7=reg128#14 -# asm 2: movdqu 912(x7=%xmm13 -movdqu 912(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 16 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 32(x0=%xmm6 -movdqu 32(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 160 ] -# asm 1: movdqu 160(x1=reg128#8 -# asm 2: movdqu 160(x1=%xmm7 -movdqu 160(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 288 ] -# asm 1: movdqu 288(x2=reg128#9 -# asm 2: movdqu 288(x2=%xmm8 -movdqu 288(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 416 ] -# asm 1: movdqu 416(x3=reg128#10 -# asm 2: movdqu 416(x3=%xmm9 -movdqu 416(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 544 ] -# asm 1: movdqu 544(x4=reg128#11 -# asm 2: movdqu 544(x4=%xmm10 -movdqu 544(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 672 ] -# asm 1: movdqu 672(x5=reg128#12 -# asm 2: movdqu 672(x5=%xmm11 -movdqu 672(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 800 ] -# asm 1: movdqu 800(x6=reg128#13 -# asm 2: movdqu 800(x6=%xmm12 -movdqu 800(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 928 ] -# asm 1: movdqu 928(x7=reg128#14 -# asm 2: movdqu 928(x7=%xmm13 -movdqu 928(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 32 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 48(x0=%xmm6 -movdqu 48(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 176 ] -# asm 1: movdqu 176(x1=reg128#8 -# asm 2: movdqu 176(x1=%xmm7 -movdqu 176(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 304 ] -# asm 1: movdqu 304(x2=reg128#9 -# asm 2: movdqu 304(x2=%xmm8 -movdqu 304(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 432 ] -# asm 1: movdqu 432(x3=reg128#10 -# asm 2: movdqu 432(x3=%xmm9 -movdqu 432(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 560 ] -# asm 1: movdqu 560(x4=reg128#11 -# asm 2: movdqu 560(x4=%xmm10 -movdqu 560(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 688 ] -# asm 1: movdqu 688(x5=reg128#12 -# asm 2: movdqu 688(x5=%xmm11 -movdqu 688(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 816 ] -# asm 1: movdqu 816(x6=reg128#13 -# asm 2: movdqu 816(x6=%xmm12 -movdqu 816(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 944 ] -# asm 1: movdqu 944(x7=reg128#14 -# asm 2: movdqu 944(x7=%xmm13 -movdqu 944(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 48 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 64(x0=%xmm6 -movdqu 64(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 192 ] -# asm 1: movdqu 192(x1=reg128#8 -# asm 2: movdqu 192(x1=%xmm7 -movdqu 192(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 320 ] -# asm 1: movdqu 320(x2=reg128#9 -# asm 2: movdqu 320(x2=%xmm8 -movdqu 320(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 448 ] -# asm 1: movdqu 448(x3=reg128#10 -# asm 2: movdqu 448(x3=%xmm9 -movdqu 448(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 576 ] -# asm 1: movdqu 576(x4=reg128#11 -# asm 2: movdqu 576(x4=%xmm10 -movdqu 576(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 704 ] -# asm 1: movdqu 704(x5=reg128#12 -# asm 2: movdqu 704(x5=%xmm11 -movdqu 704(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 832 ] -# asm 1: movdqu 832(x6=reg128#13 -# asm 2: movdqu 832(x6=%xmm12 -movdqu 832(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 960 ] -# asm 1: movdqu 960(x7=reg128#14 -# asm 2: movdqu 960(x7=%xmm13 -movdqu 960(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 64 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 80(x0=%xmm6 -movdqu 80(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 208 ] -# asm 1: movdqu 208(x1=reg128#8 -# asm 2: movdqu 208(x1=%xmm7 -movdqu 208(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 336 ] -# asm 1: movdqu 336(x2=reg128#9 -# asm 2: movdqu 336(x2=%xmm8 -movdqu 336(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 464 ] -# asm 1: movdqu 464(x3=reg128#10 -# asm 2: movdqu 464(x3=%xmm9 -movdqu 464(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 592 ] -# asm 1: movdqu 592(x4=reg128#11 -# asm 2: movdqu 592(x4=%xmm10 -movdqu 592(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 720 ] -# asm 1: movdqu 720(x5=reg128#12 -# asm 2: movdqu 720(x5=%xmm11 -movdqu 720(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 848 ] -# asm 1: movdqu 848(x6=reg128#13 -# asm 2: movdqu 848(x6=%xmm12 -movdqu 848(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 976 ] -# asm 1: movdqu 976(x7=reg128#14 -# asm 2: movdqu 976(x7=%xmm13 -movdqu 976(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 80 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 96(x0=%xmm6 -movdqu 96(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 224 ] -# asm 1: movdqu 224(x1=reg128#8 -# asm 2: movdqu 224(x1=%xmm7 -movdqu 224(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 352 ] -# asm 1: movdqu 352(x2=reg128#9 -# asm 2: movdqu 352(x2=%xmm8 -movdqu 352(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 480 ] -# asm 1: movdqu 480(x3=reg128#10 -# asm 2: movdqu 480(x3=%xmm9 -movdqu 480(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 608 ] -# asm 1: movdqu 608(x4=reg128#11 -# asm 2: movdqu 608(x4=%xmm10 -movdqu 608(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 736 ] -# asm 1: movdqu 736(x5=reg128#12 -# asm 2: movdqu 736(x5=%xmm11 -movdqu 736(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 864 ] -# asm 1: movdqu 864(x6=reg128#13 -# asm 2: movdqu 864(x6=%xmm12 -movdqu 864(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 992 ] -# asm 1: movdqu 992(x7=reg128#14 -# asm 2: movdqu 992(x7=%xmm13 -movdqu 992(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 96 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 112(x0=%xmm6 -movdqu 112(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 240 ] -# asm 1: movdqu 240(x1=reg128#8 -# asm 2: movdqu 240(x1=%xmm7 -movdqu 240(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 368 ] -# asm 1: movdqu 368(x2=reg128#9 -# asm 2: movdqu 368(x2=%xmm8 -movdqu 368(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 496 ] -# asm 1: movdqu 496(x3=reg128#10 -# asm 2: movdqu 496(x3=%xmm9 -movdqu 496(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 624 ] -# asm 1: movdqu 624(x4=reg128#11 -# asm 2: movdqu 624(x4=%xmm10 -movdqu 624(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 752 ] -# asm 1: movdqu 752(x5=reg128#12 -# asm 2: movdqu 752(x5=%xmm11 -movdqu 752(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 880 ] -# asm 1: movdqu 880(x6=reg128#13 -# asm 2: movdqu 880(x6=%xmm12 -movdqu 880(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 1008 ] -# asm 1: movdqu 1008(x7=reg128#14 -# asm 2: movdqu 1008(x7=%xmm13 -movdqu 1008(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = x4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = x0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = x5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = x1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = x6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = x2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#1 -# asm 2: vpand v00=%xmm0 -vpand %xmm0,%xmm9,%xmm0 - -# qhasm: 2x v10 = x7 << 32 -# asm 1: vpsllq $32,v10=reg128#13 -# asm 2: vpsllq $32,v10=%xmm12 -vpsllq $32,%xmm13,%xmm12 - -# qhasm: 2x v01 = x3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm1,%xmm13,%xmm1 - -# qhasm: x3 = v00 | v10 -# asm 1: vpor x3=reg128#1 -# asm 2: vpor x3=%xmm0 -vpor %xmm12,%xmm0,%xmm0 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#2 -# asm 2: vpor x7=%xmm1 -vpor %xmm1,%xmm9,%xmm1 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm2,%xmm14,%xmm9 - -# qhasm: 4x v10 = x2 << 16 -# asm 1: vpslld $16,v10=reg128#13 -# asm 2: vpslld $16,v10=%xmm12 -vpslld $16,%xmm11,%xmm12 - -# qhasm: 4x v01 = x0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#14 -# asm 2: vpsrld $16,v01=%xmm13 -vpsrld $16,%xmm14,%xmm13 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm12,%xmm9,%xmm9 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm13,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm10,%xmm12 - -# qhasm: 4x v10 = x3 << 16 -# asm 1: vpslld $16,v10=reg128#14 -# asm 2: vpslld $16,v10=%xmm13 -vpslld $16,%xmm0,%xmm13 - -# qhasm: 4x v01 = x1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm3,%xmm0,%xmm0 - -# qhasm: x1 = v00 | v10 -# asm 1: vpor x1=reg128#13 -# asm 2: vpor x1=%xmm12 -vpor %xmm13,%xmm12,%xmm12 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#1 -# asm 2: vpor x3=%xmm0 -vpor %xmm0,%xmm10,%xmm0 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm2,%xmm6,%xmm10 - -# qhasm: 4x v10 = x6 << 16 -# asm 1: vpslld $16,v10=reg128#14 -# asm 2: vpslld $16,v10=%xmm13 -vpslld $16,%xmm8,%xmm13 - -# qhasm: 4x v01 = x4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#11 -# asm 2: vpor x4=%xmm10 -vpor %xmm13,%xmm10,%xmm10 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#3 -# asm 2: vpand v00=%xmm2 -vpand %xmm2,%xmm7,%xmm2 - -# qhasm: 4x v10 = x7 << 16 -# asm 1: vpslld $16,v10=reg128#9 -# asm 2: vpslld $16,v10=%xmm8 -vpslld $16,%xmm1,%xmm8 - -# qhasm: 4x v01 = x5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm3,%xmm1,%xmm1 - -# qhasm: x5 = v00 | v10 -# asm 1: vpor x5=reg128#3 -# asm 2: vpor x5=%xmm2 -vpor %xmm8,%xmm2,%xmm2 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#2 -# asm 2: vpor x7=%xmm1 -vpor %xmm1,%xmm7,%xmm1 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#4 -# asm 2: vpand v00=%xmm3 -vpand %xmm4,%xmm9,%xmm3 - -# qhasm: 8x v10 = x1 << 8 -# asm 1: vpsllw $8,v10=reg128#8 -# asm 2: vpsllw $8,v10=%xmm7 -vpsllw $8,%xmm12,%xmm7 - -# qhasm: 8x v01 = x0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#9 -# asm 2: vpsrlw $8,v01=%xmm8 -vpsrlw $8,%xmm9,%xmm8 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm5,%xmm12,%xmm9 - -# qhasm: x0 = v00 | v10 -# asm 1: vpor x0=reg128#4 -# asm 2: vpor x0=%xmm3 -vpor %xmm7,%xmm3,%xmm3 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#8 -# asm 2: vpor x1=%xmm7 -vpor %xmm9,%xmm8,%xmm7 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm4,%xmm11,%xmm8 - -# qhasm: 8x v10 = x3 << 8 -# asm 1: vpsllw $8,v10=reg128#10 -# asm 2: vpsllw $8,v10=%xmm9 -vpsllw $8,%xmm0,%xmm9 - -# qhasm: 8x v01 = x2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm5,%xmm0,%xmm0 - -# qhasm: x2 = v00 | v10 -# asm 1: vpor x2=reg128#9 -# asm 2: vpor x2=%xmm8 -vpor %xmm9,%xmm8,%xmm8 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#1 -# asm 2: vpor x3=%xmm0 -vpor %xmm0,%xmm11,%xmm0 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm10,%xmm9 - -# qhasm: 8x v10 = x5 << 8 -# asm 1: vpsllw $8,v10=reg128#12 -# asm 2: vpsllw $8,v10=%xmm11 -vpsllw $8,%xmm2,%xmm11 - -# qhasm: 8x v01 = x4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#11 -# asm 2: vpsrlw $8,v01=%xmm10 -vpsrlw $8,%xmm10,%xmm10 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#3 -# asm 2: vpand v11=%xmm2 -vpand %xmm5,%xmm2,%xmm2 - -# qhasm: x4 = v00 | v10 -# asm 1: vpor x4=reg128#10 -# asm 2: vpor x4=%xmm9 -vpor %xmm11,%xmm9,%xmm9 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#3 -# asm 2: vpor x5=%xmm2 -vpor %xmm2,%xmm10,%xmm2 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#5 -# asm 2: vpand v00=%xmm4 -vpand %xmm4,%xmm6,%xmm4 - -# qhasm: 8x v10 = x7 << 8 -# asm 1: vpsllw $8,v10=reg128#11 -# asm 2: vpsllw $8,v10=%xmm10 -vpsllw $8,%xmm1,%xmm10 - -# qhasm: 8x v01 = x6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm5,%xmm1,%xmm1 - -# qhasm: x6 = v00 | v10 -# asm 1: vpor x6=reg128#5 -# asm 2: vpor x6=%xmm4 -vpor %xmm10,%xmm4,%xmm4 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#2 -# asm 2: vpor x7=%xmm1 -vpor %xmm1,%xmm6,%xmm1 - -# qhasm: mem128[ input_0 + 112 ] = x0 -# asm 1: movdqu mask0=reg128#1 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_0(%rip),>mask0=%xmm0 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_0(%rip),%xmm0 - -# qhasm: mask1 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK2_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_1(%rip),>mask1=reg128#2 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_1(%rip),>mask1=%xmm1 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_1(%rip),%xmm1 - -# qhasm: mask2 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK1_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_0(%rip),>mask2=reg128#3 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_0(%rip),>mask2=%xmm2 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_0(%rip),%xmm2 - -# qhasm: mask3 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK1_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_1(%rip),>mask3=reg128#4 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_1(%rip),>mask3=%xmm3 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_1(%rip),%xmm3 - -# qhasm: mask4 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK0_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_0(%rip),>mask4=reg128#5 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_0(%rip),>mask4=%xmm4 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_0(%rip),%xmm4 - -# qhasm: mask5 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK0_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_1(%rip),>mask5=reg128#6 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_1(%rip),>mask5=%xmm5 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_1(%rip),%xmm5 - -# qhasm: x0 = mem128[ input_0 + 0 ] -# asm 1: movdqu 0(x0=reg128#7 -# asm 2: movdqu 0(x0=%xmm6 -movdqu 0(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 16 ] -# asm 1: movdqu 16(x1=reg128#8 -# asm 2: movdqu 16(x1=%xmm7 -movdqu 16(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 32 ] -# asm 1: movdqu 32(x2=reg128#9 -# asm 2: movdqu 32(x2=%xmm8 -movdqu 32(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 48 ] -# asm 1: movdqu 48(x3=reg128#10 -# asm 2: movdqu 48(x3=%xmm9 -movdqu 48(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 64 ] -# asm 1: movdqu 64(x4=reg128#11 -# asm 2: movdqu 64(x4=%xmm10 -movdqu 64(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 80 ] -# asm 1: movdqu 80(x5=reg128#12 -# asm 2: movdqu 80(x5=%xmm11 -movdqu 80(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 96 ] -# asm 1: movdqu 96(x6=reg128#13 -# asm 2: movdqu 96(x6=%xmm12 -movdqu 96(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 112 ] -# asm 1: movdqu 112(x7=reg128#14 -# asm 2: movdqu 112(x7=%xmm13 -movdqu 112(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 0 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 128(x0=%xmm6 -movdqu 128(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 144 ] -# asm 1: movdqu 144(x1=reg128#8 -# asm 2: movdqu 144(x1=%xmm7 -movdqu 144(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 160 ] -# asm 1: movdqu 160(x2=reg128#9 -# asm 2: movdqu 160(x2=%xmm8 -movdqu 160(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 176 ] -# asm 1: movdqu 176(x3=reg128#10 -# asm 2: movdqu 176(x3=%xmm9 -movdqu 176(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 192 ] -# asm 1: movdqu 192(x4=reg128#11 -# asm 2: movdqu 192(x4=%xmm10 -movdqu 192(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 208 ] -# asm 1: movdqu 208(x5=reg128#12 -# asm 2: movdqu 208(x5=%xmm11 -movdqu 208(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 224 ] -# asm 1: movdqu 224(x6=reg128#13 -# asm 2: movdqu 224(x6=%xmm12 -movdqu 224(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 240 ] -# asm 1: movdqu 240(x7=reg128#14 -# asm 2: movdqu 240(x7=%xmm13 -movdqu 240(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 128 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 256(x0=%xmm6 -movdqu 256(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 272 ] -# asm 1: movdqu 272(x1=reg128#8 -# asm 2: movdqu 272(x1=%xmm7 -movdqu 272(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 288 ] -# asm 1: movdqu 288(x2=reg128#9 -# asm 2: movdqu 288(x2=%xmm8 -movdqu 288(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 304 ] -# asm 1: movdqu 304(x3=reg128#10 -# asm 2: movdqu 304(x3=%xmm9 -movdqu 304(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 320 ] -# asm 1: movdqu 320(x4=reg128#11 -# asm 2: movdqu 320(x4=%xmm10 -movdqu 320(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 336 ] -# asm 1: movdqu 336(x5=reg128#12 -# asm 2: movdqu 336(x5=%xmm11 -movdqu 336(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 352 ] -# asm 1: movdqu 352(x6=reg128#13 -# asm 2: movdqu 352(x6=%xmm12 -movdqu 352(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 368 ] -# asm 1: movdqu 368(x7=reg128#14 -# asm 2: movdqu 368(x7=%xmm13 -movdqu 368(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 256 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 384(x0=%xmm6 -movdqu 384(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 400 ] -# asm 1: movdqu 400(x1=reg128#8 -# asm 2: movdqu 400(x1=%xmm7 -movdqu 400(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 416 ] -# asm 1: movdqu 416(x2=reg128#9 -# asm 2: movdqu 416(x2=%xmm8 -movdqu 416(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 432 ] -# asm 1: movdqu 432(x3=reg128#10 -# asm 2: movdqu 432(x3=%xmm9 -movdqu 432(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 448 ] -# asm 1: movdqu 448(x4=reg128#11 -# asm 2: movdqu 448(x4=%xmm10 -movdqu 448(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 464 ] -# asm 1: movdqu 464(x5=reg128#12 -# asm 2: movdqu 464(x5=%xmm11 -movdqu 464(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 480 ] -# asm 1: movdqu 480(x6=reg128#13 -# asm 2: movdqu 480(x6=%xmm12 -movdqu 480(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 496 ] -# asm 1: movdqu 496(x7=reg128#14 -# asm 2: movdqu 496(x7=%xmm13 -movdqu 496(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 384 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 512(x0=%xmm6 -movdqu 512(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 528 ] -# asm 1: movdqu 528(x1=reg128#8 -# asm 2: movdqu 528(x1=%xmm7 -movdqu 528(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 544 ] -# asm 1: movdqu 544(x2=reg128#9 -# asm 2: movdqu 544(x2=%xmm8 -movdqu 544(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 560 ] -# asm 1: movdqu 560(x3=reg128#10 -# asm 2: movdqu 560(x3=%xmm9 -movdqu 560(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 576 ] -# asm 1: movdqu 576(x4=reg128#11 -# asm 2: movdqu 576(x4=%xmm10 -movdqu 576(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 592 ] -# asm 1: movdqu 592(x5=reg128#12 -# asm 2: movdqu 592(x5=%xmm11 -movdqu 592(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 608 ] -# asm 1: movdqu 608(x6=reg128#13 -# asm 2: movdqu 608(x6=%xmm12 -movdqu 608(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 624 ] -# asm 1: movdqu 624(x7=reg128#14 -# asm 2: movdqu 624(x7=%xmm13 -movdqu 624(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 512 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 640(x0=%xmm6 -movdqu 640(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 656 ] -# asm 1: movdqu 656(x1=reg128#8 -# asm 2: movdqu 656(x1=%xmm7 -movdqu 656(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 672 ] -# asm 1: movdqu 672(x2=reg128#9 -# asm 2: movdqu 672(x2=%xmm8 -movdqu 672(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 688 ] -# asm 1: movdqu 688(x3=reg128#10 -# asm 2: movdqu 688(x3=%xmm9 -movdqu 688(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 704 ] -# asm 1: movdqu 704(x4=reg128#11 -# asm 2: movdqu 704(x4=%xmm10 -movdqu 704(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 720 ] -# asm 1: movdqu 720(x5=reg128#12 -# asm 2: movdqu 720(x5=%xmm11 -movdqu 720(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 736 ] -# asm 1: movdqu 736(x6=reg128#13 -# asm 2: movdqu 736(x6=%xmm12 -movdqu 736(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 752 ] -# asm 1: movdqu 752(x7=reg128#14 -# asm 2: movdqu 752(x7=%xmm13 -movdqu 752(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 640 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 768(x0=%xmm6 -movdqu 768(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 784 ] -# asm 1: movdqu 784(x1=reg128#8 -# asm 2: movdqu 784(x1=%xmm7 -movdqu 784(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 800 ] -# asm 1: movdqu 800(x2=reg128#9 -# asm 2: movdqu 800(x2=%xmm8 -movdqu 800(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 816 ] -# asm 1: movdqu 816(x3=reg128#10 -# asm 2: movdqu 816(x3=%xmm9 -movdqu 816(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 832 ] -# asm 1: movdqu 832(x4=reg128#11 -# asm 2: movdqu 832(x4=%xmm10 -movdqu 832(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 848 ] -# asm 1: movdqu 848(x5=reg128#12 -# asm 2: movdqu 848(x5=%xmm11 -movdqu 848(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 864 ] -# asm 1: movdqu 864(x6=reg128#13 -# asm 2: movdqu 864(x6=%xmm12 -movdqu 864(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 880 ] -# asm 1: movdqu 880(x7=reg128#14 -# asm 2: movdqu 880(x7=%xmm13 -movdqu 880(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#13 -# asm 2: vpor x3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#10 -# asm 2: vpor x7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#14 -# asm 2: vpor x0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#15 -# asm 2: vpor x1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#13 -# asm 2: vpor x4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#8 -# asm 2: vpor x7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#14 -# asm 2: vpor x1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#15 -# asm 2: vpor x2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#11 -# asm 2: vpor x3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#12 -# asm 2: vpor x4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#9 -# asm 2: vpor x5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#13 -# asm 2: vpor x6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#7 -# asm 2: vpor x7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: mem128[ input_0 + 768 ] = x0 -# asm 1: movdqu x0=reg128#7 -# asm 2: movdqu 896(x0=%xmm6 -movdqu 896(%rdi),%xmm6 - -# qhasm: x1 = mem128[ input_0 + 912 ] -# asm 1: movdqu 912(x1=reg128#8 -# asm 2: movdqu 912(x1=%xmm7 -movdqu 912(%rdi),%xmm7 - -# qhasm: x2 = mem128[ input_0 + 928 ] -# asm 1: movdqu 928(x2=reg128#9 -# asm 2: movdqu 928(x2=%xmm8 -movdqu 928(%rdi),%xmm8 - -# qhasm: x3 = mem128[ input_0 + 944 ] -# asm 1: movdqu 944(x3=reg128#10 -# asm 2: movdqu 944(x3=%xmm9 -movdqu 944(%rdi),%xmm9 - -# qhasm: x4 = mem128[ input_0 + 960 ] -# asm 1: movdqu 960(x4=reg128#11 -# asm 2: movdqu 960(x4=%xmm10 -movdqu 960(%rdi),%xmm10 - -# qhasm: x5 = mem128[ input_0 + 976 ] -# asm 1: movdqu 976(x5=reg128#12 -# asm 2: movdqu 976(x5=%xmm11 -movdqu 976(%rdi),%xmm11 - -# qhasm: x6 = mem128[ input_0 + 992 ] -# asm 1: movdqu 992(x6=reg128#13 -# asm 2: movdqu 992(x6=%xmm12 -movdqu 992(%rdi),%xmm12 - -# qhasm: x7 = mem128[ input_0 + 1008 ] -# asm 1: movdqu 1008(x7=reg128#14 -# asm 2: movdqu 1008(x7=%xmm13 -movdqu 1008(%rdi),%xmm13 - -# qhasm: v00 = x0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = x4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = x4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x0=reg128#15 -# asm 2: vpor x0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: x4 = v01 | v11 -# asm 1: vpor x4=reg128#7 -# asm 2: vpor x4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = x1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = x5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = x5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x1=reg128#11 -# asm 2: vpor x1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#8 -# asm 2: vpor x5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = x2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = x6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = x6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#9 -# asm 2: vpor x6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = x3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = x7 & mask0 -# asm 1: vpand v10=reg128#1 -# asm 2: vpand v10=%xmm0 -vpand %xmm0,%xmm13,%xmm0 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = x7 & mask1 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm1,%xmm13,%xmm1 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,x3=reg128#1 -# asm 2: vpor x3=%xmm0 -vpor %xmm0,%xmm12,%xmm0 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#2 -# asm 2: vpor x7=%xmm1 -vpor %xmm1,%xmm9,%xmm1 - -# qhasm: v00 = x0 & mask2 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm2,%xmm14,%xmm9 - -# qhasm: v10 = x2 & mask2 -# asm 1: vpand v10=reg128#13 -# asm 2: vpand v10=%xmm12 -vpand %xmm2,%xmm11,%xmm12 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm3,%xmm14,%xmm13 - -# qhasm: v11 = x2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x0=reg128#10 -# asm 2: vpor x0=%xmm9 -vpor %xmm12,%xmm9,%xmm9 - -# qhasm: x2 = v01 | v11 -# asm 1: vpor x2=reg128#12 -# asm 2: vpor x2=%xmm11 -vpor %xmm11,%xmm13,%xmm11 - -# qhasm: v00 = x1 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm10,%xmm12 - -# qhasm: v10 = x3 & mask2 -# asm 1: vpand v10=reg128#14 -# asm 2: vpand v10=%xmm13 -vpand %xmm2,%xmm0,%xmm13 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = x3 & mask3 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm3,%xmm0,%xmm0 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x1=reg128#13 -# asm 2: vpor x1=%xmm12 -vpor %xmm13,%xmm12,%xmm12 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#1 -# asm 2: vpor x3=%xmm0 -vpor %xmm0,%xmm10,%xmm0 - -# qhasm: v00 = x4 & mask2 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm2,%xmm6,%xmm10 - -# qhasm: v10 = x6 & mask2 -# asm 1: vpand v10=reg128#14 -# asm 2: vpand v10=%xmm13 -vpand %xmm2,%xmm8,%xmm13 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = x6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x4=reg128#11 -# asm 2: vpor x4=%xmm10 -vpor %xmm13,%xmm10,%xmm10 - -# qhasm: x6 = v01 | v11 -# asm 1: vpor x6=reg128#7 -# asm 2: vpor x6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = x5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = x7 & mask2 -# asm 1: vpand v10=reg128#3 -# asm 2: vpand v10=%xmm2 -vpand %xmm2,%xmm1,%xmm2 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = x7 & mask3 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm3,%xmm1,%xmm1 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,x5=reg128#3 -# asm 2: vpor x5=%xmm2 -vpor %xmm2,%xmm8,%xmm2 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#2 -# asm 2: vpor x7=%xmm1 -vpor %xmm1,%xmm7,%xmm1 - -# qhasm: v00 = x0 & mask4 -# asm 1: vpand v00=reg128#4 -# asm 2: vpand v00=%xmm3 -vpand %xmm4,%xmm9,%xmm3 - -# qhasm: v10 = x1 & mask4 -# asm 1: vpand v10=reg128#8 -# asm 2: vpand v10=%xmm7 -vpand %xmm4,%xmm12,%xmm7 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm5,%xmm9,%xmm8 - -# qhasm: v11 = x1 & mask5 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm5,%xmm12,%xmm9 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x0=reg128#4 -# asm 2: vpor x0=%xmm3 -vpor %xmm7,%xmm3,%xmm3 - -# qhasm: x1 = v01 | v11 -# asm 1: vpor x1=reg128#8 -# asm 2: vpor x1=%xmm7 -vpor %xmm9,%xmm8,%xmm7 - -# qhasm: v00 = x2 & mask4 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm4,%xmm11,%xmm8 - -# qhasm: v10 = x3 & mask4 -# asm 1: vpand v10=reg128#10 -# asm 2: vpand v10=%xmm9 -vpand %xmm4,%xmm0,%xmm9 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = x3 & mask5 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm5,%xmm0,%xmm0 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x2=reg128#9 -# asm 2: vpor x2=%xmm8 -vpor %xmm9,%xmm8,%xmm8 - -# qhasm: x3 = v01 | v11 -# asm 1: vpor x3=reg128#1 -# asm 2: vpor x3=%xmm0 -vpor %xmm0,%xmm11,%xmm0 - -# qhasm: v00 = x4 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm10,%xmm9 - -# qhasm: v10 = x5 & mask4 -# asm 1: vpand v10=reg128#12 -# asm 2: vpand v10=%xmm11 -vpand %xmm4,%xmm2,%xmm11 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: v11 = x5 & mask5 -# asm 1: vpand v11=reg128#3 -# asm 2: vpand v11=%xmm2 -vpand %xmm5,%xmm2,%xmm2 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x4=reg128#10 -# asm 2: vpor x4=%xmm9 -vpor %xmm11,%xmm9,%xmm9 - -# qhasm: x5 = v01 | v11 -# asm 1: vpor x5=reg128#3 -# asm 2: vpor x5=%xmm2 -vpor %xmm2,%xmm10,%xmm2 - -# qhasm: v00 = x6 & mask4 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm4,%xmm6,%xmm10 - -# qhasm: v10 = x7 & mask4 -# asm 1: vpand v10=reg128#5 -# asm 2: vpand v10=%xmm4 -vpand %xmm4,%xmm1,%xmm4 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = x7 & mask5 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm5,%xmm1,%xmm1 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,x6=reg128#5 -# asm 2: vpor x6=%xmm4 -vpor %xmm4,%xmm10,%xmm4 - -# qhasm: x7 = v01 | v11 -# asm 1: vpor x7=reg128#2 -# asm 2: vpor x7=%xmm1 -vpor %xmm1,%xmm6,%xmm1 - -# qhasm: mem128[ input_0 + 896 ] = x0 -# asm 1: movdqu mask0=reg128#1 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_0(%rip),>mask0=%xmm0 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_0(%rip),%xmm0 - -# qhasm: mask1 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK5_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_1(%rip),>mask1=reg128#2 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_1(%rip),>mask1=%xmm1 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK5_1(%rip),%xmm1 - -# qhasm: mask2 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK4_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_0(%rip),>mask2=reg128#3 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_0(%rip),>mask2=%xmm2 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_0(%rip),%xmm2 - -# qhasm: mask3 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK4_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_1(%rip),>mask3=reg128#4 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_1(%rip),>mask3=%xmm3 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK4_1(%rip),%xmm3 - -# qhasm: mask4 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK3_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_0(%rip),>mask4=reg128#5 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_0(%rip),>mask4=%xmm4 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_0(%rip),%xmm4 - -# qhasm: mask5 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK3_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_1(%rip),>mask5=reg128#6 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_1(%rip),>mask5=%xmm5 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK3_1(%rip),%xmm5 - -# qhasm: r0 = mem64[ input_0 + 0 ] x2 -# asm 1: movddup 0(r0=reg128#7 -# asm 2: movddup 0(r0=%xmm6 -movddup 0(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 64 ] x2 -# asm 1: movddup 64(r1=reg128#8 -# asm 2: movddup 64(r1=%xmm7 -movddup 64(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 128 ] x2 -# asm 1: movddup 128(r2=reg128#9 -# asm 2: movddup 128(r2=%xmm8 -movddup 128(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 192 ] x2 -# asm 1: movddup 192(r3=reg128#10 -# asm 2: movddup 192(r3=%xmm9 -movddup 192(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 256 ] x2 -# asm 1: movddup 256(r4=reg128#11 -# asm 2: movddup 256(r4=%xmm10 -movddup 256(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 320 ] x2 -# asm 1: movddup 320(r5=reg128#12 -# asm 2: movddup 320(r5=%xmm11 -movddup 320(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 384 ] x2 -# asm 1: movddup 384(r6=reg128#13 -# asm 2: movddup 384(r6=%xmm12 -movddup 384(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 448 ] x2 -# asm 1: movddup 448(r7=reg128#14 -# asm 2: movddup 448(r7=%xmm13 -movddup 448(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 0 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 64 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 128 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 192 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 256 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 320 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 384 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 448 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 8(r0=%xmm6 -movddup 8(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 72 ] x2 -# asm 1: movddup 72(r1=reg128#8 -# asm 2: movddup 72(r1=%xmm7 -movddup 72(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 136 ] x2 -# asm 1: movddup 136(r2=reg128#9 -# asm 2: movddup 136(r2=%xmm8 -movddup 136(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 200 ] x2 -# asm 1: movddup 200(r3=reg128#10 -# asm 2: movddup 200(r3=%xmm9 -movddup 200(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 264 ] x2 -# asm 1: movddup 264(r4=reg128#11 -# asm 2: movddup 264(r4=%xmm10 -movddup 264(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 328 ] x2 -# asm 1: movddup 328(r5=reg128#12 -# asm 2: movddup 328(r5=%xmm11 -movddup 328(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 392 ] x2 -# asm 1: movddup 392(r6=reg128#13 -# asm 2: movddup 392(r6=%xmm12 -movddup 392(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 456 ] x2 -# asm 1: movddup 456(r7=reg128#14 -# asm 2: movddup 456(r7=%xmm13 -movddup 456(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 8 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 72 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 136 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 200 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 264 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 328 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 392 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 456 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 16(r0=%xmm6 -movddup 16(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 80 ] x2 -# asm 1: movddup 80(r1=reg128#8 -# asm 2: movddup 80(r1=%xmm7 -movddup 80(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 144 ] x2 -# asm 1: movddup 144(r2=reg128#9 -# asm 2: movddup 144(r2=%xmm8 -movddup 144(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 208 ] x2 -# asm 1: movddup 208(r3=reg128#10 -# asm 2: movddup 208(r3=%xmm9 -movddup 208(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 272 ] x2 -# asm 1: movddup 272(r4=reg128#11 -# asm 2: movddup 272(r4=%xmm10 -movddup 272(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 336 ] x2 -# asm 1: movddup 336(r5=reg128#12 -# asm 2: movddup 336(r5=%xmm11 -movddup 336(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 400 ] x2 -# asm 1: movddup 400(r6=reg128#13 -# asm 2: movddup 400(r6=%xmm12 -movddup 400(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 464 ] x2 -# asm 1: movddup 464(r7=reg128#14 -# asm 2: movddup 464(r7=%xmm13 -movddup 464(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 16 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 80 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 144 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 208 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 272 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 336 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 400 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 464 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 24(r0=%xmm6 -movddup 24(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 88 ] x2 -# asm 1: movddup 88(r1=reg128#8 -# asm 2: movddup 88(r1=%xmm7 -movddup 88(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 152 ] x2 -# asm 1: movddup 152(r2=reg128#9 -# asm 2: movddup 152(r2=%xmm8 -movddup 152(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 216 ] x2 -# asm 1: movddup 216(r3=reg128#10 -# asm 2: movddup 216(r3=%xmm9 -movddup 216(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 280 ] x2 -# asm 1: movddup 280(r4=reg128#11 -# asm 2: movddup 280(r4=%xmm10 -movddup 280(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 344 ] x2 -# asm 1: movddup 344(r5=reg128#12 -# asm 2: movddup 344(r5=%xmm11 -movddup 344(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 408 ] x2 -# asm 1: movddup 408(r6=reg128#13 -# asm 2: movddup 408(r6=%xmm12 -movddup 408(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 472 ] x2 -# asm 1: movddup 472(r7=reg128#14 -# asm 2: movddup 472(r7=%xmm13 -movddup 472(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 24 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 88 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 152 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 216 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 280 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 344 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 408 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 472 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 32(r0=%xmm6 -movddup 32(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 96 ] x2 -# asm 1: movddup 96(r1=reg128#8 -# asm 2: movddup 96(r1=%xmm7 -movddup 96(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 160 ] x2 -# asm 1: movddup 160(r2=reg128#9 -# asm 2: movddup 160(r2=%xmm8 -movddup 160(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 224 ] x2 -# asm 1: movddup 224(r3=reg128#10 -# asm 2: movddup 224(r3=%xmm9 -movddup 224(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 288 ] x2 -# asm 1: movddup 288(r4=reg128#11 -# asm 2: movddup 288(r4=%xmm10 -movddup 288(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 352 ] x2 -# asm 1: movddup 352(r5=reg128#12 -# asm 2: movddup 352(r5=%xmm11 -movddup 352(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 416 ] x2 -# asm 1: movddup 416(r6=reg128#13 -# asm 2: movddup 416(r6=%xmm12 -movddup 416(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 480 ] x2 -# asm 1: movddup 480(r7=reg128#14 -# asm 2: movddup 480(r7=%xmm13 -movddup 480(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 32 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 96 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 160 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 224 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 288 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 352 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 416 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 480 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 40(r0=%xmm6 -movddup 40(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 104 ] x2 -# asm 1: movddup 104(r1=reg128#8 -# asm 2: movddup 104(r1=%xmm7 -movddup 104(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 168 ] x2 -# asm 1: movddup 168(r2=reg128#9 -# asm 2: movddup 168(r2=%xmm8 -movddup 168(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 232 ] x2 -# asm 1: movddup 232(r3=reg128#10 -# asm 2: movddup 232(r3=%xmm9 -movddup 232(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 296 ] x2 -# asm 1: movddup 296(r4=reg128#11 -# asm 2: movddup 296(r4=%xmm10 -movddup 296(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 360 ] x2 -# asm 1: movddup 360(r5=reg128#12 -# asm 2: movddup 360(r5=%xmm11 -movddup 360(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 424 ] x2 -# asm 1: movddup 424(r6=reg128#13 -# asm 2: movddup 424(r6=%xmm12 -movddup 424(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 488 ] x2 -# asm 1: movddup 488(r7=reg128#14 -# asm 2: movddup 488(r7=%xmm13 -movddup 488(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 40 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 104 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 168 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 232 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 296 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 360 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 424 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 488 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 48(r0=%xmm6 -movddup 48(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 112 ] x2 -# asm 1: movddup 112(r1=reg128#8 -# asm 2: movddup 112(r1=%xmm7 -movddup 112(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 176 ] x2 -# asm 1: movddup 176(r2=reg128#9 -# asm 2: movddup 176(r2=%xmm8 -movddup 176(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 240 ] x2 -# asm 1: movddup 240(r3=reg128#10 -# asm 2: movddup 240(r3=%xmm9 -movddup 240(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 304 ] x2 -# asm 1: movddup 304(r4=reg128#11 -# asm 2: movddup 304(r4=%xmm10 -movddup 304(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 368 ] x2 -# asm 1: movddup 368(r5=reg128#12 -# asm 2: movddup 368(r5=%xmm11 -movddup 368(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 432 ] x2 -# asm 1: movddup 432(r6=reg128#13 -# asm 2: movddup 432(r6=%xmm12 -movddup 432(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 496 ] x2 -# asm 1: movddup 496(r7=reg128#14 -# asm 2: movddup 496(r7=%xmm13 -movddup 496(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm13,%xmm15 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm11,%xmm15 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#15 -# asm 2: vpsrld $16,v01=%xmm14 -vpsrld $16,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm12,%xmm15 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm8,%xmm15 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#16 -# asm 2: vpslld $16,v10=%xmm15 -vpslld $16,%xmm9,%xmm15 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm14,%xmm15 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#14 -# asm 2: vpsrlw $8,v01=%xmm13 -vpsrlw $8,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm10,%xmm15 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm8,%xmm15 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#13 -# asm 2: vpsrlw $8,v01=%xmm12 -vpsrlw $8,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#16 -# asm 2: vpsllw $8,v10=%xmm15 -vpsllw $8,%xmm7,%xmm15 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 48 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm13,%rsi - -# qhasm: mem64[ input_0 + 112 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm14,%rsi - -# qhasm: mem64[ input_0 + 176 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm10,%rsi - -# qhasm: mem64[ input_0 + 240 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm11,%rsi - -# qhasm: mem64[ input_0 + 304 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 368 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm12,%rsi - -# qhasm: mem64[ input_0 + 432 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm6,%rsi - -# qhasm: mem64[ input_0 + 496 ] = buf -# asm 1: movq r0=reg128#7 -# asm 2: movddup 56(r0=%xmm6 -movddup 56(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 120 ] x2 -# asm 1: movddup 120(r1=reg128#8 -# asm 2: movddup 120(r1=%xmm7 -movddup 120(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 184 ] x2 -# asm 1: movddup 184(r2=reg128#9 -# asm 2: movddup 184(r2=%xmm8 -movddup 184(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 248 ] x2 -# asm 1: movddup 248(r3=reg128#10 -# asm 2: movddup 248(r3=%xmm9 -movddup 248(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 312 ] x2 -# asm 1: movddup 312(r4=reg128#11 -# asm 2: movddup 312(r4=%xmm10 -movddup 312(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 376 ] x2 -# asm 1: movddup 376(r5=reg128#12 -# asm 2: movddup 376(r5=%xmm11 -movddup 376(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 440 ] x2 -# asm 1: movddup 440(r6=reg128#13 -# asm 2: movddup 440(r6=%xmm12 -movddup 440(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 504 ] x2 -# asm 1: movddup 504(r7=reg128#14 -# asm 2: movddup 504(r7=%xmm13 -movddup 504(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: 2x v10 = r4 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm10,%xmm15 - -# qhasm: 2x v01 = r0 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#7 -# asm 2: vpsrlq $32,v01=%xmm6 -vpsrlq $32,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: 2x v10 = r5 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm11,%xmm15 - -# qhasm: 2x v01 = r1 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#8 -# asm 2: vpsrlq $32,v01=%xmm7 -vpsrlq $32,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: 2x v10 = r6 << 32 -# asm 1: vpsllq $32,v10=reg128#16 -# asm 2: vpsllq $32,v10=%xmm15 -vpsllq $32,%xmm12,%xmm15 - -# qhasm: 2x v01 = r2 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#9 -# asm 2: vpsrlq $32,v01=%xmm8 -vpsrlq $32,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#1 -# asm 2: vpand v00=%xmm0 -vpand %xmm0,%xmm9,%xmm0 - -# qhasm: 2x v10 = r7 << 32 -# asm 1: vpsllq $32,v10=reg128#13 -# asm 2: vpsllq $32,v10=%xmm12 -vpsllq $32,%xmm13,%xmm12 - -# qhasm: 2x v01 = r3 unsigned>> 32 -# asm 1: vpsrlq $32,v01=reg128#10 -# asm 2: vpsrlq $32,v01=%xmm9 -vpsrlq $32,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm1,%xmm13,%xmm1 - -# qhasm: r3 = v00 | v10 -# asm 1: vpor r3=reg128#1 -# asm 2: vpor r3=%xmm0 -vpor %xmm12,%xmm0,%xmm0 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#2 -# asm 2: vpor r7=%xmm1 -vpor %xmm1,%xmm9,%xmm1 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm2,%xmm14,%xmm9 - -# qhasm: 4x v10 = r2 << 16 -# asm 1: vpslld $16,v10=reg128#13 -# asm 2: vpslld $16,v10=%xmm12 -vpslld $16,%xmm11,%xmm12 - -# qhasm: 4x v01 = r0 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#14 -# asm 2: vpsrld $16,v01=%xmm13 -vpsrld $16,%xmm14,%xmm13 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm12,%xmm9,%xmm9 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm13,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm10,%xmm12 - -# qhasm: 4x v10 = r3 << 16 -# asm 1: vpslld $16,v10=reg128#14 -# asm 2: vpslld $16,v10=%xmm13 -vpslld $16,%xmm0,%xmm13 - -# qhasm: 4x v01 = r1 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#11 -# asm 2: vpsrld $16,v01=%xmm10 -vpsrld $16,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm3,%xmm0,%xmm0 - -# qhasm: r1 = v00 | v10 -# asm 1: vpor r1=reg128#13 -# asm 2: vpor r1=%xmm12 -vpor %xmm13,%xmm12,%xmm12 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#1 -# asm 2: vpor r3=%xmm0 -vpor %xmm0,%xmm10,%xmm0 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm2,%xmm6,%xmm10 - -# qhasm: 4x v10 = r6 << 16 -# asm 1: vpslld $16,v10=reg128#14 -# asm 2: vpslld $16,v10=%xmm13 -vpslld $16,%xmm8,%xmm13 - -# qhasm: 4x v01 = r4 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#7 -# asm 2: vpsrld $16,v01=%xmm6 -vpsrld $16,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#11 -# asm 2: vpor r4=%xmm10 -vpor %xmm13,%xmm10,%xmm10 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#3 -# asm 2: vpand v00=%xmm2 -vpand %xmm2,%xmm7,%xmm2 - -# qhasm: 4x v10 = r7 << 16 -# asm 1: vpslld $16,v10=reg128#9 -# asm 2: vpslld $16,v10=%xmm8 -vpslld $16,%xmm1,%xmm8 - -# qhasm: 4x v01 = r5 unsigned>> 16 -# asm 1: vpsrld $16,v01=reg128#8 -# asm 2: vpsrld $16,v01=%xmm7 -vpsrld $16,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm3,%xmm1,%xmm1 - -# qhasm: r5 = v00 | v10 -# asm 1: vpor r5=reg128#3 -# asm 2: vpor r5=%xmm2 -vpor %xmm8,%xmm2,%xmm2 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#2 -# asm 2: vpor r7=%xmm1 -vpor %xmm1,%xmm7,%xmm1 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#4 -# asm 2: vpand v00=%xmm3 -vpand %xmm4,%xmm9,%xmm3 - -# qhasm: 8x v10 = r1 << 8 -# asm 1: vpsllw $8,v10=reg128#8 -# asm 2: vpsllw $8,v10=%xmm7 -vpsllw $8,%xmm12,%xmm7 - -# qhasm: 8x v01 = r0 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#9 -# asm 2: vpsrlw $8,v01=%xmm8 -vpsrlw $8,%xmm9,%xmm8 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm5,%xmm12,%xmm9 - -# qhasm: r0 = v00 | v10 -# asm 1: vpor r0=reg128#4 -# asm 2: vpor r0=%xmm3 -vpor %xmm7,%xmm3,%xmm3 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#8 -# asm 2: vpor r1=%xmm7 -vpor %xmm9,%xmm8,%xmm7 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm4,%xmm11,%xmm8 - -# qhasm: 8x v10 = r3 << 8 -# asm 1: vpsllw $8,v10=reg128#10 -# asm 2: vpsllw $8,v10=%xmm9 -vpsllw $8,%xmm0,%xmm9 - -# qhasm: 8x v01 = r2 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#12 -# asm 2: vpsrlw $8,v01=%xmm11 -vpsrlw $8,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm5,%xmm0,%xmm0 - -# qhasm: r2 = v00 | v10 -# asm 1: vpor r2=reg128#9 -# asm 2: vpor r2=%xmm8 -vpor %xmm9,%xmm8,%xmm8 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#1 -# asm 2: vpor r3=%xmm0 -vpor %xmm0,%xmm11,%xmm0 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm10,%xmm9 - -# qhasm: 8x v10 = r5 << 8 -# asm 1: vpsllw $8,v10=reg128#12 -# asm 2: vpsllw $8,v10=%xmm11 -vpsllw $8,%xmm2,%xmm11 - -# qhasm: 8x v01 = r4 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#11 -# asm 2: vpsrlw $8,v01=%xmm10 -vpsrlw $8,%xmm10,%xmm10 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#3 -# asm 2: vpand v11=%xmm2 -vpand %xmm5,%xmm2,%xmm2 - -# qhasm: r4 = v00 | v10 -# asm 1: vpor r4=reg128#10 -# asm 2: vpor r4=%xmm9 -vpor %xmm11,%xmm9,%xmm9 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#3 -# asm 2: vpor r5=%xmm2 -vpor %xmm2,%xmm10,%xmm2 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#5 -# asm 2: vpand v00=%xmm4 -vpand %xmm4,%xmm6,%xmm4 - -# qhasm: 8x v10 = r7 << 8 -# asm 1: vpsllw $8,v10=reg128#11 -# asm 2: vpsllw $8,v10=%xmm10 -vpsllw $8,%xmm1,%xmm10 - -# qhasm: 8x v01 = r6 unsigned>> 8 -# asm 1: vpsrlw $8,v01=reg128#7 -# asm 2: vpsrlw $8,v01=%xmm6 -vpsrlw $8,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm5,%xmm1,%xmm1 - -# qhasm: r6 = v00 | v10 -# asm 1: vpor r6=reg128#5 -# asm 2: vpor r6=%xmm4 -vpor %xmm10,%xmm4,%xmm4 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#2 -# asm 2: vpor r7=%xmm1 -vpor %xmm1,%xmm6,%xmm1 - -# qhasm: buf = r0[0] -# asm 1: pextrq $0x0,buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm3,%rsi - -# qhasm: mem64[ input_0 + 56 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm7,%rsi - -# qhasm: mem64[ input_0 + 120 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm8,%rsi - -# qhasm: mem64[ input_0 + 184 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm0,%rsi - -# qhasm: mem64[ input_0 + 248 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm9,%rsi - -# qhasm: mem64[ input_0 + 312 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm2,%rsi - -# qhasm: mem64[ input_0 + 376 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm4,%rsi - -# qhasm: mem64[ input_0 + 440 ] = buf -# asm 1: movq buf=int64#2 -# asm 2: pextrq $0x0,buf=%rsi -pextrq $0x0,%xmm1,%rsi - -# qhasm: mem64[ input_0 + 504 ] = buf -# asm 1: movq mask0=reg128#1 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_0(%rip),>mask0=%xmm0 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_0(%rip),%xmm0 - -# qhasm: mask1 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK2_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_1(%rip),>mask1=reg128#2 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_1(%rip),>mask1=%xmm1 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK2_1(%rip),%xmm1 - -# qhasm: mask2 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK1_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_0(%rip),>mask2=reg128#3 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_0(%rip),>mask2=%xmm2 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_0(%rip),%xmm2 - -# qhasm: mask3 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK1_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_1(%rip),>mask3=reg128#4 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_1(%rip),>mask3=%xmm3 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK1_1(%rip),%xmm3 - -# qhasm: mask4 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK0_0 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_0(%rip),>mask4=reg128#5 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_0(%rip),>mask4=%xmm4 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_0(%rip),%xmm4 - -# qhasm: mask5 aligned= mem128[ PQCLEAN_MCELIECE348864_SSE_MASK0_1 ] -# asm 1: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_1(%rip),>mask5=reg128#6 -# asm 2: movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_1(%rip),>mask5=%xmm5 -movdqa PQCLEAN_MCELIECE348864_SSE_MASK0_1(%rip),%xmm5 - -# qhasm: r0 = mem64[ input_0 + 0 ] x2 -# asm 1: movddup 0(r0=reg128#7 -# asm 2: movddup 0(r0=%xmm6 -movddup 0(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 8 ] x2 -# asm 1: movddup 8(r1=reg128#8 -# asm 2: movddup 8(r1=%xmm7 -movddup 8(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 16 ] x2 -# asm 1: movddup 16(r2=reg128#9 -# asm 2: movddup 16(r2=%xmm8 -movddup 16(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 24 ] x2 -# asm 1: movddup 24(r3=reg128#10 -# asm 2: movddup 24(r3=%xmm9 -movddup 24(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 32 ] x2 -# asm 1: movddup 32(r4=reg128#11 -# asm 2: movddup 32(r4=%xmm10 -movddup 32(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 40 ] x2 -# asm 1: movddup 40(r5=reg128#12 -# asm 2: movddup 40(r5=%xmm11 -movddup 40(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 48 ] x2 -# asm 1: movddup 48(r6=reg128#13 -# asm 2: movddup 48(r6=%xmm12 -movddup 48(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 56 ] x2 -# asm 1: movddup 56(r7=reg128#14 -# asm 2: movddup 56(r7=%xmm13 -movddup 56(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 0 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 16 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 32 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 48 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 64(r0=%xmm6 -movddup 64(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 72 ] x2 -# asm 1: movddup 72(r1=reg128#8 -# asm 2: movddup 72(r1=%xmm7 -movddup 72(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 80 ] x2 -# asm 1: movddup 80(r2=reg128#9 -# asm 2: movddup 80(r2=%xmm8 -movddup 80(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 88 ] x2 -# asm 1: movddup 88(r3=reg128#10 -# asm 2: movddup 88(r3=%xmm9 -movddup 88(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 96 ] x2 -# asm 1: movddup 96(r4=reg128#11 -# asm 2: movddup 96(r4=%xmm10 -movddup 96(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 104 ] x2 -# asm 1: movddup 104(r5=reg128#12 -# asm 2: movddup 104(r5=%xmm11 -movddup 104(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 112 ] x2 -# asm 1: movddup 112(r6=reg128#13 -# asm 2: movddup 112(r6=%xmm12 -movddup 112(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 120 ] x2 -# asm 1: movddup 120(r7=reg128#14 -# asm 2: movddup 120(r7=%xmm13 -movddup 120(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 64 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 80 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 96 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 112 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 128(r0=%xmm6 -movddup 128(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 136 ] x2 -# asm 1: movddup 136(r1=reg128#8 -# asm 2: movddup 136(r1=%xmm7 -movddup 136(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 144 ] x2 -# asm 1: movddup 144(r2=reg128#9 -# asm 2: movddup 144(r2=%xmm8 -movddup 144(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 152 ] x2 -# asm 1: movddup 152(r3=reg128#10 -# asm 2: movddup 152(r3=%xmm9 -movddup 152(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 160 ] x2 -# asm 1: movddup 160(r4=reg128#11 -# asm 2: movddup 160(r4=%xmm10 -movddup 160(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 168 ] x2 -# asm 1: movddup 168(r5=reg128#12 -# asm 2: movddup 168(r5=%xmm11 -movddup 168(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 176 ] x2 -# asm 1: movddup 176(r6=reg128#13 -# asm 2: movddup 176(r6=%xmm12 -movddup 176(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 184 ] x2 -# asm 1: movddup 184(r7=reg128#14 -# asm 2: movddup 184(r7=%xmm13 -movddup 184(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 128 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 144 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 160 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 176 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 192(r0=%xmm6 -movddup 192(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 200 ] x2 -# asm 1: movddup 200(r1=reg128#8 -# asm 2: movddup 200(r1=%xmm7 -movddup 200(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 208 ] x2 -# asm 1: movddup 208(r2=reg128#9 -# asm 2: movddup 208(r2=%xmm8 -movddup 208(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 216 ] x2 -# asm 1: movddup 216(r3=reg128#10 -# asm 2: movddup 216(r3=%xmm9 -movddup 216(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 224 ] x2 -# asm 1: movddup 224(r4=reg128#11 -# asm 2: movddup 224(r4=%xmm10 -movddup 224(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 232 ] x2 -# asm 1: movddup 232(r5=reg128#12 -# asm 2: movddup 232(r5=%xmm11 -movddup 232(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 240 ] x2 -# asm 1: movddup 240(r6=reg128#13 -# asm 2: movddup 240(r6=%xmm12 -movddup 240(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 248 ] x2 -# asm 1: movddup 248(r7=reg128#14 -# asm 2: movddup 248(r7=%xmm13 -movddup 248(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 192 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 208 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 224 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 240 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 256(r0=%xmm6 -movddup 256(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 264 ] x2 -# asm 1: movddup 264(r1=reg128#8 -# asm 2: movddup 264(r1=%xmm7 -movddup 264(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 272 ] x2 -# asm 1: movddup 272(r2=reg128#9 -# asm 2: movddup 272(r2=%xmm8 -movddup 272(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 280 ] x2 -# asm 1: movddup 280(r3=reg128#10 -# asm 2: movddup 280(r3=%xmm9 -movddup 280(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 288 ] x2 -# asm 1: movddup 288(r4=reg128#11 -# asm 2: movddup 288(r4=%xmm10 -movddup 288(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 296 ] x2 -# asm 1: movddup 296(r5=reg128#12 -# asm 2: movddup 296(r5=%xmm11 -movddup 296(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 304 ] x2 -# asm 1: movddup 304(r6=reg128#13 -# asm 2: movddup 304(r6=%xmm12 -movddup 304(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 312 ] x2 -# asm 1: movddup 312(r7=reg128#14 -# asm 2: movddup 312(r7=%xmm13 -movddup 312(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 256 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 272 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 288 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 304 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 320(r0=%xmm6 -movddup 320(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 328 ] x2 -# asm 1: movddup 328(r1=reg128#8 -# asm 2: movddup 328(r1=%xmm7 -movddup 328(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 336 ] x2 -# asm 1: movddup 336(r2=reg128#9 -# asm 2: movddup 336(r2=%xmm8 -movddup 336(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 344 ] x2 -# asm 1: movddup 344(r3=reg128#10 -# asm 2: movddup 344(r3=%xmm9 -movddup 344(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 352 ] x2 -# asm 1: movddup 352(r4=reg128#11 -# asm 2: movddup 352(r4=%xmm10 -movddup 352(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 360 ] x2 -# asm 1: movddup 360(r5=reg128#12 -# asm 2: movddup 360(r5=%xmm11 -movddup 360(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 368 ] x2 -# asm 1: movddup 368(r6=reg128#13 -# asm 2: movddup 368(r6=%xmm12 -movddup 368(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 376 ] x2 -# asm 1: movddup 376(r7=reg128#14 -# asm 2: movddup 376(r7=%xmm13 -movddup 376(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 320 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 336 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 352 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 368 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 384(r0=%xmm6 -movddup 384(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 392 ] x2 -# asm 1: movddup 392(r1=reg128#8 -# asm 2: movddup 392(r1=%xmm7 -movddup 392(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 400 ] x2 -# asm 1: movddup 400(r2=reg128#9 -# asm 2: movddup 400(r2=%xmm8 -movddup 400(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 408 ] x2 -# asm 1: movddup 408(r3=reg128#10 -# asm 2: movddup 408(r3=%xmm9 -movddup 408(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 416 ] x2 -# asm 1: movddup 416(r4=reg128#11 -# asm 2: movddup 416(r4=%xmm10 -movddup 416(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 424 ] x2 -# asm 1: movddup 424(r5=reg128#12 -# asm 2: movddup 424(r5=%xmm11 -movddup 424(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 432 ] x2 -# asm 1: movddup 432(r6=reg128#13 -# asm 2: movddup 432(r6=%xmm12 -movddup 432(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 440 ] x2 -# asm 1: movddup 440(r7=reg128#14 -# asm 2: movddup 440(r7=%xmm13 -movddup 440(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm13,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#14 -# asm 2: vpand v11=%xmm13 -vpand %xmm1,%xmm13,%xmm13 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#13 -# asm 2: vpor r3=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#10 -# asm 2: vpor r7=%xmm9 -vpor %xmm13,%xmm9,%xmm9 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#14 -# asm 2: vpand v00=%xmm13 -vpand %xmm2,%xmm14,%xmm13 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#15 -# asm 2: vpand v01=%xmm14 -vpand %xmm3,%xmm14,%xmm14 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#14 -# asm 2: vpor r0=%xmm13 -vpor %xmm15,%xmm13,%xmm13 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm14,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm2,%xmm10,%xmm14 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm3,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#15 -# asm 2: vpor r1=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm12,%xmm10,%xmm10 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm6,%xmm12 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#13 -# asm 2: vpor r4=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm2,%xmm9,%xmm15 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm3,%xmm9,%xmm9 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm15,%xmm8,%xmm8 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#8 -# asm 2: vpor r7=%xmm7 -vpor %xmm9,%xmm7,%xmm7 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm13,%xmm9 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm14,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm5,%xmm13,%xmm13 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#15 -# asm 2: vpand v11=%xmm14 -vpand %xmm5,%xmm14,%xmm14 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm15,%xmm9,%xmm9 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#14 -# asm 2: vpor r1=%xmm13 -vpor %xmm14,%xmm13,%xmm13 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm4,%xmm11,%xmm14 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#15 -# asm 2: vpor r2=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#11 -# asm 2: vpor r3=%xmm10 -vpor %xmm10,%xmm11,%xmm10 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm4,%xmm12,%xmm11 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm8,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#13 -# asm 2: vpand v01=%xmm12 -vpand %xmm5,%xmm12,%xmm12 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm5,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#12 -# asm 2: vpor r4=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#9 -# asm 2: vpor r5=%xmm8 -vpor %xmm8,%xmm12,%xmm8 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm4,%xmm6,%xmm12 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm4,%xmm7,%xmm15 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#8 -# asm 2: vpand v11=%xmm7 -vpand %xmm5,%xmm7,%xmm7 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#13 -# asm 2: vpor r6=%xmm12 -vpor %xmm15,%xmm12,%xmm12 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#7 -# asm 2: vpor r7=%xmm6 -vpor %xmm7,%xmm6,%xmm6 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm13,%xmm9,%xmm7 - -# qhasm: mem128[ input_0 + 384 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm10,%xmm14,%xmm7 - -# qhasm: mem128[ input_0 + 400 ] = t0 -# asm 1: movdqu t0=reg128#8 -# asm 2: vpunpcklqdq t0=%xmm7 -vpunpcklqdq %xmm8,%xmm11,%xmm7 - -# qhasm: mem128[ input_0 + 416 ] = t0 -# asm 1: movdqu t0=reg128#7 -# asm 2: vpunpcklqdq t0=%xmm6 -vpunpcklqdq %xmm6,%xmm12,%xmm6 - -# qhasm: mem128[ input_0 + 432 ] = t0 -# asm 1: movdqu r0=reg128#7 -# asm 2: movddup 448(r0=%xmm6 -movddup 448(%rdi),%xmm6 - -# qhasm: r1 = mem64[ input_0 + 456 ] x2 -# asm 1: movddup 456(r1=reg128#8 -# asm 2: movddup 456(r1=%xmm7 -movddup 456(%rdi),%xmm7 - -# qhasm: r2 = mem64[ input_0 + 464 ] x2 -# asm 1: movddup 464(r2=reg128#9 -# asm 2: movddup 464(r2=%xmm8 -movddup 464(%rdi),%xmm8 - -# qhasm: r3 = mem64[ input_0 + 472 ] x2 -# asm 1: movddup 472(r3=reg128#10 -# asm 2: movddup 472(r3=%xmm9 -movddup 472(%rdi),%xmm9 - -# qhasm: r4 = mem64[ input_0 + 480 ] x2 -# asm 1: movddup 480(r4=reg128#11 -# asm 2: movddup 480(r4=%xmm10 -movddup 480(%rdi),%xmm10 - -# qhasm: r5 = mem64[ input_0 + 488 ] x2 -# asm 1: movddup 488(r5=reg128#12 -# asm 2: movddup 488(r5=%xmm11 -movddup 488(%rdi),%xmm11 - -# qhasm: r6 = mem64[ input_0 + 496 ] x2 -# asm 1: movddup 496(r6=reg128#13 -# asm 2: movddup 496(r6=%xmm12 -movddup 496(%rdi),%xmm12 - -# qhasm: r7 = mem64[ input_0 + 504 ] x2 -# asm 1: movddup 504(r7=reg128#14 -# asm 2: movddup 504(r7=%xmm13 -movddup 504(%rdi),%xmm13 - -# qhasm: v00 = r0 & mask0 -# asm 1: vpand v00=reg128#15 -# asm 2: vpand v00=%xmm14 -vpand %xmm0,%xmm6,%xmm14 - -# qhasm: v10 = r4 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm10,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm1,%xmm6,%xmm6 - -# qhasm: v11 = r4 & mask1 -# asm 1: vpand v11=reg128#11 -# asm 2: vpand v11=%xmm10 -vpand %xmm1,%xmm10,%xmm10 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r0=reg128#15 -# asm 2: vpor r0=%xmm14 -vpor %xmm15,%xmm14,%xmm14 - -# qhasm: r4 = v01 | v11 -# asm 1: vpor r4=reg128#7 -# asm 2: vpor r4=%xmm6 -vpor %xmm10,%xmm6,%xmm6 - -# qhasm: v00 = r1 & mask0 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm0,%xmm7,%xmm10 - -# qhasm: v10 = r5 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm11,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm1,%xmm7,%xmm7 - -# qhasm: v11 = r5 & mask1 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm1,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r1=reg128#11 -# asm 2: vpor r1=%xmm10 -vpor %xmm15,%xmm10,%xmm10 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#8 -# asm 2: vpor r5=%xmm7 -vpor %xmm11,%xmm7,%xmm7 - -# qhasm: v00 = r2 & mask0 -# asm 1: vpand v00=reg128#12 -# asm 2: vpand v00=%xmm11 -vpand %xmm0,%xmm8,%xmm11 - -# qhasm: v10 = r6 & mask0 -# asm 1: vpand v10=reg128#16 -# asm 2: vpand v10=%xmm15 -vpand %xmm0,%xmm12,%xmm15 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm1,%xmm8,%xmm8 - -# qhasm: v11 = r6 & mask1 -# asm 1: vpand v11=reg128#13 -# asm 2: vpand v11=%xmm12 -vpand %xmm1,%xmm12,%xmm12 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm15,%xmm11,%xmm11 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#9 -# asm 2: vpor r6=%xmm8 -vpor %xmm12,%xmm8,%xmm8 - -# qhasm: v00 = r3 & mask0 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm0,%xmm9,%xmm12 - -# qhasm: v10 = r7 & mask0 -# asm 1: vpand v10=reg128#1 -# asm 2: vpand v10=%xmm0 -vpand %xmm0,%xmm13,%xmm0 - -# qhasm: 2x v10 <<= 4 -# asm 1: psllq $4,v01=reg128#10 -# asm 2: vpand v01=%xmm9 -vpand %xmm1,%xmm9,%xmm9 - -# qhasm: v11 = r7 & mask1 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm1,%xmm13,%xmm1 - -# qhasm: 2x v01 unsigned>>= 4 -# asm 1: psrlq $4,r3=reg128#1 -# asm 2: vpor r3=%xmm0 -vpor %xmm0,%xmm12,%xmm0 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#2 -# asm 2: vpor r7=%xmm1 -vpor %xmm1,%xmm9,%xmm1 - -# qhasm: v00 = r0 & mask2 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm2,%xmm14,%xmm9 - -# qhasm: v10 = r2 & mask2 -# asm 1: vpand v10=reg128#13 -# asm 2: vpand v10=%xmm12 -vpand %xmm2,%xmm11,%xmm12 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#14 -# asm 2: vpand v01=%xmm13 -vpand %xmm3,%xmm14,%xmm13 - -# qhasm: v11 = r2 & mask3 -# asm 1: vpand v11=reg128#12 -# asm 2: vpand v11=%xmm11 -vpand %xmm3,%xmm11,%xmm11 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r0=reg128#10 -# asm 2: vpor r0=%xmm9 -vpor %xmm12,%xmm9,%xmm9 - -# qhasm: r2 = v01 | v11 -# asm 1: vpor r2=reg128#12 -# asm 2: vpor r2=%xmm11 -vpor %xmm11,%xmm13,%xmm11 - -# qhasm: v00 = r1 & mask2 -# asm 1: vpand v00=reg128#13 -# asm 2: vpand v00=%xmm12 -vpand %xmm2,%xmm10,%xmm12 - -# qhasm: v10 = r3 & mask2 -# asm 1: vpand v10=reg128#14 -# asm 2: vpand v10=%xmm13 -vpand %xmm2,%xmm0,%xmm13 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm3,%xmm10,%xmm10 - -# qhasm: v11 = r3 & mask3 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm3,%xmm0,%xmm0 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r1=reg128#13 -# asm 2: vpor r1=%xmm12 -vpor %xmm13,%xmm12,%xmm12 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#1 -# asm 2: vpor r3=%xmm0 -vpor %xmm0,%xmm10,%xmm0 - -# qhasm: v00 = r4 & mask2 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm2,%xmm6,%xmm10 - -# qhasm: v10 = r6 & mask2 -# asm 1: vpand v10=reg128#14 -# asm 2: vpand v10=%xmm13 -vpand %xmm2,%xmm8,%xmm13 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm3,%xmm6,%xmm6 - -# qhasm: v11 = r6 & mask3 -# asm 1: vpand v11=reg128#9 -# asm 2: vpand v11=%xmm8 -vpand %xmm3,%xmm8,%xmm8 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r4=reg128#11 -# asm 2: vpor r4=%xmm10 -vpor %xmm13,%xmm10,%xmm10 - -# qhasm: r6 = v01 | v11 -# asm 1: vpor r6=reg128#7 -# asm 2: vpor r6=%xmm6 -vpor %xmm8,%xmm6,%xmm6 - -# qhasm: v00 = r5 & mask2 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm2,%xmm7,%xmm8 - -# qhasm: v10 = r7 & mask2 -# asm 1: vpand v10=reg128#3 -# asm 2: vpand v10=%xmm2 -vpand %xmm2,%xmm1,%xmm2 - -# qhasm: 2x v10 <<= 2 -# asm 1: psllq $2,v01=reg128#8 -# asm 2: vpand v01=%xmm7 -vpand %xmm3,%xmm7,%xmm7 - -# qhasm: v11 = r7 & mask3 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm3,%xmm1,%xmm1 - -# qhasm: 2x v01 unsigned>>= 2 -# asm 1: psrlq $2,r5=reg128#3 -# asm 2: vpor r5=%xmm2 -vpor %xmm2,%xmm8,%xmm2 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#2 -# asm 2: vpor r7=%xmm1 -vpor %xmm1,%xmm7,%xmm1 - -# qhasm: v00 = r0 & mask4 -# asm 1: vpand v00=reg128#4 -# asm 2: vpand v00=%xmm3 -vpand %xmm4,%xmm9,%xmm3 - -# qhasm: v10 = r1 & mask4 -# asm 1: vpand v10=reg128#8 -# asm 2: vpand v10=%xmm7 -vpand %xmm4,%xmm12,%xmm7 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#9 -# asm 2: vpand v01=%xmm8 -vpand %xmm5,%xmm9,%xmm8 - -# qhasm: v11 = r1 & mask5 -# asm 1: vpand v11=reg128#10 -# asm 2: vpand v11=%xmm9 -vpand %xmm5,%xmm12,%xmm9 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r0=reg128#4 -# asm 2: vpor r0=%xmm3 -vpor %xmm7,%xmm3,%xmm3 - -# qhasm: r1 = v01 | v11 -# asm 1: vpor r1=reg128#8 -# asm 2: vpor r1=%xmm7 -vpor %xmm9,%xmm8,%xmm7 - -# qhasm: v00 = r2 & mask4 -# asm 1: vpand v00=reg128#9 -# asm 2: vpand v00=%xmm8 -vpand %xmm4,%xmm11,%xmm8 - -# qhasm: v10 = r3 & mask4 -# asm 1: vpand v10=reg128#10 -# asm 2: vpand v10=%xmm9 -vpand %xmm4,%xmm0,%xmm9 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#12 -# asm 2: vpand v01=%xmm11 -vpand %xmm5,%xmm11,%xmm11 - -# qhasm: v11 = r3 & mask5 -# asm 1: vpand v11=reg128#1 -# asm 2: vpand v11=%xmm0 -vpand %xmm5,%xmm0,%xmm0 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r2=reg128#9 -# asm 2: vpor r2=%xmm8 -vpor %xmm9,%xmm8,%xmm8 - -# qhasm: r3 = v01 | v11 -# asm 1: vpor r3=reg128#1 -# asm 2: vpor r3=%xmm0 -vpor %xmm0,%xmm11,%xmm0 - -# qhasm: v00 = r4 & mask4 -# asm 1: vpand v00=reg128#10 -# asm 2: vpand v00=%xmm9 -vpand %xmm4,%xmm10,%xmm9 - -# qhasm: v10 = r5 & mask4 -# asm 1: vpand v10=reg128#12 -# asm 2: vpand v10=%xmm11 -vpand %xmm4,%xmm2,%xmm11 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#11 -# asm 2: vpand v01=%xmm10 -vpand %xmm5,%xmm10,%xmm10 - -# qhasm: v11 = r5 & mask5 -# asm 1: vpand v11=reg128#3 -# asm 2: vpand v11=%xmm2 -vpand %xmm5,%xmm2,%xmm2 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r4=reg128#10 -# asm 2: vpor r4=%xmm9 -vpor %xmm11,%xmm9,%xmm9 - -# qhasm: r5 = v01 | v11 -# asm 1: vpor r5=reg128#3 -# asm 2: vpor r5=%xmm2 -vpor %xmm2,%xmm10,%xmm2 - -# qhasm: v00 = r6 & mask4 -# asm 1: vpand v00=reg128#11 -# asm 2: vpand v00=%xmm10 -vpand %xmm4,%xmm6,%xmm10 - -# qhasm: v10 = r7 & mask4 -# asm 1: vpand v10=reg128#5 -# asm 2: vpand v10=%xmm4 -vpand %xmm4,%xmm1,%xmm4 - -# qhasm: 2x v10 <<= 1 -# asm 1: psllq $1,v01=reg128#7 -# asm 2: vpand v01=%xmm6 -vpand %xmm5,%xmm6,%xmm6 - -# qhasm: v11 = r7 & mask5 -# asm 1: vpand v11=reg128#2 -# asm 2: vpand v11=%xmm1 -vpand %xmm5,%xmm1,%xmm1 - -# qhasm: 2x v01 unsigned>>= 1 -# asm 1: psrlq $1,r6=reg128#5 -# asm 2: vpor r6=%xmm4 -vpor %xmm4,%xmm10,%xmm4 - -# qhasm: r7 = v01 | v11 -# asm 1: vpor r7=reg128#2 -# asm 2: vpor r7=%xmm1 -vpor %xmm1,%xmm6,%xmm1 - -# qhasm: t0 = r0[0]r1[0] -# asm 1: vpunpcklqdq t0=reg128#4 -# asm 2: vpunpcklqdq t0=%xmm3 -vpunpcklqdq %xmm7,%xmm3,%xmm3 - -# qhasm: mem128[ input_0 + 448 ] = t0 -# asm 1: movdqu t0=reg128#1 -# asm 2: vpunpcklqdq t0=%xmm0 -vpunpcklqdq %xmm0,%xmm8,%xmm0 - -# qhasm: mem128[ input_0 + 464 ] = t0 -# asm 1: movdqu t0=reg128#1 -# asm 2: vpunpcklqdq t0=%xmm0 -vpunpcklqdq %xmm2,%xmm9,%xmm0 - -# qhasm: mem128[ input_0 + 480 ] = t0 -# asm 1: movdqu t0=reg128#1 -# asm 2: vpunpcklqdq t0=%xmm0 -vpunpcklqdq %xmm1,%xmm4,%xmm0 - -# qhasm: mem128[ input_0 + 496 ] = t0 -# asm 1: movdqu s1=int64#2 -# asm 2: mov s1=%rsi -mov %rsi,%rsi - -# qhasm: s0 = mem64[ input_0 + 0 ] -# asm 1: movq 0(s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,s0=int64#4 -# asm 2: movq 0(s0=%rcx -movq 0(%rdi),%rcx - -# qhasm: s0 = (s1 s0) >> 1 -# asm 1: shrd $1,>= 1 -# asm 1: shr $1,> (j * 8)) & 0xFF; - } -} - -void PQCLEAN_MCELIECE348864_SSE_store2(unsigned char *dest, gf a) { - dest[0] = a & 0xFF; - dest[1] = a >> 8; -} - -uint16_t PQCLEAN_MCELIECE348864_SSE_load2(const unsigned char *src) { - uint16_t a; - - a = src[1]; - a <<= 8; - a |= src[0]; - - return a & GFMASK; -} - -uint32_t PQCLEAN_MCELIECE348864_SSE_load4(const unsigned char *src) { - uint32_t a; - - a = src[3]; - a <<= 8; - a |= src[2]; - a <<= 8; - a |= src[1]; - a <<= 8; - a |= src[0]; - - return a; -} - -void PQCLEAN_MCELIECE348864_SSE_irr_load(uint64_t *out, const unsigned char *in) { - int i, j; - uint16_t irr[ SYS_T + 1 ]; - - for (i = 0; i < SYS_T; i++) { - irr[i] = PQCLEAN_MCELIECE348864_SSE_load2(in + i * 2); - irr[i] &= GFMASK; - } - - irr[ SYS_T ] = 1; - - for (i = 0; i < GFBITS; i++) { - out[i] = 0; - } - - for (i = SYS_T; i >= 0; i--) { - for (j = 0; j < GFBITS; j++) { - out[j] <<= 1; - out[j] |= (irr[i] >> j) & 1; - } - } -} - -void PQCLEAN_MCELIECE348864_SSE_store8(unsigned char *out, uint64_t in) { - out[0] = (in >> 0x00) & 0xFF; - out[1] = (in >> 0x08) & 0xFF; - out[2] = (in >> 0x10) & 0xFF; - out[3] = (in >> 0x18) & 0xFF; - out[4] = (in >> 0x20) & 0xFF; - out[5] = (in >> 0x28) & 0xFF; - out[6] = (in >> 0x30) & 0xFF; - out[7] = (in >> 0x38) & 0xFF; -} - -uint64_t PQCLEAN_MCELIECE348864_SSE_load8(const unsigned char *in) { - int i; - uint64_t ret = in[7]; - - for (i = 6; i >= 0; i--) { - ret <<= 8; - ret |= in[i]; - } - - return ret; -} - -gf PQCLEAN_MCELIECE348864_SSE_bitrev(gf a) { - a = ((a & 0x00FF) << 8) | ((a & 0xFF00) >> 8); - a = ((a & 0x0F0F) << 4) | ((a & 0xF0F0) >> 4); - a = ((a & 0x3333) << 2) | ((a & 0xCCCC) >> 2); - a = ((a & 0x5555) << 1) | ((a & 0xAAAA) >> 1); - - return a >> 4; -} - -vec128 PQCLEAN_MCELIECE348864_SSE_load16(const unsigned char *in) { - return PQCLEAN_MCELIECE348864_SSE_vec128_set2x( PQCLEAN_MCELIECE348864_SSE_load8(in), PQCLEAN_MCELIECE348864_SSE_load8(in + 8) ); -} - -void PQCLEAN_MCELIECE348864_SSE_store16(unsigned char *out, vec128 in) { - PQCLEAN_MCELIECE348864_SSE_store8(out + 0, PQCLEAN_MCELIECE348864_SSE_vec128_extract(in, 0)); - PQCLEAN_MCELIECE348864_SSE_store8(out + 8, PQCLEAN_MCELIECE348864_SSE_vec128_extract(in, 1)); -} diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/util.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/util.h deleted file mode 100644 index 97491b39a..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/util.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_UTIL_H -#define PQCLEAN_MCELIECE348864_SSE_UTIL_H -/* - This file is for loading/storing data in a little-endian fashion -*/ - - -#include "gf.h" -#include "vec128.h" - -#include - -void PQCLEAN_MCELIECE348864_SSE_store_i(unsigned char *out, uint64_t in, int i); -void PQCLEAN_MCELIECE348864_SSE_store2(unsigned char *dest, gf a); - -uint16_t PQCLEAN_MCELIECE348864_SSE_load2(const unsigned char *src); - -uint32_t PQCLEAN_MCELIECE348864_SSE_load4(const unsigned char *src); - -void PQCLEAN_MCELIECE348864_SSE_irr_load(uint64_t *out, const unsigned char *in); - -void PQCLEAN_MCELIECE348864_SSE_store8(unsigned char *out, uint64_t in); - -uint64_t PQCLEAN_MCELIECE348864_SSE_load8(const unsigned char *in); - -gf PQCLEAN_MCELIECE348864_SSE_bitrev(gf a); - -vec128 PQCLEAN_MCELIECE348864_SSE_load16(const unsigned char *in); - -void PQCLEAN_MCELIECE348864_SSE_store16(unsigned char *out, vec128 in); - -#endif - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.c deleted file mode 100644 index 82e40b26f..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.c +++ /dev/null @@ -1,17 +0,0 @@ - -#include "vec.h" - -#include "params.h" - -void PQCLEAN_MCELIECE348864_SSE_vec_mul(uint64_t *h, const uint64_t *f, const uint64_t *g) { - PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(h, f, g, 8); -} - -void PQCLEAN_MCELIECE348864_SSE_vec_add(uint64_t *h, const uint64_t *f, const uint64_t *g) { - int b; - - for (b = 0; b < GFBITS; b++) { - h[b] = f[b] ^ g[b]; - } -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.h deleted file mode 100644 index d33258e0d..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_VEC_H -#define PQCLEAN_MCELIECE348864_SSE_VEC_H - -#include - -extern void PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(uint64_t *, const uint64_t *, const uint64_t *, int); - -void PQCLEAN_MCELIECE348864_SSE_vec_mul(uint64_t *h, const uint64_t *f, const uint64_t *g); -void PQCLEAN_MCELIECE348864_SSE_vec_add(uint64_t *h, const uint64_t *f, const uint64_t *g); - -#endif diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.c b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.c deleted file mode 100644 index 219cb19cc..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - This file is for functions related to 128-bit vectors - including functions for bitsliced field operations -*/ - -#include "vec128.h" - -#include "params.h" - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(uint16_t a) { - return _mm_set1_epi16(a); -} - -int PQCLEAN_MCELIECE348864_SSE_vec128_testz(vec128 a) { - return _mm_testz_si128(a, a); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setzero(void) { - return _mm_setzero_si128(); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_and(vec128 a, vec128 b) { - return _mm_and_si128(a, b); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_xor(vec128 a, vec128 b) { - return _mm_xor_si128(a, b); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or(vec128 a, vec128 b) { - return _mm_or_si128(a, b); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(vec128 a, int s) { - return _mm_slli_epi64(a, s); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_srl_2x(vec128 a, int s) { - return _mm_srli_epi64(a, s); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set2x(uint64_t a0, uint64_t a1) { - return _mm_set_epi64x(a1, a0); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(vec128 a, vec128 b) { - return _mm_unpacklo_epi64(a, b); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(vec128 a, vec128 b) { - return _mm_unpackhi_epi64(a, b); -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setbits(uint64_t a) { - return _mm_set1_epi64x(-a); -} - -void PQCLEAN_MCELIECE348864_SSE_vec128_copy(vec128 *dest, const vec128 *src) { - int i; - - for (i = 0; i < GFBITS; i++) { - dest[i] = src[i]; - } -} - -void PQCLEAN_MCELIECE348864_SSE_vec128_add(vec128 *c, const vec128 *a, const vec128 *b) { - int i; - - for (i = 0; i < GFBITS; i++) { - c[i] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(a[i], b[i]); - } -} - -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or_reduce(const vec128 *a) { - int i; - vec128 ret; - - ret = a[0]; - for (i = 1; i < GFBITS; i++) { - ret = PQCLEAN_MCELIECE348864_SSE_vec128_or(ret, a[i]); - } - - return ret; -} - -/* bitsliced field multiplications */ -void PQCLEAN_MCELIECE348864_SSE_vec128_mul(vec128 *h, vec128 *f, const vec128 *g) { - PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm(h, f, g, 16); -} - -/* bitsliced field squarings */ -void PQCLEAN_MCELIECE348864_SSE_vec128_sq(vec128 *out, const vec128 *in) { - int i; - vec128 result[GFBITS]; - - result[0] = in[0] ^ in[6]; - result[1] = in[11]; - result[2] = in[1] ^ in[7]; - result[3] = in[6]; - result[4] = in[2] ^ in[11] ^ in[8]; - result[5] = in[7]; - result[6] = in[3] ^ in[9]; - result[7] = in[8]; - result[8] = in[4] ^ in[10]; - result[9] = in[9]; - result[10] = in[5] ^ in[11]; - result[11] = in[10]; - - for (i = 0; i < GFBITS; i++) { - out[i] = result[i]; - } -} - -/* bitsliced field inverses */ -void PQCLEAN_MCELIECE348864_SSE_vec128_inv(vec128 *out, const vec128 *in) { - vec128 tmp_11[ GFBITS ]; - vec128 tmp_1111[ GFBITS ]; - - PQCLEAN_MCELIECE348864_SSE_vec128_copy(out, in); - - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp_11, out, in); // 11 - - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, tmp_11); - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp_1111, out, tmp_11); // 1111 - - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, tmp_1111); - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(out, out, tmp_1111); // 11111111 - - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(out, out, tmp_11); // 1111111111 - - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); - PQCLEAN_MCELIECE348864_SSE_vec128_mul(out, out, in); // 11111111111 - - PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); // 111111111110 -} - diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.h b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.h deleted file mode 100644 index e002e77f9..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef PQCLEAN_MCELIECE348864_SSE_VEC128_H -#define PQCLEAN_MCELIECE348864_SSE_VEC128_H -/* - This file is for functions related to 128-bit vectors - including functions for bitsliced field operations -*/ - - -#include -#include - -typedef __m128i vec128; - -// this needs to be a macro, because -// _mm_extract_epi64 requires a literal int argument. -#define PQCLEAN_MCELIECE348864_SSE_vec128_extract(a, i) ((uint64_t) _mm_extract_epi64((vec128) (a), (i))) - -int PQCLEAN_MCELIECE348864_SSE_vec128_testz(vec128 a); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(uint16_t a); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setzero(void); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_and(vec128 a, vec128 b); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_xor(vec128 a, vec128 b); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or(vec128 a, vec128 b); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(vec128 a, int s); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_srl_2x(vec128 a, int s); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set2x(uint64_t a0, uint64_t a1); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(vec128 a, vec128 b); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(vec128 a, vec128 b); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setbits(uint64_t a); -void PQCLEAN_MCELIECE348864_SSE_vec128_copy(vec128 *dest, const vec128 *src); -void PQCLEAN_MCELIECE348864_SSE_vec128_add(vec128 *c, const vec128 *a, const vec128 *b); -vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or_reduce(const vec128 *a); - -extern void PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm(vec128 *, vec128 *, const vec128 *, int); - -/* bitsliced field multiplications */ -void PQCLEAN_MCELIECE348864_SSE_vec128_mul(vec128 *h, vec128 *f, const vec128 *g); - -void PQCLEAN_MCELIECE348864_SSE_vec128_sq(vec128 * /*out*/, const vec128 * /*in*/); -void PQCLEAN_MCELIECE348864_SSE_vec128_inv(vec128 * /*out*/, const vec128 * /*in*/); - -#endif diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128_mul_asm.S b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128_mul_asm.S deleted file mode 100644 index f9c2753bc..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec128_mul_asm.S +++ /dev/null @@ -1,1736 +0,0 @@ - -# qhasm: int64 input_0 - -# qhasm: int64 input_1 - -# qhasm: int64 input_2 - -# qhasm: int64 input_3 - -# qhasm: int64 input_4 - -# qhasm: int64 input_5 - -# qhasm: stack64 input_6 - -# qhasm: stack64 input_7 - -# qhasm: int64 caller_r11 - -# qhasm: int64 caller_r12 - -# qhasm: int64 caller_r13 - -# qhasm: int64 caller_r14 - -# qhasm: int64 caller_r15 - -# qhasm: int64 caller_rbx - -# qhasm: int64 caller_rbp - -# qhasm: reg128 a0 - -# qhasm: reg128 a1 - -# qhasm: reg128 a2 - -# qhasm: reg128 a3 - -# qhasm: reg128 a4 - -# qhasm: reg128 a5 - -# qhasm: reg128 a6 - -# qhasm: reg128 a7 - -# qhasm: reg128 a8 - -# qhasm: reg128 a9 - -# qhasm: reg128 a10 - -# qhasm: reg128 a11 - -# qhasm: reg128 b0 - -# qhasm: reg128 b1 - -# qhasm: reg128 r0 - -# qhasm: reg128 r1 - -# qhasm: reg128 r2 - -# qhasm: reg128 r3 - -# qhasm: reg128 r4 - -# qhasm: reg128 r5 - -# qhasm: reg128 r6 - -# qhasm: reg128 r7 - -# qhasm: reg128 r8 - -# qhasm: reg128 r9 - -# qhasm: reg128 r10 - -# qhasm: reg128 r11 - -# qhasm: reg128 r12 - -# qhasm: reg128 r13 - -# qhasm: reg128 r14 - -# qhasm: reg128 r15 - -# qhasm: reg128 r16 - -# qhasm: reg128 r17 - -# qhasm: reg128 r18 - -# qhasm: reg128 r19 - -# qhasm: reg128 r20 - -# qhasm: reg128 r21 - -# qhasm: reg128 r22 - -# qhasm: reg128 r - -# qhasm: enter vec128_mul_asm -.p2align 5 -.global _PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm -.global PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm -_PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm: -PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm: -mov %rsp,%r11 -and $31,%r11 -add $0,%r11 -sub %r11,%rsp - -# qhasm: b0 = mem128[ input_2 + 0 ] -# asm 1: movdqu 0(b0=reg128#1 -# asm 2: movdqu 0(b0=%xmm0 -movdqu 0(%rdx),%xmm0 - -# qhasm: a11 = mem128[ input_1 + 176 ] -# asm 1: movdqu 176(a11=reg128#2 -# asm 2: movdqu 176(a11=%xmm1 -movdqu 176(%rsi),%xmm1 - -# qhasm: r11 = a11 & b0 -# asm 1: vpand r11=reg128#3 -# asm 2: vpand r11=%xmm2 -vpand %xmm0,%xmm1,%xmm2 - -# qhasm: r12 = a11 & mem128[input_2 + 16] -# asm 1: vpand 16(r12=reg128#4 -# asm 2: vpand 16(r12=%xmm3 -vpand 16(%rdx),%xmm1,%xmm3 - -# qhasm: r13 = a11 & mem128[input_2 + 32] -# asm 1: vpand 32(r13=reg128#5 -# asm 2: vpand 32(r13=%xmm4 -vpand 32(%rdx),%xmm1,%xmm4 - -# qhasm: r14 = a11 & mem128[input_2 + 48] -# asm 1: vpand 48(r14=reg128#6 -# asm 2: vpand 48(r14=%xmm5 -vpand 48(%rdx),%xmm1,%xmm5 - -# qhasm: r15 = a11 & mem128[input_2 + 64] -# asm 1: vpand 64(r15=reg128#7 -# asm 2: vpand 64(r15=%xmm6 -vpand 64(%rdx),%xmm1,%xmm6 - -# qhasm: r16 = a11 & mem128[input_2 + 80] -# asm 1: vpand 80(r16=reg128#8 -# asm 2: vpand 80(r16=%xmm7 -vpand 80(%rdx),%xmm1,%xmm7 - -# qhasm: r17 = a11 & mem128[input_2 + 96] -# asm 1: vpand 96(r17=reg128#9 -# asm 2: vpand 96(r17=%xmm8 -vpand 96(%rdx),%xmm1,%xmm8 - -# qhasm: r18 = a11 & mem128[input_2 + 112] -# asm 1: vpand 112(r18=reg128#10 -# asm 2: vpand 112(r18=%xmm9 -vpand 112(%rdx),%xmm1,%xmm9 - -# qhasm: r19 = a11 & mem128[input_2 + 128] -# asm 1: vpand 128(r19=reg128#11 -# asm 2: vpand 128(r19=%xmm10 -vpand 128(%rdx),%xmm1,%xmm10 - -# qhasm: r20 = a11 & mem128[input_2 + 144] -# asm 1: vpand 144(r20=reg128#12 -# asm 2: vpand 144(r20=%xmm11 -vpand 144(%rdx),%xmm1,%xmm11 - -# qhasm: r21 = a11 & mem128[input_2 + 160] -# asm 1: vpand 160(r21=reg128#13 -# asm 2: vpand 160(r21=%xmm12 -vpand 160(%rdx),%xmm1,%xmm12 - -# qhasm: r22 = a11 & mem128[input_2 + 176] -# asm 1: vpand 176(r22=reg128#2 -# asm 2: vpand 176(r22=%xmm1 -vpand 176(%rdx),%xmm1,%xmm1 - -# qhasm: r13 ^= r22 -# asm 1: pxor r10=reg128#2 -# asm 2: movdqa r10=%xmm1 -movdqa %xmm1,%xmm1 - -# qhasm: a10 = mem128[ input_1 + 160 ] -# asm 1: movdqu 160(a10=reg128#14 -# asm 2: movdqu 160(a10=%xmm13 -movdqu 160(%rsi),%xmm13 - -# qhasm: r = a10 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r16 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r17 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r18 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r19 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r20 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r21 ^= r -# asm 1: pxor r9=reg128#13 -# asm 2: movdqa r9=%xmm12 -movdqa %xmm12,%xmm12 - -# qhasm: a9 = mem128[ input_1 + 144 ] -# asm 1: movdqu 144(a9=reg128#14 -# asm 2: movdqu 144(a9=%xmm13 -movdqu 144(%rsi),%xmm13 - -# qhasm: r = a9 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r16 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r17 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r18 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r19 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r20 ^= r -# asm 1: pxor r8=reg128#12 -# asm 2: movdqa r8=%xmm11 -movdqa %xmm11,%xmm11 - -# qhasm: a8 = mem128[ input_1 + 128 ] -# asm 1: movdqu 128(a8=reg128#14 -# asm 2: movdqu 128(a8=%xmm13 -movdqu 128(%rsi),%xmm13 - -# qhasm: r = a8 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r16 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r17 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r18 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r19 ^= r -# asm 1: pxor r7=reg128#11 -# asm 2: movdqa r7=%xmm10 -movdqa %xmm10,%xmm10 - -# qhasm: a7 = mem128[ input_1 + 112 ] -# asm 1: movdqu 112(a7=reg128#14 -# asm 2: movdqu 112(a7=%xmm13 -movdqu 112(%rsi),%xmm13 - -# qhasm: r = a7 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r16 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r17 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r18 ^= r -# asm 1: pxor r6=reg128#10 -# asm 2: movdqa r6=%xmm9 -movdqa %xmm9,%xmm9 - -# qhasm: a6 = mem128[ input_1 + 96 ] -# asm 1: movdqu 96(a6=reg128#14 -# asm 2: movdqu 96(a6=%xmm13 -movdqu 96(%rsi),%xmm13 - -# qhasm: r = a6 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r16 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r17 ^= r -# asm 1: pxor r5=reg128#9 -# asm 2: movdqa r5=%xmm8 -movdqa %xmm8,%xmm8 - -# qhasm: a5 = mem128[ input_1 + 80 ] -# asm 1: movdqu 80(a5=reg128#14 -# asm 2: movdqu 80(a5=%xmm13 -movdqu 80(%rsi),%xmm13 - -# qhasm: r = a5 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r16 ^= r -# asm 1: pxor r4=reg128#8 -# asm 2: movdqa r4=%xmm7 -movdqa %xmm7,%xmm7 - -# qhasm: a4 = mem128[ input_1 + 64 ] -# asm 1: movdqu 64(a4=reg128#14 -# asm 2: movdqu 64(a4=%xmm13 -movdqu 64(%rsi),%xmm13 - -# qhasm: r = a4 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r15 ^= r -# asm 1: pxor r3=reg128#7 -# asm 2: movdqa r3=%xmm6 -movdqa %xmm6,%xmm6 - -# qhasm: a3 = mem128[ input_1 + 48 ] -# asm 1: movdqu 48(a3=reg128#14 -# asm 2: movdqu 48(a3=%xmm13 -movdqu 48(%rsi),%xmm13 - -# qhasm: r = a3 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r3 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r14 ^= r -# asm 1: pxor r2=reg128#6 -# asm 2: movdqa r2=%xmm5 -movdqa %xmm5,%xmm5 - -# qhasm: a2 = mem128[ input_1 + 32 ] -# asm 1: movdqu 32(a2=reg128#14 -# asm 2: movdqu 32(a2=%xmm13 -movdqu 32(%rsi),%xmm13 - -# qhasm: r = a2 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r2 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r3 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r13 ^= r -# asm 1: pxor r1=reg128#5 -# asm 2: movdqa r1=%xmm4 -movdqa %xmm4,%xmm4 - -# qhasm: a1 = mem128[ input_1 + 16 ] -# asm 1: movdqu 16(a1=reg128#14 -# asm 2: movdqu 16(a1=%xmm13 -movdqu 16(%rsi),%xmm13 - -# qhasm: r = a1 & b0 -# asm 1: vpand r=reg128#15 -# asm 2: vpand r=%xmm14 -vpand %xmm0,%xmm13,%xmm14 - -# qhasm: r1 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 16(r=%xmm14 -vpand 16(%rdx),%xmm13,%xmm14 - -# qhasm: r2 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 32(r=%xmm14 -vpand 32(%rdx),%xmm13,%xmm14 - -# qhasm: r3 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 48(r=%xmm14 -vpand 48(%rdx),%xmm13,%xmm14 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 64(r=%xmm14 -vpand 64(%rdx),%xmm13,%xmm14 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 80(r=%xmm14 -vpand 80(%rdx),%xmm13,%xmm14 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 96(r=%xmm14 -vpand 96(%rdx),%xmm13,%xmm14 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 112(r=%xmm14 -vpand 112(%rdx),%xmm13,%xmm14 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 128(r=%xmm14 -vpand 128(%rdx),%xmm13,%xmm14 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 144(r=%xmm14 -vpand 144(%rdx),%xmm13,%xmm14 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#15 -# asm 2: vpand 160(r=%xmm14 -vpand 160(%rdx),%xmm13,%xmm14 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#14 -# asm 2: vpand 176(r=%xmm13 -vpand 176(%rdx),%xmm13,%xmm13 - -# qhasm: r12 ^= r -# asm 1: pxor r0=reg128#4 -# asm 2: movdqa r0=%xmm3 -movdqa %xmm3,%xmm3 - -# qhasm: a0 = mem128[ input_1 + 0 ] -# asm 1: movdqu 0(a0=reg128#14 -# asm 2: movdqu 0(a0=%xmm13 -movdqu 0(%rsi),%xmm13 - -# qhasm: r = a0 & b0 -# asm 1: vpand r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm0,%xmm13,%xmm0 - -# qhasm: r0 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 16(r=%xmm0 -vpand 16(%rdx),%xmm13,%xmm0 - -# qhasm: r1 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 32(r=%xmm0 -vpand 32(%rdx),%xmm13,%xmm0 - -# qhasm: r2 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 48(r=%xmm0 -vpand 48(%rdx),%xmm13,%xmm0 - -# qhasm: r3 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 64(r=%xmm0 -vpand 64(%rdx),%xmm13,%xmm0 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 80(r=%xmm0 -vpand 80(%rdx),%xmm13,%xmm0 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 96(r=%xmm0 -vpand 96(%rdx),%xmm13,%xmm0 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 112(r=%xmm0 -vpand 112(%rdx),%xmm13,%xmm0 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 128(r=%xmm0 -vpand 128(%rdx),%xmm13,%xmm0 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 144(r=%xmm0 -vpand 144(%rdx),%xmm13,%xmm0 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 160(r=%xmm0 -vpand 160(%rdx),%xmm13,%xmm0 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand 176(r=%xmm0 -vpand 176(%rdx),%xmm13,%xmm0 - -# qhasm: r11 ^= r -# asm 1: pxor r11_stack=stack64#1 -# asm 2: movq r11_stack=608(%rsp) -movq %r11,608(%rsp) - -# qhasm: r12_stack = caller_r12 -# asm 1: movq r12_stack=stack64#2 -# asm 2: movq r12_stack=616(%rsp) -movq %r12,616(%rsp) - -# qhasm: r13_stack = caller_r13 -# asm 1: movq r13_stack=stack64#3 -# asm 2: movq r13_stack=624(%rsp) -movq %r13,624(%rsp) - -# qhasm: r14_stack = caller_r14 -# asm 1: movq r14_stack=stack64#4 -# asm 2: movq r14_stack=632(%rsp) -movq %r14,632(%rsp) - -# qhasm: r15_stack = caller_r15 -# asm 1: movq r15_stack=stack64#5 -# asm 2: movq r15_stack=640(%rsp) -movq %r15,640(%rsp) - -# qhasm: rbx_stack = caller_rbx -# asm 1: movq rbx_stack=stack64#6 -# asm 2: movq rbx_stack=648(%rsp) -movq %rbx,648(%rsp) - -# qhasm: ptr = &buf -# asm 1: leaq ptr=int64#5 -# asm 2: leaq ptr=%r8 -leaq 0(%rsp),%r8 - -# qhasm: tmp = input_3 -# asm 1: mov tmp=int64#6 -# asm 2: mov tmp=%r9 -mov %rcx,%r9 - -# qhasm: tmp *= 11 -# asm 1: imulq $11,tmp=int64#6 -# asm 2: imulq $11,tmp=%r9 -imulq $11,%r9,%r9 - -# qhasm: input_2 += tmp -# asm 1: add b11=reg128#1 -# asm 2: movddup 0(b11=%xmm0 -movddup 0(%rdx),%xmm0 - -# qhasm: input_2 -= input_3 -# asm 1: sub r16=reg128#3 -# asm 2: vpand r16=%xmm2 -vpand %xmm1,%xmm0,%xmm2 - -# qhasm: mem128[ ptr + 256 ] = r16 -# asm 1: movdqu r15=reg128#4 -# asm 2: vpand r15=%xmm3 -vpand %xmm2,%xmm0,%xmm3 - -# qhasm: a3[0] = mem64[ input_1 + 24 ] -# asm 1: pinsrq $0x0,24(r14=reg128#6 -# asm 2: vpand r14=%xmm5 -vpand %xmm4,%xmm0,%xmm5 - -# qhasm: a2[0] = mem64[ input_1 + 16 ] -# asm 1: pinsrq $0x0,16(r13=reg128#8 -# asm 2: vpand r13=%xmm7 -vpand %xmm6,%xmm0,%xmm7 - -# qhasm: a1[0] = mem64[ input_1 + 8 ] -# asm 1: pinsrq $0x0,8(r12=reg128#10 -# asm 2: vpand r12=%xmm9 -vpand %xmm8,%xmm0,%xmm9 - -# qhasm: a0[0] = mem64[ input_1 + 0 ] -# asm 1: pinsrq $0x0,0(r11=reg128#1 -# asm 2: vpand r11=%xmm0 -vpand %xmm10,%xmm0,%xmm0 - -# qhasm: b10 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b10=reg128#12 -# asm 2: movddup 0(b10=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r15 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm2,%xmm11,%xmm3 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm4,%xmm11,%xmm3 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm6,%xmm11,%xmm3 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm8,%xmm11,%xmm3 - -# qhasm: r11 ^= r -# asm 1: pxor r10=reg128#4 -# asm 2: vpand r10=%xmm3 -vpand %xmm10,%xmm11,%xmm3 - -# qhasm: b9 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b9=reg128#12 -# asm 2: movddup 0(b9=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r14 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm2,%xmm11,%xmm5 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm4,%xmm11,%xmm5 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm6,%xmm11,%xmm5 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm8,%xmm11,%xmm5 - -# qhasm: r10 ^= r -# asm 1: pxor r9=reg128#6 -# asm 2: vpand r9=%xmm5 -vpand %xmm10,%xmm11,%xmm5 - -# qhasm: b8 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b8=reg128#12 -# asm 2: movddup 0(b8=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r13 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm2,%xmm11,%xmm7 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm4,%xmm11,%xmm7 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm6,%xmm11,%xmm7 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm8,%xmm11,%xmm7 - -# qhasm: r9 ^= r -# asm 1: pxor r8=reg128#8 -# asm 2: vpand r8=%xmm7 -vpand %xmm10,%xmm11,%xmm7 - -# qhasm: b7 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b7=reg128#12 -# asm 2: movddup 0(b7=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r12 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm2,%xmm11,%xmm9 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm4,%xmm11,%xmm9 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm6,%xmm11,%xmm9 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm8,%xmm11,%xmm9 - -# qhasm: r8 ^= r -# asm 1: pxor r7=reg128#10 -# asm 2: vpand r7=%xmm9 -vpand %xmm10,%xmm11,%xmm9 - -# qhasm: b6 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b6=reg128#12 -# asm 2: movddup 0(b6=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r11 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm2,%xmm11,%xmm0 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm4,%xmm11,%xmm0 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm6,%xmm11,%xmm0 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm8,%xmm11,%xmm0 - -# qhasm: r7 ^= r -# asm 1: pxor r6=reg128#1 -# asm 2: vpand r6=%xmm0 -vpand %xmm10,%xmm11,%xmm0 - -# qhasm: b5 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b5=reg128#12 -# asm 2: movddup 0(b5=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r10 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm2,%xmm11,%xmm3 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm4,%xmm11,%xmm3 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm6,%xmm11,%xmm3 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#4 -# asm 2: vpand r=%xmm3 -vpand %xmm8,%xmm11,%xmm3 - -# qhasm: r6 ^= r -# asm 1: pxor r5=reg128#4 -# asm 2: vpand r5=%xmm3 -vpand %xmm10,%xmm11,%xmm3 - -# qhasm: b4 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b4=reg128#12 -# asm 2: movddup 0(b4=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r9 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm2,%xmm11,%xmm5 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm4,%xmm11,%xmm5 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm6,%xmm11,%xmm5 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#6 -# asm 2: vpand r=%xmm5 -vpand %xmm8,%xmm11,%xmm5 - -# qhasm: r5 ^= r -# asm 1: pxor r4=reg128#6 -# asm 2: vpand r4=%xmm5 -vpand %xmm10,%xmm11,%xmm5 - -# qhasm: b3 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b3=reg128#12 -# asm 2: movddup 0(b3=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r8 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm2,%xmm11,%xmm7 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm4,%xmm11,%xmm7 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm6,%xmm11,%xmm7 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#8 -# asm 2: vpand r=%xmm7 -vpand %xmm8,%xmm11,%xmm7 - -# qhasm: r4 ^= r -# asm 1: pxor r3=reg128#8 -# asm 2: vpand r3=%xmm7 -vpand %xmm10,%xmm11,%xmm7 - -# qhasm: b2 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b2=reg128#12 -# asm 2: movddup 0(b2=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r7 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm2,%xmm11,%xmm9 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm4,%xmm11,%xmm9 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm6,%xmm11,%xmm9 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#10 -# asm 2: vpand r=%xmm9 -vpand %xmm8,%xmm11,%xmm9 - -# qhasm: r3 ^= r -# asm 1: pxor r2=reg128#10 -# asm 2: vpand r2=%xmm9 -vpand %xmm10,%xmm11,%xmm9 - -# qhasm: b1 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b1=reg128#12 -# asm 2: movddup 0(b1=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#13 -# asm 2: vpand r=%xmm12 -vpand %xmm1,%xmm11,%xmm12 - -# qhasm: r6 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm2,%xmm11,%xmm0 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm4,%xmm11,%xmm0 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm6,%xmm11,%xmm0 - -# qhasm: r3 ^= r -# asm 1: pxor r=reg128#1 -# asm 2: vpand r=%xmm0 -vpand %xmm8,%xmm11,%xmm0 - -# qhasm: r2 ^= r -# asm 1: pxor r1=reg128#1 -# asm 2: vpand r1=%xmm0 -vpand %xmm10,%xmm11,%xmm0 - -# qhasm: b0 = mem64[ input_2 + 0 ] x2 -# asm 1: movddup 0(b0=reg128#12 -# asm 2: movddup 0(b0=%xmm11 -movddup 0(%rdx),%xmm11 - -# qhasm: input_2 -= input_3 -# asm 1: sub r=reg128#2 -# asm 2: vpand r=%xmm1 -vpand %xmm1,%xmm11,%xmm1 - -# qhasm: r5 ^= r -# asm 1: pxor r=reg128#2 -# asm 2: vpand r=%xmm1 -vpand %xmm2,%xmm11,%xmm1 - -# qhasm: r4 ^= r -# asm 1: pxor r=reg128#2 -# asm 2: vpand r=%xmm1 -vpand %xmm4,%xmm11,%xmm1 - -# qhasm: r3 ^= r -# asm 1: pxor r=reg128#2 -# asm 2: vpand r=%xmm1 -vpand %xmm6,%xmm11,%xmm1 - -# qhasm: r2 ^= r -# asm 1: pxor r=reg128#2 -# asm 2: vpand r=%xmm1 -vpand %xmm8,%xmm11,%xmm1 - -# qhasm: r1 ^= r -# asm 1: pxor r0=reg128#2 -# asm 2: vpand r0=%xmm1 -vpand %xmm10,%xmm11,%xmm1 - -# qhasm: mem128[ ptr + 64 ] = r4 -# asm 1: movdqu h22=int64#2 -# asm 2: movq 264(h22=%rsi -movq 264(%r8),%rsi - -# qhasm: h13 = h22 -# asm 1: mov h13=int64#3 -# asm 2: mov h13=%rdx -mov %rsi,%rdx - -# qhasm: h10 = h22 -# asm 1: mov h10=int64#2 -# asm 2: mov h10=%rsi -mov %rsi,%rsi - -# qhasm: h21 = mem64[ ptr + 248 ] -# asm 1: movq 248(h21=int64#4 -# asm 2: movq 248(h21=%rcx -movq 248(%r8),%rcx - -# qhasm: h12 = h21 -# asm 1: mov h12=int64#6 -# asm 2: mov h12=%r9 -mov %rcx,%r9 - -# qhasm: h9 = h21 -# asm 1: mov h9=int64#4 -# asm 2: mov h9=%rcx -mov %rcx,%rcx - -# qhasm: h20 = mem64[ ptr + 232 ] -# asm 1: movq 232(h20=int64#7 -# asm 2: movq 232(h20=%rax -movq 232(%r8),%rax - -# qhasm: h11 = h20 -# asm 1: mov h11=int64#8 -# asm 2: mov h11=%r10 -mov %rax,%r10 - -# qhasm: h8 = h20 -# asm 1: mov h8=int64#7 -# asm 2: mov h8=%rax -mov %rax,%rax - -# qhasm: h19 = mem64[ ptr + 216 ] -# asm 1: movq 216(h19=int64#9 -# asm 2: movq 216(h19=%r11 -movq 216(%r8),%r11 - -# qhasm: h10 ^= h19 -# asm 1: xor h7=int64#9 -# asm 2: mov h7=%r11 -mov %r11,%r11 - -# qhasm: h18 = mem64[ ptr + 200 ] -# asm 1: movq 200(h18=int64#10 -# asm 2: movq 200(h18=%r12 -movq 200(%r8),%r12 - -# qhasm: h9 ^= h18 -# asm 1: xor h6=int64#10 -# asm 2: mov h6=%r12 -mov %r12,%r12 - -# qhasm: h17 = mem64[ ptr + 184 ] -# asm 1: movq 184(h17=int64#11 -# asm 2: movq 184(h17=%r13 -movq 184(%r8),%r13 - -# qhasm: h8 ^= h17 -# asm 1: xor h5=int64#11 -# asm 2: mov h5=%r13 -mov %r13,%r13 - -# qhasm: h16 = mem64[ ptr + 168 ] -# asm 1: movq 168(h16=int64#12 -# asm 2: movq 168(h16=%r14 -movq 168(%r8),%r14 - -# qhasm: h16 ^= *(uint64 *) ( ptr + 256 ) -# asm 1: xorq 256(h4=int64#12 -# asm 2: mov h4=%r14 -mov %r14,%r14 - -# qhasm: h15 = mem64[ ptr + 152 ] -# asm 1: movq 152(h15=int64#13 -# asm 2: movq 152(h15=%r15 -movq 152(%r8),%r15 - -# qhasm: h15 ^= *(uint64 *) ( ptr + 240 ) -# asm 1: xorq 240(h3=int64#13 -# asm 2: mov h3=%r15 -mov %r15,%r15 - -# qhasm: h14 = mem64[ ptr + 136 ] -# asm 1: movq 136(h14=int64#14 -# asm 2: movq 136(h14=%rbx -movq 136(%r8),%rbx - -# qhasm: h14 ^= *(uint64 *) ( ptr + 224 ) -# asm 1: xorq 224(h2=int64#14 -# asm 2: mov h2=%rbx -mov %rbx,%rbx - -# qhasm: h13 ^= *(uint64 *) ( ptr + 120 ) -# asm 1: xorq 120(h1=int64#3 -# asm 2: mov h1=%rdx -mov %rdx,%rdx - -# qhasm: h12 ^= *(uint64 *) ( ptr + 104 ) -# asm 1: xorq 104(h0=int64#6 -# asm 2: mov h0=%r9 -mov %r9,%r9 - -# qhasm: h11 ^= *(uint64 *) ( ptr + 176 ) -# asm 1: xorq 176(caller_r11=int64#9 -# asm 2: movq caller_r11=%r11 -movq 608(%rsp),%r11 - -# qhasm: caller_r12 = r12_stack -# asm 1: movq caller_r12=int64#10 -# asm 2: movq caller_r12=%r12 -movq 616(%rsp),%r12 - -# qhasm: caller_r13 = r13_stack -# asm 1: movq caller_r13=int64#11 -# asm 2: movq caller_r13=%r13 -movq 624(%rsp),%r13 - -# qhasm: caller_r14 = r14_stack -# asm 1: movq caller_r14=int64#12 -# asm 2: movq caller_r14=%r14 -movq 632(%rsp),%r14 - -# qhasm: caller_r15 = r15_stack -# asm 1: movq caller_r15=int64#13 -# asm 2: movq caller_r15=%r15 -movq 640(%rsp),%r15 - -# qhasm: caller_rbx = rbx_stack -# asm 1: movq caller_rbx=int64#14 -# asm 2: movq caller_rbx=%rbx -movq 648(%rsp),%rbx - -# qhasm: return -add %r11,%rsp -ret diff --git a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec_reduce_asm.S b/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec_reduce_asm.S deleted file mode 100644 index 0c9caf143..000000000 --- a/src/kem/classic_mceliece/pqclean_mceliece348864_sse/vec_reduce_asm.S +++ /dev/null @@ -1,356 +0,0 @@ - -# qhasm: int64 input_0 - -# qhasm: int64 input_1 - -# qhasm: int64 input_2 - -# qhasm: int64 input_3 - -# qhasm: int64 input_4 - -# qhasm: int64 input_5 - -# qhasm: stack64 input_6 - -# qhasm: stack64 input_7 - -# qhasm: int64 caller_r11 - -# qhasm: int64 caller_r12 - -# qhasm: int64 caller_r13 - -# qhasm: int64 caller_r14 - -# qhasm: int64 caller_r15 - -# qhasm: int64 caller_rbx - -# qhasm: int64 caller_rbp - -# qhasm: int64 t - -# qhasm: int64 c - -# qhasm: int64 r - -# qhasm: enter vec_reduce_asm -.p2align 5 -.global _PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm -.global PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm -_PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm: -PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm: -mov %rsp,%r11 -and $31,%r11 -add $0,%r11 -sub %r11,%rsp - -# qhasm: r = 0 -# asm 1: mov $0,>r=int64#7 -# asm 2: mov $0,>r=%rax -mov $0,%rax - -# qhasm: t = mem64[ input_0 + 88 ] -# asm 1: movq 88(t=int64#2 -# asm 2: movq 88(t=%rsi -movq 88(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 80(t=%rsi -movq 80(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 72(t=%rsi -movq 72(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 64(t=%rsi -movq 64(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 56(t=%rsi -movq 56(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 48(t=%rsi -movq 48(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 40(t=%rsi -movq 40(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 32(t=%rsi -movq 32(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 24(t=%rsi -movq 24(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 16(t=%rsi -movq 16(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#2 -# asm 2: movq 8(t=%rsi -movq 8(%rdi),%rsi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#2 -# asm 2: popcnt c=%rsi -popcnt %rsi, %rsi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,t=int64#1 -# asm 2: movq 0(t=%rdi -movq 0(%rdi),%rdi - -# qhasm: c = count(t) -# asm 1: popcnt c=int64#1 -# asm 2: popcnt c=%rdi -popcnt %rdi, %rdi - -# qhasm: (uint32) c &= 1 -# asm 1: and $1,