mirror of
https://github.com/open-quantum-safe/liboqs.git
synced 2025-12-05 00:03:00 -05:00
Remove unused McEliece implementations
This commit is contained in:
parent
9b77b33ece
commit
d682661514
@ -1,16 +0,0 @@
|
||||
Public Domain.
|
||||
|
||||
Authors of Classic McEliece in alphabetical order:
|
||||
|
||||
Daniel J. Bernstein, University of Illinois at Chicago
|
||||
Tung Chou, Osaka University
|
||||
Tanja Lange, Technische Universiteit Eindhoven
|
||||
Ingo von Maurich, self
|
||||
Rafael Misoczki, Intel Corporation
|
||||
Ruben Niederhagen, Fraunhofer SIT
|
||||
Edoardo Persichetti, Florida Atlantic University
|
||||
Christiane Peters, self
|
||||
Peter Schwabe, Radboud University
|
||||
Nicolas Sendrier, Inria
|
||||
Jakub Szefer, Yale University
|
||||
Wen Wang, Yale University
|
||||
@ -1,13 +0,0 @@
|
||||
#include "aes256ctr.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_aes256ctr(
|
||||
uint8_t *out,
|
||||
size_t outlen,
|
||||
const uint8_t nonce[AESCTR_NONCEBYTES],
|
||||
const uint8_t key[AES256_KEYBYTES]) {
|
||||
|
||||
aes256ctx state;
|
||||
aes256_ctr_keyexp(&state, key);
|
||||
aes256_ctr(out, outlen, nonce, &state);
|
||||
aes256_ctx_release(&state);
|
||||
}
|
||||
@ -1,17 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_AES256CTR_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_AES256CTR_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "aes.h"
|
||||
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_aes256ctr(
|
||||
uint8_t *out,
|
||||
size_t outlen,
|
||||
const uint8_t nonce[AESCTR_NONCEBYTES],
|
||||
const uint8_t key[AES256_KEYBYTES]
|
||||
);
|
||||
|
||||
#endif
|
||||
@ -1,32 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_API_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_API_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_ALGNAME "Classic McEliece 348864"
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_PUBLICKEYBYTES 261120
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_SECRETKEYBYTES 6452
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_CIPHERTEXTBYTES 128
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_BYTES 32
|
||||
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_enc(
|
||||
uint8_t *c,
|
||||
uint8_t *key,
|
||||
const uint8_t *pk
|
||||
);
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_dec(
|
||||
uint8_t *key,
|
||||
const uint8_t *c,
|
||||
const uint8_t *sk
|
||||
);
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_keypair
|
||||
(
|
||||
uint8_t *pk,
|
||||
uint8_t *sk
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,139 +0,0 @@
|
||||
/*
|
||||
This file is for Benes network related functions
|
||||
*/
|
||||
|
||||
#include "benes.h"
|
||||
|
||||
#include "params.h"
|
||||
#include "transpose.h"
|
||||
#include "util.h"
|
||||
|
||||
/* one layer of the benes network */
|
||||
static void layer(uint64_t *data, uint64_t *bits, int lgs) {
|
||||
int i, j, s;
|
||||
|
||||
uint64_t d;
|
||||
|
||||
s = 1 << lgs;
|
||||
|
||||
for (i = 0; i < 64; i += s * 2) {
|
||||
for (j = i; j < i + s; j++) {
|
||||
|
||||
d = (data[j + 0] ^ data[j + s]);
|
||||
d &= (*bits++);
|
||||
data[j + 0] ^= d;
|
||||
data[j + s] ^= d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* input: r, sequence of bits to be permuted */
|
||||
/* bits, condition bits of the Benes network */
|
||||
/* rev, 0 for normal application; !0 for inverse */
|
||||
/* output: r, permuted bits */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_apply_benes(unsigned char *r, const unsigned char *bits, int rev) {
|
||||
int i;
|
||||
|
||||
const unsigned char *cond_ptr;
|
||||
int inc, low;
|
||||
|
||||
uint64_t bs[64];
|
||||
uint64_t cond[64];
|
||||
|
||||
//
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
bs[i] = PQCLEAN_MCELIECE348864_CLEAN_load8(r + i * 8);
|
||||
}
|
||||
|
||||
if (rev == 0) {
|
||||
inc = 256;
|
||||
cond_ptr = bits;
|
||||
} else {
|
||||
inc = -256;
|
||||
cond_ptr = bits + (2 * GFBITS - 2) * 256;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs);
|
||||
|
||||
for (low = 0; low <= 5; low++) {
|
||||
for (i = 0; i < 64; i++) {
|
||||
cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load4(cond_ptr + i * 4);
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(cond, cond);
|
||||
layer(bs, cond, low);
|
||||
cond_ptr += inc;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs);
|
||||
|
||||
for (low = 0; low <= 5; low++) {
|
||||
for (i = 0; i < 32; i++) {
|
||||
cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load8(cond_ptr + i * 8);
|
||||
}
|
||||
layer(bs, cond, low);
|
||||
cond_ptr += inc;
|
||||
}
|
||||
for (low = 4; low >= 0; low--) {
|
||||
for (i = 0; i < 32; i++) {
|
||||
cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load8(cond_ptr + i * 8);
|
||||
}
|
||||
layer(bs, cond, low);
|
||||
cond_ptr += inc;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs);
|
||||
|
||||
for (low = 5; low >= 0; low--) {
|
||||
for (i = 0; i < 64; i++) {
|
||||
cond[i] = PQCLEAN_MCELIECE348864_CLEAN_load4(cond_ptr + i * 4);
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(cond, cond);
|
||||
layer(bs, cond, low);
|
||||
cond_ptr += inc;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(bs, bs);
|
||||
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
PQCLEAN_MCELIECE348864_CLEAN_store8(r + i * 8, bs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* input: condition bits c */
|
||||
/* output: support s */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_support_gen(gf *s, const unsigned char *c) {
|
||||
gf a;
|
||||
int i, j;
|
||||
unsigned char L[ GFBITS ][ (1 << GFBITS) / 8 ];
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
for (j = 0; j < (1 << GFBITS) / 8; j++) {
|
||||
L[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
a = PQCLEAN_MCELIECE348864_CLEAN_bitrev((gf) i);
|
||||
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
L[j][ i / 8 ] |= ((a >> j) & 1) << (i % 8);
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
PQCLEAN_MCELIECE348864_CLEAN_apply_benes(L[j], c, 0);
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_N; i++) {
|
||||
s[i] = 0;
|
||||
for (j = GFBITS - 1; j >= 0; j--) {
|
||||
s[i] <<= 1;
|
||||
s[i] |= (L[j][i / 8] >> (i % 8)) & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_BENES_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_BENES_H
|
||||
/*
|
||||
This file is for Benes network related functions
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_apply_benes(unsigned char * /*r*/, const unsigned char * /*bits*/, int /*rev*/);
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_support_gen(gf * /*s*/, const unsigned char * /*c*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,83 +0,0 @@
|
||||
/*
|
||||
This file is for the Berlekamp-Massey algorithm
|
||||
see http://crypto.stanford.edu/~mironov/cs359/massey.pdf
|
||||
*/
|
||||
#include "bm.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#define min(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
/* the Berlekamp-Massey algorithm */
|
||||
/* input: s, sequence of field elements */
|
||||
/* output: out, minimal polynomial of s */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_bm(gf *out, gf *s) {
|
||||
int i;
|
||||
|
||||
uint16_t N = 0;
|
||||
uint16_t L = 0;
|
||||
uint16_t mle;
|
||||
uint16_t mne;
|
||||
|
||||
gf T[ SYS_T + 1 ];
|
||||
gf C[ SYS_T + 1 ];
|
||||
gf B[ SYS_T + 1 ];
|
||||
|
||||
gf b = 1, d, f;
|
||||
|
||||
//
|
||||
|
||||
for (i = 0; i < SYS_T + 1; i++) {
|
||||
C[i] = B[i] = 0;
|
||||
}
|
||||
|
||||
B[1] = C[0] = 1;
|
||||
|
||||
//
|
||||
|
||||
for (N = 0; N < 2 * SYS_T; N++) {
|
||||
d = 0;
|
||||
|
||||
for (i = 0; i <= min(N, SYS_T); i++) {
|
||||
d ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(C[i], s[ N - i]);
|
||||
}
|
||||
|
||||
mne = d;
|
||||
mne -= 1;
|
||||
mne >>= 15;
|
||||
mne -= 1;
|
||||
mle = N;
|
||||
mle -= 2 * L;
|
||||
mle >>= 15;
|
||||
mle -= 1;
|
||||
mle &= mne;
|
||||
|
||||
for (i = 0; i <= SYS_T; i++) {
|
||||
T[i] = C[i];
|
||||
}
|
||||
|
||||
f = PQCLEAN_MCELIECE348864_CLEAN_gf_frac(b, d);
|
||||
|
||||
for (i = 0; i <= SYS_T; i++) {
|
||||
C[i] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(f, B[i]) & mne;
|
||||
}
|
||||
|
||||
L = (L & ~mle) | ((N + 1 - L) & mle);
|
||||
|
||||
for (i = 0; i <= SYS_T; i++) {
|
||||
B[i] = (B[i] & ~mle) | (T[i] & mle);
|
||||
}
|
||||
|
||||
b = (b & ~mle) | (d & mle);
|
||||
|
||||
for (i = SYS_T; i >= 1; i--) {
|
||||
B[i] = B[i - 1];
|
||||
}
|
||||
B[0] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i <= SYS_T; i++) {
|
||||
out[i] = C[ SYS_T - i ];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_BM_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_BM_H
|
||||
/*
|
||||
This file is for the Berlekamp-Massey algorithm
|
||||
see http://crypto.stanford.edu/~mironov/cs359/massey.pdf
|
||||
*/
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_bm(gf * /*out*/, gf * /*s*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,274 +0,0 @@
|
||||
/*
|
||||
This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation
|
||||
see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf
|
||||
*/
|
||||
|
||||
#include "controlbits.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint8_t bit;
|
||||
|
||||
#define N (1 << GFBITS)
|
||||
|
||||
static bit is_smaller(uint32_t a, uint32_t b) {
|
||||
uint32_t ret = 0;
|
||||
|
||||
ret = a - b;
|
||||
ret >>= 31;
|
||||
|
||||
return (bit)ret;
|
||||
}
|
||||
|
||||
static bit is_smaller_63b(uint64_t a, uint64_t b) {
|
||||
uint64_t ret = 0;
|
||||
|
||||
ret = a - b;
|
||||
ret >>= 63;
|
||||
|
||||
return (bit)ret;
|
||||
}
|
||||
|
||||
static void cswap(uint32_t *x, uint32_t *y, bit swap) {
|
||||
uint32_t m;
|
||||
uint32_t d;
|
||||
|
||||
m = swap;
|
||||
m = 0 - m;
|
||||
|
||||
d = (*x ^ *y);
|
||||
d &= m;
|
||||
*x ^= d;
|
||||
*y ^= d;
|
||||
}
|
||||
|
||||
static void cswap_63b(uint64_t *x, uint64_t *y, bit swap) {
|
||||
uint64_t m;
|
||||
uint64_t d;
|
||||
|
||||
m = swap;
|
||||
m = 0 - m;
|
||||
|
||||
d = (*x ^ *y);
|
||||
d &= m;
|
||||
*x ^= d;
|
||||
*y ^= d;
|
||||
}
|
||||
|
||||
/* output x = min(input x,input y) */
|
||||
/* output y = max(input x,input y) */
|
||||
|
||||
static void minmax(uint32_t *x, uint32_t *y) {
|
||||
bit m;
|
||||
|
||||
m = is_smaller(*y, *x);
|
||||
cswap(x, y, m);
|
||||
}
|
||||
|
||||
static void minmax_63b(uint64_t *x, uint64_t *y) {
|
||||
bit m;
|
||||
|
||||
m = is_smaller_63b(*y, *x);
|
||||
cswap_63b(x, y, m);
|
||||
}
|
||||
|
||||
/* merge first half of x[0],x[step],...,x[(2*n-1)*step] with second half */
|
||||
/* requires n to be a power of 2 */
|
||||
|
||||
static void merge(int n, uint32_t *x, int step) {
|
||||
int i;
|
||||
if (n == 1) {
|
||||
minmax(&x[0], &x[step]);
|
||||
} else {
|
||||
merge(n / 2, x, step * 2);
|
||||
merge(n / 2, x + step, step * 2);
|
||||
for (i = 1; i < 2 * n - 1; i += 2) {
|
||||
minmax(&x[i * step], &x[(i + 1) * step]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void merge_63b(int n, uint64_t *x, int step) {
|
||||
int i;
|
||||
if (n == 1) {
|
||||
minmax_63b(&x[0], &x[step]);
|
||||
} else {
|
||||
merge_63b(n / 2, x, step * 2);
|
||||
merge_63b(n / 2, x + step, step * 2);
|
||||
for (i = 1; i < 2 * n - 1; i += 2) {
|
||||
minmax_63b(&x[i * step], &x[(i + 1) * step]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* sort x[0],x[1],...,x[n-1] in place */
|
||||
/* requires n to be a power of 2 */
|
||||
|
||||
static void sort(int n, uint32_t *x) {
|
||||
if (n <= 1) {
|
||||
return;
|
||||
}
|
||||
sort(n / 2, x);
|
||||
sort(n / 2, x + n / 2);
|
||||
merge(n / 2, x, 1);
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_sort_63b(int n, uint64_t *x) {
|
||||
if (n <= 1) {
|
||||
return;
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_CLEAN_sort_63b(n / 2, x);
|
||||
PQCLEAN_MCELIECE348864_CLEAN_sort_63b(n / 2, x + n / 2);
|
||||
merge_63b(n / 2, x, 1);
|
||||
}
|
||||
|
||||
/* y[pi[i]] = x[i] */
|
||||
/* requires n = 2^w */
|
||||
/* requires pi to be a permutation */
|
||||
static void composeinv(int n, uint32_t *y, const uint32_t *x, const uint32_t *pi) { // NC
|
||||
int i;
|
||||
uint32_t t[2 * N];
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
t[i] = x[i] | (pi[i] << 16);
|
||||
}
|
||||
|
||||
sort(n, t);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y[i] = t[i] & 0xFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
/* ip[i] = j iff pi[i] = j */
|
||||
/* requires n = 2^w */
|
||||
/* requires pi to be a permutation */
|
||||
static void invert(int n, uint32_t *ip, const uint32_t *pi) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ip[i] = i;
|
||||
}
|
||||
|
||||
composeinv(n, ip, ip, pi);
|
||||
}
|
||||
|
||||
|
||||
static void flow(int w, uint32_t *x, const uint32_t *y, int t) {
|
||||
bit m0;
|
||||
bit m1;
|
||||
|
||||
uint32_t b;
|
||||
uint32_t y_copy = *y;
|
||||
|
||||
m0 = is_smaller(*y & ((1 << w) - 1), *x & ((1 << w) - 1));
|
||||
m1 = is_smaller(0, t);
|
||||
|
||||
cswap(x, &y_copy, m0);
|
||||
b = m0 & m1;
|
||||
*x ^= b << w;
|
||||
}
|
||||
|
||||
/* input: permutation pi */
|
||||
/* output: (2w-1)n/2 (or 0 if n==1) control bits c[0],c[step],c[2*step],... */
|
||||
/* requires n = 2^w */
|
||||
static void controlbitsfrompermutation(int w, int n, int step, int off, unsigned char *c, const uint32_t *pi) {
|
||||
int i;
|
||||
int j;
|
||||
int k;
|
||||
int t;
|
||||
uint32_t ip[N] = {0};
|
||||
uint32_t I[2 * N] = {0};
|
||||
uint32_t P[2 * N] = {0};
|
||||
uint32_t PI[2 * N] = {0};
|
||||
uint32_t T[2 * N] = {0};
|
||||
uint32_t piflip[N] = {0};
|
||||
uint32_t subpi[2][N / 2] = {{0}};
|
||||
|
||||
if (w == 1) {
|
||||
c[ off / 8 ] |= (pi[0] & 1) << (off % 8);
|
||||
}
|
||||
if (w <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
invert(n, ip, pi);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
I[i] = ip[i] | (1 << w);
|
||||
I[n + i] = pi[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
P[i] = (i >> w) + (i & ((1 << w) - 2)) + ((i & 1) << w);
|
||||
}
|
||||
|
||||
for (t = 0; t < w; ++t) {
|
||||
composeinv(2 * n, PI, P, I);
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
flow(w, &P[i], &PI[i], t);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
T[i] = I[i ^ 1];
|
||||
}
|
||||
|
||||
composeinv(2 * n, I, I, T);
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
T[i] = P[i ^ 1];
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
flow(w, &P[i], &T[i], 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
piflip[i] = pi[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
c[ (off + i * step) / 8 ] |= ((P[i * 2] >> w) & 1) << ((off + i * step) % 8);
|
||||
}
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
c[ (off + ((w - 1)*n + i) * step) / 8 ] |= ((P[n + i * 2] >> w) & 1) << ((off + ((w - 1) * n + i) * step) % 8);
|
||||
}
|
||||
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
cswap(&piflip[i * 2], &piflip[i * 2 + 1], (P[n + i * 2] >> w) & 1);
|
||||
}
|
||||
|
||||
for (k = 0; k < 2; ++k) {
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
subpi[k][i] = piflip[i * 2 + k] >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (k = 0; k < 2; ++k) {
|
||||
controlbitsfrompermutation(w - 1, n / 2, step * 2, off + step * (n / 2 + k), c, subpi[k]);
|
||||
}
|
||||
}
|
||||
|
||||
/* input: pi, a permutation*/
|
||||
/* output: out, control bits w.r.t. pi */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_controlbits(unsigned char *out, const uint32_t *pi) {
|
||||
unsigned int i;
|
||||
unsigned char c[ (2 * GFBITS - 1) * (1 << GFBITS) / 16 ];
|
||||
|
||||
for (i = 0; i < sizeof(c); i++) {
|
||||
c[i] = 0;
|
||||
}
|
||||
|
||||
controlbitsfrompermutation(GFBITS, (1 << GFBITS), 1, 0, c, pi);
|
||||
|
||||
for (i = 0; i < sizeof(c); i++) {
|
||||
out[i] = c[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_CONTROLBITS_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CONTROLBITS_H
|
||||
/*
|
||||
This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation
|
||||
see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_sort_63b(int n, uint64_t *x);
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_controlbits(unsigned char *out, const uint32_t *pi);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_HASH_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_CRYPTO_HASH_H
|
||||
#include "fips202.h"
|
||||
|
||||
#define crypto_hash_32b(out,in,inlen) shake256(out, 32, in, inlen)
|
||||
|
||||
#endif
|
||||
@ -1,90 +0,0 @@
|
||||
/*
|
||||
This file is for Niederreiter decryption
|
||||
*/
|
||||
|
||||
#include "decrypt.h"
|
||||
|
||||
#include "benes.h"
|
||||
#include "bm.h"
|
||||
#include "gf.h"
|
||||
#include "params.h"
|
||||
#include "root.h"
|
||||
#include "synd.h"
|
||||
#include "util.h"
|
||||
|
||||
/* Niederreiter decryption with the Berlekamp decoder */
|
||||
/* intput: sk, secret key */
|
||||
/* c, ciphertext */
|
||||
/* output: e, error vector */
|
||||
/* return: 0 for success; 1 for failure */
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *c) {
|
||||
int i, w = 0;
|
||||
uint16_t check;
|
||||
|
||||
unsigned char r[ SYS_N / 8 ];
|
||||
|
||||
gf g[ SYS_T + 1 ];
|
||||
gf L[ SYS_N ];
|
||||
|
||||
gf s[ SYS_T * 2 ];
|
||||
gf s_cmp[ SYS_T * 2 ];
|
||||
gf locator[ SYS_T + 1 ];
|
||||
gf images[ SYS_N ];
|
||||
|
||||
gf t;
|
||||
|
||||
//
|
||||
|
||||
for (i = 0; i < SYND_BYTES; i++) {
|
||||
r[i] = c[i];
|
||||
}
|
||||
for (i = SYND_BYTES; i < SYS_N / 8; i++) {
|
||||
r[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
g[i] = PQCLEAN_MCELIECE348864_CLEAN_load2(sk);
|
||||
g[i] &= GFMASK;
|
||||
sk += 2;
|
||||
}
|
||||
g[ SYS_T ] = 1;
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_support_gen(L, sk);
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_synd(s, g, L, r);
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_bm(locator, s);
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_root(images, locator, L);
|
||||
|
||||
//
|
||||
|
||||
for (i = 0; i < SYS_N / 8; i++) {
|
||||
e[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_N; i++) {
|
||||
t = PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(images[i]) & 1;
|
||||
|
||||
e[ i / 8 ] |= t << (i % 8);
|
||||
w += t;
|
||||
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_synd(s_cmp, g, L, e);
|
||||
|
||||
//
|
||||
|
||||
check = (uint16_t)w;
|
||||
check ^= SYS_T;
|
||||
|
||||
for (i = 0; i < SYS_T * 2; i++) {
|
||||
check |= s[i] ^ s_cmp[i];
|
||||
}
|
||||
|
||||
check -= 1;
|
||||
check >>= 15;
|
||||
|
||||
return check ^ 1;
|
||||
}
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_DECRYPT_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_DECRYPT_H
|
||||
/*
|
||||
This file is for Nieddereiter decryption
|
||||
*/
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_decrypt(unsigned char * /*e*/, const unsigned char * /*sk*/, const unsigned char * /*c*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,138 +0,0 @@
|
||||
/*
|
||||
This file is for Niederreiter encryption
|
||||
*/
|
||||
|
||||
#include "encrypt.h"
|
||||
|
||||
#include "params.h"
|
||||
#include "randombytes.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
static inline uint8_t same_mask(uint16_t x, uint16_t y) {
|
||||
uint32_t mask;
|
||||
|
||||
mask = x ^ y;
|
||||
mask -= 1;
|
||||
mask >>= 31;
|
||||
mask = -mask;
|
||||
|
||||
return (uint8_t)mask;
|
||||
}
|
||||
|
||||
/* output: e, an error vector of weight t */
|
||||
static void gen_e(unsigned char *e) {
|
||||
size_t i, j;
|
||||
int eq, count;
|
||||
|
||||
uint16_t ind_[ SYS_T * 2 ];
|
||||
uint8_t *ind_8 = (uint8_t *)ind_;
|
||||
uint16_t ind[ SYS_T * 2 ];
|
||||
uint8_t mask;
|
||||
unsigned char val[ SYS_T ];
|
||||
|
||||
while (1) {
|
||||
randombytes(ind_8, sizeof(ind_));
|
||||
// Copy to uint16_t ind_ in a little-endian way
|
||||
for (i = 0; i < sizeof(ind_); i += 2) {
|
||||
ind_[i / 2] = ((uint16_t)ind_8[i + 1]) << 8 | (uint16_t)ind_8[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T * 2; i++) {
|
||||
ind_[i] &= GFMASK;
|
||||
}
|
||||
|
||||
// moving and counting indices in the correct range
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < SYS_T * 2; i++) {
|
||||
if (ind_[i] < SYS_N) {
|
||||
ind[ count++ ] = ind_[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (count < SYS_T) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check for repetition
|
||||
|
||||
eq = 0;
|
||||
|
||||
for (i = 1; i < SYS_T; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
if (ind[i] == ind[j]) {
|
||||
eq = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (eq == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
val[j] = 1 << (ind[j] & 7);
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_N / 8; i++) {
|
||||
e[i] = 0;
|
||||
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
mask = same_mask((uint16_t)i, (ind[j] >> 3));
|
||||
|
||||
e[i] |= val[j] & mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* input: public key pk, error vector e */
|
||||
/* output: syndrome s */
|
||||
static void syndrome(unsigned char *s, const unsigned char *pk, const unsigned char *e) {
|
||||
unsigned char b, row[SYS_N / 8];
|
||||
const unsigned char *pk_ptr = pk;
|
||||
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < SYND_BYTES; i++) {
|
||||
s[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < PK_NROWS; i++) {
|
||||
for (j = 0; j < SYS_N / 8; j++) {
|
||||
row[j] = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < PK_ROW_BYTES; j++) {
|
||||
row[ SYS_N / 8 - PK_ROW_BYTES + j ] = pk_ptr[j];
|
||||
}
|
||||
|
||||
row[i / 8] |= 1 << (i % 8);
|
||||
|
||||
b = 0;
|
||||
for (j = 0; j < SYS_N / 8; j++) {
|
||||
b ^= row[j] & e[j];
|
||||
}
|
||||
|
||||
b ^= b >> 4;
|
||||
b ^= b >> 2;
|
||||
b ^= b >> 1;
|
||||
b &= 1;
|
||||
|
||||
s[ i / 8 ] |= (b << (i % 8));
|
||||
|
||||
pk_ptr += PK_ROW_BYTES;
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_encrypt(unsigned char *s, unsigned char *e, const unsigned char *pk) {
|
||||
gen_e(e);
|
||||
|
||||
syndrome(s, pk, e);
|
||||
}
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_ENCRYPT_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_ENCRYPT_H
|
||||
/*
|
||||
This file is for Niederreiter encryption
|
||||
*/
|
||||
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_encrypt(unsigned char * /*s*/, unsigned char * /*e*/, const unsigned char * /*pk*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,139 +0,0 @@
|
||||
/*
|
||||
This file is for functions for field arithmetic
|
||||
*/
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(gf a) {
|
||||
uint32_t t = a;
|
||||
|
||||
t -= 1;
|
||||
t >>= 19;
|
||||
|
||||
return (gf) t;
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_add(gf in0, gf in1) {
|
||||
return in0 ^ in1;
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_mul(gf in0, gf in1) {
|
||||
int i;
|
||||
|
||||
uint32_t tmp;
|
||||
uint32_t t0;
|
||||
uint32_t t1;
|
||||
uint32_t t;
|
||||
|
||||
t0 = in0;
|
||||
t1 = in1;
|
||||
|
||||
tmp = t0 * (t1 & 1);
|
||||
|
||||
for (i = 1; i < GFBITS; i++) {
|
||||
tmp ^= (t0 * (t1 & (1 << i)));
|
||||
}
|
||||
|
||||
t = tmp & 0x7FC000;
|
||||
tmp ^= t >> 9;
|
||||
tmp ^= t >> 12;
|
||||
|
||||
t = tmp & 0x3000;
|
||||
tmp ^= t >> 9;
|
||||
tmp ^= t >> 12;
|
||||
|
||||
return tmp & ((1 << GFBITS) - 1);
|
||||
}
|
||||
|
||||
/* input: field element in */
|
||||
/* return: in^2 */
|
||||
static inline gf gf_sq(gf in) {
|
||||
const uint32_t B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF};
|
||||
|
||||
uint32_t x = in;
|
||||
uint32_t t;
|
||||
|
||||
x = (x | (x << 8)) & B[3];
|
||||
x = (x | (x << 4)) & B[2];
|
||||
x = (x | (x << 2)) & B[1];
|
||||
x = (x | (x << 1)) & B[0];
|
||||
|
||||
t = x & 0x7FC000;
|
||||
x ^= t >> 9;
|
||||
x ^= t >> 12;
|
||||
|
||||
t = x & 0x3000;
|
||||
x ^= t >> 9;
|
||||
x ^= t >> 12;
|
||||
|
||||
return x & ((1 << GFBITS) - 1);
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_inv(gf in) {
|
||||
gf tmp_11;
|
||||
gf tmp_1111;
|
||||
|
||||
gf out = in;
|
||||
|
||||
out = gf_sq(out);
|
||||
tmp_11 = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, in); // 11
|
||||
|
||||
out = gf_sq(tmp_11);
|
||||
out = gf_sq(out);
|
||||
tmp_1111 = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, tmp_11); // 1111
|
||||
|
||||
out = gf_sq(tmp_1111);
|
||||
out = gf_sq(out);
|
||||
out = gf_sq(out);
|
||||
out = gf_sq(out);
|
||||
out = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, tmp_1111); // 11111111
|
||||
|
||||
out = gf_sq(out);
|
||||
out = gf_sq(out);
|
||||
out = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, tmp_11); // 1111111111
|
||||
|
||||
out = gf_sq(out);
|
||||
out = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(out, in); // 11111111111
|
||||
|
||||
return gf_sq(out); // 111111111110
|
||||
}
|
||||
|
||||
/* input: field element den, num */
|
||||
/* return: (num/den) */
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_frac(gf den, gf num) {
|
||||
return PQCLEAN_MCELIECE348864_CLEAN_gf_mul(PQCLEAN_MCELIECE348864_CLEAN_gf_inv(den), num);
|
||||
}
|
||||
|
||||
/* input: in0, in1 in GF((2^m)^t)*/
|
||||
/* output: out = in0*in1 */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_GF_mul(gf *out, const gf *in0, const gf *in1) {
|
||||
int i, j;
|
||||
|
||||
gf prod[ SYS_T * 2 - 1 ];
|
||||
|
||||
for (i = 0; i < SYS_T * 2 - 1; i++) {
|
||||
prod[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
prod[i + j] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(in0[i], in1[j]);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
for (i = (SYS_T - 1) * 2; i >= SYS_T; i--) {
|
||||
prod[i - SYS_T + 9] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 877);
|
||||
prod[i - SYS_T + 7] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 2888);
|
||||
prod[i - SYS_T + 5] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 1781);
|
||||
prod[i - SYS_T + 0] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(prod[i], (gf) 373);
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
out[i] = prod[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_GF_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_GF_H
|
||||
/*
|
||||
This file is for functions for field arithmetic
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint16_t gf;
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(gf a);
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_add(gf in0, gf in1);
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_mul(gf in0, gf in1);
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_frac(gf den, gf num);
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_gf_inv(gf in);
|
||||
uint64_t PQCLEAN_MCELIECE348864_CLEAN_gf_mul2(gf a, gf b0, gf b1);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_GF_mul(gf *out, const gf *in0, const gf *in1);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,136 +0,0 @@
|
||||
#include "api.h"
|
||||
|
||||
#include "aes256ctr.h"
|
||||
#include "controlbits.h"
|
||||
#include "crypto_hash.h"
|
||||
#include "decrypt.h"
|
||||
#include "encrypt.h"
|
||||
#include "params.h"
|
||||
#include "pk_gen.h"
|
||||
#include "randombytes.h"
|
||||
#include "sk_gen.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_enc(
|
||||
uint8_t *c,
|
||||
uint8_t *key,
|
||||
const uint8_t *pk
|
||||
) {
|
||||
uint8_t two_e[ 1 + SYS_N / 8 ] = {2};
|
||||
uint8_t *e = two_e + 1;
|
||||
uint8_t one_ec[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ] = {1};
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_encrypt(c, e, pk);
|
||||
|
||||
crypto_hash_32b(c + SYND_BYTES, two_e, sizeof(two_e));
|
||||
|
||||
memcpy(one_ec + 1, e, SYS_N / 8);
|
||||
memcpy(one_ec + 1 + SYS_N / 8, c, SYND_BYTES + 32);
|
||||
|
||||
crypto_hash_32b(key, one_ec, sizeof(one_ec));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_dec(
|
||||
uint8_t *key,
|
||||
const uint8_t *c,
|
||||
const uint8_t *sk
|
||||
) {
|
||||
int i;
|
||||
|
||||
uint8_t ret_confirm = 0;
|
||||
uint8_t ret_decrypt = 0;
|
||||
|
||||
uint16_t m;
|
||||
|
||||
uint8_t conf[32];
|
||||
uint8_t two_e[ 1 + SYS_N / 8 ] = {2};
|
||||
uint8_t *e = two_e + 1;
|
||||
uint8_t preimage[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ];
|
||||
uint8_t *x = preimage;
|
||||
|
||||
//
|
||||
|
||||
ret_decrypt = (uint8_t)PQCLEAN_MCELIECE348864_CLEAN_decrypt(e, sk + SYS_N / 8, c);
|
||||
|
||||
crypto_hash_32b(conf, two_e, sizeof(two_e));
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
ret_confirm |= conf[i] ^ c[SYND_BYTES + i];
|
||||
}
|
||||
|
||||
m = ret_decrypt | ret_confirm;
|
||||
m -= 1;
|
||||
m >>= 8;
|
||||
|
||||
*x++ = (~m & 0) | (m & 1);
|
||||
for (i = 0; i < SYS_N / 8; i++) {
|
||||
*x++ = (~m & sk[i]) | (m & e[i]);
|
||||
}
|
||||
for (i = 0; i < SYND_BYTES + 32; i++) {
|
||||
*x++ = c[i];
|
||||
}
|
||||
|
||||
crypto_hash_32b(key, preimage, sizeof(preimage));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_crypto_kem_keypair
|
||||
(
|
||||
uint8_t *pk,
|
||||
uint8_t *sk
|
||||
) {
|
||||
int i;
|
||||
uint8_t seed[ 32 ];
|
||||
uint8_t r[ SYS_T * 2 + (1 << GFBITS)*sizeof(uint32_t) + SYS_N / 8 + 32 ];
|
||||
uint8_t nonce[ 16 ] = {0};
|
||||
uint8_t *rp;
|
||||
|
||||
gf f[ SYS_T ]; // element in GF(2^mt)
|
||||
gf irr[ SYS_T ]; // Goppa polynomial
|
||||
uint32_t perm[ 1 << GFBITS ]; // random permutation
|
||||
|
||||
randombytes(seed, sizeof(seed));
|
||||
|
||||
while (1) {
|
||||
rp = r;
|
||||
PQCLEAN_MCELIECE348864_CLEAN_aes256ctr(r, sizeof(r), nonce, seed);
|
||||
memcpy(seed, &r[ sizeof(r) - 32 ], 32);
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
f[i] = PQCLEAN_MCELIECE348864_CLEAN_load2(rp + i * 2);
|
||||
}
|
||||
rp += sizeof(f);
|
||||
if (PQCLEAN_MCELIECE348864_CLEAN_genpoly_gen(irr, f)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
perm[i] = PQCLEAN_MCELIECE348864_CLEAN_load4(rp + i * 4);
|
||||
}
|
||||
rp += sizeof(perm);
|
||||
if (PQCLEAN_MCELIECE348864_CLEAN_perm_check(perm)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
PQCLEAN_MCELIECE348864_CLEAN_store2(sk + SYS_N / 8 + i * 2, irr[i]);
|
||||
}
|
||||
if (PQCLEAN_MCELIECE348864_CLEAN_pk_gen(pk, perm, sk + SYS_N / 8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(sk, rp, SYS_N / 8);
|
||||
PQCLEAN_MCELIECE348864_CLEAN_controlbits(sk + SYS_N / 8 + IRR_BYTES, perm);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_PARAMS_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_PARAMS_H
|
||||
|
||||
#define GFBITS 12
|
||||
#define SYS_N 3488
|
||||
#define SYS_T 64
|
||||
|
||||
#define COND_BYTES ((1 << (GFBITS-4))*(2*GFBITS - 1))
|
||||
#define IRR_BYTES (SYS_T * 2)
|
||||
|
||||
#define PK_NROWS (SYS_T*GFBITS)
|
||||
#define PK_NCOLS (SYS_N - PK_NROWS)
|
||||
#define PK_ROW_BYTES ((PK_NCOLS + 7)/8)
|
||||
|
||||
#define SK_BYTES (SYS_N/8 + IRR_BYTES + COND_BYTES)
|
||||
#define SYND_BYTES ((PK_NROWS + 7)/8)
|
||||
|
||||
#define GFMASK ((1 << GFBITS) - 1)
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,144 +0,0 @@
|
||||
/*
|
||||
This file is for public-key generation
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "benes.h"
|
||||
#include "controlbits.h"
|
||||
#include "gf.h"
|
||||
#include "params.h"
|
||||
#include "pk_gen.h"
|
||||
#include "root.h"
|
||||
#include "util.h"
|
||||
|
||||
/* input: secret key sk */
|
||||
/* output: public key pk */
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_pk_gen(uint8_t *pk, uint32_t *perm, const uint8_t *sk) {
|
||||
int i, j, k;
|
||||
int row, c;
|
||||
|
||||
uint64_t buf[ 1 << GFBITS ];
|
||||
|
||||
uint8_t mat[ GFBITS * SYS_T ][ SYS_N / 8 ];
|
||||
uint8_t mask;
|
||||
uint8_t b;
|
||||
|
||||
gf g[ SYS_T + 1 ]; // Goppa polynomial
|
||||
gf L[ SYS_N ]; // support
|
||||
gf inv[ SYS_N ];
|
||||
|
||||
//
|
||||
|
||||
g[ SYS_T ] = 1;
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
g[i] = PQCLEAN_MCELIECE348864_CLEAN_load2(sk);
|
||||
g[i] &= GFMASK;
|
||||
sk += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
buf[i] = perm[i];
|
||||
buf[i] <<= 31;
|
||||
buf[i] |= i;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_sort_63b(1 << GFBITS, buf);
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
perm[i] = buf[i] & GFMASK;
|
||||
}
|
||||
for (i = 0; i < SYS_N; i++) {
|
||||
L[i] = PQCLEAN_MCELIECE348864_CLEAN_bitrev((gf)perm[i]);
|
||||
}
|
||||
|
||||
// filling the matrix
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_root(inv, g, L);
|
||||
|
||||
for (i = 0; i < SYS_N; i++) {
|
||||
inv[i] = PQCLEAN_MCELIECE348864_CLEAN_gf_inv(inv[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < PK_NROWS; i++) {
|
||||
for (j = 0; j < SYS_N / 8; j++) {
|
||||
mat[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
for (j = 0; j < SYS_N; j += 8) {
|
||||
for (k = 0; k < GFBITS; k++) {
|
||||
b = (inv[j + 7] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 6] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 5] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 4] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 3] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 2] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 1] >> k) & 1;
|
||||
b <<= 1;
|
||||
b |= (inv[j + 0] >> k) & 1;
|
||||
|
||||
mat[ i * GFBITS + k ][ j / 8 ] = b;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < SYS_N; j++) {
|
||||
inv[j] = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(inv[j], L[j]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// gaussian elimination
|
||||
|
||||
for (i = 0; i < (GFBITS * SYS_T + 7) / 8; i++) {
|
||||
for (j = 0; j < 8; j++) {
|
||||
row = i * 8 + j;
|
||||
|
||||
if (row >= GFBITS * SYS_T) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (k = row + 1; k < GFBITS * SYS_T; k++) {
|
||||
mask = mat[ row ][ i ] ^ mat[ k ][ i ];
|
||||
mask >>= j;
|
||||
mask &= 1;
|
||||
mask = -mask;
|
||||
|
||||
for (c = 0; c < SYS_N / 8; c++) {
|
||||
mat[ row ][ c ] ^= mat[ k ][ c ] & mask;
|
||||
}
|
||||
}
|
||||
|
||||
if ( ((mat[ row ][ i ] >> j) & 1) == 0 ) { // return if not systematic
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (k = 0; k < GFBITS * SYS_T; k++) {
|
||||
if (k != row) {
|
||||
mask = mat[ k ][ i ] >> j;
|
||||
mask &= 1;
|
||||
mask = -mask;
|
||||
|
||||
for (c = 0; c < SYS_N / 8; c++) {
|
||||
mat[ k ][ c ] ^= mat[ row ][ c ] & mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < PK_NROWS; i++) {
|
||||
memcpy(pk + i * PK_ROW_BYTES, mat[i] + PK_NROWS / 8, PK_ROW_BYTES);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_PK_GEN_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_PK_GEN_H
|
||||
/*
|
||||
This file is for public-key generation
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_pk_gen(uint8_t * /*pk*/, uint32_t * /*perm*/, const uint8_t * /*sk*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
/*
|
||||
This file is for evaluating a polynomial at one or more field elements
|
||||
*/
|
||||
#include "root.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
/* input: polynomial f and field element a */
|
||||
/* return f(a) */
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_eval(gf *f, gf a) {
|
||||
int i;
|
||||
gf r;
|
||||
|
||||
r = f[ SYS_T ];
|
||||
|
||||
for (i = SYS_T - 1; i >= 0; i--) {
|
||||
r = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(r, a);
|
||||
r = PQCLEAN_MCELIECE348864_CLEAN_gf_add(r, f[i]);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* input: polynomial f and list of field elements L */
|
||||
/* output: out = [ f(a) for a in L ] */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_root(gf *out, gf *f, gf *L) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SYS_N; i++) {
|
||||
out[i] = PQCLEAN_MCELIECE348864_CLEAN_eval(f, L[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_ROOT_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_ROOT_H
|
||||
/*
|
||||
This file is for evaluating a polynomial at one or more field elements
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_eval(gf * /*f*/, gf /*a*/);
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_root(gf * /*out*/, gf * /*f*/, gf * /*L*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,98 +0,0 @@
|
||||
/*
|
||||
This file is for secret-key generation
|
||||
*/
|
||||
|
||||
#include "sk_gen.h"
|
||||
|
||||
#include "controlbits.h"
|
||||
#include "gf.h"
|
||||
#include "params.h"
|
||||
#include "util.h"
|
||||
|
||||
/* input: f, element in GF((2^m)^t) */
|
||||
/* output: out, minimal polynomial of f */
|
||||
/* return: 0 for success and -1 for failure */
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_genpoly_gen(gf *out, gf *f) {
|
||||
int i, j, k, c;
|
||||
|
||||
gf mat[ SYS_T + 1 ][ SYS_T ];
|
||||
gf mask, inv, t;
|
||||
|
||||
// fill matrix
|
||||
|
||||
mat[0][0] = 1;
|
||||
|
||||
for (i = 1; i < SYS_T; i++) {
|
||||
mat[0][i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
mat[1][i] = f[i];
|
||||
}
|
||||
|
||||
for (j = 2; j <= SYS_T; j++) {
|
||||
PQCLEAN_MCELIECE348864_CLEAN_GF_mul(mat[j], mat[j - 1], f);
|
||||
}
|
||||
|
||||
// gaussian
|
||||
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
for (k = j + 1; k < SYS_T; k++) {
|
||||
mask = PQCLEAN_MCELIECE348864_CLEAN_gf_iszero(mat[ j ][ j ]);
|
||||
|
||||
for (c = j; c < SYS_T + 1; c++) {
|
||||
mat[ c ][ j ] ^= mat[ c ][ k ] & mask;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( mat[ j ][ j ] == 0 ) { // return if not systematic
|
||||
return -1;
|
||||
}
|
||||
|
||||
inv = PQCLEAN_MCELIECE348864_CLEAN_gf_inv(mat[j][j]);
|
||||
|
||||
for (c = j; c < SYS_T + 1; c++) {
|
||||
mat[ c ][ j ] = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(mat[ c ][ j ], inv) ;
|
||||
}
|
||||
|
||||
for (k = 0; k < SYS_T; k++) {
|
||||
if (k != j) {
|
||||
t = mat[ j ][ k ];
|
||||
|
||||
for (c = j; c < SYS_T + 1; c++) {
|
||||
mat[ c ][ k ] ^= PQCLEAN_MCELIECE348864_CLEAN_gf_mul(mat[ c ][ j ], t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
out[i] = mat[ SYS_T ][ i ];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* input: permutation p represented as a list of 32-bit intergers */
|
||||
/* output: -1 if some interger repeats in p */
|
||||
/* 0 otherwise */
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_perm_check(const uint32_t *p) {
|
||||
int i;
|
||||
uint64_t list[1 << GFBITS];
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
list[i] = p[i];
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_CLEAN_sort_63b(1 << GFBITS, list);
|
||||
|
||||
for (i = 1; i < (1 << GFBITS); i++) {
|
||||
if (list[i - 1] == list[i]) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_SK_GEN_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_SK_GEN_H
|
||||
/*
|
||||
This file is for secret-key generation
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_genpoly_gen(gf * /*out*/, gf * /*f*/);
|
||||
int PQCLEAN_MCELIECE348864_CLEAN_perm_check(const uint32_t * /*p*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,33 +0,0 @@
|
||||
/*
|
||||
This file is for syndrome computation
|
||||
*/
|
||||
|
||||
#include "synd.h"
|
||||
|
||||
#include "params.h"
|
||||
#include "root.h"
|
||||
|
||||
|
||||
/* input: Goppa polynomial f, support L, received word r */
|
||||
/* output: out, the syndrome of length 2t */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_synd(gf *out, gf *f, gf *L, const unsigned char *r) {
|
||||
int i, j;
|
||||
gf e, e_inv, c;
|
||||
|
||||
for (j = 0; j < 2 * SYS_T; j++) {
|
||||
out[j] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_N; i++) {
|
||||
c = (r[i / 8] >> (i % 8)) & 1;
|
||||
|
||||
e = PQCLEAN_MCELIECE348864_CLEAN_eval(f, L[i]);
|
||||
e_inv = PQCLEAN_MCELIECE348864_CLEAN_gf_inv(PQCLEAN_MCELIECE348864_CLEAN_gf_mul(e, e));
|
||||
|
||||
for (j = 0; j < 2 * SYS_T; j++) {
|
||||
out[j] = PQCLEAN_MCELIECE348864_CLEAN_gf_add(out[j], PQCLEAN_MCELIECE348864_CLEAN_gf_mul(e_inv, c));
|
||||
e_inv = PQCLEAN_MCELIECE348864_CLEAN_gf_mul(e_inv, L[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_SYND_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_SYND_H
|
||||
/*
|
||||
This file is for syndrome computation
|
||||
*/
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_synd(gf * /*out*/, gf * /*f*/, gf * /*L*/, const unsigned char * /*r*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
/*
|
||||
This file is for matrix transposition
|
||||
*/
|
||||
|
||||
#include "transpose.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* input: in, a 64x64 matrix over GF(2) */
|
||||
/* output: out, transpose of in */
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(uint64_t *out, const uint64_t *in) {
|
||||
int i, j, s, d;
|
||||
|
||||
uint64_t x, y;
|
||||
uint64_t masks[6][2] = {
|
||||
{0x5555555555555555, 0xAAAAAAAAAAAAAAAA},
|
||||
{0x3333333333333333, 0xCCCCCCCCCCCCCCCC},
|
||||
{0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0},
|
||||
{0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00},
|
||||
{0x0000FFFF0000FFFF, 0xFFFF0000FFFF0000},
|
||||
{0x00000000FFFFFFFF, 0xFFFFFFFF00000000}
|
||||
};
|
||||
|
||||
for (i = 0; i < 64; i++) {
|
||||
out[i] = in[i];
|
||||
}
|
||||
|
||||
for (d = 5; d >= 0; d--) {
|
||||
s = 1 << d;
|
||||
|
||||
for (i = 0; i < 64; i += s * 2) {
|
||||
for (j = i; j < i + s; j++) {
|
||||
x = (out[j] & masks[d][0]) | ((out[j + s] & masks[d][0]) << s);
|
||||
y = ((out[j] & masks[d][1]) >> s) | (out[j + s] & masks[d][1]);
|
||||
|
||||
out[j + 0] = x;
|
||||
out[j + s] = y;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_TRANSPOSE_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_TRANSPOSE_H
|
||||
/*
|
||||
This file is for matrix transposition
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_transpose_64x64(uint64_t * /*out*/, const uint64_t * /*in*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,67 +0,0 @@
|
||||
/*
|
||||
This file is for loading/storing data in a little-endian fashion
|
||||
*/
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_store2(unsigned char *dest, gf a) {
|
||||
dest[0] = a & 0xFF;
|
||||
dest[1] = a >> 8;
|
||||
}
|
||||
|
||||
uint16_t PQCLEAN_MCELIECE348864_CLEAN_load2(const unsigned char *src) {
|
||||
uint16_t a;
|
||||
|
||||
a = src[1];
|
||||
a <<= 8;
|
||||
a |= src[0];
|
||||
|
||||
return a & GFMASK;
|
||||
}
|
||||
|
||||
uint32_t PQCLEAN_MCELIECE348864_CLEAN_load4(const unsigned char *in) {
|
||||
int i;
|
||||
uint32_t ret = in[3];
|
||||
|
||||
for (i = 2; i >= 0; i--) {
|
||||
ret <<= 8;
|
||||
ret |= in[i];
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_store8(unsigned char *out, uint64_t in) {
|
||||
out[0] = (in >> 0x00) & 0xFF;
|
||||
out[1] = (in >> 0x08) & 0xFF;
|
||||
out[2] = (in >> 0x10) & 0xFF;
|
||||
out[3] = (in >> 0x18) & 0xFF;
|
||||
out[4] = (in >> 0x20) & 0xFF;
|
||||
out[5] = (in >> 0x28) & 0xFF;
|
||||
out[6] = (in >> 0x30) & 0xFF;
|
||||
out[7] = (in >> 0x38) & 0xFF;
|
||||
}
|
||||
|
||||
uint64_t PQCLEAN_MCELIECE348864_CLEAN_load8(const unsigned char *in) {
|
||||
int i;
|
||||
uint64_t ret = in[7];
|
||||
|
||||
for (i = 6; i >= 0; i--) {
|
||||
ret <<= 8;
|
||||
ret |= in[i];
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_bitrev(gf a) {
|
||||
a = ((a & 0x00FF) << 8) | ((a & 0xFF00) >> 8);
|
||||
a = ((a & 0x0F0F) << 4) | ((a & 0xF0F0) >> 4);
|
||||
a = ((a & 0x3333) << 2) | ((a & 0xCCCC) >> 2);
|
||||
a = ((a & 0x5555) << 1) | ((a & 0xAAAA) >> 1);
|
||||
|
||||
return a >> 4;
|
||||
}
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_CLEAN_UTIL_H
|
||||
#define PQCLEAN_MCELIECE348864_CLEAN_UTIL_H
|
||||
/*
|
||||
This file is for loading/storing data in a little-endian fashion
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
#include <stdint.h>
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_store2(unsigned char * /*dest*/, gf /*a*/);
|
||||
uint16_t PQCLEAN_MCELIECE348864_CLEAN_load2(const unsigned char * /*src*/);
|
||||
|
||||
uint32_t PQCLEAN_MCELIECE348864_CLEAN_load4(const unsigned char * /*in*/);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_CLEAN_store8(unsigned char * /*out*/, uint64_t /*in*/);
|
||||
uint64_t PQCLEAN_MCELIECE348864_CLEAN_load8(const unsigned char * /*in*/);
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_CLEAN_bitrev(gf /*a*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
Public Domain.
|
||||
|
||||
Authors of Classic McEliece in alphabetical order:
|
||||
|
||||
Daniel J. Bernstein, University of Illinois at Chicago
|
||||
Tung Chou, Osaka University
|
||||
Tanja Lange, Technische Universiteit Eindhoven
|
||||
Ingo von Maurich, self
|
||||
Rafael Misoczki, Intel Corporation
|
||||
Ruben Niederhagen, Fraunhofer SIT
|
||||
Edoardo Persichetti, Florida Atlantic University
|
||||
Christiane Peters, self
|
||||
Peter Schwabe, Radboud University
|
||||
Nicolas Sendrier, Inria
|
||||
Jakub Szefer, Yale University
|
||||
Wen Wang, Yale University
|
||||
@ -1,13 +0,0 @@
|
||||
#include "aes256ctr.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_aes256ctr(
|
||||
uint8_t *out,
|
||||
size_t outlen,
|
||||
const uint8_t nonce[AESCTR_NONCEBYTES],
|
||||
const uint8_t key[AES256_KEYBYTES]) {
|
||||
|
||||
aes256ctx state;
|
||||
aes256_ctr_keyexp(&state, key);
|
||||
aes256_ctr(out, outlen, nonce, &state);
|
||||
aes256_ctx_release(&state);
|
||||
}
|
||||
@ -1,17 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_AES256CTR_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_AES256CTR_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "aes.h"
|
||||
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_aes256ctr(
|
||||
uint8_t *out,
|
||||
size_t outlen,
|
||||
const uint8_t nonce[AESCTR_NONCEBYTES],
|
||||
const uint8_t key[AES256_KEYBYTES]
|
||||
);
|
||||
|
||||
#endif
|
||||
@ -1,32 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_API_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_API_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_ALGNAME "Classic McEliece 348864"
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_PUBLICKEYBYTES 261120
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_SECRETKEYBYTES 6452
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_CIPHERTEXTBYTES 128
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_BYTES 32
|
||||
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_crypto_kem_enc(
|
||||
uint8_t *c,
|
||||
uint8_t *key,
|
||||
const uint8_t *pk
|
||||
);
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_crypto_kem_dec(
|
||||
uint8_t *key,
|
||||
const uint8_t *c,
|
||||
const uint8_t *sk
|
||||
);
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_crypto_kem_keypair
|
||||
(
|
||||
uint8_t *pk,
|
||||
uint8_t *sk
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,287 +0,0 @@
|
||||
/*
|
||||
This file is for Benes network related functions
|
||||
*/
|
||||
#include "benes.h"
|
||||
|
||||
#include "params.h"
|
||||
#include "transpose.h"
|
||||
#include "util.h"
|
||||
|
||||
static void layer_0(uint64_t *bs, const uint64_t *cond) {
|
||||
int x;
|
||||
uint64_t diff;
|
||||
|
||||
for (x = 0; x < (1 << 6); x += 2) {
|
||||
diff = bs[ x ] ^ bs[ x + 1 ];
|
||||
diff &= *cond++;
|
||||
bs[ x ] ^= diff;
|
||||
bs[ x + 1 ] ^= diff;
|
||||
}
|
||||
}
|
||||
|
||||
static void layer_1(uint64_t *bs, const uint64_t *cond) {
|
||||
int x;
|
||||
uint64_t diff;
|
||||
|
||||
for (x = 0; x < (1 << 6); x += 4) {
|
||||
diff = bs[ x + 0 ] ^ bs[ x + 2 ];
|
||||
diff &= cond[0];
|
||||
bs[ x + 0 ] ^= diff;
|
||||
bs[ x + 2 ] ^= diff;
|
||||
|
||||
diff = bs[ x + 1 ] ^ bs[ x + 3 ];
|
||||
diff &= cond[1];
|
||||
bs[ x + 1 ] ^= diff;
|
||||
bs[ x + 3 ] ^= diff;
|
||||
|
||||
cond += 2;
|
||||
}
|
||||
}
|
||||
|
||||
static void layer_2(uint64_t *bs, const uint64_t *cond) {
|
||||
int x;
|
||||
uint64_t diff;
|
||||
|
||||
for (x = 0; x < (1 << 6); x += 8) {
|
||||
diff = bs[ x + 0 ] ^ bs[ x + 4 ];
|
||||
diff &= cond[0];
|
||||
bs[ x + 0 ] ^= diff;
|
||||
bs[ x + 4 ] ^= diff;
|
||||
|
||||
diff = bs[ x + 1 ] ^ bs[ x + 5 ];
|
||||
diff &= cond[1];
|
||||
bs[ x + 1 ] ^= diff;
|
||||
bs[ x + 5 ] ^= diff;
|
||||
|
||||
diff = bs[ x + 2 ] ^ bs[ x + 6 ];
|
||||
diff &= cond[2];
|
||||
bs[ x + 2 ] ^= diff;
|
||||
bs[ x + 6 ] ^= diff;
|
||||
|
||||
diff = bs[ x + 3 ] ^ bs[ x + 7 ];
|
||||
diff &= cond[3];
|
||||
bs[ x + 3 ] ^= diff;
|
||||
bs[ x + 7 ] ^= diff;
|
||||
|
||||
cond += 4;
|
||||
}
|
||||
}
|
||||
|
||||
static void layer_3(uint64_t *bs, const uint64_t *cond) {
|
||||
int x, s;
|
||||
uint64_t diff;
|
||||
|
||||
for (x = 0; x < (1 << 6); x += 16) {
|
||||
for (s = x; s < x + 8; s += 4) {
|
||||
diff = bs[ s + 0 ] ^ bs[ s + 8 ];
|
||||
diff &= cond[0];
|
||||
bs[ s + 0 ] ^= diff;
|
||||
bs[ s + 8 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 1 ] ^ bs[ s + 9 ];
|
||||
diff &= cond[1];
|
||||
bs[ s + 1 ] ^= diff;
|
||||
bs[ s + 9 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 2 ] ^ bs[ s + 10 ];
|
||||
diff &= cond[2];
|
||||
bs[ s + 2 ] ^= diff;
|
||||
bs[ s + 10 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 3 ] ^ bs[ s + 11 ];
|
||||
diff &= cond[3];
|
||||
bs[ s + 3 ] ^= diff;
|
||||
bs[ s + 11 ] ^= diff;
|
||||
|
||||
cond += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void layer_4(uint64_t *bs, const uint64_t *cond) {
|
||||
int x, s;
|
||||
uint64_t diff;
|
||||
|
||||
for (x = 0; x < (1 << 6); x += 32) {
|
||||
for (s = x; s < x + 16; s += 4) {
|
||||
diff = bs[ s + 0 ] ^ bs[ s + 16 ];
|
||||
diff &= cond[0];
|
||||
bs[ s + 0 ] ^= diff;
|
||||
bs[ s + 16 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 1 ] ^ bs[ s + 17 ];
|
||||
diff &= cond[1];
|
||||
bs[ s + 1 ] ^= diff;
|
||||
bs[ s + 17 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 2 ] ^ bs[ s + 18 ];
|
||||
diff &= cond[2];
|
||||
bs[ s + 2 ] ^= diff;
|
||||
bs[ s + 18 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 3 ] ^ bs[ s + 19 ];
|
||||
diff &= cond[3];
|
||||
bs[ s + 3 ] ^= diff;
|
||||
bs[ s + 19 ] ^= diff;
|
||||
|
||||
cond += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void layer_5(uint64_t *bs, const uint64_t *cond) {
|
||||
int x, s;
|
||||
uint64_t diff;
|
||||
|
||||
for (x = 0; x < (1 << 6); x += 64) {
|
||||
for (s = x; s < x + 32; s += 4) {
|
||||
diff = bs[ s + 0 ] ^ bs[ s + 32 ];
|
||||
diff &= cond[0];
|
||||
bs[ s + 0 ] ^= diff;
|
||||
bs[ s + 32 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 1 ] ^ bs[ s + 33 ];
|
||||
diff &= cond[1];
|
||||
bs[ s + 1 ] ^= diff;
|
||||
bs[ s + 33 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 2 ] ^ bs[ s + 34 ];
|
||||
diff &= cond[2];
|
||||
bs[ s + 2 ] ^= diff;
|
||||
bs[ s + 34 ] ^= diff;
|
||||
|
||||
diff = bs[ s + 3 ] ^ bs[ s + 35 ];
|
||||
diff &= cond[3];
|
||||
bs[ s + 3 ] ^= diff;
|
||||
bs[ s + 35 ] ^= diff;
|
||||
|
||||
cond += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* input: bits, control bits as array of bytes */
|
||||
/* output: out, control bits as array of 128-bit vectors */
|
||||
void PQCLEAN_MCELIECE348864_SSE_load_bits(uint64_t out[][32], const unsigned char *bits) {
|
||||
int i, low, block = 0;
|
||||
|
||||
uint64_t cond[64];
|
||||
|
||||
//
|
||||
|
||||
for (low = 0; low <= 5; low++) {
|
||||
for (i = 0; i < 64; i++) {
|
||||
cond[i] = PQCLEAN_MCELIECE348864_SSE_load4(bits + block * 256 + i * 4);
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(cond);
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
out[ block ][i] = cond[i];
|
||||
}
|
||||
block++;
|
||||
}
|
||||
|
||||
for (low = 0; low <= 5; low++) {
|
||||
for (i = 0; i < 32; i++) {
|
||||
out[ block ][i] = PQCLEAN_MCELIECE348864_SSE_load8(bits + block * 256 + i * 8);
|
||||
}
|
||||
block++;
|
||||
}
|
||||
|
||||
for (low = 4; low >= 0; low--) {
|
||||
for (i = 0; i < 32; i++) {
|
||||
out[ block ][i] = PQCLEAN_MCELIECE348864_SSE_load8(bits + block * 256 + i * 8);
|
||||
}
|
||||
block++;
|
||||
}
|
||||
|
||||
for (low = 5; low >= 0; low--) {
|
||||
for (i = 0; i < 64; i++) {
|
||||
cond[i] = PQCLEAN_MCELIECE348864_SSE_load4(bits + block * 256 + i * 4);
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(cond);
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
out[ block ][i] = cond[i];
|
||||
}
|
||||
block++;
|
||||
}
|
||||
}
|
||||
|
||||
/* input: r, sequence of bits to be permuted */
|
||||
/* cond, control bits as array of 128-bit vectors */
|
||||
/* rev, 0 for normal application; !0 for inverse */
|
||||
/* output: r, permuted bits */
|
||||
void PQCLEAN_MCELIECE348864_SSE_benes(uint64_t *r, uint64_t cond[][32], int rev) {
|
||||
int block, inc;
|
||||
|
||||
uint64_t *bs = r;
|
||||
|
||||
//
|
||||
|
||||
if (rev == 0) {
|
||||
block = 0;
|
||||
inc = 1;
|
||||
} else {
|
||||
block = 22;
|
||||
inc = -1;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs);
|
||||
|
||||
layer_0(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_1(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_2(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_3(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_4(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_5(bs, cond[ block ]);
|
||||
block += inc;
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs);
|
||||
|
||||
layer_0(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_1(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_2(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_3(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_4(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_5(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_4(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_3(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_2(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_1(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_0(bs, cond[ block ]);
|
||||
block += inc;
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs);
|
||||
|
||||
layer_5(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_4(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_3(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_2(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_1(bs, cond[ block ]);
|
||||
block += inc;
|
||||
layer_0(bs, cond[ block ]);
|
||||
//block += inc;
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(bs);
|
||||
}
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_BENES_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_BENES_H
|
||||
/*
|
||||
This file is for Benes network related functions
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
#include "vec128.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_load_bits(uint64_t /*out*/[][32], const unsigned char * /*bits*/);
|
||||
void PQCLEAN_MCELIECE348864_SSE_benes(uint64_t * /*r*/, uint64_t /*cond*/[][32], int /*rev*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,220 +0,0 @@
|
||||
/*
|
||||
This file is for the inversion-free Berlekamp-Massey algorithm
|
||||
see https://ieeexplore.ieee.org/document/87857
|
||||
*/
|
||||
|
||||
#include "bm.h"
|
||||
|
||||
#include "gf.h"
|
||||
#include "util.h"
|
||||
#include "vec.h"
|
||||
#include "vec128.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern void PQCLEAN_MCELIECE348864_SSE_update_asm(void *, gf, int);
|
||||
extern gf PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm(uint64_t *);
|
||||
|
||||
static inline uint64_t mask_nonzero(gf a) {
|
||||
uint64_t ret = a;
|
||||
|
||||
ret -= 1;
|
||||
ret >>= 63;
|
||||
ret -= 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline uint64_t mask_leq(uint16_t a, uint16_t b) {
|
||||
uint64_t a_tmp = a;
|
||||
uint64_t b_tmp = b;
|
||||
uint64_t ret = b_tmp - a_tmp;
|
||||
|
||||
ret >>= 63;
|
||||
ret -= 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vec_cmov(uint64_t out[][2], uint64_t mask) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
out[i][0] = (out[i][0] & ~mask) | (out[i][1] & mask);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void interleave(vec128 *in, int idx0, int idx1, vec128 *mask, int b) {
|
||||
int s = 1 << b;
|
||||
|
||||
vec128 x, y;
|
||||
|
||||
x = PQCLEAN_MCELIECE348864_SSE_vec128_or(PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx0], mask[0]),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx1], mask[0]), s));
|
||||
|
||||
y = PQCLEAN_MCELIECE348864_SSE_vec128_or(PQCLEAN_MCELIECE348864_SSE_vec128_srl_2x(PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx0], mask[1]), s),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_and(in[idx1], mask[1]));
|
||||
|
||||
in[idx0] = x;
|
||||
in[idx1] = y;
|
||||
}
|
||||
|
||||
/* input: in, field elements in bitsliced form */
|
||||
/* output: out, field elements in non-bitsliced form */
|
||||
static inline void get_coefs(gf *out, vec128 *in) {
|
||||
int i, k;
|
||||
|
||||
vec128 mask[4][2];
|
||||
vec128 buf[16];
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
buf[i] = in[i];
|
||||
}
|
||||
for (i = GFBITS; i < 16; i++) {
|
||||
buf[i] = PQCLEAN_MCELIECE348864_SSE_vec128_setzero();
|
||||
}
|
||||
|
||||
mask[0][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x5555);
|
||||
mask[0][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xAAAA);
|
||||
mask[1][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x3333);
|
||||
mask[1][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xCCCC);
|
||||
mask[2][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x0F0F);
|
||||
mask[2][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xF0F0);
|
||||
mask[3][0] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0x00FF);
|
||||
mask[3][1] = PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(0xFF00);
|
||||
|
||||
interleave(buf, 0, 8, mask[3], 3);
|
||||
interleave(buf, 1, 9, mask[3], 3);
|
||||
interleave(buf, 2, 10, mask[3], 3);
|
||||
interleave(buf, 3, 11, mask[3], 3);
|
||||
interleave(buf, 4, 12, mask[3], 3);
|
||||
interleave(buf, 5, 13, mask[3], 3);
|
||||
interleave(buf, 6, 14, mask[3], 3);
|
||||
interleave(buf, 7, 15, mask[3], 3);
|
||||
|
||||
interleave(buf, 0, 4, mask[2], 2);
|
||||
interleave(buf, 1, 5, mask[2], 2);
|
||||
interleave(buf, 2, 6, mask[2], 2);
|
||||
interleave(buf, 3, 7, mask[2], 2);
|
||||
interleave(buf, 8, 12, mask[2], 2);
|
||||
interleave(buf, 9, 13, mask[2], 2);
|
||||
interleave(buf, 10, 14, mask[2], 2);
|
||||
interleave(buf, 11, 15, mask[2], 2);
|
||||
|
||||
interleave(buf, 0, 2, mask[1], 1);
|
||||
interleave(buf, 1, 3, mask[1], 1);
|
||||
interleave(buf, 4, 6, mask[1], 1);
|
||||
interleave(buf, 5, 7, mask[1], 1);
|
||||
interleave(buf, 8, 10, mask[1], 1);
|
||||
interleave(buf, 9, 11, mask[1], 1);
|
||||
interleave(buf, 12, 14, mask[1], 1);
|
||||
interleave(buf, 13, 15, mask[1], 1);
|
||||
|
||||
interleave(buf, 0, 1, mask[0], 0);
|
||||
interleave(buf, 2, 3, mask[0], 0);
|
||||
interleave(buf, 4, 5, mask[0], 0);
|
||||
interleave(buf, 6, 7, mask[0], 0);
|
||||
interleave(buf, 8, 9, mask[0], 0);
|
||||
interleave(buf, 10, 11, mask[0], 0);
|
||||
interleave(buf, 12, 13, mask[0], 0);
|
||||
interleave(buf, 14, 15, mask[0], 0);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
for (k = 0; k < 4; k++) {
|
||||
out[ (4 * 0 + k) * 16 + i ] = (PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[i], 0) >> (k * 16)) & GFMASK;
|
||||
out[ (4 * 1 + k) * 16 + i ] = (PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[i], 1) >> (k * 16)) & GFMASK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* input: in, sequence of field elements */
|
||||
/* output: out, minimal polynomial of in */
|
||||
void PQCLEAN_MCELIECE348864_SSE_bm(uint64_t out[ GFBITS ], vec128 in[ GFBITS ]) {
|
||||
uint16_t i;
|
||||
uint16_t N, L;
|
||||
|
||||
uint64_t prod[ GFBITS ];
|
||||
uint64_t in_tmp[ GFBITS ];
|
||||
|
||||
uint64_t db[ GFBITS ][ 2 ];
|
||||
uint64_t BC_tmp[ GFBITS ][ 2 ];
|
||||
uint64_t BC[ GFBITS ][ 2 ];
|
||||
|
||||
uint64_t mask, t;
|
||||
|
||||
gf d, b, c0 = 1;
|
||||
|
||||
gf coefs[SYS_T * 2];
|
||||
|
||||
// init
|
||||
|
||||
BC[0][1] = 0;
|
||||
BC[0][0] = 1;
|
||||
BC[0][0] <<= 63;
|
||||
|
||||
for (i = 1; i < GFBITS; i++) {
|
||||
BC[i][0] = BC[i][1] = 0;
|
||||
}
|
||||
|
||||
b = 1;
|
||||
L = 0;
|
||||
|
||||
//
|
||||
|
||||
get_coefs(coefs, in);
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
in_tmp[i] = 0;
|
||||
}
|
||||
|
||||
for (N = 0; N < SYS_T * 2; N++) {
|
||||
// computing d
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(prod, in_tmp, &BC[0][1], 16);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_update_asm(in_tmp, coefs[N], 8);
|
||||
|
||||
d = PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm(prod);
|
||||
|
||||
t = PQCLEAN_MCELIECE348864_SSE_gf_mul2(c0, coefs[N], b);
|
||||
|
||||
d ^= t & 0xFFFFFFFF;
|
||||
|
||||
// 3 cases
|
||||
|
||||
mask = mask_nonzero(d) & mask_leq(L * 2, N);
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
db[i][0] = (d >> i) & 1;
|
||||
db[i][0] = -db[i][0];
|
||||
db[i][1] = (b >> i) & 1;
|
||||
db[i][1] = -db[i][1];
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul((vec128 *) BC_tmp, (vec128 *) db, (vec128 *) BC);
|
||||
|
||||
vec_cmov(BC, mask);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_update_asm(BC, mask & c0, 16);
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
BC[i][1] = BC_tmp[i][0] ^ BC_tmp[i][1];
|
||||
}
|
||||
|
||||
c0 = t >> 32;
|
||||
b = (d & mask) | (b & ~mask);
|
||||
L = ((N + 1 - L) & mask) | (L & ~mask);
|
||||
|
||||
}
|
||||
|
||||
c0 = PQCLEAN_MCELIECE348864_SSE_gf_inv(c0);
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
out[i] = (c0 >> i) & 1;
|
||||
out[i] = -out[i];
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(out, out, &BC[0][1], 16);
|
||||
}
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_BM_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_BM_H
|
||||
/*
|
||||
This file is for the inversion-free Berlekamp-Massey algorithm
|
||||
see https://ieeexplore.ieee.org/document/87857
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "params.h"
|
||||
#include "vec128.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_bm(uint64_t *out, vec128 *in);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,32 +0,0 @@
|
||||
.data
|
||||
|
||||
# not supported on macos
|
||||
#.section .rodata
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK0_0
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK0_1
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK1_0
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK1_1
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK2_0
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK2_1
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK3_0
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK3_1
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK4_0
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK4_1
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK5_0
|
||||
.globl PQCLEAN_MCELIECE348864_SSE_MASK5_1
|
||||
|
||||
.p2align 4
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK0_0: .quad 0x5555555555555555, 0x5555555555555555
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK0_1: .quad 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK1_0: .quad 0x3333333333333333, 0x3333333333333333
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK1_1: .quad 0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK2_0: .quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK2_1: .quad 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK3_0: .quad 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK3_1: .quad 0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK4_0: .quad 0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK4_1: .quad 0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK5_0: .quad 0x00000000FFFFFFFF, 0x00000000FFFFFFFF
|
||||
PQCLEAN_MCELIECE348864_SSE_MASK5_1: .quad 0xFFFFFFFF00000000, 0xFFFFFFFF00000000
|
||||
|
||||
@ -1,448 +0,0 @@
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00F0FF0F00F0FF0, 0xF00F0FF0F00F0FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0xF0F00F0F0F0FF0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF00FF00FF0, 0x0FF00FF00FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA5555AAAA5555AA, 0xAA5555AAAA5555AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00F0FF0F00F0FF0, 0xF00F0FF0F00F0FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0x33CCCC33CC3333CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC33CC3333CC33CC, 0xCC33CC3333CC33CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CC33CC33CC33CC, 0x33CC33CC33CC33CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5A5A5A5A5A5A5A5A, 0x5A5A5A5A5A5A5A5A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00F0FF0F00F0FF0, 0xF00F0FF0F00F0FF0),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3C3C3C3C3C3C3C, 0x3C3C3C3C3C3C3C3C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555AAAA5555AAAA, 0xAAAA5555AAAA5555),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CCCC3333CC, 0xCC3333CCCC3333CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33CC33CC33CC33C, 0xC33CC33CC33CC33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55555555AAAAAAAA, 0x55555555AAAAAAAA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33333333CCCCCCCC, 0x33333333CCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FFFF00FF00, 0xFF00FF0000FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFFFFFF0000, 0x0000FFFFFFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0xF0F00F0F0F0FF0F0),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA5555AA55AA, 0x55AA55AAAA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC33CC3333CC33CC, 0xCC33CC3333CC33CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55555555AAAAAAAA, 0x55555555AAAAAAAA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FF00FFFF00, 0xFF0000FF00FFFF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC33CC3C33CC33C, 0xC33CC33C3CC33CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555AAAA5555AAAA, 0xAAAA5555AAAA5555),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF00FF00FF0, 0xF00FF00FF00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC33333333CCCC, 0x3333CCCCCCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0xFF0000FFFF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33CC33CC33CC33C, 0xC33CC33CC33CC33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA5555AA55AA, 0x55AA55AAAA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CC33CCCC33CC33, 0x33CC33CCCC33CC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CC33CCCC33, 0x33CCCC33CC3333CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55555555AAAAAAAA, 0x55555555AAAAAAAA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF00FF0000FF, 0x00FFFF00FF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC33CC3C33CC33C, 0xC33CC33C3CC33CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555AAAA5555AAAA, 0xAAAA5555AAAA5555),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF00FF00FF0, 0xF00FF00FF00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCCCCCC3333, 0xCCCC33333333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0xFF0000FFFF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33CC33CC33CC33C, 0xC33CC33CC33CC33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0x33CCCC33CC3333CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA5A5A5A55A5A5A5A, 0xA5A5A5A55A5A5A5A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0x33CCCC33CC3333CC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5A5A5A5AA5A5A5A5, 0x5A5A5A5AA5A5A5A5),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CC33CCCC33, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5A5A5A5AA5A5A5A5, 0x5A5A5A5AA5A5A5A5),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3C3CC3C3C3C33C3C, 0x3C3CC3C3C3C33C3C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6699669999669966, 0x9966996666996699),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCC3333CC33CCCC33, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA5A5A5A55A5A5A5A, 0xA5A5A5A55A5A5A5A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC3C33C3C3C3CC3C3, 0xC3C33C3C3C3CC3C3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x33CCCC33CC3333CC, 0xCC3333CC33CCCC33),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF0000FFFF0000FF, 0xFF0000FFFF0000FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF00FF00F0FF00FF0, 0x0FF00FF0F00FF00F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F00F0F0F0FF0F0, 0x0F0FF0F0F0F00F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669699696696996, 0x9669699696696996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x9669966996699669, 0x9669966996699669),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x6996699669966996, 0x6996699669966996),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FFFF0000FFFF00, 0x00FFFF0000FFFF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0FF00FF0F00FF00F, 0xF00FF00F0FF00FF0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0FF0F0F0F00F0F, 0xF0F00F0F0F0FF0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0x3CC3C33CC33C3CC3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xA55A5AA55AA5A55A, 0xA55A5AA55AA5A55A),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xC33C3CC33CC3C33C, 0xC33C3CC33CC3C33C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3CC3C33C3CC3C33C, 0x3CC3C33C3CC3C33C),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFF00000000, 0xFFFFFFFF00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCCCCCCCCCCCCCC, 0xCCCCCCCCCCCCCCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
@ -1,274 +0,0 @@
|
||||
/*
|
||||
This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation
|
||||
see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf
|
||||
*/
|
||||
|
||||
#include "controlbits.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint8_t bit;
|
||||
|
||||
#define N (1 << GFBITS)
|
||||
|
||||
static bit is_smaller(uint32_t a, uint32_t b) {
|
||||
uint32_t ret = 0;
|
||||
|
||||
ret = a - b;
|
||||
ret >>= 31;
|
||||
|
||||
return (bit)ret;
|
||||
}
|
||||
|
||||
static bit is_smaller_63b(uint64_t a, uint64_t b) {
|
||||
uint64_t ret = 0;
|
||||
|
||||
ret = a - b;
|
||||
ret >>= 63;
|
||||
|
||||
return (bit)ret;
|
||||
}
|
||||
|
||||
static void cswap(uint32_t *x, uint32_t *y, bit swap) {
|
||||
uint32_t m;
|
||||
uint32_t d;
|
||||
|
||||
m = swap;
|
||||
m = 0 - m;
|
||||
|
||||
d = (*x ^ *y);
|
||||
d &= m;
|
||||
*x ^= d;
|
||||
*y ^= d;
|
||||
}
|
||||
|
||||
static void cswap_63b(uint64_t *x, uint64_t *y, bit swap) {
|
||||
uint64_t m;
|
||||
uint64_t d;
|
||||
|
||||
m = swap;
|
||||
m = 0 - m;
|
||||
|
||||
d = (*x ^ *y);
|
||||
d &= m;
|
||||
*x ^= d;
|
||||
*y ^= d;
|
||||
}
|
||||
|
||||
/* output x = min(input x,input y) */
|
||||
/* output y = max(input x,input y) */
|
||||
|
||||
static void minmax(uint32_t *x, uint32_t *y) {
|
||||
bit m;
|
||||
|
||||
m = is_smaller(*y, *x);
|
||||
cswap(x, y, m);
|
||||
}
|
||||
|
||||
static void minmax_63b(uint64_t *x, uint64_t *y) {
|
||||
bit m;
|
||||
|
||||
m = is_smaller_63b(*y, *x);
|
||||
cswap_63b(x, y, m);
|
||||
}
|
||||
|
||||
/* merge first half of x[0],x[step],...,x[(2*n-1)*step] with second half */
|
||||
/* requires n to be a power of 2 */
|
||||
|
||||
static void merge(int n, uint32_t *x, int step) {
|
||||
int i;
|
||||
if (n == 1) {
|
||||
minmax(&x[0], &x[step]);
|
||||
} else {
|
||||
merge(n / 2, x, step * 2);
|
||||
merge(n / 2, x + step, step * 2);
|
||||
for (i = 1; i < 2 * n - 1; i += 2) {
|
||||
minmax(&x[i * step], &x[(i + 1) * step]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void merge_63b(int n, uint64_t *x, int step) {
|
||||
int i;
|
||||
if (n == 1) {
|
||||
minmax_63b(&x[0], &x[step]);
|
||||
} else {
|
||||
merge_63b(n / 2, x, step * 2);
|
||||
merge_63b(n / 2, x + step, step * 2);
|
||||
for (i = 1; i < 2 * n - 1; i += 2) {
|
||||
minmax_63b(&x[i * step], &x[(i + 1) * step]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* sort x[0],x[1],...,x[n-1] in place */
|
||||
/* requires n to be a power of 2 */
|
||||
|
||||
static void sort(int n, uint32_t *x) {
|
||||
if (n <= 1) {
|
||||
return;
|
||||
}
|
||||
sort(n / 2, x);
|
||||
sort(n / 2, x + n / 2);
|
||||
merge(n / 2, x, 1);
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_sort_63b(int n, uint64_t *x) {
|
||||
if (n <= 1) {
|
||||
return;
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_SSE_sort_63b(n / 2, x);
|
||||
PQCLEAN_MCELIECE348864_SSE_sort_63b(n / 2, x + n / 2);
|
||||
merge_63b(n / 2, x, 1);
|
||||
}
|
||||
|
||||
/* y[pi[i]] = x[i] */
|
||||
/* requires n = 2^w */
|
||||
/* requires pi to be a permutation */
|
||||
static void composeinv(int n, uint32_t *y, const uint32_t *x, const uint32_t *pi) { // NC
|
||||
int i;
|
||||
uint32_t t[2 * N];
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
t[i] = x[i] | (pi[i] << 16);
|
||||
}
|
||||
|
||||
sort(n, t);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y[i] = t[i] & 0xFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
/* ip[i] = j iff pi[i] = j */
|
||||
/* requires n = 2^w */
|
||||
/* requires pi to be a permutation */
|
||||
static void invert(int n, uint32_t *ip, const uint32_t *pi) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
ip[i] = i;
|
||||
}
|
||||
|
||||
composeinv(n, ip, ip, pi);
|
||||
}
|
||||
|
||||
|
||||
static void flow(int w, uint32_t *x, const uint32_t *y, int t) {
|
||||
bit m0;
|
||||
bit m1;
|
||||
|
||||
uint32_t b;
|
||||
uint32_t y_copy = *y;
|
||||
|
||||
m0 = is_smaller(*y & ((1 << w) - 1), *x & ((1 << w) - 1));
|
||||
m1 = is_smaller(0, t);
|
||||
|
||||
cswap(x, &y_copy, m0);
|
||||
b = m0 & m1;
|
||||
*x ^= b << w;
|
||||
}
|
||||
|
||||
/* input: permutation pi */
|
||||
/* output: (2w-1)n/2 (or 0 if n==1) control bits c[0],c[step],c[2*step],... */
|
||||
/* requires n = 2^w */
|
||||
static void controlbitsfrompermutation(int w, int n, int step, int off, unsigned char *c, const uint32_t *pi) {
|
||||
int i;
|
||||
int j;
|
||||
int k;
|
||||
int t;
|
||||
uint32_t ip[N] = {0};
|
||||
uint32_t I[2 * N] = {0};
|
||||
uint32_t P[2 * N] = {0};
|
||||
uint32_t PI[2 * N] = {0};
|
||||
uint32_t T[2 * N] = {0};
|
||||
uint32_t piflip[N] = {0};
|
||||
uint32_t subpi[2][N / 2] = {{0}};
|
||||
|
||||
if (w == 1) {
|
||||
c[ off / 8 ] |= (pi[0] & 1) << (off % 8);
|
||||
}
|
||||
if (w <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
invert(n, ip, pi);
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
I[i] = ip[i] | (1 << w);
|
||||
I[n + i] = pi[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
P[i] = (i >> w) + (i & ((1 << w) - 2)) + ((i & 1) << w);
|
||||
}
|
||||
|
||||
for (t = 0; t < w; ++t) {
|
||||
composeinv(2 * n, PI, P, I);
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
flow(w, &P[i], &PI[i], t);
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
T[i] = I[i ^ 1];
|
||||
}
|
||||
|
||||
composeinv(2 * n, I, I, T);
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
T[i] = P[i ^ 1];
|
||||
}
|
||||
|
||||
for (i = 0; i < 2 * n; ++i) {
|
||||
flow(w, &P[i], &T[i], 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
for (j = 0; j < w; ++j) {
|
||||
piflip[i] = pi[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
c[ (off + i * step) / 8 ] |= ((P[i * 2] >> w) & 1) << ((off + i * step) % 8);
|
||||
}
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
c[ (off + ((w - 1)*n + i) * step) / 8 ] |= ((P[n + i * 2] >> w) & 1) << ((off + ((w - 1) * n + i) * step) % 8);
|
||||
}
|
||||
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
cswap(&piflip[i * 2], &piflip[i * 2 + 1], (P[n + i * 2] >> w) & 1);
|
||||
}
|
||||
|
||||
for (k = 0; k < 2; ++k) {
|
||||
for (i = 0; i < n / 2; ++i) {
|
||||
subpi[k][i] = piflip[i * 2 + k] >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (k = 0; k < 2; ++k) {
|
||||
controlbitsfrompermutation(w - 1, n / 2, step * 2, off + step * (n / 2 + k), c, subpi[k]);
|
||||
}
|
||||
}
|
||||
|
||||
/* input: pi, a permutation*/
|
||||
/* output: out, control bits w.r.t. pi */
|
||||
void PQCLEAN_MCELIECE348864_SSE_controlbits(unsigned char *out, const uint32_t *pi) {
|
||||
unsigned int i;
|
||||
unsigned char c[ (2 * GFBITS - 1) * (1 << GFBITS) / 16 ];
|
||||
|
||||
for (i = 0; i < sizeof(c); i++) {
|
||||
c[i] = 0;
|
||||
}
|
||||
|
||||
controlbitsfrompermutation(GFBITS, (1 << GFBITS), 1, 0, c, pi);
|
||||
|
||||
for (i = 0; i < sizeof(c); i++) {
|
||||
out[i] = c[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_CONTROLBITS_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CONTROLBITS_H
|
||||
/*
|
||||
This file is for functions required for generating the control bits of the Benes network w.r.t. a random permutation
|
||||
see the Lev-Pippenger-Valiant paper https://www.computer.org/csdl/trans/tc/1981/02/06312171.pdf
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_sort_63b(int n, uint64_t *x);
|
||||
void PQCLEAN_MCELIECE348864_SSE_controlbits(unsigned char *out, const uint32_t *pi);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,7 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_CRYPTO_HASH_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_CRYPTO_HASH_H
|
||||
#include "fips202.h"
|
||||
|
||||
#define crypto_hash_32b(out,in,inlen) shake256(out, 32, in, inlen)
|
||||
|
||||
#endif
|
||||
@ -1,203 +0,0 @@
|
||||
/*
|
||||
This file is for Niederreiter decryption
|
||||
*/
|
||||
|
||||
#include "decrypt.h"
|
||||
|
||||
#include "benes.h"
|
||||
#include "bm.h"
|
||||
#include "fft.h"
|
||||
#include "fft_tr.h"
|
||||
#include "params.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static void scaling(vec128 out[][GFBITS], vec128 inv[][GFBITS], const unsigned char *sk, vec128 *recv) {
|
||||
int i, j;
|
||||
|
||||
uint64_t irr_int[ GFBITS ];
|
||||
vec128 eval[32][ GFBITS ];
|
||||
vec128 tmp[ GFBITS ];
|
||||
|
||||
//
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_irr_load(irr_int, sk);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_fft(eval, irr_int);
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(eval[i], eval[i]);
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_copy(inv[0], eval[0]);
|
||||
|
||||
for (i = 1; i < 32; i++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(inv[i], inv[i - 1], eval[i]);
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_inv(tmp, inv[31]);
|
||||
|
||||
for (i = 30; i >= 0; i--) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(inv[i + 1], tmp, inv[i]);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, tmp, eval[i + 1]);
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_copy(inv[0], tmp);
|
||||
|
||||
//
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
out[i][j] = PQCLEAN_MCELIECE348864_SSE_vec128_and(inv[i][j], recv[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void preprocess(vec128 *recv, const unsigned char *s) {
|
||||
int i;
|
||||
uint8_t r[ 512 ];
|
||||
|
||||
for (i = 0; i < SYND_BYTES; i++) {
|
||||
r[i] = s[i];
|
||||
}
|
||||
|
||||
for (i = SYND_BYTES; i < 512; i++) {
|
||||
r[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
recv[i] = PQCLEAN_MCELIECE348864_SSE_load16(r + i * 16);
|
||||
}
|
||||
}
|
||||
|
||||
static void postprocess(unsigned char *e, vec128 *err) {
|
||||
int i;
|
||||
unsigned char error8[ (1 << GFBITS) / 8 ];
|
||||
uint64_t v[2];
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
v[0] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(err[i], 0);
|
||||
v[1] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(err[i], 1);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_store8(error8 + i * 16 + 0, v[0]);
|
||||
PQCLEAN_MCELIECE348864_SSE_store8(error8 + i * 16 + 8, v[1]);
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_N / 8; i++) {
|
||||
e[i] = error8[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void scaling_inv(vec128 out[][GFBITS], vec128 inv[][GFBITS], vec128 *recv) {
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
out[i][j] = PQCLEAN_MCELIECE348864_SSE_vec128_and(inv[i][j], recv[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static uint16_t weight_check(unsigned char *e, vec128 *error) {
|
||||
int i;
|
||||
uint16_t w0 = 0;
|
||||
uint16_t w1 = 0;
|
||||
uint16_t check;
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
w0 += _mm_popcnt_u64( PQCLEAN_MCELIECE348864_SSE_vec128_extract(error[i], 0) );
|
||||
w0 += _mm_popcnt_u64( PQCLEAN_MCELIECE348864_SSE_vec128_extract(error[i], 1) );
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_N / 8; i++) {
|
||||
w1 += _mm_popcnt_u32( e[i] );
|
||||
}
|
||||
|
||||
check = (w0 ^ SYS_T) | (w1 ^ SYS_T);
|
||||
check -= 1;
|
||||
check >>= 15;
|
||||
|
||||
return check;
|
||||
}
|
||||
|
||||
static uint64_t synd_cmp(vec128 s0[ GFBITS ], vec128 s1[ GFBITS ]) {
|
||||
int i;
|
||||
vec128 diff;
|
||||
|
||||
diff = PQCLEAN_MCELIECE348864_SSE_vec128_xor(s0[0], s1[0]);
|
||||
|
||||
for (i = 1; i < GFBITS; i++) {
|
||||
diff = PQCLEAN_MCELIECE348864_SSE_vec128_or(diff, PQCLEAN_MCELIECE348864_SSE_vec128_xor(s0[i], s1[i]));
|
||||
}
|
||||
|
||||
return PQCLEAN_MCELIECE348864_SSE_vec128_testz(diff);
|
||||
}
|
||||
|
||||
/* Niederreiter decryption with the Berlekamp decoder */
|
||||
/* intput: sk, secret key */
|
||||
/* c, ciphertext (syndrome) */
|
||||
/* output: e, error vector */
|
||||
/* return: 0 for success; 1 for failure */
|
||||
int PQCLEAN_MCELIECE348864_SSE_decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *c) {
|
||||
int i;
|
||||
|
||||
uint16_t check_synd;
|
||||
uint16_t check_weight;
|
||||
|
||||
vec128 inv[ 32 ][ GFBITS ];
|
||||
vec128 scaled[ 32 ][ GFBITS ];
|
||||
vec128 eval[ 32 ][ GFBITS ];
|
||||
|
||||
vec128 error[ 32 ];
|
||||
|
||||
vec128 s_priv[ GFBITS ];
|
||||
vec128 s_priv_cmp[ GFBITS ];
|
||||
|
||||
uint64_t locator[ GFBITS ];
|
||||
|
||||
vec128 recv[ 32 ];
|
||||
vec128 allone;
|
||||
|
||||
uint64_t bits_int[23][32];
|
||||
|
||||
// Berlekamp decoder
|
||||
|
||||
preprocess(recv, c);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_load_bits(bits_int, sk + IRR_BYTES);
|
||||
PQCLEAN_MCELIECE348864_SSE_benes((uint64_t *) recv, bits_int, 1);
|
||||
|
||||
scaling(scaled, inv, sk, recv);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_fft_tr(s_priv, scaled);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_bm(locator, s_priv);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_fft(eval, locator);
|
||||
|
||||
// reencryption and weight check
|
||||
|
||||
allone = PQCLEAN_MCELIECE348864_SSE_vec128_setbits(1);
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
error[i] = PQCLEAN_MCELIECE348864_SSE_vec128_or_reduce(eval[i]);
|
||||
error[i] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(error[i], allone);
|
||||
}
|
||||
|
||||
scaling_inv(scaled, inv, error);
|
||||
PQCLEAN_MCELIECE348864_SSE_fft_tr(s_priv_cmp, scaled);
|
||||
|
||||
check_synd = synd_cmp(s_priv, s_priv_cmp);
|
||||
|
||||
//
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_benes((uint64_t *) error, bits_int, 0);
|
||||
|
||||
postprocess(e, error);
|
||||
|
||||
check_weight = weight_check(e, error);
|
||||
|
||||
return 1 - (check_synd & check_weight);
|
||||
}
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_DECRYPT_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_DECRYPT_H
|
||||
/*
|
||||
This file is for Nieddereiter decryption
|
||||
*/
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_decrypt(unsigned char * /*e*/, const unsigned char * /*sk*/, const unsigned char * /*c*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,99 +0,0 @@
|
||||
/*
|
||||
This file is for Niederreiter encryption
|
||||
*/
|
||||
|
||||
#include "encrypt.h"
|
||||
|
||||
#include "gf.h"
|
||||
#include "params.h"
|
||||
#include "randombytes.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* input: public key pk, error vector e */
|
||||
/* output: syndrome s */
|
||||
extern void PQCLEAN_MCELIECE348864_SSE_syndrome_asm(unsigned char *s, const unsigned char *pk, unsigned char *e);
|
||||
|
||||
/* output: e, an error vector of weight t */
|
||||
static void gen_e(unsigned char *e) {
|
||||
size_t i, j;
|
||||
int eq, count;
|
||||
|
||||
uint16_t ind_[ SYS_T * 2 ];
|
||||
uint16_t ind[ SYS_T * 2 ];
|
||||
uint64_t e_int[ (SYS_N + 63) / 64 ];
|
||||
uint64_t one = 1;
|
||||
uint64_t mask;
|
||||
uint64_t val[ SYS_T ];
|
||||
|
||||
while (1) {
|
||||
randombytes((uint8_t *)ind_, sizeof(ind_));
|
||||
|
||||
for (i = 0; i < SYS_T * 2; i++) {
|
||||
ind_[i] &= GFMASK;
|
||||
}
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < SYS_T * 2; i++) {
|
||||
if (ind_[i] < SYS_N) {
|
||||
ind[ count++ ] = ind_[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (count < SYS_T) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check for repetition
|
||||
|
||||
eq = 0;
|
||||
|
||||
for (i = 1; i < SYS_T; i++) {
|
||||
for (j = 0; j < i; j++) {
|
||||
if (ind[i] == ind[j]) {
|
||||
eq = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (eq == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
val[j] = one << (ind[j] & 63);
|
||||
}
|
||||
|
||||
for (i = 0; i < (SYS_N + 63) / 64; i++) {
|
||||
e_int[i] = 0;
|
||||
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
mask = i ^ (ind[j] >> 6);
|
||||
mask -= 1;
|
||||
mask >>= 63;
|
||||
mask = -mask;
|
||||
|
||||
e_int[i] |= val[j] & mask;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < (SYS_N + 63) / 64 - 1; i++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_store8(e, e_int[i]);
|
||||
e += 8;
|
||||
}
|
||||
|
||||
for (j = 0; j < (SYS_N % 64); j += 8) {
|
||||
e[ j / 8 ] = (e_int[i] >> j) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_encrypt(unsigned char *s, unsigned char *e, const unsigned char *pk) {
|
||||
gen_e(e);
|
||||
PQCLEAN_MCELIECE348864_SSE_syndrome_asm(s, pk, e);
|
||||
}
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_ENCRYPT_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_ENCRYPT_H
|
||||
/*
|
||||
This file is for Niederreiter encryption
|
||||
*/
|
||||
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_encrypt(unsigned char * /*s*/, unsigned char * /*e*/, const unsigned char * /*pk*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,155 +0,0 @@
|
||||
/*
|
||||
This file is for the Gao-Mateer FFT
|
||||
sse http://www.math.clemson.edu/~sgao/papers/GM10.pdf
|
||||
*/
|
||||
|
||||
#include "fft.h"
|
||||
|
||||
#include "vec.h"
|
||||
#include "vec128.h"
|
||||
|
||||
/* input: in, polynomial in bitsliced form */
|
||||
/* output: in, result of applying the radix conversions on in */
|
||||
static void radix_conversions(uint64_t *in) {
|
||||
int i, j, k;
|
||||
|
||||
const uint64_t mask[5][2] = {
|
||||
{0x8888888888888888, 0x4444444444444444},
|
||||
{0xC0C0C0C0C0C0C0C0, 0x3030303030303030},
|
||||
{0xF000F000F000F000, 0x0F000F000F000F00},
|
||||
{0xFF000000FF000000, 0x00FF000000FF0000},
|
||||
{0xFFFF000000000000, 0x0000FFFF00000000}
|
||||
};
|
||||
|
||||
const uint64_t s[5][GFBITS] = {
|
||||
#include "scalars.inc"
|
||||
};
|
||||
|
||||
//
|
||||
|
||||
for (j = 0; j <= 4; j++) {
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
for (k = 4; k >= j; k--) {
|
||||
in[i] ^= (in[i] & mask[k][0]) >> (1 << k);
|
||||
in[i] ^= (in[i] & mask[k][1]) >> (1 << k);
|
||||
}
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_mul(in, in, s[j]); // scaling
|
||||
}
|
||||
}
|
||||
|
||||
/* input: in, result of applying the radix conversions to the input polynomial */
|
||||
/* output: out, evaluation results (by applying the FFT butterflies) */
|
||||
static void butterflies(vec128 out[][ GFBITS ], const uint64_t *in) {
|
||||
int i, j, k, s, b;
|
||||
|
||||
uint64_t t0, t1;
|
||||
|
||||
const vec128 consts[ 32 ][ GFBITS ] = {
|
||||
#include "consts.inc"
|
||||
};
|
||||
|
||||
uint64_t consts_ptr = 0;
|
||||
|
||||
const uint8_t reversal[64] = {
|
||||
0, 32, 16, 48, 8, 40, 24, 56,
|
||||
4, 36, 20, 52, 12, 44, 28, 60,
|
||||
2, 34, 18, 50, 10, 42, 26, 58,
|
||||
6, 38, 22, 54, 14, 46, 30, 62,
|
||||
1, 33, 17, 49, 9, 41, 25, 57,
|
||||
5, 37, 21, 53, 13, 45, 29, 61,
|
||||
3, 35, 19, 51, 11, 43, 27, 59,
|
||||
7, 39, 23, 55, 15, 47, 31, 63
|
||||
};
|
||||
|
||||
// boradcast
|
||||
|
||||
vec128 tmp[ GFBITS ];
|
||||
vec128 x[ GFBITS ], y[ GFBITS ];
|
||||
|
||||
for (j = 0; j < 64; j += 4) {
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
t0 = (in[i] >> reversal[j + 0]) & 1;
|
||||
t0 = -t0;
|
||||
t1 = (in[i] >> reversal[j + 2]) & 1;
|
||||
t1 = -t1;
|
||||
|
||||
out[j / 2 + 0][i] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(t0, t1);
|
||||
|
||||
t0 = (in[i] >> reversal[j + 1]) & 1;
|
||||
t0 = -t0;
|
||||
t1 = (in[i] >> reversal[j + 3]) & 1;
|
||||
t1 = -t1;
|
||||
|
||||
out[j / 2 + 1][i] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(t0, t1);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
|
||||
for (i = 0; i < 32; i += 2) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, out[i + 1], consts[ 0 ]);
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[i + 0][b] ^= tmp[b];
|
||||
}
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[i + 1][b] ^= out[i + 0][b];
|
||||
}
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
x[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(out[i + 0][b], out[i + 1][b]);
|
||||
}
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
y[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(out[i + 0][b], out[i + 1][b]);
|
||||
}
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[i + 0][b] = x[b];
|
||||
}
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[i + 1][b] = y[b];
|
||||
}
|
||||
}
|
||||
|
||||
consts_ptr += 1;
|
||||
|
||||
for (i = 0; i <= 4; i++) {
|
||||
s = 1 << i;
|
||||
|
||||
for (j = 0; j < 32; j += 2 * s) {
|
||||
for (k = j; k < j + s; k++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, out[k + s], consts[ consts_ptr + (k - j) ]);
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[k][b] ^= tmp[b];
|
||||
}
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[k + s][b] ^= out[k][b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
consts_ptr += s;
|
||||
}
|
||||
|
||||
// adding the part contributed by x^64
|
||||
|
||||
vec128 powers[32][GFBITS] = {
|
||||
#include "powers.inc"
|
||||
};
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[i][b] ^= powers[i][b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_fft(vec128 out[][ GFBITS ], uint64_t *in) {
|
||||
radix_conversions(in);
|
||||
butterflies(out, in);
|
||||
}
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_FFT_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_FFT_H
|
||||
/*
|
||||
This file is for the Gao-Mateer FFT
|
||||
sse http://www.math.clemson.edu/~sgao/papers/GM10.pdf
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "params.h"
|
||||
#include "vec128.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_fft(vec128 /*out*/[][GFBITS], uint64_t * /*in*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,312 +0,0 @@
|
||||
/*
|
||||
This file is for transpose of the Gao-Mateer FFT
|
||||
Functions with names ending with _tr are (roughly) the transpose of the corresponding functions in fft.c
|
||||
*/
|
||||
|
||||
#include "fft_tr.h"
|
||||
|
||||
#include "transpose.h"
|
||||
#include "vec.h"
|
||||
#include "vec128.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static void radix_conversions_tr(vec128 in[ GFBITS ]) {
|
||||
int i, j, k;
|
||||
|
||||
const vec128 mask[10] = {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x2222222222222222, 0x2222222222222222),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x4444444444444444, 0x4444444444444444),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0C0C0C0C0C0C0C0C, 0x0C0C0C0C0C0C0C0C),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3030303030303030, 0x3030303030303030),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00F000F000F000F0, 0x00F000F000F000F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F000F000F000F00, 0x0F000F000F000F00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FF000000FF00, 0x0000FF000000FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF000000FF0000, 0x00FF000000FF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000FFFF0000, 0x00000000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF00000000, 0x0000FFFF00000000)
|
||||
};
|
||||
|
||||
const vec128 s[5][GFBITS] = {
|
||||
#include "scalars_2x.inc"
|
||||
};
|
||||
|
||||
uint64_t v0, v1;
|
||||
|
||||
//
|
||||
|
||||
for (j = 5; j >= 0; j--) {
|
||||
|
||||
if (j < 5) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(in, in, s[j]);
|
||||
}
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
for (k = j; k <= 4; k++) {
|
||||
in[i] ^= PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(in[i] & mask[2 * k + 0], 1 << k);
|
||||
in[i] ^= PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(in[i] & mask[2 * k + 1], 1 << k);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
v0 = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i], 0);
|
||||
v1 = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i], 1);
|
||||
|
||||
v1 ^= v0 >> 32;
|
||||
v1 ^= v1 << 32;
|
||||
|
||||
in[i] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(v0, v1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void butterflies_tr(vec128 out[ GFBITS ], vec128 in[][ GFBITS ]) {
|
||||
int i, j, k, s, b;
|
||||
|
||||
uint64_t t[ GFBITS ];
|
||||
uint64_t pre[6][ GFBITS ];
|
||||
|
||||
uint64_t out64[2][GFBITS];
|
||||
|
||||
vec128 p2[ 6 ];
|
||||
vec128 buf[64];
|
||||
vec128 tt[ GFBITS ];
|
||||
vec128 x[ GFBITS ], y[ GFBITS ];
|
||||
|
||||
const vec128 consts[ 32 ][ GFBITS ] = {
|
||||
#include "consts.inc"
|
||||
};
|
||||
|
||||
uint64_t consts_ptr = 32;
|
||||
|
||||
const uint8_t reversal[64] = {
|
||||
0, 32, 16, 48, 8, 40, 24, 56,
|
||||
4, 36, 20, 52, 12, 44, 28, 60,
|
||||
2, 34, 18, 50, 10, 42, 26, 58,
|
||||
6, 38, 22, 54, 14, 46, 30, 62,
|
||||
1, 33, 17, 49, 9, 41, 25, 57,
|
||||
5, 37, 21, 53, 13, 45, 29, 61,
|
||||
3, 35, 19, 51, 11, 43, 27, 59,
|
||||
7, 39, 23, 55, 15, 47, 31, 63
|
||||
};
|
||||
|
||||
const uint16_t beta[6] = {8, 1300, 3408, 1354, 2341, 1154};
|
||||
|
||||
// butterflies
|
||||
|
||||
for (i = 4; i >= 0; i--) {
|
||||
s = 1 << i;
|
||||
consts_ptr -= s;
|
||||
|
||||
for (j = 0; j < 32; j += 2 * s) {
|
||||
for (k = j; k < j + s; k++) {
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
in[k][b] ^= in[k + s][b];
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tt, in[k], consts[ consts_ptr + (k - j) ]);
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
in[k + s][b] ^= tt[b];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i += 2) {
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
x[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(in[i + 0][b], in[i + 1][b]);
|
||||
}
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
y[b] = PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(in[i + 0][b], in[i + 1][b]);
|
||||
}
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
in[i + 0][b] = x[b] ^ y[b];
|
||||
}
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tt, in[i + 0], consts[ 0 ]);
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
in[i + 1][b] = y[b] ^ tt[b];
|
||||
}
|
||||
}
|
||||
|
||||
// transpose
|
||||
|
||||
for (i = 0; i < GFBITS; i += 2) {
|
||||
for (j = 0; j < 64; j += 4) {
|
||||
buf[ reversal[j + 0] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 0], 0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 1], 0));
|
||||
buf[ reversal[j + 1] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 0], 0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 1], 0));
|
||||
buf[ reversal[j + 2] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 0], 1),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 0][i + 1], 1));
|
||||
buf[ reversal[j + 3] ] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 0], 1),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[j / 2 + 1][i + 1], 1));
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp(buf);
|
||||
|
||||
p2[0] = buf[32];
|
||||
buf[33] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[33], buf[32]);
|
||||
p2[1] = buf[33];
|
||||
buf[35] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[35], buf[33]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[35]);
|
||||
buf[34] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[34], buf[35]);
|
||||
p2[2] = buf[34];
|
||||
buf[38] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[38], buf[34]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[38]);
|
||||
buf[39] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[39], buf[38]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[39]);
|
||||
buf[37] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[37], buf[39]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[37]);
|
||||
buf[36] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[36], buf[37]);
|
||||
p2[3] = buf[36];
|
||||
buf[44] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[44], buf[36]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[44]);
|
||||
buf[45] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[45], buf[44]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[45]);
|
||||
buf[47] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[47], buf[45]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[47]);
|
||||
buf[46] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[46], buf[47]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[46]);
|
||||
buf[42] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[42], buf[46]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[42]);
|
||||
buf[43] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[43], buf[42]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[43]);
|
||||
buf[41] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[41], buf[43]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[41]);
|
||||
buf[40] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[40], buf[41]);
|
||||
p2[4] = buf[40];
|
||||
buf[56] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[56], buf[40]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[56]);
|
||||
buf[57] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[57], buf[56]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[57]);
|
||||
buf[59] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[59], buf[57]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[59]);
|
||||
buf[58] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[58], buf[59]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[58]);
|
||||
buf[62] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[62], buf[58]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[62]);
|
||||
buf[63] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[63], buf[62]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[63]);
|
||||
buf[61] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[61], buf[63]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[61]);
|
||||
buf[60] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[60], buf[61]);
|
||||
p2[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[3], buf[60]);
|
||||
buf[52] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[52], buf[60]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[52]);
|
||||
buf[53] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[53], buf[52]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[53]);
|
||||
buf[55] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[55], buf[53]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[55]);
|
||||
buf[54] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[54], buf[55]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[54]);
|
||||
buf[50] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[50], buf[54]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[50]);
|
||||
buf[51] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[51], buf[50]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[51]);
|
||||
buf[49] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[49], buf[51]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[49]);
|
||||
buf[48] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[48], buf[49]);
|
||||
p2[5] = buf[48];
|
||||
buf[16] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[16], buf[48]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[16]);
|
||||
buf[17] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[17], buf[16]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[17]);
|
||||
buf[19] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[19], buf[17]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[19]);
|
||||
buf[18] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[18], buf[19]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[18]);
|
||||
buf[22] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[22], buf[18]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[22]);
|
||||
buf[23] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[23], buf[22]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[23]);
|
||||
buf[21] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[21], buf[23]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[21]);
|
||||
buf[20] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[20], buf[21]);
|
||||
p2[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[3], buf[20]);
|
||||
buf[28] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[28], buf[20]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[28]);
|
||||
buf[29] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[29], buf[28]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[29]);
|
||||
buf[31] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[31], buf[29]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[31]);
|
||||
buf[30] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[30], buf[31]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[30]);
|
||||
buf[26] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[26], buf[30]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[26]);
|
||||
buf[27] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[27], buf[26]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[27]);
|
||||
buf[25] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[25], buf[27]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[25]);
|
||||
buf[24] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[24], buf[25]);
|
||||
p2[4] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[4], buf[24]);
|
||||
buf[8] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[8], buf[24]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[8]);
|
||||
buf[9] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[9], buf[8]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[9]);
|
||||
buf[11] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[11], buf[9]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[11]);
|
||||
buf[10] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[10], buf[11]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[10]);
|
||||
buf[14] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[14], buf[10]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[14]);
|
||||
buf[15] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[15], buf[14]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[15]);
|
||||
buf[13] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[13], buf[15]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[13]);
|
||||
buf[12] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[12], buf[13]);
|
||||
p2[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[3], buf[12]);
|
||||
buf[4] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[4], buf[12]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[4]);
|
||||
buf[5] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[5], buf[4]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[5]);
|
||||
buf[7] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[7], buf[5]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[7]);
|
||||
buf[6] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[6], buf[7]);
|
||||
p2[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[2], buf[6]);
|
||||
buf[2] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[2], buf[6]);
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[2]);
|
||||
buf[3] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[3], buf[2]);
|
||||
p2[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[1], buf[3]);
|
||||
buf[1] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[1], buf[3]);
|
||||
|
||||
p2[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(p2[0], buf[1]);
|
||||
buf[0] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(buf[0], buf[1]);
|
||||
|
||||
for (j = 0; j < 6; j++) {
|
||||
pre[j][i + 0] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(p2[j], 0);
|
||||
pre[j][i + 1] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(p2[j], 1);
|
||||
}
|
||||
|
||||
out64[0][i + 0] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[0], 0);
|
||||
out64[0][i + 1] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(buf[0], 1);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
t[j] = (beta[0] >> j) & 1;
|
||||
t[j] = -t[j];
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_mul(out64[1], pre[0], t);
|
||||
|
||||
for (i = 1; i < 6; i++) {
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
t[j] = (beta[i] >> j) & 1;
|
||||
t[j] = -t[j];
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_mul(t, pre[i], t);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_add(out64[1], out64[1], t);
|
||||
}
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
out[b] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(out64[0][b], out64[1][b]);
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_fft_tr(vec128 out[GFBITS], vec128 in[][ GFBITS ]) {
|
||||
butterflies_tr(out, in);
|
||||
radix_conversions_tr(out);
|
||||
}
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_FFT_TR_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_FFT_TR_H
|
||||
/*
|
||||
This file is for transpose of the Gao-Mateer FFT
|
||||
*/
|
||||
|
||||
#include "params.h"
|
||||
#include "vec128.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_fft_tr(vec128 /*out*/[GFBITS], vec128 /*in*/[][ GFBITS ]);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,169 +0,0 @@
|
||||
/*
|
||||
This file is for functions for field arithmetic
|
||||
*/
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_iszero(gf a) {
|
||||
uint32_t t = a;
|
||||
|
||||
t -= 1;
|
||||
t >>= 20;
|
||||
|
||||
return (gf) t;
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_add(gf in0, gf in1) {
|
||||
return in0 ^ in1;
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_mul(gf in0, gf in1) {
|
||||
int i;
|
||||
|
||||
uint32_t tmp;
|
||||
uint32_t t0;
|
||||
uint32_t t1;
|
||||
uint32_t t;
|
||||
|
||||
t0 = in0;
|
||||
t1 = in1;
|
||||
|
||||
tmp = t0 * (t1 & 1);
|
||||
|
||||
for (i = 1; i < GFBITS; i++) {
|
||||
tmp ^= (t0 * (t1 & (1 << i)));
|
||||
}
|
||||
|
||||
t = tmp & 0x7FC000;
|
||||
tmp ^= t >> 9;
|
||||
tmp ^= t >> 12;
|
||||
|
||||
t = tmp & 0x3000;
|
||||
tmp ^= t >> 9;
|
||||
tmp ^= t >> 12;
|
||||
|
||||
return tmp & ((1 << GFBITS) - 1);
|
||||
}
|
||||
|
||||
/* input: field element in */
|
||||
/* return: in^2 */
|
||||
static inline gf gf_sq(gf in) {
|
||||
const uint32_t B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF};
|
||||
|
||||
uint32_t x = in;
|
||||
uint32_t t;
|
||||
|
||||
x = (x | (x << 8)) & B[3];
|
||||
x = (x | (x << 4)) & B[2];
|
||||
x = (x | (x << 2)) & B[1];
|
||||
x = (x | (x << 1)) & B[0];
|
||||
|
||||
t = x & 0x7FC000;
|
||||
x ^= t >> 9;
|
||||
x ^= t >> 12;
|
||||
|
||||
t = x & 0x3000;
|
||||
x ^= t >> 9;
|
||||
x ^= t >> 12;
|
||||
|
||||
return x & ((1 << GFBITS) - 1);
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_inv(gf in) {
|
||||
gf tmp_11;
|
||||
gf tmp_1111;
|
||||
|
||||
gf out = in;
|
||||
|
||||
out = gf_sq(out);
|
||||
tmp_11 = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, in); // 11
|
||||
|
||||
out = gf_sq(tmp_11);
|
||||
out = gf_sq(out);
|
||||
tmp_1111 = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, tmp_11); // 1111
|
||||
|
||||
out = gf_sq(tmp_1111);
|
||||
out = gf_sq(out);
|
||||
out = gf_sq(out);
|
||||
out = gf_sq(out);
|
||||
out = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, tmp_1111); // 11111111
|
||||
|
||||
out = gf_sq(out);
|
||||
out = gf_sq(out);
|
||||
out = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, tmp_11); // 1111111111
|
||||
|
||||
out = gf_sq(out);
|
||||
out = PQCLEAN_MCELIECE348864_SSE_gf_mul(out, in); // 11111111111
|
||||
|
||||
return gf_sq(out); // 111111111110
|
||||
}
|
||||
|
||||
/* input: field element den, num */
|
||||
/* return: (num/den) */
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_frac(gf den, gf num) {
|
||||
return PQCLEAN_MCELIECE348864_SSE_gf_mul(PQCLEAN_MCELIECE348864_SSE_gf_inv(den), num);
|
||||
}
|
||||
|
||||
/* input: in0, in1 in GF((2^m)^t)*/
|
||||
/* output: out = in0*in1 */
|
||||
void PQCLEAN_MCELIECE348864_SSE_GF_mul(gf *out, const gf *in0, const gf *in1) {
|
||||
int i, j;
|
||||
|
||||
gf prod[ SYS_T * 2 - 1 ];
|
||||
|
||||
for (i = 0; i < SYS_T * 2 - 1; i++) {
|
||||
prod[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
prod[i + j] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(in0[i], in1[j]);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
for (i = (SYS_T - 1) * 2; i >= SYS_T; i--) {
|
||||
prod[i - SYS_T + 9] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 877);
|
||||
prod[i - SYS_T + 7] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 2888);
|
||||
prod[i - SYS_T + 5] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 1781);
|
||||
prod[i - SYS_T + 0] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(prod[i], (gf) 373);
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
out[i] = prod[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* 2 field multiplications */
|
||||
uint64_t PQCLEAN_MCELIECE348864_SSE_gf_mul2(gf a, gf b0, gf b1) {
|
||||
int i;
|
||||
|
||||
uint64_t tmp = 0;
|
||||
uint64_t t0;
|
||||
uint64_t t1;
|
||||
uint64_t t;
|
||||
uint64_t mask = 0x0000000100000001;
|
||||
|
||||
t0 = a;
|
||||
t1 = b1;
|
||||
t1 = (t1 << 32) | b0;
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
tmp ^= t0 * (t1 & mask);
|
||||
mask += mask;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
t = tmp & 0x007FC000007FC000;
|
||||
tmp ^= (t >> 9) ^ (t >> 12);
|
||||
|
||||
t = tmp & 0x0000300000003000;
|
||||
tmp ^= (t >> 9) ^ (t >> 12);
|
||||
|
||||
return tmp & 0x00000FFF00000FFF;
|
||||
}
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_GF_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_GF_H
|
||||
/*
|
||||
This file is for functions for field arithmetic
|
||||
*/
|
||||
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint16_t gf;
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_iszero(gf /*a*/);
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_add(gf /*in0*/, gf /*in1*/);
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_mul(gf /*in0*/, gf /*in1*/);
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_frac(gf /*den*/, gf /*num*/);
|
||||
gf PQCLEAN_MCELIECE348864_SSE_gf_inv(gf /*in*/);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_GF_mul(gf * /*out*/, const gf * /*in0*/, const gf * /*in1*/);
|
||||
|
||||
/* 2 field multiplications */
|
||||
uint64_t PQCLEAN_MCELIECE348864_SSE_gf_mul2(gf a, gf b0, gf b1);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,136 +0,0 @@
|
||||
#include "api.h"
|
||||
|
||||
#include "aes256ctr.h"
|
||||
#include "controlbits.h"
|
||||
#include "crypto_hash.h"
|
||||
#include "decrypt.h"
|
||||
#include "encrypt.h"
|
||||
#include "params.h"
|
||||
#include "pk_gen.h"
|
||||
#include "randombytes.h"
|
||||
#include "sk_gen.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_crypto_kem_enc(
|
||||
uint8_t *c,
|
||||
uint8_t *key,
|
||||
const uint8_t *pk
|
||||
) {
|
||||
uint8_t two_e[ 1 + SYS_N / 8 ] = {2};
|
||||
uint8_t *e = two_e + 1;
|
||||
uint8_t one_ec[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ] = {1};
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_encrypt(c, e, pk);
|
||||
|
||||
crypto_hash_32b(c + SYND_BYTES, two_e, sizeof(two_e));
|
||||
|
||||
memcpy(one_ec + 1, e, SYS_N / 8);
|
||||
memcpy(one_ec + 1 + SYS_N / 8, c, SYND_BYTES + 32);
|
||||
|
||||
crypto_hash_32b(key, one_ec, sizeof(one_ec));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_crypto_kem_dec(
|
||||
uint8_t *key,
|
||||
const uint8_t *c,
|
||||
const uint8_t *sk
|
||||
) {
|
||||
int i;
|
||||
|
||||
uint8_t ret_confirm = 0;
|
||||
uint8_t ret_decrypt = 0;
|
||||
|
||||
uint16_t m;
|
||||
|
||||
uint8_t conf[32];
|
||||
uint8_t two_e[ 1 + SYS_N / 8 ] = {2};
|
||||
uint8_t *e = two_e + 1;
|
||||
uint8_t preimage[ 1 + SYS_N / 8 + (SYND_BYTES + 32) ];
|
||||
uint8_t *x = preimage;
|
||||
|
||||
//
|
||||
|
||||
ret_decrypt = (uint8_t)PQCLEAN_MCELIECE348864_SSE_decrypt(e, sk + SYS_N / 8, c);
|
||||
|
||||
crypto_hash_32b(conf, two_e, sizeof(two_e));
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
ret_confirm |= conf[i] ^ c[SYND_BYTES + i];
|
||||
}
|
||||
|
||||
m = ret_decrypt | ret_confirm;
|
||||
m -= 1;
|
||||
m >>= 8;
|
||||
|
||||
*x++ = (~m & 0) | (m & 1);
|
||||
for (i = 0; i < SYS_N / 8; i++) {
|
||||
*x++ = (~m & sk[i]) | (m & e[i]);
|
||||
}
|
||||
for (i = 0; i < SYND_BYTES + 32; i++) {
|
||||
*x++ = c[i];
|
||||
}
|
||||
|
||||
crypto_hash_32b(key, preimage, sizeof(preimage));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_crypto_kem_keypair
|
||||
(
|
||||
uint8_t *pk,
|
||||
uint8_t *sk
|
||||
) {
|
||||
int i;
|
||||
uint8_t seed[ 32 ];
|
||||
uint8_t r[ SYS_T * 2 + (1 << GFBITS)*sizeof(uint32_t) + SYS_N / 8 + 32 ];
|
||||
uint8_t nonce[ 16 ] = {0};
|
||||
uint8_t *rp;
|
||||
|
||||
gf f[ SYS_T ]; // element in GF(2^mt)
|
||||
gf irr[ SYS_T ]; // Goppa polynomial
|
||||
uint32_t perm[ 1 << GFBITS ]; // random permutation
|
||||
|
||||
randombytes(seed, sizeof(seed));
|
||||
|
||||
while (1) {
|
||||
rp = r;
|
||||
PQCLEAN_MCELIECE348864_SSE_aes256ctr(r, sizeof(r), nonce, seed);
|
||||
memcpy(seed, &r[ sizeof(r) - 32 ], 32);
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
f[i] = PQCLEAN_MCELIECE348864_SSE_load2(rp + i * 2);
|
||||
}
|
||||
rp += sizeof(f);
|
||||
if (PQCLEAN_MCELIECE348864_SSE_genpoly_gen(irr, f)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
perm[i] = PQCLEAN_MCELIECE348864_SSE_load4(rp + i * 4);
|
||||
}
|
||||
rp += sizeof(perm);
|
||||
if (PQCLEAN_MCELIECE348864_SSE_perm_check(perm)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_store2(sk + SYS_N / 8 + i * 2, irr[i]);
|
||||
}
|
||||
if (PQCLEAN_MCELIECE348864_SSE_pk_gen(pk, perm, sk + SYS_N / 8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(sk, rp, SYS_N / 8);
|
||||
PQCLEAN_MCELIECE348864_SSE_controlbits(sk + SYS_N / 8 + IRR_BYTES, perm);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_PARAMS_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_PARAMS_H
|
||||
|
||||
#define GFBITS 12
|
||||
#define SYS_N 3488
|
||||
#define SYS_T 64
|
||||
|
||||
#define COND_BYTES ((1 << (GFBITS-4))*(2*GFBITS - 1))
|
||||
#define IRR_BYTES (SYS_T * 2)
|
||||
|
||||
#define PK_NROWS (SYS_T*GFBITS)
|
||||
#define PK_NCOLS (SYS_N - PK_NROWS)
|
||||
#define PK_ROW_BYTES ((PK_NCOLS + 7)/8)
|
||||
|
||||
#define SK_BYTES (SYS_N/8 + IRR_BYTES + COND_BYTES)
|
||||
#define SYND_BYTES ((PK_NROWS + 7)/8)
|
||||
|
||||
#define GFMASK ((1 << GFBITS) - 1)
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,329 +0,0 @@
|
||||
/*
|
||||
This file is for public-key generation
|
||||
*/
|
||||
|
||||
#include "pk_gen.h"
|
||||
|
||||
#include "benes.h"
|
||||
#include "controlbits.h"
|
||||
#include "fft.h"
|
||||
#include "params.h"
|
||||
#include "transpose.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#define min(a, b) (((a) < (b)) ? (a) : (b))
|
||||
|
||||
static void de_bitslicing(uint64_t *out, vec128 in[][GFBITS]) {
|
||||
int i, j, r;
|
||||
uint64_t u = 0;
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
out[i] = 0 ;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
for (j = GFBITS - 1; j >= 0; j--) {
|
||||
u = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i][j], 0);
|
||||
for (r = 0; r < 64; r++) {
|
||||
out[i * 128 + 0 * 64 + r] <<= 1;
|
||||
out[i * 128 + 0 * 64 + r] |= (u >> r) & 1;
|
||||
}
|
||||
u = PQCLEAN_MCELIECE348864_SSE_vec128_extract(in[i][j], 1);
|
||||
for (r = 0; r < 64; r++) {
|
||||
out[i * 128 + 1 * 64 + r] <<= 1;
|
||||
out[i * 128 + 1 * 64 + r] |= (u >> r) & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void to_bitslicing_2x(vec128 out0[][GFBITS], vec128 out1[][GFBITS], const uint64_t *in) {
|
||||
int i, j, k, r;
|
||||
uint64_t u[2] = {0};
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
for (j = GFBITS - 1; j >= 0; j--) {
|
||||
for (k = 0; k < 2; k++) {
|
||||
for (r = 63; r >= 0; r--) {
|
||||
u[k] <<= 1;
|
||||
u[k] |= (in[i * 128 + k * 64 + r] >> (j + GFBITS)) & 1;
|
||||
}
|
||||
}
|
||||
|
||||
out1[i][j] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(u[0], u[1]);
|
||||
}
|
||||
|
||||
for (j = GFBITS - 1; j >= 0; j--) {
|
||||
for (k = 0; k < 2; k++) {
|
||||
for (r = 63; r >= 0; r--) {
|
||||
u[k] <<= 1;
|
||||
u[k] |= (in[i * 128 + k * 64 + r] >> j) & 1;
|
||||
}
|
||||
}
|
||||
|
||||
out0[i][GFBITS - 1 - j] = PQCLEAN_MCELIECE348864_SSE_vec128_set2x(u[0], u[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* return number of trailing zeros of the non-zero input in */
|
||||
static inline int ctz(uint64_t in) {
|
||||
return (int)_tzcnt_u64(in);
|
||||
}
|
||||
|
||||
static inline uint64_t same_mask(uint16_t x, uint16_t y) {
|
||||
uint64_t mask;
|
||||
|
||||
mask = x ^ y;
|
||||
mask -= 1;
|
||||
mask >>= 63;
|
||||
mask = -mask;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static int mov_columns(uint64_t mat[][ ((SYS_N + 127) / 128) * 2 ], uint32_t *perm) {
|
||||
int i, j, k, s, block_idx, row;
|
||||
uint64_t buf[64], ctz_list[32], t, d, mask;
|
||||
|
||||
row = GFBITS * SYS_T - 32;
|
||||
block_idx = row / 64;
|
||||
|
||||
// extract the 32x64 matrix
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
buf[i] = (mat[ row + i ][ block_idx + 0 ] >> 32) |
|
||||
(mat[ row + i ][ block_idx + 1 ] << 32);
|
||||
}
|
||||
|
||||
// compute the column indices of pivots by Gaussian elimination.
|
||||
// the indices are stored in ctz_list
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
t = buf[i];
|
||||
for (j = i + 1; j < 32; j++) {
|
||||
t |= buf[j];
|
||||
}
|
||||
|
||||
if (t == 0) {
|
||||
return -1; // return if buf is not full rank
|
||||
}
|
||||
|
||||
ctz_list[i] = s = ctz(t);
|
||||
|
||||
for (j = i + 1; j < 32; j++) {
|
||||
mask = (buf[i] >> s) & 1;
|
||||
mask -= 1;
|
||||
buf[i] ^= buf[j] & mask;
|
||||
}
|
||||
for (j = 0; j < i; j++) {
|
||||
mask = (buf[j] >> s) & 1;
|
||||
mask = -mask;
|
||||
buf[j] ^= buf[i] & mask;
|
||||
}
|
||||
for (j = i + 1; j < 32; j++) {
|
||||
mask = (buf[j] >> s) & 1;
|
||||
mask = -mask;
|
||||
buf[j] ^= buf[i] & mask;
|
||||
}
|
||||
}
|
||||
|
||||
// updating permutation
|
||||
|
||||
for (j = 0; j < 32; j++) {
|
||||
for (k = j + 1; k < 64; k++) {
|
||||
d = perm[ row + j ] ^ perm[ row + k ];
|
||||
d &= same_mask(k, ctz_list[j]);
|
||||
perm[ row + j ] ^= d;
|
||||
perm[ row + k ] ^= d;
|
||||
}
|
||||
}
|
||||
|
||||
// moving columns of mat according to the column indices of pivots
|
||||
|
||||
for (i = 0; i < GFBITS * SYS_T; i += 64) {
|
||||
|
||||
for (j = 0; j < min(64, GFBITS * SYS_T - i); j++) {
|
||||
buf[j] = (mat[ i + j ][ block_idx + 0 ] >> 32) |
|
||||
(mat[ i + j ][ block_idx + 1 ] << 32);
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(buf);
|
||||
|
||||
for (j = 0; j < 32; j++) {
|
||||
for (k = j + 1; k < 64; k++) {
|
||||
d = buf[ j ] ^ buf[ k ];
|
||||
d &= same_mask(k, ctz_list[j]);
|
||||
buf[ j ] ^= d;
|
||||
buf[ k ] ^= d;
|
||||
}
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64(buf);
|
||||
|
||||
for (j = 0; j < min(64, GFBITS * SYS_T - i); j++) {
|
||||
mat[ i + j ][ block_idx + 0 ] = (mat[ i + j ][ block_idx + 0 ] << 32 >> 32) | (buf[j] << 32);
|
||||
mat[ i + j ][ block_idx + 1 ] = (mat[ i + j ][ block_idx + 1 ] >> 32 << 32) | (buf[j] >> 32);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define NBLOCKS1_H ((SYS_N + 63) / 64)
|
||||
#define NBLOCKS2_H ((SYS_N + 127) / 128)
|
||||
#define NBLOCKS_I ((GFBITS * SYS_T + 63) / 64)
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_pk_gen(unsigned char *pk, uint32_t *perm, const unsigned char *sk) {
|
||||
int i, j, k;
|
||||
int row, c;
|
||||
|
||||
uint64_t mat[ GFBITS * SYS_T ][ NBLOCKS2_H * 2 ];
|
||||
uint64_t ops[ GFBITS * SYS_T ][ NBLOCKS_I ];
|
||||
|
||||
uint64_t mask;
|
||||
|
||||
uint64_t irr_int[ GFBITS ];
|
||||
|
||||
vec128 consts[32][ GFBITS ];
|
||||
vec128 eval[ 32 ][ GFBITS ];
|
||||
vec128 prod[ 32 ][ GFBITS ];
|
||||
vec128 tmp[ GFBITS ];
|
||||
|
||||
uint64_t list[1 << GFBITS];
|
||||
|
||||
// compute the inverses
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_irr_load(irr_int, sk);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_fft(eval, irr_int);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_copy(prod[0], eval[0]);
|
||||
|
||||
for (i = 1; i < 32; i++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(prod[i], prod[i - 1], eval[i]);
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_inv(tmp, prod[31]);
|
||||
|
||||
for (i = 30; i >= 0; i--) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(prod[i + 1], prod[i], tmp);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp, tmp, eval[i + 1]);
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_copy(prod[0], tmp);
|
||||
|
||||
// fill matrix
|
||||
|
||||
de_bitslicing(list, prod);
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
list[i] <<= GFBITS;
|
||||
list[i] |= i;
|
||||
list[i] |= ((uint64_t) perm[i]) << 31;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_sort_63b(1 << GFBITS, list);
|
||||
|
||||
to_bitslicing_2x(consts, prod, list);
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
perm[i] = list[i] & GFMASK;
|
||||
}
|
||||
|
||||
for (j = 0; j < NBLOCKS2_H; j++) {
|
||||
for (k = 0; k < GFBITS; k++) {
|
||||
mat[ k ][ 2 * j + 0 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 0);
|
||||
mat[ k ][ 2 * j + 1 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 1; i < SYS_T; i++) {
|
||||
for (j = 0; j < NBLOCKS2_H; j++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(prod[j], prod[j], consts[j]);
|
||||
|
||||
for (k = 0; k < GFBITS; k++) {
|
||||
mat[ i * GFBITS + k ][ 2 * j + 0 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 0);
|
||||
mat[ i * GFBITS + k ][ 2 * j + 1 ] = PQCLEAN_MCELIECE348864_SSE_vec128_extract(prod[ j ][ k ], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gaussian elimination
|
||||
|
||||
for (i = 0; i < PK_NROWS; i++) {
|
||||
for (j = 0; j < NBLOCKS_I; j++) {
|
||||
ops[ i ][ j ] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < PK_NROWS; i++) {
|
||||
ops[ i ][ i / 64 ] = 1;
|
||||
ops[ i ][ i / 64 ] <<= (i % 64);
|
||||
}
|
||||
|
||||
for (row = 0; row < PK_NROWS; row++) {
|
||||
i = row >> 6;
|
||||
j = row & 63;
|
||||
|
||||
if (row == GFBITS * SYS_T - 32) {
|
||||
if (mov_columns(mat, perm)) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
for (k = row + 1; k < PK_NROWS; k++) {
|
||||
mask = mat[ row ][ i ] >> j;
|
||||
mask &= 1;
|
||||
mask -= 1;
|
||||
|
||||
for (c = 0; c < NBLOCKS1_H; c++) {
|
||||
mat[ row ][ c ] ^= mat[ k ][ c ] & mask;
|
||||
}
|
||||
}
|
||||
|
||||
if ( ((mat[ row ][ i ] >> j) & 1) == 0 ) { // return if not systematic
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (k = 0; k < row; k++) {
|
||||
mask = mat[ k ][ i ] >> j;
|
||||
mask &= 1;
|
||||
mask = -mask;
|
||||
|
||||
for (c = 0; c < NBLOCKS1_H; c++) {
|
||||
mat[ k ][ c ] ^= mat[ row ][ c ] & mask;
|
||||
}
|
||||
}
|
||||
|
||||
for (k = row + 1; k < PK_NROWS; k++) {
|
||||
mask = mat[ k ][ i ] >> j;
|
||||
mask &= 1;
|
||||
mask = -mask;
|
||||
|
||||
for (c = 0; c < NBLOCKS1_H; c++) {
|
||||
mat[ k ][ c ] ^= mat[ row ][ c ] & mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < GFBITS * SYS_T; i++) {
|
||||
for (j = NBLOCKS_I; j < NBLOCKS1_H - 1; j++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_store8(pk, mat[i][j]);
|
||||
pk += 8;
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_store_i(pk, mat[i][j], PK_ROW_BYTES % 8);
|
||||
|
||||
pk += PK_ROW_BYTES % 8;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_PK_GEN_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_PK_GEN_H
|
||||
/*
|
||||
This file is for public-key generation
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_pk_gen(unsigned char * /*pk*/, uint32_t * /*perm*/, const unsigned char * /*sk*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,448 +0,0 @@
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3333CCCC3333CCCC, 0x3333CCCC3333CCCC),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x55AA55AA55AA55AA, 0xAA55AA55AA55AA55),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFF00FF00FF00FF00, 0xFF00FF00FF00FF00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F00F0F0F0F, 0xF0F0F0F00F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFF0000FFFF0000, 0xFFFF0000FFFF0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000FFFF0000FFFF, 0x0000FFFF0000FFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xAA55AA55AA55AA55, 0x55AA55AA55AA55AA),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0F0F0F0FF0F0F0F0, 0x0F0F0F0FF0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xCCCC3333CCCC3333, 0xCCCC3333CCCC3333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x5555555555555555, 0x5555555555555555),
|
||||
},
|
||||
@ -1,70 +0,0 @@
|
||||
{
|
||||
0XF3CFC030FC30F003,
|
||||
0X3FCF0F003C00C00C,
|
||||
0X30033CC300C0C03C,
|
||||
0XCCFF0F3C0F30F0C0,
|
||||
0X0300C03FF303C3F0,
|
||||
0X3FFF3C0FF0CCCCC0,
|
||||
0XF3FFF0C00F3C3CC0,
|
||||
0X3003333FFFC3C000,
|
||||
0X0FF30FFFC3FFF300,
|
||||
0XFFC0F300F0F0CC00,
|
||||
0XC0CFF3FCCC3CFC00,
|
||||
0XFC3C03F0F330C000,
|
||||
},
|
||||
{
|
||||
0X000F00000000F00F,
|
||||
0X00000F00F00000F0,
|
||||
0X0F00000F00000F00,
|
||||
0XF00F00F00F000000,
|
||||
0X00F00000000000F0,
|
||||
0X0000000F00000000,
|
||||
0XF00000000F00F000,
|
||||
0X00F00F00000F0000,
|
||||
0X0000F00000F00F00,
|
||||
0X000F00F00F00F000,
|
||||
0X00F00F0000000000,
|
||||
0X0000000000F00000,
|
||||
},
|
||||
{
|
||||
0X0000FF00FF0000FF,
|
||||
0X0000FF000000FF00,
|
||||
0XFF0000FF00FF0000,
|
||||
0XFFFF0000FF000000,
|
||||
0X00FF00FF00FF0000,
|
||||
0X0000FFFFFF000000,
|
||||
0X00FFFF00FF000000,
|
||||
0XFFFFFF0000FF0000,
|
||||
0XFFFF00FFFF00FF00,
|
||||
0X0000FF0000000000,
|
||||
0XFFFFFF00FF000000,
|
||||
0X00FF000000000000,
|
||||
},
|
||||
{
|
||||
0X000000000000FFFF,
|
||||
0X00000000FFFF0000,
|
||||
0X0000000000000000,
|
||||
0XFFFF000000000000,
|
||||
0X00000000FFFF0000,
|
||||
0X0000FFFF00000000,
|
||||
0X0000000000000000,
|
||||
0X00000000FFFF0000,
|
||||
0X0000FFFF00000000,
|
||||
0X0000000000000000,
|
||||
0X0000000000000000,
|
||||
0X0000000000000000,
|
||||
},
|
||||
{
|
||||
0X00000000FFFFFFFF,
|
||||
0XFFFFFFFF00000000,
|
||||
0XFFFFFFFF00000000,
|
||||
0X0000000000000000,
|
||||
0X0000000000000000,
|
||||
0XFFFFFFFF00000000,
|
||||
0X0000000000000000,
|
||||
0X0000000000000000,
|
||||
0XFFFFFFFF00000000,
|
||||
0X0000000000000000,
|
||||
0X0000000000000000,
|
||||
0X0000000000000000,
|
||||
}
|
||||
@ -1,70 +0,0 @@
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf3cfc030fc30f003, 0x000c03c0c3c0330c),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3fcf0f003c00c00c, 0xf330cffcc00f33c0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x30033cc300c0c03c, 0xccf330f00f3c0333),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xccff0f3c0f30f0c0, 0xff03fff3ff0cf0c0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0300c03ff303c3f0, 0x3cc3fcf00fcc303c),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3fff3c0ff0ccccc0, 0x0f000c0fc30303f3),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf3fff0c00f3c3cc0, 0xcf0fc3ff333ccf3c),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x3003333fffc3c000, 0x003f3fc3c0ff333f),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0ff30fffc3fff300, 0x3cc3f0f3cf0ff00f),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffc0f300f0f0cc00, 0xf3f33cc03fc30cc0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xc0cff3fccc3cfc00, 0x3cc330cfc333f33f),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xfc3c03f0f330c000, 0x3cc0303ff3c3fffc),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x000f00000000f00f, 0x0f00f00f00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000f00f00000f0, 0xf00000000000f000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0f00000f00000f00, 0x00000f00000000f0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf00f00f00f000000, 0x0f00f00000f00000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00f00000000000f0, 0x000f00000f00f00f),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000f00000000, 0x00f00f00f00f0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xf00000000f00f000, 0x0f00f00000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00f00f00000f0000, 0x000000000f000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000f00000f00f00, 0x00f00000000f00f0),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x000f00f00f00f000, 0x0000f00f00000f00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00f00f0000000000, 0xf00000f00000f00f),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000f00000, 0x00000f00f00f00f0),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ff00ff0000ff, 0xff00ffffff000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ff000000ff00, 0xff0000ffff000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xff0000ff00ff0000, 0xffff00ffff000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffff0000ff000000, 0xff00ffffffffff00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00ff00ff00ff0000, 0x00000000ff00ff00),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ffffff000000, 0xffffffff00ff0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00ffff00ff000000, 0x00ffffff00ff0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffff0000ff0000, 0xffff00ffff00ffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffff00ffff00ff00, 0xffff0000ffffffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ff0000000000, 0xff00000000ff0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffff00ff000000, 0x000000ff00ff00ff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00ff000000000000, 0x00ff00ff00ffff00),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x000000000000ffff, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffff0000, 0xffff000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffff000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffff0000, 0xffff00000000ffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ffff00000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000ffff00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffff0000, 0xffff00000000ffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000ffff00000000, 0x00000000ffff0000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffff00000000ffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x00000000ffff0000),
|
||||
},
|
||||
{
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x00000000ffffffff, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0x00000000ffffffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffffffffffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0xffffffff00000000, 0xffffffff00000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0x0000000000000000),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffffffffffff),
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_set2x(0x0000000000000000, 0xffffffff00000000),
|
||||
},
|
||||
@ -1,98 +0,0 @@
|
||||
/*
|
||||
This file is for secret-key generation
|
||||
*/
|
||||
|
||||
#include "sk_gen.h"
|
||||
|
||||
#include "controlbits.h"
|
||||
#include "gf.h"
|
||||
#include "params.h"
|
||||
#include "util.h"
|
||||
|
||||
/* input: f, element in GF((2^m)^t) */
|
||||
/* output: out, minimal polynomial of f */
|
||||
/* return: 0 for success and -1 for failure */
|
||||
int PQCLEAN_MCELIECE348864_SSE_genpoly_gen(gf *out, gf *f) {
|
||||
int i, j, k, c;
|
||||
|
||||
gf mat[ SYS_T + 1 ][ SYS_T ];
|
||||
gf mask, inv, t;
|
||||
|
||||
// fill matrix
|
||||
|
||||
mat[0][0] = 1;
|
||||
|
||||
for (i = 1; i < SYS_T; i++) {
|
||||
mat[0][i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
mat[1][i] = f[i];
|
||||
}
|
||||
|
||||
for (j = 2; j <= SYS_T; j++) {
|
||||
PQCLEAN_MCELIECE348864_SSE_GF_mul(mat[j], mat[j - 1], f);
|
||||
}
|
||||
|
||||
// gaussian
|
||||
|
||||
for (j = 0; j < SYS_T; j++) {
|
||||
for (k = j + 1; k < SYS_T; k++) {
|
||||
mask = PQCLEAN_MCELIECE348864_SSE_gf_iszero(mat[ j ][ j ]);
|
||||
|
||||
for (c = j; c < SYS_T + 1; c++) {
|
||||
mat[ c ][ j ] ^= mat[ c ][ k ] & mask;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( mat[ j ][ j ] == 0 ) { // return if not systematic
|
||||
return -1;
|
||||
}
|
||||
|
||||
inv = PQCLEAN_MCELIECE348864_SSE_gf_inv(mat[j][j]);
|
||||
|
||||
for (c = j; c < SYS_T + 1; c++) {
|
||||
mat[ c ][ j ] = PQCLEAN_MCELIECE348864_SSE_gf_mul(mat[ c ][ j ], inv) ;
|
||||
}
|
||||
|
||||
for (k = 0; k < SYS_T; k++) {
|
||||
if (k != j) {
|
||||
t = mat[ j ][ k ];
|
||||
|
||||
for (c = j; c < SYS_T + 1; c++) {
|
||||
mat[ c ][ k ] ^= PQCLEAN_MCELIECE348864_SSE_gf_mul(mat[ c ][ j ], t);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
out[i] = mat[ SYS_T ][ i ];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* input: permutation p represented as a list of 32-bit intergers */
|
||||
/* output: -1 if some interger repeats in p */
|
||||
/* 0 otherwise */
|
||||
int PQCLEAN_MCELIECE348864_SSE_perm_check(const uint32_t *p) {
|
||||
int i;
|
||||
uint64_t list[1 << GFBITS];
|
||||
|
||||
for (i = 0; i < (1 << GFBITS); i++) {
|
||||
list[i] = p[i];
|
||||
}
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_sort_63b(1 << GFBITS, list);
|
||||
|
||||
for (i = 1; i < (1 << GFBITS); i++) {
|
||||
if (list[i - 1] == list[i]) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_SK_GEN_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_SK_GEN_H
|
||||
/*
|
||||
This file is for secret-key generation
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_genpoly_gen(gf * /*out*/, gf * /*f*/);
|
||||
int PQCLEAN_MCELIECE348864_SSE_perm_check(const uint32_t * /*p*/);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,740 +0,0 @@
|
||||
|
||||
# qhasm: int64 input_0
|
||||
|
||||
# qhasm: int64 input_1
|
||||
|
||||
# qhasm: int64 input_2
|
||||
|
||||
# qhasm: int64 input_3
|
||||
|
||||
# qhasm: int64 input_4
|
||||
|
||||
# qhasm: int64 input_5
|
||||
|
||||
# qhasm: stack64 input_6
|
||||
|
||||
# qhasm: stack64 input_7
|
||||
|
||||
# qhasm: int64 caller_r11
|
||||
|
||||
# qhasm: int64 caller_r12
|
||||
|
||||
# qhasm: int64 caller_r13
|
||||
|
||||
# qhasm: int64 caller_r14
|
||||
|
||||
# qhasm: int64 caller_r15
|
||||
|
||||
# qhasm: int64 caller_rbx
|
||||
|
||||
# qhasm: int64 caller_rbp
|
||||
|
||||
# qhasm: int64 b64
|
||||
|
||||
# qhasm: int64 synd
|
||||
|
||||
# qhasm: int64 addr
|
||||
|
||||
# qhasm: int64 c
|
||||
|
||||
# qhasm: int64 c_all
|
||||
|
||||
# qhasm: int64 row
|
||||
|
||||
# qhasm: int64 p
|
||||
|
||||
# qhasm: int64 e
|
||||
|
||||
# qhasm: int64 s
|
||||
|
||||
# qhasm: reg128 pp
|
||||
|
||||
# qhasm: reg128 ee
|
||||
|
||||
# qhasm: reg128 ss
|
||||
|
||||
# qhasm: int64 buf_ptr
|
||||
|
||||
# qhasm: stack128 buf
|
||||
|
||||
# qhasm: enter syndrome_asm
|
||||
.p2align 5
|
||||
.global _PQCLEAN_MCELIECE348864_SSE_syndrome_asm
|
||||
.global PQCLEAN_MCELIECE348864_SSE_syndrome_asm
|
||||
_PQCLEAN_MCELIECE348864_SSE_syndrome_asm:
|
||||
PQCLEAN_MCELIECE348864_SSE_syndrome_asm:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $32,%r11
|
||||
sub %r11,%rsp
|
||||
|
||||
# qhasm: input_1 += 260780
|
||||
# asm 1: add $260780,<input_1=int64#2
|
||||
# asm 2: add $260780,<input_1=%rsi
|
||||
add $260780,%rsi
|
||||
|
||||
# qhasm: buf_ptr = &buf
|
||||
# asm 1: leaq <buf=stack128#1,>buf_ptr=int64#4
|
||||
# asm 2: leaq <buf=0(%rsp),>buf_ptr=%rcx
|
||||
leaq 0(%rsp),%rcx
|
||||
|
||||
# qhasm: row = 768
|
||||
# asm 1: mov $768,>row=int64#5
|
||||
# asm 2: mov $768,>row=%r8
|
||||
mov $768,%r8
|
||||
|
||||
# qhasm: loop:
|
||||
._loop:
|
||||
|
||||
# qhasm: row -= 1
|
||||
# asm 1: sub $1,<row=int64#5
|
||||
# asm 2: sub $1,<row=%r8
|
||||
sub $1,%r8
|
||||
|
||||
# qhasm: ss = mem128[ input_1 + 0 ]
|
||||
# asm 1: movdqu 0(<input_1=int64#2),>ss=reg128#1
|
||||
# asm 2: movdqu 0(<input_1=%rsi),>ss=%xmm0
|
||||
movdqu 0(%rsi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 96 ]
|
||||
# asm 1: movdqu 96(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 96(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 96(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss &= ee
|
||||
# asm 1: pand <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pand <ee=%xmm1,<ss=%xmm0
|
||||
pand %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 16 ]
|
||||
# asm 1: movdqu 16(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 16(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 16(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 112 ]
|
||||
# asm 1: movdqu 112(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 112(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 112(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 32 ]
|
||||
# asm 1: movdqu 32(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 32(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 32(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 128 ]
|
||||
# asm 1: movdqu 128(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 128(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 128(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 48 ]
|
||||
# asm 1: movdqu 48(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 48(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 48(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 144 ]
|
||||
# asm 1: movdqu 144(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 144(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 144(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 64 ]
|
||||
# asm 1: movdqu 64(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 64(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 64(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 160 ]
|
||||
# asm 1: movdqu 160(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 160(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 160(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 80 ]
|
||||
# asm 1: movdqu 80(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 80(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 80(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 176 ]
|
||||
# asm 1: movdqu 176(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 176(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 176(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 96 ]
|
||||
# asm 1: movdqu 96(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 96(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 96(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 192 ]
|
||||
# asm 1: movdqu 192(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 192(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 192(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 112 ]
|
||||
# asm 1: movdqu 112(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 112(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 112(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 208 ]
|
||||
# asm 1: movdqu 208(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 208(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 208(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 128 ]
|
||||
# asm 1: movdqu 128(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 128(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 128(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 224 ]
|
||||
# asm 1: movdqu 224(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 224(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 224(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 144 ]
|
||||
# asm 1: movdqu 144(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 144(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 144(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 240 ]
|
||||
# asm 1: movdqu 240(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 240(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 240(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 160 ]
|
||||
# asm 1: movdqu 160(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 160(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 160(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 256 ]
|
||||
# asm 1: movdqu 256(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 256(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 256(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 176 ]
|
||||
# asm 1: movdqu 176(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 176(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 176(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 272 ]
|
||||
# asm 1: movdqu 272(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 272(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 272(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 192 ]
|
||||
# asm 1: movdqu 192(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 192(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 192(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 288 ]
|
||||
# asm 1: movdqu 288(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 288(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 288(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 208 ]
|
||||
# asm 1: movdqu 208(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 208(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 208(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 304 ]
|
||||
# asm 1: movdqu 304(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 304(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 304(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 224 ]
|
||||
# asm 1: movdqu 224(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 224(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 224(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 320 ]
|
||||
# asm 1: movdqu 320(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 320(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 320(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 240 ]
|
||||
# asm 1: movdqu 240(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 240(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 240(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 336 ]
|
||||
# asm 1: movdqu 336(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 336(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 336(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 256 ]
|
||||
# asm 1: movdqu 256(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 256(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 256(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 352 ]
|
||||
# asm 1: movdqu 352(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 352(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 352(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 272 ]
|
||||
# asm 1: movdqu 272(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 272(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 272(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 368 ]
|
||||
# asm 1: movdqu 368(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 368(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 368(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 288 ]
|
||||
# asm 1: movdqu 288(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 288(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 288(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 384 ]
|
||||
# asm 1: movdqu 384(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 384(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 384(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 304 ]
|
||||
# asm 1: movdqu 304(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 304(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 304(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 400 ]
|
||||
# asm 1: movdqu 400(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 400(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 400(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: pp = mem128[ input_1 + 320 ]
|
||||
# asm 1: movdqu 320(<input_1=int64#2),>pp=reg128#2
|
||||
# asm 2: movdqu 320(<input_1=%rsi),>pp=%xmm1
|
||||
movdqu 320(%rsi),%xmm1
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 416 ]
|
||||
# asm 1: movdqu 416(<input_2=int64#3),>ee=reg128#3
|
||||
# asm 2: movdqu 416(<input_2=%rdx),>ee=%xmm2
|
||||
movdqu 416(%rdx),%xmm2
|
||||
|
||||
# qhasm: pp &= ee
|
||||
# asm 1: pand <ee=reg128#3,<pp=reg128#2
|
||||
# asm 2: pand <ee=%xmm2,<pp=%xmm1
|
||||
pand %xmm2,%xmm1
|
||||
|
||||
# qhasm: ss ^= pp
|
||||
# asm 1: pxor <pp=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <pp=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: buf = ss
|
||||
# asm 1: movdqa <ss=reg128#1,>buf=stack128#1
|
||||
# asm 2: movdqa <ss=%xmm0,>buf=0(%rsp)
|
||||
movdqa %xmm0,0(%rsp)
|
||||
|
||||
# qhasm: s = *(uint32 *)(input_1 + 336)
|
||||
# asm 1: movl 336(<input_1=int64#2),>s=int64#6d
|
||||
# asm 2: movl 336(<input_1=%rsi),>s=%r9d
|
||||
movl 336(%rsi),%r9d
|
||||
|
||||
# qhasm: e = *(uint32 *)(input_2 + 432)
|
||||
# asm 1: movl 432(<input_2=int64#3),>e=int64#7d
|
||||
# asm 2: movl 432(<input_2=%rdx),>e=%eax
|
||||
movl 432(%rdx),%eax
|
||||
|
||||
# qhasm: s &= e
|
||||
# asm 1: and <e=int64#7,<s=int64#6
|
||||
# asm 2: and <e=%rax,<s=%r9
|
||||
and %rax,%r9
|
||||
|
||||
# qhasm: c_all = count(s)
|
||||
# asm 1: popcnt <s=int64#6, >c_all=int64#6
|
||||
# asm 2: popcnt <s=%r9, >c_all=%r9
|
||||
popcnt %r9, %r9
|
||||
|
||||
# qhasm: b64 = mem64[ buf_ptr + 0 ]
|
||||
# asm 1: movq 0(<buf_ptr=int64#4),>b64=int64#7
|
||||
# asm 2: movq 0(<buf_ptr=%rcx),>b64=%rax
|
||||
movq 0(%rcx),%rax
|
||||
|
||||
# qhasm: c = count(b64)
|
||||
# asm 1: popcnt <b64=int64#7, >c=int64#7
|
||||
# asm 2: popcnt <b64=%rax, >c=%rax
|
||||
popcnt %rax, %rax
|
||||
|
||||
# qhasm: c_all ^= c
|
||||
# asm 1: xor <c=int64#7,<c_all=int64#6
|
||||
# asm 2: xor <c=%rax,<c_all=%r9
|
||||
xor %rax,%r9
|
||||
|
||||
# qhasm: b64 = mem64[ buf_ptr + 8 ]
|
||||
# asm 1: movq 8(<buf_ptr=int64#4),>b64=int64#7
|
||||
# asm 2: movq 8(<buf_ptr=%rcx),>b64=%rax
|
||||
movq 8(%rcx),%rax
|
||||
|
||||
# qhasm: c = count(b64)
|
||||
# asm 1: popcnt <b64=int64#7, >c=int64#7
|
||||
# asm 2: popcnt <b64=%rax, >c=%rax
|
||||
popcnt %rax, %rax
|
||||
|
||||
# qhasm: c_all ^= c
|
||||
# asm 1: xor <c=int64#7,<c_all=int64#6
|
||||
# asm 2: xor <c=%rax,<c_all=%r9
|
||||
xor %rax,%r9
|
||||
|
||||
# qhasm: addr = row
|
||||
# asm 1: mov <row=int64#5,>addr=int64#7
|
||||
# asm 2: mov <row=%r8,>addr=%rax
|
||||
mov %r8,%rax
|
||||
|
||||
# qhasm: (uint64) addr >>= 3
|
||||
# asm 1: shr $3,<addr=int64#7
|
||||
# asm 2: shr $3,<addr=%rax
|
||||
shr $3,%rax
|
||||
|
||||
# qhasm: addr += input_0
|
||||
# asm 1: add <input_0=int64#1,<addr=int64#7
|
||||
# asm 2: add <input_0=%rdi,<addr=%rax
|
||||
add %rdi,%rax
|
||||
|
||||
# qhasm: synd = *(uint8 *) (addr + 0)
|
||||
# asm 1: movzbq 0(<addr=int64#7),>synd=int64#8
|
||||
# asm 2: movzbq 0(<addr=%rax),>synd=%r10
|
||||
movzbq 0(%rax),%r10
|
||||
|
||||
# qhasm: synd <<= 1
|
||||
# asm 1: shl $1,<synd=int64#8
|
||||
# asm 2: shl $1,<synd=%r10
|
||||
shl $1,%r10
|
||||
|
||||
# qhasm: (uint32) c_all &= 1
|
||||
# asm 1: and $1,<c_all=int64#6d
|
||||
# asm 2: and $1,<c_all=%r9d
|
||||
and $1,%r9d
|
||||
|
||||
# qhasm: synd |= c_all
|
||||
# asm 1: or <c_all=int64#6,<synd=int64#8
|
||||
# asm 2: or <c_all=%r9,<synd=%r10
|
||||
or %r9,%r10
|
||||
|
||||
# qhasm: *(uint8 *) (addr + 0) = synd
|
||||
# asm 1: movb <synd=int64#8b,0(<addr=int64#7)
|
||||
# asm 2: movb <synd=%r10b,0(<addr=%rax)
|
||||
movb %r10b,0(%rax)
|
||||
|
||||
# qhasm: input_1 -= 340
|
||||
# asm 1: sub $340,<input_1=int64#2
|
||||
# asm 2: sub $340,<input_1=%rsi
|
||||
sub $340,%rsi
|
||||
|
||||
# qhasm: =? row-0
|
||||
# asm 1: cmp $0,<row=int64#5
|
||||
# asm 2: cmp $0,<row=%r8
|
||||
cmp $0,%r8
|
||||
# comment:fp stack unchanged by jump
|
||||
|
||||
# qhasm: goto loop if !=
|
||||
jne ._loop
|
||||
|
||||
# qhasm: ss = mem128[ input_0 + 0 ]
|
||||
# asm 1: movdqu 0(<input_0=int64#1),>ss=reg128#1
|
||||
# asm 2: movdqu 0(<input_0=%rdi),>ss=%xmm0
|
||||
movdqu 0(%rdi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 0 ]
|
||||
# asm 1: movdqu 0(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 0(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 0(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss ^= ee
|
||||
# asm 1: pxor <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <ee=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: mem128[ input_0 + 0 ] = ss
|
||||
# asm 1: movdqu <ss=reg128#1,0(<input_0=int64#1)
|
||||
# asm 2: movdqu <ss=%xmm0,0(<input_0=%rdi)
|
||||
movdqu %xmm0,0(%rdi)
|
||||
|
||||
# qhasm: ss = mem128[ input_0 + 16 ]
|
||||
# asm 1: movdqu 16(<input_0=int64#1),>ss=reg128#1
|
||||
# asm 2: movdqu 16(<input_0=%rdi),>ss=%xmm0
|
||||
movdqu 16(%rdi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 16 ]
|
||||
# asm 1: movdqu 16(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 16(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 16(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss ^= ee
|
||||
# asm 1: pxor <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <ee=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: mem128[ input_0 + 16 ] = ss
|
||||
# asm 1: movdqu <ss=reg128#1,16(<input_0=int64#1)
|
||||
# asm 2: movdqu <ss=%xmm0,16(<input_0=%rdi)
|
||||
movdqu %xmm0,16(%rdi)
|
||||
|
||||
# qhasm: ss = mem128[ input_0 + 32 ]
|
||||
# asm 1: movdqu 32(<input_0=int64#1),>ss=reg128#1
|
||||
# asm 2: movdqu 32(<input_0=%rdi),>ss=%xmm0
|
||||
movdqu 32(%rdi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 32 ]
|
||||
# asm 1: movdqu 32(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 32(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 32(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss ^= ee
|
||||
# asm 1: pxor <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <ee=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: mem128[ input_0 + 32 ] = ss
|
||||
# asm 1: movdqu <ss=reg128#1,32(<input_0=int64#1)
|
||||
# asm 2: movdqu <ss=%xmm0,32(<input_0=%rdi)
|
||||
movdqu %xmm0,32(%rdi)
|
||||
|
||||
# qhasm: ss = mem128[ input_0 + 48 ]
|
||||
# asm 1: movdqu 48(<input_0=int64#1),>ss=reg128#1
|
||||
# asm 2: movdqu 48(<input_0=%rdi),>ss=%xmm0
|
||||
movdqu 48(%rdi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 48 ]
|
||||
# asm 1: movdqu 48(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 48(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 48(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss ^= ee
|
||||
# asm 1: pxor <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <ee=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: mem128[ input_0 + 48 ] = ss
|
||||
# asm 1: movdqu <ss=reg128#1,48(<input_0=int64#1)
|
||||
# asm 2: movdqu <ss=%xmm0,48(<input_0=%rdi)
|
||||
movdqu %xmm0,48(%rdi)
|
||||
|
||||
# qhasm: ss = mem128[ input_0 + 64 ]
|
||||
# asm 1: movdqu 64(<input_0=int64#1),>ss=reg128#1
|
||||
# asm 2: movdqu 64(<input_0=%rdi),>ss=%xmm0
|
||||
movdqu 64(%rdi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 64 ]
|
||||
# asm 1: movdqu 64(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 64(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 64(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss ^= ee
|
||||
# asm 1: pxor <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <ee=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: mem128[ input_0 + 64 ] = ss
|
||||
# asm 1: movdqu <ss=reg128#1,64(<input_0=int64#1)
|
||||
# asm 2: movdqu <ss=%xmm0,64(<input_0=%rdi)
|
||||
movdqu %xmm0,64(%rdi)
|
||||
|
||||
# qhasm: ss = mem128[ input_0 + 80 ]
|
||||
# asm 1: movdqu 80(<input_0=int64#1),>ss=reg128#1
|
||||
# asm 2: movdqu 80(<input_0=%rdi),>ss=%xmm0
|
||||
movdqu 80(%rdi),%xmm0
|
||||
|
||||
# qhasm: ee = mem128[ input_2 + 80 ]
|
||||
# asm 1: movdqu 80(<input_2=int64#3),>ee=reg128#2
|
||||
# asm 2: movdqu 80(<input_2=%rdx),>ee=%xmm1
|
||||
movdqu 80(%rdx),%xmm1
|
||||
|
||||
# qhasm: ss ^= ee
|
||||
# asm 1: pxor <ee=reg128#2,<ss=reg128#1
|
||||
# asm 2: pxor <ee=%xmm1,<ss=%xmm0
|
||||
pxor %xmm1,%xmm0
|
||||
|
||||
# qhasm: mem128[ input_0 + 80 ] = ss
|
||||
# asm 1: movdqu <ss=reg128#1,80(<input_0=int64#1)
|
||||
# asm 2: movdqu <ss=%xmm0,80(<input_0=%rdi)
|
||||
movdqu %xmm0,80(%rdi)
|
||||
|
||||
# qhasm: return
|
||||
add %r11,%rsp
|
||||
ret
|
||||
@ -1,12 +0,0 @@
|
||||
#include "transpose.h"
|
||||
|
||||
extern void PQCLEAN_MCELIECE348864_SSE_transpose_64x64_asm(uint64_t *);
|
||||
void PQCLEAN_MCELIECE348864_SSE_transpose_64x64(uint64_t *in) {
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x64_asm(in);
|
||||
}
|
||||
|
||||
extern void PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp_asm(vec128 *);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp(vec128 *in) {
|
||||
PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp_asm(in);
|
||||
}
|
||||
@ -1,16 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_TRANSPOSE_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_TRANSPOSE_H
|
||||
/*
|
||||
This file is for matrix transposition
|
||||
*/
|
||||
|
||||
|
||||
#include "vec128.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_transpose_64x64(uint64_t *in);
|
||||
void PQCLEAN_MCELIECE348864_SSE_transpose_64x128_sp(vec128 *in);
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,354 +0,0 @@
|
||||
|
||||
# qhasm: int64 input_0
|
||||
|
||||
# qhasm: int64 input_1
|
||||
|
||||
# qhasm: int64 input_2
|
||||
|
||||
# qhasm: int64 input_3
|
||||
|
||||
# qhasm: int64 input_4
|
||||
|
||||
# qhasm: int64 input_5
|
||||
|
||||
# qhasm: stack64 input_6
|
||||
|
||||
# qhasm: stack64 input_7
|
||||
|
||||
# qhasm: int64 caller_r11
|
||||
|
||||
# qhasm: int64 caller_r12
|
||||
|
||||
# qhasm: int64 caller_r13
|
||||
|
||||
# qhasm: int64 caller_r14
|
||||
|
||||
# qhasm: int64 caller_r15
|
||||
|
||||
# qhasm: int64 caller_rbx
|
||||
|
||||
# qhasm: int64 caller_rbp
|
||||
|
||||
# qhasm: int64 s0
|
||||
|
||||
# qhasm: int64 s1
|
||||
|
||||
# qhasm: enter update_asm
|
||||
.p2align 5
|
||||
.global _PQCLEAN_MCELIECE348864_SSE_update_asm
|
||||
.global PQCLEAN_MCELIECE348864_SSE_update_asm
|
||||
_PQCLEAN_MCELIECE348864_SSE_update_asm:
|
||||
PQCLEAN_MCELIECE348864_SSE_update_asm:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $0,%r11
|
||||
sub %r11,%rsp
|
||||
|
||||
# qhasm: s1 = input_1
|
||||
# asm 1: mov <input_1=int64#2,>s1=int64#2
|
||||
# asm 2: mov <input_1=%rsi,>s1=%rsi
|
||||
mov %rsi,%rsi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: s0 = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>s0=int64#4
|
||||
# asm 2: movq 0(<input_0=%rdi),>s0=%rcx
|
||||
movq 0(%rdi),%rcx
|
||||
|
||||
# qhasm: s0 = (s1 s0) >> 1
|
||||
# asm 1: shrd $1,<s1=int64#2,<s0=int64#4
|
||||
# asm 2: shrd $1,<s1=%rsi,<s0=%rcx
|
||||
shrd $1,%rsi,%rcx
|
||||
|
||||
# qhasm: (uint64) s1 >>= 1
|
||||
# asm 1: shr $1,<s1=int64#2
|
||||
# asm 2: shr $1,<s1=%rsi
|
||||
shr $1,%rsi
|
||||
|
||||
# qhasm: mem64[ input_0 + 0 ] = s0
|
||||
# asm 1: movq <s0=int64#4,0(<input_0=int64#1)
|
||||
# asm 2: movq <s0=%rcx,0(<input_0=%rdi)
|
||||
movq %rcx,0(%rdi)
|
||||
|
||||
# qhasm: input_0 += input_2
|
||||
# asm 1: add <input_2=int64#3,<input_0=int64#1
|
||||
# asm 2: add <input_2=%rdx,<input_0=%rdi
|
||||
add %rdx,%rdi
|
||||
|
||||
# qhasm: return
|
||||
add %r11,%rsp
|
||||
ret
|
||||
@ -1,106 +0,0 @@
|
||||
/*
|
||||
This file is for loading/storing data in a little-endian fashion
|
||||
*/
|
||||
|
||||
#include "util.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store_i(unsigned char *out, uint64_t in, int i) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < i; j++) {
|
||||
out[j] = (in >> (j * 8)) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store2(unsigned char *dest, gf a) {
|
||||
dest[0] = a & 0xFF;
|
||||
dest[1] = a >> 8;
|
||||
}
|
||||
|
||||
uint16_t PQCLEAN_MCELIECE348864_SSE_load2(const unsigned char *src) {
|
||||
uint16_t a;
|
||||
|
||||
a = src[1];
|
||||
a <<= 8;
|
||||
a |= src[0];
|
||||
|
||||
return a & GFMASK;
|
||||
}
|
||||
|
||||
uint32_t PQCLEAN_MCELIECE348864_SSE_load4(const unsigned char *src) {
|
||||
uint32_t a;
|
||||
|
||||
a = src[3];
|
||||
a <<= 8;
|
||||
a |= src[2];
|
||||
a <<= 8;
|
||||
a |= src[1];
|
||||
a <<= 8;
|
||||
a |= src[0];
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_irr_load(uint64_t *out, const unsigned char *in) {
|
||||
int i, j;
|
||||
uint16_t irr[ SYS_T + 1 ];
|
||||
|
||||
for (i = 0; i < SYS_T; i++) {
|
||||
irr[i] = PQCLEAN_MCELIECE348864_SSE_load2(in + i * 2);
|
||||
irr[i] &= GFMASK;
|
||||
}
|
||||
|
||||
irr[ SYS_T ] = 1;
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
out[i] = 0;
|
||||
}
|
||||
|
||||
for (i = SYS_T; i >= 0; i--) {
|
||||
for (j = 0; j < GFBITS; j++) {
|
||||
out[j] <<= 1;
|
||||
out[j] |= (irr[i] >> j) & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store8(unsigned char *out, uint64_t in) {
|
||||
out[0] = (in >> 0x00) & 0xFF;
|
||||
out[1] = (in >> 0x08) & 0xFF;
|
||||
out[2] = (in >> 0x10) & 0xFF;
|
||||
out[3] = (in >> 0x18) & 0xFF;
|
||||
out[4] = (in >> 0x20) & 0xFF;
|
||||
out[5] = (in >> 0x28) & 0xFF;
|
||||
out[6] = (in >> 0x30) & 0xFF;
|
||||
out[7] = (in >> 0x38) & 0xFF;
|
||||
}
|
||||
|
||||
uint64_t PQCLEAN_MCELIECE348864_SSE_load8(const unsigned char *in) {
|
||||
int i;
|
||||
uint64_t ret = in[7];
|
||||
|
||||
for (i = 6; i >= 0; i--) {
|
||||
ret <<= 8;
|
||||
ret |= in[i];
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_bitrev(gf a) {
|
||||
a = ((a & 0x00FF) << 8) | ((a & 0xFF00) >> 8);
|
||||
a = ((a & 0x0F0F) << 4) | ((a & 0xF0F0) >> 4);
|
||||
a = ((a & 0x3333) << 2) | ((a & 0xCCCC) >> 2);
|
||||
a = ((a & 0x5555) << 1) | ((a & 0xAAAA) >> 1);
|
||||
|
||||
return a >> 4;
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_load16(const unsigned char *in) {
|
||||
return PQCLEAN_MCELIECE348864_SSE_vec128_set2x( PQCLEAN_MCELIECE348864_SSE_load8(in), PQCLEAN_MCELIECE348864_SSE_load8(in + 8) );
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store16(unsigned char *out, vec128 in) {
|
||||
PQCLEAN_MCELIECE348864_SSE_store8(out + 0, PQCLEAN_MCELIECE348864_SSE_vec128_extract(in, 0));
|
||||
PQCLEAN_MCELIECE348864_SSE_store8(out + 8, PQCLEAN_MCELIECE348864_SSE_vec128_extract(in, 1));
|
||||
}
|
||||
@ -1,33 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_UTIL_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_UTIL_H
|
||||
/*
|
||||
This file is for loading/storing data in a little-endian fashion
|
||||
*/
|
||||
|
||||
|
||||
#include "gf.h"
|
||||
#include "vec128.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store_i(unsigned char *out, uint64_t in, int i);
|
||||
void PQCLEAN_MCELIECE348864_SSE_store2(unsigned char *dest, gf a);
|
||||
|
||||
uint16_t PQCLEAN_MCELIECE348864_SSE_load2(const unsigned char *src);
|
||||
|
||||
uint32_t PQCLEAN_MCELIECE348864_SSE_load4(const unsigned char *src);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_irr_load(uint64_t *out, const unsigned char *in);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store8(unsigned char *out, uint64_t in);
|
||||
|
||||
uint64_t PQCLEAN_MCELIECE348864_SSE_load8(const unsigned char *in);
|
||||
|
||||
gf PQCLEAN_MCELIECE348864_SSE_bitrev(gf a);
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_load16(const unsigned char *in);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_store16(unsigned char *out, vec128 in);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
|
||||
#include "vec.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec_mul(uint64_t *h, const uint64_t *f, const uint64_t *g) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(h, f, g, 8);
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec_add(uint64_t *h, const uint64_t *f, const uint64_t *g) {
|
||||
int b;
|
||||
|
||||
for (b = 0; b < GFBITS; b++) {
|
||||
h[b] = f[b] ^ g[b];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_VEC_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_VEC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern void PQCLEAN_MCELIECE348864_SSE_vec_mul_asm(uint64_t *, const uint64_t *, const uint64_t *, int);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec_mul(uint64_t *h, const uint64_t *f, const uint64_t *g);
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec_add(uint64_t *h, const uint64_t *f, const uint64_t *g);
|
||||
|
||||
#endif
|
||||
@ -1,143 +0,0 @@
|
||||
/*
|
||||
This file is for functions related to 128-bit vectors
|
||||
including functions for bitsliced field operations
|
||||
*/
|
||||
|
||||
#include "vec128.h"
|
||||
|
||||
#include "params.h"
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(uint16_t a) {
|
||||
return _mm_set1_epi16(a);
|
||||
}
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_vec128_testz(vec128 a) {
|
||||
return _mm_testz_si128(a, a);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setzero(void) {
|
||||
return _mm_setzero_si128();
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_and(vec128 a, vec128 b) {
|
||||
return _mm_and_si128(a, b);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_xor(vec128 a, vec128 b) {
|
||||
return _mm_xor_si128(a, b);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or(vec128 a, vec128 b) {
|
||||
return _mm_or_si128(a, b);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(vec128 a, int s) {
|
||||
return _mm_slli_epi64(a, s);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_srl_2x(vec128 a, int s) {
|
||||
return _mm_srli_epi64(a, s);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set2x(uint64_t a0, uint64_t a1) {
|
||||
return _mm_set_epi64x(a1, a0);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(vec128 a, vec128 b) {
|
||||
return _mm_unpacklo_epi64(a, b);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(vec128 a, vec128 b) {
|
||||
return _mm_unpackhi_epi64(a, b);
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setbits(uint64_t a) {
|
||||
return _mm_set1_epi64x(-a);
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_copy(vec128 *dest, const vec128 *src) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
dest[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_add(vec128 *c, const vec128 *a, const vec128 *b) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
c[i] = PQCLEAN_MCELIECE348864_SSE_vec128_xor(a[i], b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or_reduce(const vec128 *a) {
|
||||
int i;
|
||||
vec128 ret;
|
||||
|
||||
ret = a[0];
|
||||
for (i = 1; i < GFBITS; i++) {
|
||||
ret = PQCLEAN_MCELIECE348864_SSE_vec128_or(ret, a[i]);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* bitsliced field multiplications */
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_mul(vec128 *h, vec128 *f, const vec128 *g) {
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm(h, f, g, 16);
|
||||
}
|
||||
|
||||
/* bitsliced field squarings */
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_sq(vec128 *out, const vec128 *in) {
|
||||
int i;
|
||||
vec128 result[GFBITS];
|
||||
|
||||
result[0] = in[0] ^ in[6];
|
||||
result[1] = in[11];
|
||||
result[2] = in[1] ^ in[7];
|
||||
result[3] = in[6];
|
||||
result[4] = in[2] ^ in[11] ^ in[8];
|
||||
result[5] = in[7];
|
||||
result[6] = in[3] ^ in[9];
|
||||
result[7] = in[8];
|
||||
result[8] = in[4] ^ in[10];
|
||||
result[9] = in[9];
|
||||
result[10] = in[5] ^ in[11];
|
||||
result[11] = in[10];
|
||||
|
||||
for (i = 0; i < GFBITS; i++) {
|
||||
out[i] = result[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* bitsliced field inverses */
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_inv(vec128 *out, const vec128 *in) {
|
||||
vec128 tmp_11[ GFBITS ];
|
||||
vec128 tmp_1111[ GFBITS ];
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_copy(out, in);
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp_11, out, in); // 11
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, tmp_11);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(tmp_1111, out, tmp_11); // 1111
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, tmp_1111);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(out, out, tmp_1111); // 11111111
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(out, out, tmp_11); // 1111111111
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out);
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_mul(out, out, in); // 11111111111
|
||||
|
||||
PQCLEAN_MCELIECE348864_SSE_vec128_sq(out, out); // 111111111110
|
||||
}
|
||||
|
||||
@ -1,42 +0,0 @@
|
||||
#ifndef PQCLEAN_MCELIECE348864_SSE_VEC128_H
|
||||
#define PQCLEAN_MCELIECE348864_SSE_VEC128_H
|
||||
/*
|
||||
This file is for functions related to 128-bit vectors
|
||||
including functions for bitsliced field operations
|
||||
*/
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef __m128i vec128;
|
||||
|
||||
// this needs to be a macro, because
|
||||
// _mm_extract_epi64 requires a literal int argument.
|
||||
#define PQCLEAN_MCELIECE348864_SSE_vec128_extract(a, i) ((uint64_t) _mm_extract_epi64((vec128) (a), (i)))
|
||||
|
||||
int PQCLEAN_MCELIECE348864_SSE_vec128_testz(vec128 a);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set1_16b(uint16_t a);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setzero(void);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_and(vec128 a, vec128 b);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_xor(vec128 a, vec128 b);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or(vec128 a, vec128 b);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_sll_2x(vec128 a, int s);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_srl_2x(vec128 a, int s);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_set2x(uint64_t a0, uint64_t a1);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_low(vec128 a, vec128 b);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_unpack_high(vec128 a, vec128 b);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_setbits(uint64_t a);
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_copy(vec128 *dest, const vec128 *src);
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_add(vec128 *c, const vec128 *a, const vec128 *b);
|
||||
vec128 PQCLEAN_MCELIECE348864_SSE_vec128_or_reduce(const vec128 *a);
|
||||
|
||||
extern void PQCLEAN_MCELIECE348864_SSE_vec128_mul_asm(vec128 *, vec128 *, const vec128 *, int);
|
||||
|
||||
/* bitsliced field multiplications */
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_mul(vec128 *h, vec128 *f, const vec128 *g);
|
||||
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_sq(vec128 * /*out*/, const vec128 * /*in*/);
|
||||
void PQCLEAN_MCELIECE348864_SSE_vec128_inv(vec128 * /*out*/, const vec128 * /*in*/);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,356 +0,0 @@
|
||||
|
||||
# qhasm: int64 input_0
|
||||
|
||||
# qhasm: int64 input_1
|
||||
|
||||
# qhasm: int64 input_2
|
||||
|
||||
# qhasm: int64 input_3
|
||||
|
||||
# qhasm: int64 input_4
|
||||
|
||||
# qhasm: int64 input_5
|
||||
|
||||
# qhasm: stack64 input_6
|
||||
|
||||
# qhasm: stack64 input_7
|
||||
|
||||
# qhasm: int64 caller_r11
|
||||
|
||||
# qhasm: int64 caller_r12
|
||||
|
||||
# qhasm: int64 caller_r13
|
||||
|
||||
# qhasm: int64 caller_r14
|
||||
|
||||
# qhasm: int64 caller_r15
|
||||
|
||||
# qhasm: int64 caller_rbx
|
||||
|
||||
# qhasm: int64 caller_rbp
|
||||
|
||||
# qhasm: int64 t
|
||||
|
||||
# qhasm: int64 c
|
||||
|
||||
# qhasm: int64 r
|
||||
|
||||
# qhasm: enter vec_reduce_asm
|
||||
.p2align 5
|
||||
.global _PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm
|
||||
.global PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm
|
||||
_PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm:
|
||||
PQCLEAN_MCELIECE348864_SSE_vec_reduce_asm:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $0,%r11
|
||||
sub %r11,%rsp
|
||||
|
||||
# qhasm: r = 0
|
||||
# asm 1: mov $0,>r=int64#7
|
||||
# asm 2: mov $0,>r=%rax
|
||||
mov $0,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 88 ]
|
||||
# asm 1: movq 88(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 88(<input_0=%rdi),>t=%rsi
|
||||
movq 88(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 80 ]
|
||||
# asm 1: movq 80(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 80(<input_0=%rdi),>t=%rsi
|
||||
movq 80(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 72 ]
|
||||
# asm 1: movq 72(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 72(<input_0=%rdi),>t=%rsi
|
||||
movq 72(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 64 ]
|
||||
# asm 1: movq 64(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 64(<input_0=%rdi),>t=%rsi
|
||||
movq 64(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 56 ]
|
||||
# asm 1: movq 56(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 56(<input_0=%rdi),>t=%rsi
|
||||
movq 56(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 48 ]
|
||||
# asm 1: movq 48(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 48(<input_0=%rdi),>t=%rsi
|
||||
movq 48(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 40 ]
|
||||
# asm 1: movq 40(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 40(<input_0=%rdi),>t=%rsi
|
||||
movq 40(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 32 ]
|
||||
# asm 1: movq 32(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 32(<input_0=%rdi),>t=%rsi
|
||||
movq 32(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 24 ]
|
||||
# asm 1: movq 24(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 24(<input_0=%rdi),>t=%rsi
|
||||
movq 24(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 16 ]
|
||||
# asm 1: movq 16(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 16(<input_0=%rdi),>t=%rsi
|
||||
movq 16(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 8 ]
|
||||
# asm 1: movq 8(<input_0=int64#1),>t=int64#2
|
||||
# asm 2: movq 8(<input_0=%rdi),>t=%rsi
|
||||
movq 8(%rdi),%rsi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#2, >c=int64#2
|
||||
# asm 2: popcnt <t=%rsi, >c=%rsi
|
||||
popcnt %rsi, %rsi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#2d
|
||||
# asm 2: and $1,<c=%esi
|
||||
and $1,%esi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#2,<r=int64#7
|
||||
# asm 2: or <c=%rsi,<r=%rax
|
||||
or %rsi,%rax
|
||||
|
||||
# qhasm: t = mem64[ input_0 + 0 ]
|
||||
# asm 1: movq 0(<input_0=int64#1),>t=int64#1
|
||||
# asm 2: movq 0(<input_0=%rdi),>t=%rdi
|
||||
movq 0(%rdi),%rdi
|
||||
|
||||
# qhasm: c = count(t)
|
||||
# asm 1: popcnt <t=int64#1, >c=int64#1
|
||||
# asm 2: popcnt <t=%rdi, >c=%rdi
|
||||
popcnt %rdi, %rdi
|
||||
|
||||
# qhasm: (uint32) c &= 1
|
||||
# asm 1: and $1,<c=int64#1d
|
||||
# asm 2: and $1,<c=%edi
|
||||
and $1,%edi
|
||||
|
||||
# qhasm: r <<= 1
|
||||
# asm 1: shl $1,<r=int64#7
|
||||
# asm 2: shl $1,<r=%rax
|
||||
shl $1,%rax
|
||||
|
||||
# qhasm: r |= c
|
||||
# asm 1: or <c=int64#1,<r=int64#7
|
||||
# asm 2: or <c=%rdi,<r=%rax
|
||||
or %rdi,%rax
|
||||
|
||||
# qhasm: return r
|
||||
add %r11,%rsp
|
||||
ret
|
||||
Loading…
x
Reference in New Issue
Block a user