mirror of
https://github.com/open-quantum-safe/liboqs.git
synced 2025-10-09 00:04:26 -04:00
Manually remove fips202 files left by copy_from_upstream (#958)
This commit is contained in:
parent
7a5001fe12
commit
7f3088232a
@ -1,774 +0,0 @@
|
|||||||
/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from
|
|
||||||
* http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202"
|
|
||||||
* implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein,
|
|
||||||
* and Peter Schwabe */
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include "fips202.h"
|
|
||||||
|
|
||||||
#define NROUNDS 24
|
|
||||||
#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: load64
|
|
||||||
*
|
|
||||||
* Description: Load 8 bytes into uint64_t in little-endian order
|
|
||||||
*
|
|
||||||
* Arguments: - const uint8_t *x: pointer to input byte array
|
|
||||||
*
|
|
||||||
* Returns the loaded 64-bit unsigned integer
|
|
||||||
**************************************************/
|
|
||||||
static uint64_t load64(const uint8_t x[8]) {
|
|
||||||
unsigned int i;
|
|
||||||
uint64_t r = 0;
|
|
||||||
|
|
||||||
for(i=0;i<8;i++)
|
|
||||||
r |= (uint64_t)x[i] << 8*i;
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: store64
|
|
||||||
*
|
|
||||||
* Description: Store a 64-bit integer to array of 8 bytes in little-endian order
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *x: pointer to the output byte array (allocated)
|
|
||||||
* - uint64_t u: input 64-bit unsigned integer
|
|
||||||
**************************************************/
|
|
||||||
static void store64(uint8_t x[8], uint64_t u) {
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for(i=0;i<8;i++)
|
|
||||||
x[i] = u >> 8*i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Keccak round constants */
|
|
||||||
static const uint64_t KeccakF_RoundConstants[NROUNDS] = {
|
|
||||||
(uint64_t)0x0000000000000001ULL,
|
|
||||||
(uint64_t)0x0000000000008082ULL,
|
|
||||||
(uint64_t)0x800000000000808aULL,
|
|
||||||
(uint64_t)0x8000000080008000ULL,
|
|
||||||
(uint64_t)0x000000000000808bULL,
|
|
||||||
(uint64_t)0x0000000080000001ULL,
|
|
||||||
(uint64_t)0x8000000080008081ULL,
|
|
||||||
(uint64_t)0x8000000000008009ULL,
|
|
||||||
(uint64_t)0x000000000000008aULL,
|
|
||||||
(uint64_t)0x0000000000000088ULL,
|
|
||||||
(uint64_t)0x0000000080008009ULL,
|
|
||||||
(uint64_t)0x000000008000000aULL,
|
|
||||||
(uint64_t)0x000000008000808bULL,
|
|
||||||
(uint64_t)0x800000000000008bULL,
|
|
||||||
(uint64_t)0x8000000000008089ULL,
|
|
||||||
(uint64_t)0x8000000000008003ULL,
|
|
||||||
(uint64_t)0x8000000000008002ULL,
|
|
||||||
(uint64_t)0x8000000000000080ULL,
|
|
||||||
(uint64_t)0x000000000000800aULL,
|
|
||||||
(uint64_t)0x800000008000000aULL,
|
|
||||||
(uint64_t)0x8000000080008081ULL,
|
|
||||||
(uint64_t)0x8000000000008080ULL,
|
|
||||||
(uint64_t)0x0000000080000001ULL,
|
|
||||||
(uint64_t)0x8000000080008008ULL
|
|
||||||
};
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: KeccakF1600_StatePermute
|
|
||||||
*
|
|
||||||
* Description: The Keccak F1600 Permutation
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *state: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
static void KeccakF1600_StatePermute(uint64_t state[25])
|
|
||||||
{
|
|
||||||
int round;
|
|
||||||
|
|
||||||
uint64_t Aba, Abe, Abi, Abo, Abu;
|
|
||||||
uint64_t Aga, Age, Agi, Ago, Agu;
|
|
||||||
uint64_t Aka, Ake, Aki, Ako, Aku;
|
|
||||||
uint64_t Ama, Ame, Ami, Amo, Amu;
|
|
||||||
uint64_t Asa, Ase, Asi, Aso, Asu;
|
|
||||||
uint64_t BCa, BCe, BCi, BCo, BCu;
|
|
||||||
uint64_t Da, De, Di, Do, Du;
|
|
||||||
uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
|
|
||||||
uint64_t Ega, Ege, Egi, Ego, Egu;
|
|
||||||
uint64_t Eka, Eke, Eki, Eko, Eku;
|
|
||||||
uint64_t Ema, Eme, Emi, Emo, Emu;
|
|
||||||
uint64_t Esa, Ese, Esi, Eso, Esu;
|
|
||||||
|
|
||||||
//copyFromState(A, state)
|
|
||||||
Aba = state[ 0];
|
|
||||||
Abe = state[ 1];
|
|
||||||
Abi = state[ 2];
|
|
||||||
Abo = state[ 3];
|
|
||||||
Abu = state[ 4];
|
|
||||||
Aga = state[ 5];
|
|
||||||
Age = state[ 6];
|
|
||||||
Agi = state[ 7];
|
|
||||||
Ago = state[ 8];
|
|
||||||
Agu = state[ 9];
|
|
||||||
Aka = state[10];
|
|
||||||
Ake = state[11];
|
|
||||||
Aki = state[12];
|
|
||||||
Ako = state[13];
|
|
||||||
Aku = state[14];
|
|
||||||
Ama = state[15];
|
|
||||||
Ame = state[16];
|
|
||||||
Ami = state[17];
|
|
||||||
Amo = state[18];
|
|
||||||
Amu = state[19];
|
|
||||||
Asa = state[20];
|
|
||||||
Ase = state[21];
|
|
||||||
Asi = state[22];
|
|
||||||
Aso = state[23];
|
|
||||||
Asu = state[24];
|
|
||||||
|
|
||||||
for(round = 0; round < NROUNDS; round += 2) {
|
|
||||||
// prepareTheta
|
|
||||||
BCa = Aba^Aga^Aka^Ama^Asa;
|
|
||||||
BCe = Abe^Age^Ake^Ame^Ase;
|
|
||||||
BCi = Abi^Agi^Aki^Ami^Asi;
|
|
||||||
BCo = Abo^Ago^Ako^Amo^Aso;
|
|
||||||
BCu = Abu^Agu^Aku^Amu^Asu;
|
|
||||||
|
|
||||||
//thetaRhoPiChiIotaPrepareTheta(round, A, E)
|
|
||||||
Da = BCu^ROL(BCe, 1);
|
|
||||||
De = BCa^ROL(BCi, 1);
|
|
||||||
Di = BCe^ROL(BCo, 1);
|
|
||||||
Do = BCi^ROL(BCu, 1);
|
|
||||||
Du = BCo^ROL(BCa, 1);
|
|
||||||
|
|
||||||
Aba ^= Da;
|
|
||||||
BCa = Aba;
|
|
||||||
Age ^= De;
|
|
||||||
BCe = ROL(Age, 44);
|
|
||||||
Aki ^= Di;
|
|
||||||
BCi = ROL(Aki, 43);
|
|
||||||
Amo ^= Do;
|
|
||||||
BCo = ROL(Amo, 21);
|
|
||||||
Asu ^= Du;
|
|
||||||
BCu = ROL(Asu, 14);
|
|
||||||
Eba = BCa ^((~BCe)& BCi );
|
|
||||||
Eba ^= (uint64_t)KeccakF_RoundConstants[round];
|
|
||||||
Ebe = BCe ^((~BCi)& BCo );
|
|
||||||
Ebi = BCi ^((~BCo)& BCu );
|
|
||||||
Ebo = BCo ^((~BCu)& BCa );
|
|
||||||
Ebu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abo ^= Do;
|
|
||||||
BCa = ROL(Abo, 28);
|
|
||||||
Agu ^= Du;
|
|
||||||
BCe = ROL(Agu, 20);
|
|
||||||
Aka ^= Da;
|
|
||||||
BCi = ROL(Aka, 3);
|
|
||||||
Ame ^= De;
|
|
||||||
BCo = ROL(Ame, 45);
|
|
||||||
Asi ^= Di;
|
|
||||||
BCu = ROL(Asi, 61);
|
|
||||||
Ega = BCa ^((~BCe)& BCi );
|
|
||||||
Ege = BCe ^((~BCi)& BCo );
|
|
||||||
Egi = BCi ^((~BCo)& BCu );
|
|
||||||
Ego = BCo ^((~BCu)& BCa );
|
|
||||||
Egu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abe ^= De;
|
|
||||||
BCa = ROL(Abe, 1);
|
|
||||||
Agi ^= Di;
|
|
||||||
BCe = ROL(Agi, 6);
|
|
||||||
Ako ^= Do;
|
|
||||||
BCi = ROL(Ako, 25);
|
|
||||||
Amu ^= Du;
|
|
||||||
BCo = ROL(Amu, 8);
|
|
||||||
Asa ^= Da;
|
|
||||||
BCu = ROL(Asa, 18);
|
|
||||||
Eka = BCa ^((~BCe)& BCi );
|
|
||||||
Eke = BCe ^((~BCi)& BCo );
|
|
||||||
Eki = BCi ^((~BCo)& BCu );
|
|
||||||
Eko = BCo ^((~BCu)& BCa );
|
|
||||||
Eku = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abu ^= Du;
|
|
||||||
BCa = ROL(Abu, 27);
|
|
||||||
Aga ^= Da;
|
|
||||||
BCe = ROL(Aga, 36);
|
|
||||||
Ake ^= De;
|
|
||||||
BCi = ROL(Ake, 10);
|
|
||||||
Ami ^= Di;
|
|
||||||
BCo = ROL(Ami, 15);
|
|
||||||
Aso ^= Do;
|
|
||||||
BCu = ROL(Aso, 56);
|
|
||||||
Ema = BCa ^((~BCe)& BCi );
|
|
||||||
Eme = BCe ^((~BCi)& BCo );
|
|
||||||
Emi = BCi ^((~BCo)& BCu );
|
|
||||||
Emo = BCo ^((~BCu)& BCa );
|
|
||||||
Emu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abi ^= Di;
|
|
||||||
BCa = ROL(Abi, 62);
|
|
||||||
Ago ^= Do;
|
|
||||||
BCe = ROL(Ago, 55);
|
|
||||||
Aku ^= Du;
|
|
||||||
BCi = ROL(Aku, 39);
|
|
||||||
Ama ^= Da;
|
|
||||||
BCo = ROL(Ama, 41);
|
|
||||||
Ase ^= De;
|
|
||||||
BCu = ROL(Ase, 2);
|
|
||||||
Esa = BCa ^((~BCe)& BCi );
|
|
||||||
Ese = BCe ^((~BCi)& BCo );
|
|
||||||
Esi = BCi ^((~BCo)& BCu );
|
|
||||||
Eso = BCo ^((~BCu)& BCa );
|
|
||||||
Esu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
// prepareTheta
|
|
||||||
BCa = Eba^Ega^Eka^Ema^Esa;
|
|
||||||
BCe = Ebe^Ege^Eke^Eme^Ese;
|
|
||||||
BCi = Ebi^Egi^Eki^Emi^Esi;
|
|
||||||
BCo = Ebo^Ego^Eko^Emo^Eso;
|
|
||||||
BCu = Ebu^Egu^Eku^Emu^Esu;
|
|
||||||
|
|
||||||
//thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
|
|
||||||
Da = BCu^ROL(BCe, 1);
|
|
||||||
De = BCa^ROL(BCi, 1);
|
|
||||||
Di = BCe^ROL(BCo, 1);
|
|
||||||
Do = BCi^ROL(BCu, 1);
|
|
||||||
Du = BCo^ROL(BCa, 1);
|
|
||||||
|
|
||||||
Eba ^= Da;
|
|
||||||
BCa = Eba;
|
|
||||||
Ege ^= De;
|
|
||||||
BCe = ROL(Ege, 44);
|
|
||||||
Eki ^= Di;
|
|
||||||
BCi = ROL(Eki, 43);
|
|
||||||
Emo ^= Do;
|
|
||||||
BCo = ROL(Emo, 21);
|
|
||||||
Esu ^= Du;
|
|
||||||
BCu = ROL(Esu, 14);
|
|
||||||
Aba = BCa ^((~BCe)& BCi );
|
|
||||||
Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
|
|
||||||
Abe = BCe ^((~BCi)& BCo );
|
|
||||||
Abi = BCi ^((~BCo)& BCu );
|
|
||||||
Abo = BCo ^((~BCu)& BCa );
|
|
||||||
Abu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebo ^= Do;
|
|
||||||
BCa = ROL(Ebo, 28);
|
|
||||||
Egu ^= Du;
|
|
||||||
BCe = ROL(Egu, 20);
|
|
||||||
Eka ^= Da;
|
|
||||||
BCi = ROL(Eka, 3);
|
|
||||||
Eme ^= De;
|
|
||||||
BCo = ROL(Eme, 45);
|
|
||||||
Esi ^= Di;
|
|
||||||
BCu = ROL(Esi, 61);
|
|
||||||
Aga = BCa ^((~BCe)& BCi );
|
|
||||||
Age = BCe ^((~BCi)& BCo );
|
|
||||||
Agi = BCi ^((~BCo)& BCu );
|
|
||||||
Ago = BCo ^((~BCu)& BCa );
|
|
||||||
Agu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebe ^= De;
|
|
||||||
BCa = ROL(Ebe, 1);
|
|
||||||
Egi ^= Di;
|
|
||||||
BCe = ROL(Egi, 6);
|
|
||||||
Eko ^= Do;
|
|
||||||
BCi = ROL(Eko, 25);
|
|
||||||
Emu ^= Du;
|
|
||||||
BCo = ROL(Emu, 8);
|
|
||||||
Esa ^= Da;
|
|
||||||
BCu = ROL(Esa, 18);
|
|
||||||
Aka = BCa ^((~BCe)& BCi );
|
|
||||||
Ake = BCe ^((~BCi)& BCo );
|
|
||||||
Aki = BCi ^((~BCo)& BCu );
|
|
||||||
Ako = BCo ^((~BCu)& BCa );
|
|
||||||
Aku = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebu ^= Du;
|
|
||||||
BCa = ROL(Ebu, 27);
|
|
||||||
Ega ^= Da;
|
|
||||||
BCe = ROL(Ega, 36);
|
|
||||||
Eke ^= De;
|
|
||||||
BCi = ROL(Eke, 10);
|
|
||||||
Emi ^= Di;
|
|
||||||
BCo = ROL(Emi, 15);
|
|
||||||
Eso ^= Do;
|
|
||||||
BCu = ROL(Eso, 56);
|
|
||||||
Ama = BCa ^((~BCe)& BCi );
|
|
||||||
Ame = BCe ^((~BCi)& BCo );
|
|
||||||
Ami = BCi ^((~BCo)& BCu );
|
|
||||||
Amo = BCo ^((~BCu)& BCa );
|
|
||||||
Amu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebi ^= Di;
|
|
||||||
BCa = ROL(Ebi, 62);
|
|
||||||
Ego ^= Do;
|
|
||||||
BCe = ROL(Ego, 55);
|
|
||||||
Eku ^= Du;
|
|
||||||
BCi = ROL(Eku, 39);
|
|
||||||
Ema ^= Da;
|
|
||||||
BCo = ROL(Ema, 41);
|
|
||||||
Ese ^= De;
|
|
||||||
BCu = ROL(Ese, 2);
|
|
||||||
Asa = BCa ^((~BCe)& BCi );
|
|
||||||
Ase = BCe ^((~BCi)& BCo );
|
|
||||||
Asi = BCi ^((~BCo)& BCu );
|
|
||||||
Aso = BCo ^((~BCu)& BCa );
|
|
||||||
Asu = BCu ^((~BCa)& BCe );
|
|
||||||
}
|
|
||||||
|
|
||||||
//copyToState(state, A)
|
|
||||||
state[ 0] = Aba;
|
|
||||||
state[ 1] = Abe;
|
|
||||||
state[ 2] = Abi;
|
|
||||||
state[ 3] = Abo;
|
|
||||||
state[ 4] = Abu;
|
|
||||||
state[ 5] = Aga;
|
|
||||||
state[ 6] = Age;
|
|
||||||
state[ 7] = Agi;
|
|
||||||
state[ 8] = Ago;
|
|
||||||
state[ 9] = Agu;
|
|
||||||
state[10] = Aka;
|
|
||||||
state[11] = Ake;
|
|
||||||
state[12] = Aki;
|
|
||||||
state[13] = Ako;
|
|
||||||
state[14] = Aku;
|
|
||||||
state[15] = Ama;
|
|
||||||
state[16] = Ame;
|
|
||||||
state[17] = Ami;
|
|
||||||
state[18] = Amo;
|
|
||||||
state[19] = Amu;
|
|
||||||
state[20] = Asa;
|
|
||||||
state[21] = Ase;
|
|
||||||
state[22] = Asi;
|
|
||||||
state[23] = Aso;
|
|
||||||
state[24] = Asu;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_init
|
|
||||||
*
|
|
||||||
* Description: Initializes the Keccak state.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to Keccak state
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_init(uint64_t s[25])
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
for(i=0;i<25;i++)
|
|
||||||
s[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_absorb
|
|
||||||
*
|
|
||||||
* Description: Absorb step of Keccak; incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to Keccak state
|
|
||||||
* - unsigned int pos: position in current block to be absorbed
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
*
|
|
||||||
* Returns new position pos in current block
|
|
||||||
**************************************************/
|
|
||||||
static unsigned int keccak_absorb(uint64_t s[25],
|
|
||||||
unsigned int pos,
|
|
||||||
unsigned int r,
|
|
||||||
const uint8_t *in,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
while(pos+inlen >= r) {
|
|
||||||
for(i=pos;i<r;i++)
|
|
||||||
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
|
|
||||||
inlen -= r-pos;
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i=pos;i<pos+inlen;i++)
|
|
||||||
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
|
|
||||||
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_finalize
|
|
||||||
*
|
|
||||||
* Description: Finalize absorb step.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to Keccak state
|
|
||||||
* - unsigned int pos: position in current block to be absorbed
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
* - uint8_t p: domain separation byte
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_finalize(uint64_t s[25], unsigned int pos, unsigned int r, uint8_t p)
|
|
||||||
{
|
|
||||||
s[pos/8] ^= (uint64_t)p << 8*(pos%8);
|
|
||||||
s[r/8-1] ^= 1ULL << 63;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_squeeze
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes.
|
|
||||||
* Modifies the state. Can be called multiple times to keep
|
|
||||||
* squeezing, i.e., is incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output
|
|
||||||
* - size_t outlen: number of bytes to be squeezed (written to out)
|
|
||||||
* - uint64_t *s: pointer to input/output Keccak state
|
|
||||||
* - unsigned int pos: number of bytes in current block already squeezed
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
*
|
|
||||||
* Returns new position pos in current block
|
|
||||||
**************************************************/
|
|
||||||
static unsigned int keccak_squeeze(uint8_t *out,
|
|
||||||
size_t outlen,
|
|
||||||
uint64_t s[25],
|
|
||||||
unsigned int pos,
|
|
||||||
unsigned int r)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
while(outlen) {
|
|
||||||
if(pos == r) {
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
for(i=pos;i < r && i < pos+outlen; i++)
|
|
||||||
*out++ = s[i/8] >> 8*(i%8);
|
|
||||||
outlen -= i-pos;
|
|
||||||
pos = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_absorb_once
|
|
||||||
*
|
|
||||||
* Description: Absorb step of Keccak;
|
|
||||||
* non-incremental, starts by zeroeing the state.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
* - uint8_t p: domain-separation byte for different Keccak-derived functions
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_absorb_once(uint64_t s[25],
|
|
||||||
unsigned int r,
|
|
||||||
const uint8_t *in,
|
|
||||||
size_t inlen,
|
|
||||||
uint8_t p)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for(i=0;i<25;i++)
|
|
||||||
s[i] = 0;
|
|
||||||
|
|
||||||
while(inlen >= r) {
|
|
||||||
for(i=0;i<r/8;i++)
|
|
||||||
s[i] ^= load64(in+8*i);
|
|
||||||
in += r;
|
|
||||||
inlen -= r;
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i=0;i<inlen;i++)
|
|
||||||
s[i/8] ^= (uint64_t)in[i] << 8*(i%8);
|
|
||||||
|
|
||||||
s[i/8] ^= (uint64_t)p << 8*(i%8);
|
|
||||||
s[(r-1)/8] ^= 1ULL << 63;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_squeezeblocks
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.
|
|
||||||
* Modifies the state. Can be called multiple times to keep
|
|
||||||
* squeezing, i.e., is incremental. Assumes zero bytes of current
|
|
||||||
* block have already been squeezed.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t nblocks: number of blocks to be squeezed (written to out)
|
|
||||||
* - uint64_t *s: pointer to input/output Keccak state
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_squeezeblocks(uint8_t *out,
|
|
||||||
size_t nblocks,
|
|
||||||
uint64_t s[25],
|
|
||||||
unsigned int r)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
while(nblocks) {
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
for(i=0;i<r/8;i++)
|
|
||||||
store64(out+8*i, s[i]);
|
|
||||||
out += r;
|
|
||||||
nblocks -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_init
|
|
||||||
*
|
|
||||||
* Description: Initilizes Keccak state for use as SHAKE128 XOF
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_init(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_init(state->s);
|
|
||||||
state->pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_absorb
|
|
||||||
*
|
|
||||||
* Description: Absorb step of the SHAKE128 XOF; incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
state->pos = keccak_absorb(state->s, state->pos, SHAKE128_RATE, in, inlen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_finalize
|
|
||||||
*
|
|
||||||
* Description: Finalize absorb step of the SHAKE128 XOF.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_finalize(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_finalize(state->s, state->pos, SHAKE128_RATE, 0x1F);
|
|
||||||
state->pos = SHAKE128_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_squeeze
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE128 XOF. Squeezes arbitraily many
|
|
||||||
* bytes. Can be called multiple times to keep squeezing.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t outlen : number of bytes to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
|
|
||||||
{
|
|
||||||
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE128_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_absorb_once
|
|
||||||
*
|
|
||||||
* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F);
|
|
||||||
state->pos = SHAKE128_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_squeezeblocks
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
|
|
||||||
* SHAKE128_RATE bytes each. Can be called multiple times
|
|
||||||
* to keep squeezing. Assumes new block has not yet been
|
|
||||||
* started (state->pos = SHAKE128_RATE).
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t nblocks: number of blocks to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_init
|
|
||||||
*
|
|
||||||
* Description: Initilizes Keccak state for use as SHAKE256 XOF
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_init(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_init(state->s);
|
|
||||||
state->pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_absorb
|
|
||||||
*
|
|
||||||
* Description: Absorb step of the SHAKE256 XOF; incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_finalize
|
|
||||||
*
|
|
||||||
* Description: Finalize absorb step of the SHAKE256 XOF.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_finalize(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F);
|
|
||||||
state->pos = SHAKE256_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_squeeze
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many
|
|
||||||
* bytes. Can be called multiple times to keep squeezing.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t outlen : number of bytes to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
|
|
||||||
{
|
|
||||||
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_absorb_once
|
|
||||||
*
|
|
||||||
* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F);
|
|
||||||
state->pos = SHAKE256_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_squeezeblocks
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of
|
|
||||||
* SHAKE256_RATE bytes each. Can be called multiple times
|
|
||||||
* to keep squeezing. Assumes next block has not yet been
|
|
||||||
* started (state->pos = SHAKE256_RATE).
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t nblocks: number of blocks to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128
|
|
||||||
*
|
|
||||||
* Description: SHAKE128 XOF with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output
|
|
||||||
* - size_t outlen: requested output length in bytes
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
size_t nblocks;
|
|
||||||
keccak_state state;
|
|
||||||
|
|
||||||
shake128_absorb_once(&state, in, inlen);
|
|
||||||
nblocks = outlen/SHAKE128_RATE;
|
|
||||||
shake128_squeezeblocks(out, nblocks, &state);
|
|
||||||
outlen -= nblocks*SHAKE128_RATE;
|
|
||||||
out += nblocks*SHAKE128_RATE;
|
|
||||||
shake128_squeeze(out, outlen, &state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256
|
|
||||||
*
|
|
||||||
* Description: SHAKE256 XOF with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output
|
|
||||||
* - size_t outlen: requested output length in bytes
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
size_t nblocks;
|
|
||||||
keccak_state state;
|
|
||||||
|
|
||||||
shake256_absorb_once(&state, in, inlen);
|
|
||||||
nblocks = outlen/SHAKE256_RATE;
|
|
||||||
shake256_squeezeblocks(out, nblocks, &state);
|
|
||||||
outlen -= nblocks*SHAKE256_RATE;
|
|
||||||
out += nblocks*SHAKE256_RATE;
|
|
||||||
shake256_squeeze(out, outlen, &state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: sha3_256
|
|
||||||
*
|
|
||||||
* Description: SHA3-256 with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *h: pointer to output (32 bytes)
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
uint64_t s[25];
|
|
||||||
|
|
||||||
keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06);
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
for(i=0;i<4;i++)
|
|
||||||
store64(h+8*i,s[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: sha3_512
|
|
||||||
*
|
|
||||||
* Description: SHA3-512 with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *h: pointer to output (64 bytes)
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
uint64_t s[25];
|
|
||||||
|
|
||||||
keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06);
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
for(i=0;i<8;i++)
|
|
||||||
store64(h+8*i,s[i]);
|
|
||||||
}
|
|
@ -1,54 +0,0 @@
|
|||||||
#ifndef FIPS202_H
|
|
||||||
#define FIPS202_H
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#define SHAKE128_RATE 168
|
|
||||||
#define SHAKE256_RATE 136
|
|
||||||
#define SHA3_256_RATE 136
|
|
||||||
#define SHA3_512_RATE 72
|
|
||||||
|
|
||||||
#define FIPS202_NAMESPACE(s) pqcrystals_kyber_fips202_avx2_##s
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint64_t s[25];
|
|
||||||
unsigned int pos;
|
|
||||||
} keccak_state;
|
|
||||||
|
|
||||||
#define shake128_init FIPS202_NAMESPACE(shake128_init)
|
|
||||||
void shake128_init(keccak_state *state);
|
|
||||||
#define shake128_absorb FIPS202_NAMESPACE(shake128_absorb)
|
|
||||||
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake128_finalize FIPS202_NAMESPACE(shake128_finalize)
|
|
||||||
void shake128_finalize(keccak_state *state);
|
|
||||||
#define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze)
|
|
||||||
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
|
|
||||||
#define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once)
|
|
||||||
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks)
|
|
||||||
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
|
|
||||||
|
|
||||||
#define shake256_init FIPS202_NAMESPACE(shake256_init)
|
|
||||||
void shake256_init(keccak_state *state);
|
|
||||||
#define shake256_absorb FIPS202_NAMESPACE(shake256_absorb)
|
|
||||||
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake256_finalize FIPS202_NAMESPACE(shake256_finalize)
|
|
||||||
void shake256_finalize(keccak_state *state);
|
|
||||||
#define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze)
|
|
||||||
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
|
|
||||||
#define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once)
|
|
||||||
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks)
|
|
||||||
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
|
|
||||||
|
|
||||||
#define shake128 FIPS202_NAMESPACE(shake128)
|
|
||||||
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake256 FIPS202_NAMESPACE(shake256)
|
|
||||||
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
|
|
||||||
#define sha3_256 FIPS202_NAMESPACE(sha3_256)
|
|
||||||
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen);
|
|
||||||
#define sha3_512 FIPS202_NAMESPACE(sha3_512)
|
|
||||||
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,200 +0,0 @@
|
|||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include "fips202.h"
|
|
||||||
#include "fips202x4.h"
|
|
||||||
|
|
||||||
/* Use implementation from the Keccak Code Package */
|
|
||||||
#define KeccakF1600_StatePermute4x FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds)
|
|
||||||
extern void KeccakF1600_StatePermute4x(__m256i *s);
|
|
||||||
|
|
||||||
static void keccakx4_absorb_once(__m256i s[25],
|
|
||||||
unsigned int r,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen,
|
|
||||||
uint8_t p)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
uint64_t pos = 0;
|
|
||||||
__m256i t, idx;
|
|
||||||
|
|
||||||
for(i = 0; i < 25; ++i)
|
|
||||||
s[i] = _mm256_setzero_si256();
|
|
||||||
|
|
||||||
idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0);
|
|
||||||
while(inlen >= r) {
|
|
||||||
for(i = 0; i < r/8; ++i) {
|
|
||||||
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
pos += 8;
|
|
||||||
}
|
|
||||||
inlen -= r;
|
|
||||||
|
|
||||||
KeccakF1600_StatePermute4x(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i = 0; i < inlen/8; ++i) {
|
|
||||||
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
pos += 8;
|
|
||||||
}
|
|
||||||
inlen -= 8*i;
|
|
||||||
|
|
||||||
if(inlen) {
|
|
||||||
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
|
|
||||||
idx = _mm256_set1_epi64x((1ULL << (8*inlen)) - 1);
|
|
||||||
t = _mm256_and_si256(t, idx);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
}
|
|
||||||
|
|
||||||
t = _mm256_set1_epi64x((uint64_t)p << 8*inlen);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
t = _mm256_set1_epi64x(1ULL << 63);
|
|
||||||
s[r/8 - 1] = _mm256_xor_si256(s[r/8 - 1], t);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void keccakx4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
unsigned int r,
|
|
||||||
__m256i s[25])
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
__m128d t;
|
|
||||||
|
|
||||||
while(nblocks > 0) {
|
|
||||||
KeccakF1600_StatePermute4x(s);
|
|
||||||
for(i=0; i < r/8; ++i) {
|
|
||||||
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
|
||||||
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out0[8*i], t);
|
|
||||||
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out1[8*i], t);
|
|
||||||
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i],1));
|
|
||||||
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out2[8*i], t);
|
|
||||||
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out3[8*i], t);
|
|
||||||
}
|
|
||||||
|
|
||||||
out0 += r;
|
|
||||||
out1 += r;
|
|
||||||
out2 += r;
|
|
||||||
out3 += r;
|
|
||||||
--nblocks;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake128x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake128x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state)
|
|
||||||
{
|
|
||||||
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake256x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake256x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state)
|
|
||||||
{
|
|
||||||
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake128x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
size_t nblocks = outlen/SHAKE128_RATE;
|
|
||||||
uint8_t t[4][SHAKE128_RATE];
|
|
||||||
keccakx4_state state;
|
|
||||||
|
|
||||||
shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen);
|
|
||||||
shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
|
|
||||||
|
|
||||||
out0 += nblocks*SHAKE128_RATE;
|
|
||||||
out1 += nblocks*SHAKE128_RATE;
|
|
||||||
out2 += nblocks*SHAKE128_RATE;
|
|
||||||
out3 += nblocks*SHAKE128_RATE;
|
|
||||||
outlen -= nblocks*SHAKE128_RATE;
|
|
||||||
|
|
||||||
if(outlen) {
|
|
||||||
shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
|
|
||||||
for(i = 0; i < outlen; ++i) {
|
|
||||||
out0[i] = t[0][i];
|
|
||||||
out1[i] = t[1][i];
|
|
||||||
out2[i] = t[2][i];
|
|
||||||
out3[i] = t[3][i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake256x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
size_t nblocks = outlen/SHAKE256_RATE;
|
|
||||||
uint8_t t[4][SHAKE256_RATE];
|
|
||||||
keccakx4_state state;
|
|
||||||
|
|
||||||
shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen);
|
|
||||||
shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
|
|
||||||
|
|
||||||
out0 += nblocks*SHAKE256_RATE;
|
|
||||||
out1 += nblocks*SHAKE256_RATE;
|
|
||||||
out2 += nblocks*SHAKE256_RATE;
|
|
||||||
out3 += nblocks*SHAKE256_RATE;
|
|
||||||
outlen -= nblocks*SHAKE256_RATE;
|
|
||||||
|
|
||||||
if(outlen) {
|
|
||||||
shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
|
|
||||||
for(i = 0; i < outlen; ++i) {
|
|
||||||
out0[i] = t[0][i];
|
|
||||||
out1[i] = t[1][i];
|
|
||||||
out2[i] = t[2][i];
|
|
||||||
out3[i] = t[3][i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,70 +0,0 @@
|
|||||||
#ifndef FIPS202X4_H
|
|
||||||
#define FIPS202X4_H
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
#define FIPS202X4_NAMESPACE(s) pqcrystals_kyber_fips202x4_avx2_##s
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
__m256i s[25];
|
|
||||||
} keccakx4_state;
|
|
||||||
|
|
||||||
#define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once)
|
|
||||||
void shake128x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks)
|
|
||||||
void shake128x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state);
|
|
||||||
|
|
||||||
#define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once)
|
|
||||||
void shake256x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks)
|
|
||||||
void shake256x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state);
|
|
||||||
|
|
||||||
#define shake128x4 FIPS202X4_NAMESPACE(shake128x4)
|
|
||||||
void shake128x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake256x4 FIPS202X4_NAMESPACE(shake256x4)
|
|
||||||
void shake256x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,70 +0,0 @@
|
|||||||
#ifndef FIPS202X4_H
|
|
||||||
#define FIPS202X4_H
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
#define FIPS202X4_NAMESPACE(s) pqcrystals_kyber_fips202x4_avx2_##s
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
__m256i s[25];
|
|
||||||
} keccakx4_state;
|
|
||||||
|
|
||||||
#define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once)
|
|
||||||
void shake128x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks)
|
|
||||||
void shake128x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state);
|
|
||||||
|
|
||||||
#define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once)
|
|
||||||
void shake256x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks)
|
|
||||||
void shake256x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state);
|
|
||||||
|
|
||||||
#define shake128x4 FIPS202X4_NAMESPACE(shake128x4)
|
|
||||||
void shake128x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake256x4 FIPS202X4_NAMESPACE(shake256x4)
|
|
||||||
void shake256x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#endif
|
|
File diff suppressed because it is too large
Load Diff
@ -1,65 +0,0 @@
|
|||||||
/*
|
|
||||||
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
|
|
||||||
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
|
|
||||||
denoted as "the implementer".
|
|
||||||
|
|
||||||
For more information, feedback or questions, please refer to our websites:
|
|
||||||
http://keccak.noekeon.org/
|
|
||||||
http://keyak.noekeon.org/
|
|
||||||
http://ketje.noekeon.org/
|
|
||||||
|
|
||||||
To the extent possible under law, the implementer has waived all copyright
|
|
||||||
and related or neighboring rights to the source code in this file.
|
|
||||||
http://creativecommons.org/publicdomain/zero/1.0/
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _KeccakP_1600_times4_SnP_h_
|
|
||||||
#define _KeccakP_1600_times4_SnP_h_
|
|
||||||
|
|
||||||
/** For the documentation, see PlSnP-documentation.h.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "KeccakP-SIMD256-config.h"
|
|
||||||
#include "../fips202x4.h"
|
|
||||||
|
|
||||||
#define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")"
|
|
||||||
#define KeccakP1600times4_statesSizeInBytes 800
|
|
||||||
#define KeccakP1600times4_statesAlignment 32
|
|
||||||
#define KeccakF1600times4_FastLoop_supported
|
|
||||||
#define KeccakP1600times4_12rounds_FastLoop_supported
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#define KeccakP1600times4_StaticInitialize()
|
|
||||||
#define KeccakP1600times4_InitializeAll FIPS202X4_NAMESPACE(KeccakP1600times4_InitializeAll)
|
|
||||||
void KeccakP1600times4_InitializeAll(void *states);
|
|
||||||
#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \
|
|
||||||
((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte)
|
|
||||||
#define KeccakP1600times4_AddBytes FIPS202X4_NAMESPACE(KeccakP1600times4_AddBytes)
|
|
||||||
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
|
|
||||||
#define KeccakP1600times4_AddLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_AddLanesAll)
|
|
||||||
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
|
|
||||||
#define KeccakP1600times4_OverwriteBytes FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteBytes)
|
|
||||||
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
|
|
||||||
#define KeccakP1600times4_OverwriteLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteLanesAll)
|
|
||||||
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
|
|
||||||
#define KeccakP1600times4_OverwriteWithZeroes FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteWithZeroes)
|
|
||||||
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
|
|
||||||
#define KeccakP1600times4_PermuteAll_12rounds FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_12rounds)
|
|
||||||
void KeccakP1600times4_PermuteAll_12rounds(void *states);
|
|
||||||
#define KeccakP1600times4_PermuteAll_24rounds FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds)
|
|
||||||
void KeccakP1600times4_PermuteAll_24rounds(void *states);
|
|
||||||
#define KeccakP1600times4_ExtractBytes FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractBytes)
|
|
||||||
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
|
|
||||||
#define KeccakP1600times4_ExtractLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractLanesAll)
|
|
||||||
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
|
|
||||||
#define KeccakP1600times4_ExtractAndAddBytes FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractAndAddBytes)
|
|
||||||
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
|
|
||||||
#define KeccakP1600times4_ExtractAndAddLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractAndAddLanesAll)
|
|
||||||
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
|
|
||||||
#define KeccakF1600times4_FastLoop_Absorb FIPS202X4_NAMESPACE(KeccakF1600times4_FastLoop_Absorb)
|
|
||||||
size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
|
|
||||||
#define KeccakP1600times4_12rounds_FastLoop_Absorb FIPS202X4_NAMESPACE(KeccakP1600times4_12rounds_FastLoop_Absorb)
|
|
||||||
size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,198 +0,0 @@
|
|||||||
/*
|
|
||||||
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
|
|
||||||
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
|
|
||||||
denoted as "the implementer".
|
|
||||||
|
|
||||||
For more information, feedback or questions, please refer to our websites:
|
|
||||||
http://keccak.noekeon.org/
|
|
||||||
http://keyak.noekeon.org/
|
|
||||||
http://ketje.noekeon.org/
|
|
||||||
|
|
||||||
To the extent possible under law, the implementer has waived all copyright
|
|
||||||
and related or neighboring rights to the source code in this file.
|
|
||||||
http://creativecommons.org/publicdomain/zero/1.0/
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if (defined(FullUnrolling))
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
|
|
||||||
thetaRhoPiChiIota(23, E, A) \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
|
|
||||||
thetaRhoPiChiIota(23, E, A) \
|
|
||||||
|
|
||||||
#elif (Unrolling == 12)
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=0; i<24; i+=12) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
|
|
||||||
thetaRhoPiChiIota(23, E, A) \
|
|
||||||
|
|
||||||
#elif (Unrolling == 6)
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=0; i<24; i+=6) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=12; i<24; i+=6) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#elif (Unrolling == 4)
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=0; i<24; i+=4) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=12; i<24; i+=4) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#elif (Unrolling == 3)
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=0; i<24; i+=3) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
|
|
||||||
copyStateVariables(A, E) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=12; i<24; i+=3) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
|
|
||||||
copyStateVariables(A, E) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#elif (Unrolling == 2)
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=0; i<24; i+=2) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=12; i<24; i+=2) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#elif (Unrolling == 1)
|
|
||||||
#define rounds24 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=0; i<24; i++) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
copyStateVariables(A, E) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#define rounds12 \
|
|
||||||
prepareTheta \
|
|
||||||
for(i=12; i<24; i++) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
copyStateVariables(A, E) \
|
|
||||||
} \
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error "Unrolling is not correctly specified!"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define roundsN(__nrounds) \
|
|
||||||
prepareTheta \
|
|
||||||
i = 24 - (__nrounds); \
|
|
||||||
if ((i&1) != 0) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i, A, E) \
|
|
||||||
copyStateVariables(A, E) \
|
|
||||||
++i; \
|
|
||||||
} \
|
|
||||||
for( /* empty */; i<24; i+=2) { \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
|
|
||||||
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
|
|
||||||
}
|
|
@ -1,3 +0,0 @@
|
|||||||
#define KeccakP1600times4_implementation_config "AVX2, all rounds unrolled"
|
|
||||||
#define KeccakP1600times4_fullUnrolling
|
|
||||||
#define KeccakP1600times4_useAVX2
|
|
@ -1,34 +0,0 @@
|
|||||||
/*
|
|
||||||
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
|
|
||||||
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
|
|
||||||
denoted as "the implementer".
|
|
||||||
|
|
||||||
For more information, feedback or questions, please refer to our websites:
|
|
||||||
http://keccak.noekeon.org/
|
|
||||||
http://keyak.noekeon.org/
|
|
||||||
http://ketje.noekeon.org/
|
|
||||||
|
|
||||||
To the extent possible under law, the implementer has waived all copyright
|
|
||||||
and related or neighboring rights to the source code in this file.
|
|
||||||
http://creativecommons.org/publicdomain/zero/1.0/
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _keccakp_align_h_
|
|
||||||
#define _keccakp_align_h_
|
|
||||||
|
|
||||||
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
|
|
||||||
#ifdef ALIGN
|
|
||||||
#undef ALIGN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
|
||||||
#define ALIGN(x) __attribute__ ((aligned(x)))
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
#define ALIGN(x) __declspec(align(x))
|
|
||||||
#elif defined(__ARMCC_VERSION)
|
|
||||||
#define ALIGN(x) __align(x)
|
|
||||||
#else
|
|
||||||
#define ALIGN(x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,142 +0,0 @@
|
|||||||
/*
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
|
|
||||||
|
|
||||||
LICENSE TERMS
|
|
||||||
|
|
||||||
The redistribution and use of this software (with or without changes)
|
|
||||||
is allowed without the payment of fees or royalties provided that:
|
|
||||||
|
|
||||||
1. source code distributions include the above copyright notice, this
|
|
||||||
list of conditions and the following disclaimer;
|
|
||||||
|
|
||||||
2. binary distributions include the above copyright notice, this list
|
|
||||||
of conditions and the following disclaimer in their documentation;
|
|
||||||
|
|
||||||
3. the name of the copyright holder is not used to endorse products
|
|
||||||
built using this software without specific written permission.
|
|
||||||
|
|
||||||
DISCLAIMER
|
|
||||||
|
|
||||||
This software is provided 'as is' with no explicit or implied warranties
|
|
||||||
in respect of its properties, including, but not limited to, correctness
|
|
||||||
and/or fitness for purpose.
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
Issue Date: 20/12/2007
|
|
||||||
Changes for ARM 9/9/2010
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _KECCAKP_BRG_ENDIAN_H
|
|
||||||
#define _KECCAKP_BRG_ENDIAN_H
|
|
||||||
|
|
||||||
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
|
|
||||||
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
/* Include files where endian defines and byteswap functions may reside */
|
|
||||||
#if defined( __sun )
|
|
||||||
# include <sys/isa_defs.h>
|
|
||||||
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
|
|
||||||
# include <sys/endian.h>
|
|
||||||
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
|
|
||||||
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
|
|
||||||
# include <machine/endian.h>
|
|
||||||
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
|
|
||||||
# if !defined( __MINGW32__ ) && !defined( _AIX )
|
|
||||||
# include <endian.h>
|
|
||||||
# if !defined( __BEOS__ )
|
|
||||||
# include <byteswap.h>
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Now attempt to set the define for platform byte order using any */
|
|
||||||
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
|
|
||||||
/* seem to encompass most endian symbol definitions */
|
|
||||||
|
|
||||||
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
|
|
||||||
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( BIG_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( LITTLE_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
|
|
||||||
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( _BIG_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( _LITTLE_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
|
|
||||||
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( __BIG_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( __LITTLE_ENDIAN )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
|
|
||||||
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif defined( __BIG_ENDIAN__ )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#elif defined( __LITTLE_ENDIAN__ )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* if the platform byte order could not be determined, then try to */
|
|
||||||
/* set this define using common machine defines */
|
|
||||||
#if !defined(PLATFORM_BYTE_ORDER)
|
|
||||||
|
|
||||||
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
|
|
||||||
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
|
|
||||||
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
|
|
||||||
defined( vax ) || defined( vms ) || defined( VMS ) || \
|
|
||||||
defined( __VMS ) || defined( _M_X64 )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
|
|
||||||
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
|
|
||||||
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
|
|
||||||
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
|
|
||||||
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
|
|
||||||
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
|
|
||||||
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
|
|
||||||
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
|
|
||||||
#elif defined(__arm__)
|
|
||||||
# ifdef __BIG_ENDIAN
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
# else
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
# endif
|
|
||||||
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
|
||||||
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
|
|
||||||
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
|
|
||||||
#else
|
|
||||||
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,907 +0,0 @@
|
|||||||
/* Taken from Bas Westerbaan's new 4-way SHAKE implementation
|
|
||||||
* for Sphincs+ (https://github.com/sphincs/sphincsplus/pull/14/),
|
|
||||||
* but uses vpshufb for byte-granular rotations as in the Keccak Code Package. */
|
|
||||||
|
|
||||||
#include "fips202x4.h"
|
|
||||||
|
|
||||||
.data
|
|
||||||
.p2align 5
|
|
||||||
rho8:
|
|
||||||
.byte 7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14,7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14
|
|
||||||
rho56:
|
|
||||||
.byte 1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8,1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8
|
|
||||||
|
|
||||||
.text
|
|
||||||
.global cdecl(f1600x4)
|
|
||||||
cdecl(f1600x4):
|
|
||||||
vmovdqa rho8(%rip), %ymm0
|
|
||||||
movq $6, %rax
|
|
||||||
looptop:
|
|
||||||
vmovdqa 0(%rdi), %ymm8
|
|
||||||
vmovdqa 32(%rdi), %ymm9
|
|
||||||
vmovdqa 64(%rdi), %ymm10
|
|
||||||
vmovdqa 96(%rdi), %ymm11
|
|
||||||
vmovdqa 128(%rdi), %ymm12
|
|
||||||
vpxor 160(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 192(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 224(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 256(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 288(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 320(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 352(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 384(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 416(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 448(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 480(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 512(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 544(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 576(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 608(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 640(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 672(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 704(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 736(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 768(%rdi), %ymm12, %ymm12
|
|
||||||
vpsllq $1, %ymm9, %ymm13
|
|
||||||
vpsllq $1, %ymm10, %ymm14
|
|
||||||
vpsllq $1, %ymm11, %ymm15
|
|
||||||
vpsllq $1, %ymm12, %ymm7
|
|
||||||
vpsllq $1, %ymm8, %ymm6
|
|
||||||
vpsrlq $63, %ymm9, %ymm5
|
|
||||||
vpsrlq $63, %ymm10, %ymm4
|
|
||||||
vpsrlq $63, %ymm11, %ymm3
|
|
||||||
vpsrlq $63, %ymm12, %ymm2
|
|
||||||
vpsrlq $63, %ymm8, %ymm1
|
|
||||||
vpor %ymm13, %ymm5, %ymm5
|
|
||||||
vpor %ymm14, %ymm4, %ymm4
|
|
||||||
vpor %ymm15, %ymm3, %ymm3
|
|
||||||
vpor %ymm7, %ymm2, %ymm2
|
|
||||||
vpor %ymm6, %ymm1, %ymm1
|
|
||||||
vpxor %ymm5, %ymm12, %ymm5
|
|
||||||
vpxor %ymm4, %ymm8, %ymm4
|
|
||||||
vpxor %ymm3, %ymm9, %ymm3
|
|
||||||
vpxor %ymm2, %ymm10, %ymm2
|
|
||||||
vpxor %ymm1, %ymm11, %ymm1
|
|
||||||
vpxor 0(%rdi), %ymm5, %ymm8
|
|
||||||
vpxor 192(%rdi), %ymm4, %ymm9
|
|
||||||
vpxor 384(%rdi), %ymm3, %ymm10
|
|
||||||
vpxor 576(%rdi), %ymm2, %ymm11
|
|
||||||
vpxor 768(%rdi), %ymm1, %ymm12
|
|
||||||
vpsllq $44, %ymm9, %ymm14
|
|
||||||
vpsllq $43, %ymm10, %ymm15
|
|
||||||
vpsllq $21, %ymm11, %ymm7
|
|
||||||
vpsllq $14, %ymm12, %ymm6
|
|
||||||
vpsrlq $20, %ymm9, %ymm9
|
|
||||||
vpsrlq $21, %ymm10, %ymm10
|
|
||||||
vpsrlq $43, %ymm11, %ymm11
|
|
||||||
vpsrlq $50, %ymm12, %ymm12
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vpbroadcastq 0(%rsi), %ymm8
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vmovdqa %ymm13, 0(%rdi)
|
|
||||||
vmovdqa %ymm14, 192(%rdi)
|
|
||||||
vmovdqa %ymm15, 384(%rdi)
|
|
||||||
vmovdqa %ymm7, 576(%rdi)
|
|
||||||
vmovdqa %ymm6, 768(%rdi)
|
|
||||||
vpxor 96(%rdi), %ymm2, %ymm8
|
|
||||||
vpxor 288(%rdi), %ymm1, %ymm9
|
|
||||||
vpxor 320(%rdi), %ymm5, %ymm10
|
|
||||||
vpxor 512(%rdi), %ymm4, %ymm11
|
|
||||||
vpxor 704(%rdi), %ymm3, %ymm12
|
|
||||||
vpsllq $28, %ymm8, %ymm13
|
|
||||||
vpsllq $20, %ymm9, %ymm14
|
|
||||||
vpsllq $3, %ymm10, %ymm15
|
|
||||||
vpsllq $45, %ymm11, %ymm7
|
|
||||||
vpsllq $61, %ymm12, %ymm6
|
|
||||||
vpsrlq $36, %ymm8, %ymm8
|
|
||||||
vpsrlq $44, %ymm9, %ymm9
|
|
||||||
vpsrlq $61, %ymm10, %ymm10
|
|
||||||
vpsrlq $19, %ymm11, %ymm11
|
|
||||||
vpsrlq $3, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 320(%rdi)
|
|
||||||
vmovdqa %ymm14, 512(%rdi)
|
|
||||||
vmovdqa %ymm15, 704(%rdi)
|
|
||||||
vmovdqa %ymm7, 96(%rdi)
|
|
||||||
vmovdqa %ymm6, 288(%rdi)
|
|
||||||
vpxor 32(%rdi), %ymm4, %ymm8
|
|
||||||
vpxor 224(%rdi), %ymm3, %ymm9
|
|
||||||
vpxor 416(%rdi), %ymm2, %ymm10
|
|
||||||
vpxor 608(%rdi), %ymm1, %ymm11
|
|
||||||
vpxor 640(%rdi), %ymm5, %ymm12
|
|
||||||
vpsllq $1, %ymm8, %ymm13
|
|
||||||
vpsllq $6, %ymm9, %ymm14
|
|
||||||
vpsllq $25, %ymm10, %ymm15
|
|
||||||
#vpsllq $8, %ymm11, %ymm7
|
|
||||||
vpsllq $18, %ymm12, %ymm6
|
|
||||||
vpsrlq $63, %ymm8, %ymm8
|
|
||||||
vpsrlq $58, %ymm9, %ymm9
|
|
||||||
vpsrlq $39, %ymm10, %ymm10
|
|
||||||
#vpsrlq $56, %ymm11, %ymm11
|
|
||||||
vpsrlq $46, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
#vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpshufb %ymm0, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 640(%rdi)
|
|
||||||
vmovdqa %ymm14, 32(%rdi)
|
|
||||||
vmovdqa %ymm15, 224(%rdi)
|
|
||||||
vmovdqa %ymm7, 416(%rdi)
|
|
||||||
vmovdqa %ymm6, 608(%rdi)
|
|
||||||
vpxor 128(%rdi), %ymm1, %ymm8
|
|
||||||
vpxor 160(%rdi), %ymm5, %ymm9
|
|
||||||
vpxor 352(%rdi), %ymm4, %ymm10
|
|
||||||
vpxor 544(%rdi), %ymm3, %ymm11
|
|
||||||
vpxor 736(%rdi), %ymm2, %ymm12
|
|
||||||
vpsllq $27, %ymm8, %ymm13
|
|
||||||
vpsllq $36, %ymm9, %ymm14
|
|
||||||
vpsllq $10, %ymm10, %ymm15
|
|
||||||
vpsllq $15, %ymm11, %ymm7
|
|
||||||
#vpsllq $56, %ymm12, %ymm6
|
|
||||||
vpsrlq $37, %ymm8, %ymm8
|
|
||||||
vpsrlq $28, %ymm9, %ymm9
|
|
||||||
vpsrlq $54, %ymm10, %ymm10
|
|
||||||
vpsrlq $49, %ymm11, %ymm11
|
|
||||||
#vpsrlq $8, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
#vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpshufb rho56(%rip), %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 160(%rdi)
|
|
||||||
vmovdqa %ymm14, 352(%rdi)
|
|
||||||
vmovdqa %ymm15, 544(%rdi)
|
|
||||||
vmovdqa %ymm7, 736(%rdi)
|
|
||||||
vmovdqa %ymm6, 128(%rdi)
|
|
||||||
vpxor 64(%rdi), %ymm3, %ymm8
|
|
||||||
vpxor 256(%rdi), %ymm2, %ymm9
|
|
||||||
vpxor 448(%rdi), %ymm1, %ymm10
|
|
||||||
vpxor 480(%rdi), %ymm5, %ymm11
|
|
||||||
vpxor 672(%rdi), %ymm4, %ymm12
|
|
||||||
vpsllq $62, %ymm8, %ymm13
|
|
||||||
vpsllq $55, %ymm9, %ymm14
|
|
||||||
vpsllq $39, %ymm10, %ymm15
|
|
||||||
vpsllq $41, %ymm11, %ymm7
|
|
||||||
vpsllq $2, %ymm12, %ymm6
|
|
||||||
vpsrlq $2, %ymm8, %ymm8
|
|
||||||
vpsrlq $9, %ymm9, %ymm9
|
|
||||||
vpsrlq $25, %ymm10, %ymm10
|
|
||||||
vpsrlq $23, %ymm11, %ymm11
|
|
||||||
vpsrlq $62, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 480(%rdi)
|
|
||||||
vmovdqa %ymm14, 672(%rdi)
|
|
||||||
vmovdqa %ymm15, 64(%rdi)
|
|
||||||
vmovdqa %ymm7, 256(%rdi)
|
|
||||||
vmovdqa %ymm6, 448(%rdi)
|
|
||||||
vmovdqa 0(%rdi), %ymm8
|
|
||||||
vmovdqa 32(%rdi), %ymm9
|
|
||||||
vmovdqa 64(%rdi), %ymm10
|
|
||||||
vmovdqa 96(%rdi), %ymm11
|
|
||||||
vmovdqa 128(%rdi), %ymm12
|
|
||||||
vpxor 160(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 192(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 224(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 256(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 288(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 320(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 352(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 384(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 416(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 448(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 480(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 512(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 544(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 576(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 608(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 640(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 672(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 704(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 736(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 768(%rdi), %ymm12, %ymm12
|
|
||||||
vpsllq $1, %ymm9, %ymm13
|
|
||||||
vpsllq $1, %ymm10, %ymm14
|
|
||||||
vpsllq $1, %ymm11, %ymm15
|
|
||||||
vpsllq $1, %ymm12, %ymm7
|
|
||||||
vpsllq $1, %ymm8, %ymm6
|
|
||||||
vpsrlq $63, %ymm9, %ymm5
|
|
||||||
vpsrlq $63, %ymm10, %ymm4
|
|
||||||
vpsrlq $63, %ymm11, %ymm3
|
|
||||||
vpsrlq $63, %ymm12, %ymm2
|
|
||||||
vpsrlq $63, %ymm8, %ymm1
|
|
||||||
vpor %ymm13, %ymm5, %ymm5
|
|
||||||
vpor %ymm14, %ymm4, %ymm4
|
|
||||||
vpor %ymm15, %ymm3, %ymm3
|
|
||||||
vpor %ymm7, %ymm2, %ymm2
|
|
||||||
vpor %ymm6, %ymm1, %ymm1
|
|
||||||
vpxor %ymm5, %ymm12, %ymm5
|
|
||||||
vpxor %ymm4, %ymm8, %ymm4
|
|
||||||
vpxor %ymm3, %ymm9, %ymm3
|
|
||||||
vpxor %ymm2, %ymm10, %ymm2
|
|
||||||
vpxor %ymm1, %ymm11, %ymm1
|
|
||||||
vpxor 0(%rdi), %ymm5, %ymm8
|
|
||||||
vpxor 512(%rdi), %ymm4, %ymm9
|
|
||||||
vpxor 224(%rdi), %ymm3, %ymm10
|
|
||||||
vpxor 736(%rdi), %ymm2, %ymm11
|
|
||||||
vpxor 448(%rdi), %ymm1, %ymm12
|
|
||||||
vpsllq $44, %ymm9, %ymm14
|
|
||||||
vpsllq $43, %ymm10, %ymm15
|
|
||||||
vpsllq $21, %ymm11, %ymm7
|
|
||||||
vpsllq $14, %ymm12, %ymm6
|
|
||||||
vpsrlq $20, %ymm9, %ymm9
|
|
||||||
vpsrlq $21, %ymm10, %ymm10
|
|
||||||
vpsrlq $43, %ymm11, %ymm11
|
|
||||||
vpsrlq $50, %ymm12, %ymm12
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vpbroadcastq 8(%rsi), %ymm8
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vmovdqa %ymm13, 0(%rdi)
|
|
||||||
vmovdqa %ymm14, 512(%rdi)
|
|
||||||
vmovdqa %ymm15, 224(%rdi)
|
|
||||||
vmovdqa %ymm7, 736(%rdi)
|
|
||||||
vmovdqa %ymm6, 448(%rdi)
|
|
||||||
vpxor 576(%rdi), %ymm2, %ymm8
|
|
||||||
vpxor 288(%rdi), %ymm1, %ymm9
|
|
||||||
vpxor 640(%rdi), %ymm5, %ymm10
|
|
||||||
vpxor 352(%rdi), %ymm4, %ymm11
|
|
||||||
vpxor 64(%rdi), %ymm3, %ymm12
|
|
||||||
vpsllq $28, %ymm8, %ymm13
|
|
||||||
vpsllq $20, %ymm9, %ymm14
|
|
||||||
vpsllq $3, %ymm10, %ymm15
|
|
||||||
vpsllq $45, %ymm11, %ymm7
|
|
||||||
vpsllq $61, %ymm12, %ymm6
|
|
||||||
vpsrlq $36, %ymm8, %ymm8
|
|
||||||
vpsrlq $44, %ymm9, %ymm9
|
|
||||||
vpsrlq $61, %ymm10, %ymm10
|
|
||||||
vpsrlq $19, %ymm11, %ymm11
|
|
||||||
vpsrlq $3, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 640(%rdi)
|
|
||||||
vmovdqa %ymm14, 352(%rdi)
|
|
||||||
vmovdqa %ymm15, 64(%rdi)
|
|
||||||
vmovdqa %ymm7, 576(%rdi)
|
|
||||||
vmovdqa %ymm6, 288(%rdi)
|
|
||||||
vpxor 192(%rdi), %ymm4, %ymm8
|
|
||||||
vpxor 704(%rdi), %ymm3, %ymm9
|
|
||||||
vpxor 416(%rdi), %ymm2, %ymm10
|
|
||||||
vpxor 128(%rdi), %ymm1, %ymm11
|
|
||||||
vpxor 480(%rdi), %ymm5, %ymm12
|
|
||||||
vpsllq $1, %ymm8, %ymm13
|
|
||||||
vpsllq $6, %ymm9, %ymm14
|
|
||||||
vpsllq $25, %ymm10, %ymm15
|
|
||||||
#vpsllq $8, %ymm11, %ymm7
|
|
||||||
vpsllq $18, %ymm12, %ymm6
|
|
||||||
vpsrlq $63, %ymm8, %ymm8
|
|
||||||
vpsrlq $58, %ymm9, %ymm9
|
|
||||||
vpsrlq $39, %ymm10, %ymm10
|
|
||||||
#vpsrlq $56, %ymm11, %ymm11
|
|
||||||
vpsrlq $46, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
#vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpshufb %ymm0, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 480(%rdi)
|
|
||||||
vmovdqa %ymm14, 192(%rdi)
|
|
||||||
vmovdqa %ymm15, 704(%rdi)
|
|
||||||
vmovdqa %ymm7, 416(%rdi)
|
|
||||||
vmovdqa %ymm6, 128(%rdi)
|
|
||||||
vpxor 768(%rdi), %ymm1, %ymm8
|
|
||||||
vpxor 320(%rdi), %ymm5, %ymm9
|
|
||||||
vpxor 32(%rdi), %ymm4, %ymm10
|
|
||||||
vpxor 544(%rdi), %ymm3, %ymm11
|
|
||||||
vpxor 256(%rdi), %ymm2, %ymm12
|
|
||||||
vpsllq $27, %ymm8, %ymm13
|
|
||||||
vpsllq $36, %ymm9, %ymm14
|
|
||||||
vpsllq $10, %ymm10, %ymm15
|
|
||||||
vpsllq $15, %ymm11, %ymm7
|
|
||||||
#vpsllq $56, %ymm12, %ymm6
|
|
||||||
vpsrlq $37, %ymm8, %ymm8
|
|
||||||
vpsrlq $28, %ymm9, %ymm9
|
|
||||||
vpsrlq $54, %ymm10, %ymm10
|
|
||||||
vpsrlq $49, %ymm11, %ymm11
|
|
||||||
#vpsrlq $8, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
#vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpshufb rho56(%rip), %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 320(%rdi)
|
|
||||||
vmovdqa %ymm14, 32(%rdi)
|
|
||||||
vmovdqa %ymm15, 544(%rdi)
|
|
||||||
vmovdqa %ymm7, 256(%rdi)
|
|
||||||
vmovdqa %ymm6, 768(%rdi)
|
|
||||||
vpxor 384(%rdi), %ymm3, %ymm8
|
|
||||||
vpxor 96(%rdi), %ymm2, %ymm9
|
|
||||||
vpxor 608(%rdi), %ymm1, %ymm10
|
|
||||||
vpxor 160(%rdi), %ymm5, %ymm11
|
|
||||||
vpxor 672(%rdi), %ymm4, %ymm12
|
|
||||||
vpsllq $62, %ymm8, %ymm13
|
|
||||||
vpsllq $55, %ymm9, %ymm14
|
|
||||||
vpsllq $39, %ymm10, %ymm15
|
|
||||||
vpsllq $41, %ymm11, %ymm7
|
|
||||||
vpsllq $2, %ymm12, %ymm6
|
|
||||||
vpsrlq $2, %ymm8, %ymm8
|
|
||||||
vpsrlq $9, %ymm9, %ymm9
|
|
||||||
vpsrlq $25, %ymm10, %ymm10
|
|
||||||
vpsrlq $23, %ymm11, %ymm11
|
|
||||||
vpsrlq $62, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 160(%rdi)
|
|
||||||
vmovdqa %ymm14, 672(%rdi)
|
|
||||||
vmovdqa %ymm15, 384(%rdi)
|
|
||||||
vmovdqa %ymm7, 96(%rdi)
|
|
||||||
vmovdqa %ymm6, 608(%rdi)
|
|
||||||
vmovdqa 0(%rdi), %ymm8
|
|
||||||
vmovdqa 32(%rdi), %ymm9
|
|
||||||
vmovdqa 64(%rdi), %ymm10
|
|
||||||
vmovdqa 96(%rdi), %ymm11
|
|
||||||
vmovdqa 128(%rdi), %ymm12
|
|
||||||
vpxor 160(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 192(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 224(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 256(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 288(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 320(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 352(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 384(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 416(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 448(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 480(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 512(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 544(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 576(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 608(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 640(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 672(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 704(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 736(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 768(%rdi), %ymm12, %ymm12
|
|
||||||
vpsllq $1, %ymm9, %ymm13
|
|
||||||
vpsllq $1, %ymm10, %ymm14
|
|
||||||
vpsllq $1, %ymm11, %ymm15
|
|
||||||
vpsllq $1, %ymm12, %ymm7
|
|
||||||
vpsllq $1, %ymm8, %ymm6
|
|
||||||
vpsrlq $63, %ymm9, %ymm5
|
|
||||||
vpsrlq $63, %ymm10, %ymm4
|
|
||||||
vpsrlq $63, %ymm11, %ymm3
|
|
||||||
vpsrlq $63, %ymm12, %ymm2
|
|
||||||
vpsrlq $63, %ymm8, %ymm1
|
|
||||||
vpor %ymm13, %ymm5, %ymm5
|
|
||||||
vpor %ymm14, %ymm4, %ymm4
|
|
||||||
vpor %ymm15, %ymm3, %ymm3
|
|
||||||
vpor %ymm7, %ymm2, %ymm2
|
|
||||||
vpor %ymm6, %ymm1, %ymm1
|
|
||||||
vpxor %ymm5, %ymm12, %ymm5
|
|
||||||
vpxor %ymm4, %ymm8, %ymm4
|
|
||||||
vpxor %ymm3, %ymm9, %ymm3
|
|
||||||
vpxor %ymm2, %ymm10, %ymm2
|
|
||||||
vpxor %ymm1, %ymm11, %ymm1
|
|
||||||
vpxor 0(%rdi), %ymm5, %ymm8
|
|
||||||
vpxor 352(%rdi), %ymm4, %ymm9
|
|
||||||
vpxor 704(%rdi), %ymm3, %ymm10
|
|
||||||
vpxor 256(%rdi), %ymm2, %ymm11
|
|
||||||
vpxor 608(%rdi), %ymm1, %ymm12
|
|
||||||
vpsllq $44, %ymm9, %ymm14
|
|
||||||
vpsllq $43, %ymm10, %ymm15
|
|
||||||
vpsllq $21, %ymm11, %ymm7
|
|
||||||
vpsllq $14, %ymm12, %ymm6
|
|
||||||
vpsrlq $20, %ymm9, %ymm9
|
|
||||||
vpsrlq $21, %ymm10, %ymm10
|
|
||||||
vpsrlq $43, %ymm11, %ymm11
|
|
||||||
vpsrlq $50, %ymm12, %ymm12
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vpbroadcastq 16(%rsi), %ymm8
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vmovdqa %ymm13, 0(%rdi)
|
|
||||||
vmovdqa %ymm14, 352(%rdi)
|
|
||||||
vmovdqa %ymm15, 704(%rdi)
|
|
||||||
vmovdqa %ymm7, 256(%rdi)
|
|
||||||
vmovdqa %ymm6, 608(%rdi)
|
|
||||||
vpxor 736(%rdi), %ymm2, %ymm8
|
|
||||||
vpxor 288(%rdi), %ymm1, %ymm9
|
|
||||||
vpxor 480(%rdi), %ymm5, %ymm10
|
|
||||||
vpxor 32(%rdi), %ymm4, %ymm11
|
|
||||||
vpxor 384(%rdi), %ymm3, %ymm12
|
|
||||||
vpsllq $28, %ymm8, %ymm13
|
|
||||||
vpsllq $20, %ymm9, %ymm14
|
|
||||||
vpsllq $3, %ymm10, %ymm15
|
|
||||||
vpsllq $45, %ymm11, %ymm7
|
|
||||||
vpsllq $61, %ymm12, %ymm6
|
|
||||||
vpsrlq $36, %ymm8, %ymm8
|
|
||||||
vpsrlq $44, %ymm9, %ymm9
|
|
||||||
vpsrlq $61, %ymm10, %ymm10
|
|
||||||
vpsrlq $19, %ymm11, %ymm11
|
|
||||||
vpsrlq $3, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 480(%rdi)
|
|
||||||
vmovdqa %ymm14, 32(%rdi)
|
|
||||||
vmovdqa %ymm15, 384(%rdi)
|
|
||||||
vmovdqa %ymm7, 736(%rdi)
|
|
||||||
vmovdqa %ymm6, 288(%rdi)
|
|
||||||
vpxor 512(%rdi), %ymm4, %ymm8
|
|
||||||
vpxor 64(%rdi), %ymm3, %ymm9
|
|
||||||
vpxor 416(%rdi), %ymm2, %ymm10
|
|
||||||
vpxor 768(%rdi), %ymm1, %ymm11
|
|
||||||
vpxor 160(%rdi), %ymm5, %ymm12
|
|
||||||
vpsllq $1, %ymm8, %ymm13
|
|
||||||
vpsllq $6, %ymm9, %ymm14
|
|
||||||
vpsllq $25, %ymm10, %ymm15
|
|
||||||
#vpsllq $8, %ymm11, %ymm7
|
|
||||||
vpsllq $18, %ymm12, %ymm6
|
|
||||||
vpsrlq $63, %ymm8, %ymm8
|
|
||||||
vpsrlq $58, %ymm9, %ymm9
|
|
||||||
vpsrlq $39, %ymm10, %ymm10
|
|
||||||
#vpsrlq $56, %ymm11, %ymm11
|
|
||||||
vpsrlq $46, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
#vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpshufb %ymm0, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 160(%rdi)
|
|
||||||
vmovdqa %ymm14, 512(%rdi)
|
|
||||||
vmovdqa %ymm15, 64(%rdi)
|
|
||||||
vmovdqa %ymm7, 416(%rdi)
|
|
||||||
vmovdqa %ymm6, 768(%rdi)
|
|
||||||
vpxor 448(%rdi), %ymm1, %ymm8
|
|
||||||
vpxor 640(%rdi), %ymm5, %ymm9
|
|
||||||
vpxor 192(%rdi), %ymm4, %ymm10
|
|
||||||
vpxor 544(%rdi), %ymm3, %ymm11
|
|
||||||
vpxor 96(%rdi), %ymm2, %ymm12
|
|
||||||
vpsllq $27, %ymm8, %ymm13
|
|
||||||
vpsllq $36, %ymm9, %ymm14
|
|
||||||
vpsllq $10, %ymm10, %ymm15
|
|
||||||
vpsllq $15, %ymm11, %ymm7
|
|
||||||
#vpsllq $56, %ymm12, %ymm6
|
|
||||||
vpsrlq $37, %ymm8, %ymm8
|
|
||||||
vpsrlq $28, %ymm9, %ymm9
|
|
||||||
vpsrlq $54, %ymm10, %ymm10
|
|
||||||
vpsrlq $49, %ymm11, %ymm11
|
|
||||||
#vpsrlq $8, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
#vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpshufb rho56(%rip), %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 640(%rdi)
|
|
||||||
vmovdqa %ymm14, 192(%rdi)
|
|
||||||
vmovdqa %ymm15, 544(%rdi)
|
|
||||||
vmovdqa %ymm7, 96(%rdi)
|
|
||||||
vmovdqa %ymm6, 448(%rdi)
|
|
||||||
vpxor 224(%rdi), %ymm3, %ymm8
|
|
||||||
vpxor 576(%rdi), %ymm2, %ymm9
|
|
||||||
vpxor 128(%rdi), %ymm1, %ymm10
|
|
||||||
vpxor 320(%rdi), %ymm5, %ymm11
|
|
||||||
vpxor 672(%rdi), %ymm4, %ymm12
|
|
||||||
vpsllq $62, %ymm8, %ymm13
|
|
||||||
vpsllq $55, %ymm9, %ymm14
|
|
||||||
vpsllq $39, %ymm10, %ymm15
|
|
||||||
vpsllq $41, %ymm11, %ymm7
|
|
||||||
vpsllq $2, %ymm12, %ymm6
|
|
||||||
vpsrlq $2, %ymm8, %ymm8
|
|
||||||
vpsrlq $9, %ymm9, %ymm9
|
|
||||||
vpsrlq $25, %ymm10, %ymm10
|
|
||||||
vpsrlq $23, %ymm11, %ymm11
|
|
||||||
vpsrlq $62, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 320(%rdi)
|
|
||||||
vmovdqa %ymm14, 672(%rdi)
|
|
||||||
vmovdqa %ymm15, 224(%rdi)
|
|
||||||
vmovdqa %ymm7, 576(%rdi)
|
|
||||||
vmovdqa %ymm6, 128(%rdi)
|
|
||||||
vmovdqa 0(%rdi), %ymm8
|
|
||||||
vmovdqa 32(%rdi), %ymm9
|
|
||||||
vmovdqa 64(%rdi), %ymm10
|
|
||||||
vmovdqa 96(%rdi), %ymm11
|
|
||||||
vmovdqa 128(%rdi), %ymm12
|
|
||||||
vpxor 160(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 192(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 224(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 256(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 288(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 320(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 352(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 384(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 416(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 448(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 480(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 512(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 544(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 576(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 608(%rdi), %ymm12, %ymm12
|
|
||||||
vpxor 640(%rdi), %ymm8, %ymm8
|
|
||||||
vpxor 672(%rdi), %ymm9, %ymm9
|
|
||||||
vpxor 704(%rdi), %ymm10, %ymm10
|
|
||||||
vpxor 736(%rdi), %ymm11, %ymm11
|
|
||||||
vpxor 768(%rdi), %ymm12, %ymm12
|
|
||||||
vpsllq $1, %ymm9, %ymm13
|
|
||||||
vpsllq $1, %ymm10, %ymm14
|
|
||||||
vpsllq $1, %ymm11, %ymm15
|
|
||||||
vpsllq $1, %ymm12, %ymm7
|
|
||||||
vpsllq $1, %ymm8, %ymm6
|
|
||||||
vpsrlq $63, %ymm9, %ymm5
|
|
||||||
vpsrlq $63, %ymm10, %ymm4
|
|
||||||
vpsrlq $63, %ymm11, %ymm3
|
|
||||||
vpsrlq $63, %ymm12, %ymm2
|
|
||||||
vpsrlq $63, %ymm8, %ymm1
|
|
||||||
vpor %ymm13, %ymm5, %ymm5
|
|
||||||
vpor %ymm14, %ymm4, %ymm4
|
|
||||||
vpor %ymm15, %ymm3, %ymm3
|
|
||||||
vpor %ymm7, %ymm2, %ymm2
|
|
||||||
vpor %ymm6, %ymm1, %ymm1
|
|
||||||
vpxor %ymm5, %ymm12, %ymm5
|
|
||||||
vpxor %ymm4, %ymm8, %ymm4
|
|
||||||
vpxor %ymm3, %ymm9, %ymm3
|
|
||||||
vpxor %ymm2, %ymm10, %ymm2
|
|
||||||
vpxor %ymm1, %ymm11, %ymm1
|
|
||||||
vpxor 0(%rdi), %ymm5, %ymm8
|
|
||||||
vpxor 32(%rdi), %ymm4, %ymm9
|
|
||||||
vpxor 64(%rdi), %ymm3, %ymm10
|
|
||||||
vpxor 96(%rdi), %ymm2, %ymm11
|
|
||||||
vpxor 128(%rdi), %ymm1, %ymm12
|
|
||||||
vpsllq $44, %ymm9, %ymm14
|
|
||||||
vpsllq $43, %ymm10, %ymm15
|
|
||||||
vpsllq $21, %ymm11, %ymm7
|
|
||||||
vpsllq $14, %ymm12, %ymm6
|
|
||||||
vpsrlq $20, %ymm9, %ymm9
|
|
||||||
vpsrlq $21, %ymm10, %ymm10
|
|
||||||
vpsrlq $43, %ymm11, %ymm11
|
|
||||||
vpsrlq $50, %ymm12, %ymm12
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vpbroadcastq 24(%rsi), %ymm8
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vmovdqa %ymm13, 0(%rdi)
|
|
||||||
vmovdqa %ymm14, 32(%rdi)
|
|
||||||
vmovdqa %ymm15, 64(%rdi)
|
|
||||||
vmovdqa %ymm7, 96(%rdi)
|
|
||||||
vmovdqa %ymm6, 128(%rdi)
|
|
||||||
vpxor 256(%rdi), %ymm2, %ymm8
|
|
||||||
vpxor 288(%rdi), %ymm1, %ymm9
|
|
||||||
vpxor 160(%rdi), %ymm5, %ymm10
|
|
||||||
vpxor 192(%rdi), %ymm4, %ymm11
|
|
||||||
vpxor 224(%rdi), %ymm3, %ymm12
|
|
||||||
vpsllq $28, %ymm8, %ymm13
|
|
||||||
vpsllq $20, %ymm9, %ymm14
|
|
||||||
vpsllq $3, %ymm10, %ymm15
|
|
||||||
vpsllq $45, %ymm11, %ymm7
|
|
||||||
vpsllq $61, %ymm12, %ymm6
|
|
||||||
vpsrlq $36, %ymm8, %ymm8
|
|
||||||
vpsrlq $44, %ymm9, %ymm9
|
|
||||||
vpsrlq $61, %ymm10, %ymm10
|
|
||||||
vpsrlq $19, %ymm11, %ymm11
|
|
||||||
vpsrlq $3, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 160(%rdi)
|
|
||||||
vmovdqa %ymm14, 192(%rdi)
|
|
||||||
vmovdqa %ymm15, 224(%rdi)
|
|
||||||
vmovdqa %ymm7, 256(%rdi)
|
|
||||||
vmovdqa %ymm6, 288(%rdi)
|
|
||||||
vpxor 352(%rdi), %ymm4, %ymm8
|
|
||||||
vpxor 384(%rdi), %ymm3, %ymm9
|
|
||||||
vpxor 416(%rdi), %ymm2, %ymm10
|
|
||||||
vpxor 448(%rdi), %ymm1, %ymm11
|
|
||||||
vpxor 320(%rdi), %ymm5, %ymm12
|
|
||||||
vpsllq $1, %ymm8, %ymm13
|
|
||||||
vpsllq $6, %ymm9, %ymm14
|
|
||||||
vpsllq $25, %ymm10, %ymm15
|
|
||||||
#vpsllq $8, %ymm11, %ymm7
|
|
||||||
vpsllq $18, %ymm12, %ymm6
|
|
||||||
vpsrlq $63, %ymm8, %ymm8
|
|
||||||
vpsrlq $58, %ymm9, %ymm9
|
|
||||||
vpsrlq $39, %ymm10, %ymm10
|
|
||||||
#vpsrlq $56, %ymm11, %ymm11
|
|
||||||
vpsrlq $46, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
#vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpshufb %ymm0, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 320(%rdi)
|
|
||||||
vmovdqa %ymm14, 352(%rdi)
|
|
||||||
vmovdqa %ymm15, 384(%rdi)
|
|
||||||
vmovdqa %ymm7, 416(%rdi)
|
|
||||||
vmovdqa %ymm6, 448(%rdi)
|
|
||||||
vpxor 608(%rdi), %ymm1, %ymm8
|
|
||||||
vpxor 480(%rdi), %ymm5, %ymm9
|
|
||||||
vpxor 512(%rdi), %ymm4, %ymm10
|
|
||||||
vpxor 544(%rdi), %ymm3, %ymm11
|
|
||||||
vpxor 576(%rdi), %ymm2, %ymm12
|
|
||||||
vpsllq $27, %ymm8, %ymm13
|
|
||||||
vpsllq $36, %ymm9, %ymm14
|
|
||||||
vpsllq $10, %ymm10, %ymm15
|
|
||||||
vpsllq $15, %ymm11, %ymm7
|
|
||||||
#vpsllq $56, %ymm12, %ymm6
|
|
||||||
vpsrlq $37, %ymm8, %ymm8
|
|
||||||
vpsrlq $28, %ymm9, %ymm9
|
|
||||||
vpsrlq $54, %ymm10, %ymm10
|
|
||||||
vpsrlq $49, %ymm11, %ymm11
|
|
||||||
#vpsrlq $8, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
#vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpshufb rho56(%rip), %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 480(%rdi)
|
|
||||||
vmovdqa %ymm14, 512(%rdi)
|
|
||||||
vmovdqa %ymm15, 544(%rdi)
|
|
||||||
vmovdqa %ymm7, 576(%rdi)
|
|
||||||
vmovdqa %ymm6, 608(%rdi)
|
|
||||||
vpxor 704(%rdi), %ymm3, %ymm8
|
|
||||||
vpxor 736(%rdi), %ymm2, %ymm9
|
|
||||||
vpxor 768(%rdi), %ymm1, %ymm10
|
|
||||||
vpxor 640(%rdi), %ymm5, %ymm11
|
|
||||||
vpxor 672(%rdi), %ymm4, %ymm12
|
|
||||||
vpsllq $62, %ymm8, %ymm13
|
|
||||||
vpsllq $55, %ymm9, %ymm14
|
|
||||||
vpsllq $39, %ymm10, %ymm15
|
|
||||||
vpsllq $41, %ymm11, %ymm7
|
|
||||||
vpsllq $2, %ymm12, %ymm6
|
|
||||||
vpsrlq $2, %ymm8, %ymm8
|
|
||||||
vpsrlq $9, %ymm9, %ymm9
|
|
||||||
vpsrlq $25, %ymm10, %ymm10
|
|
||||||
vpsrlq $23, %ymm11, %ymm11
|
|
||||||
vpsrlq $62, %ymm12, %ymm12
|
|
||||||
vpor %ymm13, %ymm8, %ymm8
|
|
||||||
vpor %ymm14, %ymm9, %ymm9
|
|
||||||
vpor %ymm15, %ymm10, %ymm10
|
|
||||||
vpor %ymm7, %ymm11, %ymm11
|
|
||||||
vpor %ymm6, %ymm12, %ymm12
|
|
||||||
vpandn %ymm10, %ymm9, %ymm13
|
|
||||||
vpandn %ymm11, %ymm10, %ymm14
|
|
||||||
vpandn %ymm12, %ymm11, %ymm15
|
|
||||||
vpandn %ymm8, %ymm12, %ymm7
|
|
||||||
vpandn %ymm9, %ymm8, %ymm6
|
|
||||||
vpxor %ymm8, %ymm13, %ymm13
|
|
||||||
vpxor %ymm9, %ymm14, %ymm14
|
|
||||||
vpxor %ymm10, %ymm15, %ymm15
|
|
||||||
vpxor %ymm11, %ymm7, %ymm7
|
|
||||||
vpxor %ymm12, %ymm6, %ymm6
|
|
||||||
vmovdqa %ymm13, 640(%rdi)
|
|
||||||
vmovdqa %ymm14, 672(%rdi)
|
|
||||||
vmovdqa %ymm15, 704(%rdi)
|
|
||||||
vmovdqa %ymm7, 736(%rdi)
|
|
||||||
vmovdqa %ymm6, 768(%rdi)
|
|
||||||
addq $32, %rsi
|
|
||||||
subq $1, %rax
|
|
||||||
jnz looptop
|
|
||||||
ret
|
|
@ -1,774 +0,0 @@
|
|||||||
/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from
|
|
||||||
* http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202"
|
|
||||||
* implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein,
|
|
||||||
* and Peter Schwabe */
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include "fips202.h"
|
|
||||||
|
|
||||||
#define NROUNDS 24
|
|
||||||
#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: load64
|
|
||||||
*
|
|
||||||
* Description: Load 8 bytes into uint64_t in little-endian order
|
|
||||||
*
|
|
||||||
* Arguments: - const uint8_t *x: pointer to input byte array
|
|
||||||
*
|
|
||||||
* Returns the loaded 64-bit unsigned integer
|
|
||||||
**************************************************/
|
|
||||||
static uint64_t load64(const uint8_t x[8]) {
|
|
||||||
unsigned int i;
|
|
||||||
uint64_t r = 0;
|
|
||||||
|
|
||||||
for(i=0;i<8;i++)
|
|
||||||
r |= (uint64_t)x[i] << 8*i;
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: store64
|
|
||||||
*
|
|
||||||
* Description: Store a 64-bit integer to array of 8 bytes in little-endian order
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *x: pointer to the output byte array (allocated)
|
|
||||||
* - uint64_t u: input 64-bit unsigned integer
|
|
||||||
**************************************************/
|
|
||||||
static void store64(uint8_t x[8], uint64_t u) {
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for(i=0;i<8;i++)
|
|
||||||
x[i] = u >> 8*i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Keccak round constants */
|
|
||||||
const uint64_t KeccakF_RoundConstants[NROUNDS] = {
|
|
||||||
(uint64_t)0x0000000000000001ULL,
|
|
||||||
(uint64_t)0x0000000000008082ULL,
|
|
||||||
(uint64_t)0x800000000000808aULL,
|
|
||||||
(uint64_t)0x8000000080008000ULL,
|
|
||||||
(uint64_t)0x000000000000808bULL,
|
|
||||||
(uint64_t)0x0000000080000001ULL,
|
|
||||||
(uint64_t)0x8000000080008081ULL,
|
|
||||||
(uint64_t)0x8000000000008009ULL,
|
|
||||||
(uint64_t)0x000000000000008aULL,
|
|
||||||
(uint64_t)0x0000000000000088ULL,
|
|
||||||
(uint64_t)0x0000000080008009ULL,
|
|
||||||
(uint64_t)0x000000008000000aULL,
|
|
||||||
(uint64_t)0x000000008000808bULL,
|
|
||||||
(uint64_t)0x800000000000008bULL,
|
|
||||||
(uint64_t)0x8000000000008089ULL,
|
|
||||||
(uint64_t)0x8000000000008003ULL,
|
|
||||||
(uint64_t)0x8000000000008002ULL,
|
|
||||||
(uint64_t)0x8000000000000080ULL,
|
|
||||||
(uint64_t)0x000000000000800aULL,
|
|
||||||
(uint64_t)0x800000008000000aULL,
|
|
||||||
(uint64_t)0x8000000080008081ULL,
|
|
||||||
(uint64_t)0x8000000000008080ULL,
|
|
||||||
(uint64_t)0x0000000080000001ULL,
|
|
||||||
(uint64_t)0x8000000080008008ULL
|
|
||||||
};
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: KeccakF1600_StatePermute
|
|
||||||
*
|
|
||||||
* Description: The Keccak F1600 Permutation
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *state: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
static void KeccakF1600_StatePermute(uint64_t state[25])
|
|
||||||
{
|
|
||||||
int round;
|
|
||||||
|
|
||||||
uint64_t Aba, Abe, Abi, Abo, Abu;
|
|
||||||
uint64_t Aga, Age, Agi, Ago, Agu;
|
|
||||||
uint64_t Aka, Ake, Aki, Ako, Aku;
|
|
||||||
uint64_t Ama, Ame, Ami, Amo, Amu;
|
|
||||||
uint64_t Asa, Ase, Asi, Aso, Asu;
|
|
||||||
uint64_t BCa, BCe, BCi, BCo, BCu;
|
|
||||||
uint64_t Da, De, Di, Do, Du;
|
|
||||||
uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
|
|
||||||
uint64_t Ega, Ege, Egi, Ego, Egu;
|
|
||||||
uint64_t Eka, Eke, Eki, Eko, Eku;
|
|
||||||
uint64_t Ema, Eme, Emi, Emo, Emu;
|
|
||||||
uint64_t Esa, Ese, Esi, Eso, Esu;
|
|
||||||
|
|
||||||
//copyFromState(A, state)
|
|
||||||
Aba = state[ 0];
|
|
||||||
Abe = state[ 1];
|
|
||||||
Abi = state[ 2];
|
|
||||||
Abo = state[ 3];
|
|
||||||
Abu = state[ 4];
|
|
||||||
Aga = state[ 5];
|
|
||||||
Age = state[ 6];
|
|
||||||
Agi = state[ 7];
|
|
||||||
Ago = state[ 8];
|
|
||||||
Agu = state[ 9];
|
|
||||||
Aka = state[10];
|
|
||||||
Ake = state[11];
|
|
||||||
Aki = state[12];
|
|
||||||
Ako = state[13];
|
|
||||||
Aku = state[14];
|
|
||||||
Ama = state[15];
|
|
||||||
Ame = state[16];
|
|
||||||
Ami = state[17];
|
|
||||||
Amo = state[18];
|
|
||||||
Amu = state[19];
|
|
||||||
Asa = state[20];
|
|
||||||
Ase = state[21];
|
|
||||||
Asi = state[22];
|
|
||||||
Aso = state[23];
|
|
||||||
Asu = state[24];
|
|
||||||
|
|
||||||
for(round = 0; round < NROUNDS; round += 2) {
|
|
||||||
// prepareTheta
|
|
||||||
BCa = Aba^Aga^Aka^Ama^Asa;
|
|
||||||
BCe = Abe^Age^Ake^Ame^Ase;
|
|
||||||
BCi = Abi^Agi^Aki^Ami^Asi;
|
|
||||||
BCo = Abo^Ago^Ako^Amo^Aso;
|
|
||||||
BCu = Abu^Agu^Aku^Amu^Asu;
|
|
||||||
|
|
||||||
//thetaRhoPiChiIotaPrepareTheta(round, A, E)
|
|
||||||
Da = BCu^ROL(BCe, 1);
|
|
||||||
De = BCa^ROL(BCi, 1);
|
|
||||||
Di = BCe^ROL(BCo, 1);
|
|
||||||
Do = BCi^ROL(BCu, 1);
|
|
||||||
Du = BCo^ROL(BCa, 1);
|
|
||||||
|
|
||||||
Aba ^= Da;
|
|
||||||
BCa = Aba;
|
|
||||||
Age ^= De;
|
|
||||||
BCe = ROL(Age, 44);
|
|
||||||
Aki ^= Di;
|
|
||||||
BCi = ROL(Aki, 43);
|
|
||||||
Amo ^= Do;
|
|
||||||
BCo = ROL(Amo, 21);
|
|
||||||
Asu ^= Du;
|
|
||||||
BCu = ROL(Asu, 14);
|
|
||||||
Eba = BCa ^((~BCe)& BCi );
|
|
||||||
Eba ^= (uint64_t)KeccakF_RoundConstants[round];
|
|
||||||
Ebe = BCe ^((~BCi)& BCo );
|
|
||||||
Ebi = BCi ^((~BCo)& BCu );
|
|
||||||
Ebo = BCo ^((~BCu)& BCa );
|
|
||||||
Ebu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abo ^= Do;
|
|
||||||
BCa = ROL(Abo, 28);
|
|
||||||
Agu ^= Du;
|
|
||||||
BCe = ROL(Agu, 20);
|
|
||||||
Aka ^= Da;
|
|
||||||
BCi = ROL(Aka, 3);
|
|
||||||
Ame ^= De;
|
|
||||||
BCo = ROL(Ame, 45);
|
|
||||||
Asi ^= Di;
|
|
||||||
BCu = ROL(Asi, 61);
|
|
||||||
Ega = BCa ^((~BCe)& BCi );
|
|
||||||
Ege = BCe ^((~BCi)& BCo );
|
|
||||||
Egi = BCi ^((~BCo)& BCu );
|
|
||||||
Ego = BCo ^((~BCu)& BCa );
|
|
||||||
Egu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abe ^= De;
|
|
||||||
BCa = ROL(Abe, 1);
|
|
||||||
Agi ^= Di;
|
|
||||||
BCe = ROL(Agi, 6);
|
|
||||||
Ako ^= Do;
|
|
||||||
BCi = ROL(Ako, 25);
|
|
||||||
Amu ^= Du;
|
|
||||||
BCo = ROL(Amu, 8);
|
|
||||||
Asa ^= Da;
|
|
||||||
BCu = ROL(Asa, 18);
|
|
||||||
Eka = BCa ^((~BCe)& BCi );
|
|
||||||
Eke = BCe ^((~BCi)& BCo );
|
|
||||||
Eki = BCi ^((~BCo)& BCu );
|
|
||||||
Eko = BCo ^((~BCu)& BCa );
|
|
||||||
Eku = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abu ^= Du;
|
|
||||||
BCa = ROL(Abu, 27);
|
|
||||||
Aga ^= Da;
|
|
||||||
BCe = ROL(Aga, 36);
|
|
||||||
Ake ^= De;
|
|
||||||
BCi = ROL(Ake, 10);
|
|
||||||
Ami ^= Di;
|
|
||||||
BCo = ROL(Ami, 15);
|
|
||||||
Aso ^= Do;
|
|
||||||
BCu = ROL(Aso, 56);
|
|
||||||
Ema = BCa ^((~BCe)& BCi );
|
|
||||||
Eme = BCe ^((~BCi)& BCo );
|
|
||||||
Emi = BCi ^((~BCo)& BCu );
|
|
||||||
Emo = BCo ^((~BCu)& BCa );
|
|
||||||
Emu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Abi ^= Di;
|
|
||||||
BCa = ROL(Abi, 62);
|
|
||||||
Ago ^= Do;
|
|
||||||
BCe = ROL(Ago, 55);
|
|
||||||
Aku ^= Du;
|
|
||||||
BCi = ROL(Aku, 39);
|
|
||||||
Ama ^= Da;
|
|
||||||
BCo = ROL(Ama, 41);
|
|
||||||
Ase ^= De;
|
|
||||||
BCu = ROL(Ase, 2);
|
|
||||||
Esa = BCa ^((~BCe)& BCi );
|
|
||||||
Ese = BCe ^((~BCi)& BCo );
|
|
||||||
Esi = BCi ^((~BCo)& BCu );
|
|
||||||
Eso = BCo ^((~BCu)& BCa );
|
|
||||||
Esu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
// prepareTheta
|
|
||||||
BCa = Eba^Ega^Eka^Ema^Esa;
|
|
||||||
BCe = Ebe^Ege^Eke^Eme^Ese;
|
|
||||||
BCi = Ebi^Egi^Eki^Emi^Esi;
|
|
||||||
BCo = Ebo^Ego^Eko^Emo^Eso;
|
|
||||||
BCu = Ebu^Egu^Eku^Emu^Esu;
|
|
||||||
|
|
||||||
//thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
|
|
||||||
Da = BCu^ROL(BCe, 1);
|
|
||||||
De = BCa^ROL(BCi, 1);
|
|
||||||
Di = BCe^ROL(BCo, 1);
|
|
||||||
Do = BCi^ROL(BCu, 1);
|
|
||||||
Du = BCo^ROL(BCa, 1);
|
|
||||||
|
|
||||||
Eba ^= Da;
|
|
||||||
BCa = Eba;
|
|
||||||
Ege ^= De;
|
|
||||||
BCe = ROL(Ege, 44);
|
|
||||||
Eki ^= Di;
|
|
||||||
BCi = ROL(Eki, 43);
|
|
||||||
Emo ^= Do;
|
|
||||||
BCo = ROL(Emo, 21);
|
|
||||||
Esu ^= Du;
|
|
||||||
BCu = ROL(Esu, 14);
|
|
||||||
Aba = BCa ^((~BCe)& BCi );
|
|
||||||
Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
|
|
||||||
Abe = BCe ^((~BCi)& BCo );
|
|
||||||
Abi = BCi ^((~BCo)& BCu );
|
|
||||||
Abo = BCo ^((~BCu)& BCa );
|
|
||||||
Abu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebo ^= Do;
|
|
||||||
BCa = ROL(Ebo, 28);
|
|
||||||
Egu ^= Du;
|
|
||||||
BCe = ROL(Egu, 20);
|
|
||||||
Eka ^= Da;
|
|
||||||
BCi = ROL(Eka, 3);
|
|
||||||
Eme ^= De;
|
|
||||||
BCo = ROL(Eme, 45);
|
|
||||||
Esi ^= Di;
|
|
||||||
BCu = ROL(Esi, 61);
|
|
||||||
Aga = BCa ^((~BCe)& BCi );
|
|
||||||
Age = BCe ^((~BCi)& BCo );
|
|
||||||
Agi = BCi ^((~BCo)& BCu );
|
|
||||||
Ago = BCo ^((~BCu)& BCa );
|
|
||||||
Agu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebe ^= De;
|
|
||||||
BCa = ROL(Ebe, 1);
|
|
||||||
Egi ^= Di;
|
|
||||||
BCe = ROL(Egi, 6);
|
|
||||||
Eko ^= Do;
|
|
||||||
BCi = ROL(Eko, 25);
|
|
||||||
Emu ^= Du;
|
|
||||||
BCo = ROL(Emu, 8);
|
|
||||||
Esa ^= Da;
|
|
||||||
BCu = ROL(Esa, 18);
|
|
||||||
Aka = BCa ^((~BCe)& BCi );
|
|
||||||
Ake = BCe ^((~BCi)& BCo );
|
|
||||||
Aki = BCi ^((~BCo)& BCu );
|
|
||||||
Ako = BCo ^((~BCu)& BCa );
|
|
||||||
Aku = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebu ^= Du;
|
|
||||||
BCa = ROL(Ebu, 27);
|
|
||||||
Ega ^= Da;
|
|
||||||
BCe = ROL(Ega, 36);
|
|
||||||
Eke ^= De;
|
|
||||||
BCi = ROL(Eke, 10);
|
|
||||||
Emi ^= Di;
|
|
||||||
BCo = ROL(Emi, 15);
|
|
||||||
Eso ^= Do;
|
|
||||||
BCu = ROL(Eso, 56);
|
|
||||||
Ama = BCa ^((~BCe)& BCi );
|
|
||||||
Ame = BCe ^((~BCi)& BCo );
|
|
||||||
Ami = BCi ^((~BCo)& BCu );
|
|
||||||
Amo = BCo ^((~BCu)& BCa );
|
|
||||||
Amu = BCu ^((~BCa)& BCe );
|
|
||||||
|
|
||||||
Ebi ^= Di;
|
|
||||||
BCa = ROL(Ebi, 62);
|
|
||||||
Ego ^= Do;
|
|
||||||
BCe = ROL(Ego, 55);
|
|
||||||
Eku ^= Du;
|
|
||||||
BCi = ROL(Eku, 39);
|
|
||||||
Ema ^= Da;
|
|
||||||
BCo = ROL(Ema, 41);
|
|
||||||
Ese ^= De;
|
|
||||||
BCu = ROL(Ese, 2);
|
|
||||||
Asa = BCa ^((~BCe)& BCi );
|
|
||||||
Ase = BCe ^((~BCi)& BCo );
|
|
||||||
Asi = BCi ^((~BCo)& BCu );
|
|
||||||
Aso = BCo ^((~BCu)& BCa );
|
|
||||||
Asu = BCu ^((~BCa)& BCe );
|
|
||||||
}
|
|
||||||
|
|
||||||
//copyToState(state, A)
|
|
||||||
state[ 0] = Aba;
|
|
||||||
state[ 1] = Abe;
|
|
||||||
state[ 2] = Abi;
|
|
||||||
state[ 3] = Abo;
|
|
||||||
state[ 4] = Abu;
|
|
||||||
state[ 5] = Aga;
|
|
||||||
state[ 6] = Age;
|
|
||||||
state[ 7] = Agi;
|
|
||||||
state[ 8] = Ago;
|
|
||||||
state[ 9] = Agu;
|
|
||||||
state[10] = Aka;
|
|
||||||
state[11] = Ake;
|
|
||||||
state[12] = Aki;
|
|
||||||
state[13] = Ako;
|
|
||||||
state[14] = Aku;
|
|
||||||
state[15] = Ama;
|
|
||||||
state[16] = Ame;
|
|
||||||
state[17] = Ami;
|
|
||||||
state[18] = Amo;
|
|
||||||
state[19] = Amu;
|
|
||||||
state[20] = Asa;
|
|
||||||
state[21] = Ase;
|
|
||||||
state[22] = Asi;
|
|
||||||
state[23] = Aso;
|
|
||||||
state[24] = Asu;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_init
|
|
||||||
*
|
|
||||||
* Description: Initializes the Keccak state.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to Keccak state
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_init(uint64_t s[25])
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
for(i=0;i<25;i++)
|
|
||||||
s[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_absorb
|
|
||||||
*
|
|
||||||
* Description: Absorb step of Keccak; incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to Keccak state
|
|
||||||
* - unsigned int pos: position in current block to be absorbed
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
*
|
|
||||||
* Returns new position pos in current block
|
|
||||||
**************************************************/
|
|
||||||
static unsigned int keccak_absorb(uint64_t s[25],
|
|
||||||
unsigned int pos,
|
|
||||||
unsigned int r,
|
|
||||||
const uint8_t *in,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
while(pos+inlen >= r) {
|
|
||||||
for(i=pos;i<r;i++)
|
|
||||||
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
|
|
||||||
inlen -= r-pos;
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i=pos;i<pos+inlen;i++)
|
|
||||||
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
|
|
||||||
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_finalize
|
|
||||||
*
|
|
||||||
* Description: Finalize absorb step.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to Keccak state
|
|
||||||
* - unsigned int pos: position in current block to be absorbed
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
* - uint8_t p: domain separation byte
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_finalize(uint64_t s[25], unsigned int pos, unsigned int r, uint8_t p)
|
|
||||||
{
|
|
||||||
s[pos/8] ^= (uint64_t)p << 8*(pos%8);
|
|
||||||
s[r/8-1] ^= 1ULL << 63;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_squeeze
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes.
|
|
||||||
* Modifies the state. Can be called multiple times to keep
|
|
||||||
* squeezing, i.e., is incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output
|
|
||||||
* - size_t outlen: number of bytes to be squeezed (written to out)
|
|
||||||
* - uint64_t *s: pointer to input/output Keccak state
|
|
||||||
* - unsigned int pos: number of bytes in current block already squeezed
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
*
|
|
||||||
* Returns new position pos in current block
|
|
||||||
**************************************************/
|
|
||||||
static unsigned int keccak_squeeze(uint8_t *out,
|
|
||||||
size_t outlen,
|
|
||||||
uint64_t s[25],
|
|
||||||
unsigned int pos,
|
|
||||||
unsigned int r)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
while(outlen) {
|
|
||||||
if(pos == r) {
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
for(i=pos;i < r && i < pos+outlen; i++)
|
|
||||||
*out++ = s[i/8] >> 8*(i%8);
|
|
||||||
outlen -= i-pos;
|
|
||||||
pos = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_absorb_once
|
|
||||||
*
|
|
||||||
* Description: Absorb step of Keccak;
|
|
||||||
* non-incremental, starts by zeroeing the state.
|
|
||||||
*
|
|
||||||
* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
* - uint8_t p: domain-separation byte for different Keccak-derived functions
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_absorb_once(uint64_t s[25],
|
|
||||||
unsigned int r,
|
|
||||||
const uint8_t *in,
|
|
||||||
size_t inlen,
|
|
||||||
uint8_t p)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for(i=0;i<25;i++)
|
|
||||||
s[i] = 0;
|
|
||||||
|
|
||||||
while(inlen >= r) {
|
|
||||||
for(i=0;i<r/8;i++)
|
|
||||||
s[i] ^= load64(in+8*i);
|
|
||||||
in += r;
|
|
||||||
inlen -= r;
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i=0;i<inlen;i++)
|
|
||||||
s[i/8] ^= (uint64_t)in[i] << 8*(i%8);
|
|
||||||
|
|
||||||
s[i/8] ^= (uint64_t)p << 8*(i%8);
|
|
||||||
s[(r-1)/8] ^= 1ULL << 63;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: keccak_squeezeblocks
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.
|
|
||||||
* Modifies the state. Can be called multiple times to keep
|
|
||||||
* squeezing, i.e., is incremental. Assumes zero bytes of current
|
|
||||||
* block have already been squeezed.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t nblocks: number of blocks to be squeezed (written to out)
|
|
||||||
* - uint64_t *s: pointer to input/output Keccak state
|
|
||||||
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
|
|
||||||
**************************************************/
|
|
||||||
static void keccak_squeezeblocks(uint8_t *out,
|
|
||||||
size_t nblocks,
|
|
||||||
uint64_t s[25],
|
|
||||||
unsigned int r)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
while(nblocks) {
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
for(i=0;i<r/8;i++)
|
|
||||||
store64(out+8*i, s[i]);
|
|
||||||
out += r;
|
|
||||||
nblocks -= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_init
|
|
||||||
*
|
|
||||||
* Description: Initilizes Keccak state for use as SHAKE128 XOF
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_init(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_init(state->s);
|
|
||||||
state->pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_absorb
|
|
||||||
*
|
|
||||||
* Description: Absorb step of the SHAKE128 XOF; incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
state->pos = keccak_absorb(state->s, state->pos, SHAKE128_RATE, in, inlen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_finalize
|
|
||||||
*
|
|
||||||
* Description: Finalize absorb step of the SHAKE128 XOF.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_finalize(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_finalize(state->s, state->pos, SHAKE128_RATE, 0x1F);
|
|
||||||
state->pos = SHAKE128_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_squeeze
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE128 XOF. Squeezes arbitraily many
|
|
||||||
* bytes. Can be called multiple times to keep squeezing.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t outlen : number of bytes to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
|
|
||||||
{
|
|
||||||
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE128_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_absorb_once
|
|
||||||
*
|
|
||||||
* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F);
|
|
||||||
state->pos = SHAKE128_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128_squeezeblocks
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
|
|
||||||
* SHAKE128_RATE bytes each. Can be called multiple times
|
|
||||||
* to keep squeezing. Assumes new block has not yet been
|
|
||||||
* started (state->pos = SHAKE128_RATE).
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t nblocks: number of blocks to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_init
|
|
||||||
*
|
|
||||||
* Description: Initilizes Keccak state for use as SHAKE256 XOF
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_init(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_init(state->s);
|
|
||||||
state->pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_absorb
|
|
||||||
*
|
|
||||||
* Description: Absorb step of the SHAKE256 XOF; incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_finalize
|
|
||||||
*
|
|
||||||
* Description: Finalize absorb step of the SHAKE256 XOF.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_finalize(keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F);
|
|
||||||
state->pos = SHAKE256_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_squeeze
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many
|
|
||||||
* bytes. Can be called multiple times to keep squeezing.
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t outlen : number of bytes to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
|
|
||||||
{
|
|
||||||
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_absorb_once
|
|
||||||
*
|
|
||||||
* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental.
|
|
||||||
*
|
|
||||||
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
|
|
||||||
* - const uint8_t *in: pointer to input to be absorbed into s
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F);
|
|
||||||
state->pos = SHAKE256_RATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256_squeezeblocks
|
|
||||||
*
|
|
||||||
* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of
|
|
||||||
* SHAKE256_RATE bytes each. Can be called multiple times
|
|
||||||
* to keep squeezing. Assumes next block has not yet been
|
|
||||||
* started (state->pos = SHAKE256_RATE).
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output blocks
|
|
||||||
* - size_t nblocks: number of blocks to be squeezed (written to output)
|
|
||||||
* - keccak_state *s: pointer to input/output Keccak state
|
|
||||||
**************************************************/
|
|
||||||
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
|
|
||||||
{
|
|
||||||
keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake128
|
|
||||||
*
|
|
||||||
* Description: SHAKE128 XOF with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output
|
|
||||||
* - size_t outlen: requested output length in bytes
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
size_t nblocks;
|
|
||||||
keccak_state state;
|
|
||||||
|
|
||||||
shake128_absorb_once(&state, in, inlen);
|
|
||||||
nblocks = outlen/SHAKE128_RATE;
|
|
||||||
shake128_squeezeblocks(out, nblocks, &state);
|
|
||||||
outlen -= nblocks*SHAKE128_RATE;
|
|
||||||
out += nblocks*SHAKE128_RATE;
|
|
||||||
shake128_squeeze(out, outlen, &state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: shake256
|
|
||||||
*
|
|
||||||
* Description: SHAKE256 XOF with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *out: pointer to output
|
|
||||||
* - size_t outlen: requested output length in bytes
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
size_t nblocks;
|
|
||||||
keccak_state state;
|
|
||||||
|
|
||||||
shake256_absorb_once(&state, in, inlen);
|
|
||||||
nblocks = outlen/SHAKE256_RATE;
|
|
||||||
shake256_squeezeblocks(out, nblocks, &state);
|
|
||||||
outlen -= nblocks*SHAKE256_RATE;
|
|
||||||
out += nblocks*SHAKE256_RATE;
|
|
||||||
shake256_squeeze(out, outlen, &state);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: sha3_256
|
|
||||||
*
|
|
||||||
* Description: SHA3-256 with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *h: pointer to output (32 bytes)
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
uint64_t s[25];
|
|
||||||
|
|
||||||
keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06);
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
for(i=0;i<4;i++)
|
|
||||||
store64(h+8*i,s[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Name: sha3_512
|
|
||||||
*
|
|
||||||
* Description: SHA3-512 with non-incremental API
|
|
||||||
*
|
|
||||||
* Arguments: - uint8_t *h: pointer to output (64 bytes)
|
|
||||||
* - const uint8_t *in: pointer to input
|
|
||||||
* - size_t inlen: length of input in bytes
|
|
||||||
**************************************************/
|
|
||||||
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
uint64_t s[25];
|
|
||||||
|
|
||||||
keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06);
|
|
||||||
KeccakF1600_StatePermute(s);
|
|
||||||
for(i=0;i<8;i++)
|
|
||||||
store64(h+8*i,s[i]);
|
|
||||||
}
|
|
@ -1,57 +0,0 @@
|
|||||||
#ifndef FIPS202_H
|
|
||||||
#define FIPS202_H
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#define SHAKE128_RATE 168
|
|
||||||
#define SHAKE256_RATE 136
|
|
||||||
#define SHA3_256_RATE 136
|
|
||||||
#define SHA3_512_RATE 72
|
|
||||||
|
|
||||||
#define FIPS202_NAMESPACE(s) pqcrystals_dilithium_fips202_avx2_##s
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
uint64_t s[25];
|
|
||||||
unsigned int pos;
|
|
||||||
} keccak_state;
|
|
||||||
|
|
||||||
#define KeccakF_RoundConstants FIPS202_NAMESPACE(KeccakF_RoundConstants)
|
|
||||||
extern const uint64_t KeccakF_RoundConstants[];
|
|
||||||
|
|
||||||
#define shake128_init FIPS202_NAMESPACE(shake128_init)
|
|
||||||
void shake128_init(keccak_state *state);
|
|
||||||
#define shake128_absorb FIPS202_NAMESPACE(shake128_absorb)
|
|
||||||
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake128_finalize FIPS202_NAMESPACE(shake128_finalize)
|
|
||||||
void shake128_finalize(keccak_state *state);
|
|
||||||
#define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze)
|
|
||||||
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
|
|
||||||
#define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once)
|
|
||||||
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks)
|
|
||||||
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
|
|
||||||
|
|
||||||
#define shake256_init FIPS202_NAMESPACE(shake256_init)
|
|
||||||
void shake256_init(keccak_state *state);
|
|
||||||
#define shake256_absorb FIPS202_NAMESPACE(shake256_absorb)
|
|
||||||
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake256_finalize FIPS202_NAMESPACE(shake256_finalize)
|
|
||||||
void shake256_finalize(keccak_state *state);
|
|
||||||
#define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze)
|
|
||||||
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
|
|
||||||
#define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once)
|
|
||||||
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks)
|
|
||||||
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
|
|
||||||
|
|
||||||
#define shake128 FIPS202_NAMESPACE(shake128)
|
|
||||||
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
|
|
||||||
#define shake256 FIPS202_NAMESPACE(shake256)
|
|
||||||
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
|
|
||||||
#define sha3_256 FIPS202_NAMESPACE(sha3_256)
|
|
||||||
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen);
|
|
||||||
#define sha3_512 FIPS202_NAMESPACE(sha3_512)
|
|
||||||
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen);
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,196 +0,0 @@
|
|||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include "fips202.h"
|
|
||||||
#include "fips202x4.h"
|
|
||||||
|
|
||||||
static void keccakx4_absorb_once(__m256i s[25],
|
|
||||||
unsigned int r,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen,
|
|
||||||
uint8_t p)
|
|
||||||
{
|
|
||||||
size_t i;
|
|
||||||
uint64_t pos = 0;
|
|
||||||
__m256i t, idx;
|
|
||||||
|
|
||||||
for(i = 0; i < 25; ++i)
|
|
||||||
s[i] = _mm256_setzero_si256();
|
|
||||||
|
|
||||||
idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0);
|
|
||||||
while(inlen >= r) {
|
|
||||||
for(i = 0; i < r/8; ++i) {
|
|
||||||
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
pos += 8;
|
|
||||||
}
|
|
||||||
inlen -= r;
|
|
||||||
|
|
||||||
f1600x4(s, KeccakF_RoundConstants);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(i = 0; i < inlen/8; ++i) {
|
|
||||||
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
pos += 8;
|
|
||||||
}
|
|
||||||
inlen -= 8*i;
|
|
||||||
|
|
||||||
if(inlen) {
|
|
||||||
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
|
|
||||||
idx = _mm256_set1_epi64x((1ULL << (8*inlen)) - 1);
|
|
||||||
t = _mm256_and_si256(t, idx);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
}
|
|
||||||
|
|
||||||
t = _mm256_set1_epi64x((uint64_t)p << 8*inlen);
|
|
||||||
s[i] = _mm256_xor_si256(s[i], t);
|
|
||||||
t = _mm256_set1_epi64x(1ULL << 63);
|
|
||||||
s[r/8 - 1] = _mm256_xor_si256(s[r/8 - 1], t);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void keccakx4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
unsigned int r,
|
|
||||||
__m256i s[25])
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
__m128d t;
|
|
||||||
|
|
||||||
while(nblocks > 0) {
|
|
||||||
f1600x4(s, KeccakF_RoundConstants);
|
|
||||||
for(i=0; i < r/8; ++i) {
|
|
||||||
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
|
||||||
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out0[8*i], t);
|
|
||||||
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out1[8*i], t);
|
|
||||||
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i],1));
|
|
||||||
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out2[8*i], t);
|
|
||||||
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out3[8*i], t);
|
|
||||||
}
|
|
||||||
|
|
||||||
out0 += r;
|
|
||||||
out1 += r;
|
|
||||||
out2 += r;
|
|
||||||
out3 += r;
|
|
||||||
--nblocks;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake128x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake128x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state)
|
|
||||||
{
|
|
||||||
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake256x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake256x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state)
|
|
||||||
{
|
|
||||||
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s);
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake128x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
size_t nblocks = outlen/SHAKE128_RATE;
|
|
||||||
uint8_t t[4][SHAKE128_RATE];
|
|
||||||
keccakx4_state state;
|
|
||||||
|
|
||||||
shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen);
|
|
||||||
shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
|
|
||||||
|
|
||||||
out0 += nblocks*SHAKE128_RATE;
|
|
||||||
out1 += nblocks*SHAKE128_RATE;
|
|
||||||
out2 += nblocks*SHAKE128_RATE;
|
|
||||||
out3 += nblocks*SHAKE128_RATE;
|
|
||||||
outlen -= nblocks*SHAKE128_RATE;
|
|
||||||
|
|
||||||
if(outlen) {
|
|
||||||
shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
|
|
||||||
for(i = 0; i < outlen; ++i) {
|
|
||||||
out0[i] = t[0][i];
|
|
||||||
out1[i] = t[1][i];
|
|
||||||
out2[i] = t[2][i];
|
|
||||||
out3[i] = t[3][i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void shake256x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
size_t nblocks = outlen/SHAKE256_RATE;
|
|
||||||
uint8_t t[4][SHAKE256_RATE];
|
|
||||||
keccakx4_state state;
|
|
||||||
|
|
||||||
shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen);
|
|
||||||
shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
|
|
||||||
|
|
||||||
out0 += nblocks*SHAKE256_RATE;
|
|
||||||
out1 += nblocks*SHAKE256_RATE;
|
|
||||||
out2 += nblocks*SHAKE256_RATE;
|
|
||||||
out3 += nblocks*SHAKE256_RATE;
|
|
||||||
outlen -= nblocks*SHAKE256_RATE;
|
|
||||||
|
|
||||||
if(outlen) {
|
|
||||||
shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
|
|
||||||
for(i = 0; i < outlen; ++i) {
|
|
||||||
out0[i] = t[0][i];
|
|
||||||
out1[i] = t[1][i];
|
|
||||||
out2[i] = t[2][i];
|
|
||||||
out3[i] = t[3][i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,91 +0,0 @@
|
|||||||
#ifndef FIPS202X4_H
|
|
||||||
#define FIPS202X4_H
|
|
||||||
|
|
||||||
#define FIPS202X4_NAMESPACE(s) pqcrystals_dilithium_fips202x4_avx2_##s
|
|
||||||
|
|
||||||
#ifdef __ASSEMBLER__
|
|
||||||
/* The C ABI on MacOS exports all symbols with a leading
|
|
||||||
* underscore. This means that any symbols we refer to from
|
|
||||||
* C files (functions) can't be found, and all symbols we
|
|
||||||
* refer to from ASM also can't be found.
|
|
||||||
*
|
|
||||||
* This define helps us get around this
|
|
||||||
*/
|
|
||||||
#if defined(__WIN32__) || defined(__APPLE__)
|
|
||||||
#define decorate(s) _##s
|
|
||||||
#define _cdecl(s) decorate(s)
|
|
||||||
#define cdecl(s) _cdecl(FIPS202X4_NAMESPACE(##s))
|
|
||||||
#else
|
|
||||||
#define cdecl(s) FIPS202X4_NAMESPACE(##s)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <immintrin.h>
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
__m256i s[25];
|
|
||||||
} keccakx4_state;
|
|
||||||
|
|
||||||
#define f1600x4 FIPS202X4_NAMESPACE(f1600x4)
|
|
||||||
void f1600x4(__m256i *s, const uint64_t *rc);
|
|
||||||
|
|
||||||
#define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once)
|
|
||||||
void shake128x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks)
|
|
||||||
void shake128x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state);
|
|
||||||
|
|
||||||
#define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once)
|
|
||||||
void shake256x4_absorb_once(keccakx4_state *state,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks)
|
|
||||||
void shake256x4_squeezeblocks(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t nblocks,
|
|
||||||
keccakx4_state *state);
|
|
||||||
|
|
||||||
#define shake128x4 FIPS202X4_NAMESPACE(shake128x4)
|
|
||||||
void shake128x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#define shake256x4 FIPS202X4_NAMESPACE(shake256x4)
|
|
||||||
void shake256x4(uint8_t *out0,
|
|
||||||
uint8_t *out1,
|
|
||||||
uint8_t *out2,
|
|
||||||
uint8_t *out3,
|
|
||||||
size_t outlen,
|
|
||||||
const uint8_t *in0,
|
|
||||||
const uint8_t *in1,
|
|
||||||
const uint8_t *in2,
|
|
||||||
const uint8_t *in3,
|
|
||||||
size_t inlen);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
|
Loading…
x
Reference in New Issue
Block a user