Manually remove fips202 files left by copy_from_upstream (#958)

This commit is contained in:
John Schanck 2021-04-02 13:08:30 -04:00 committed by GitHub
parent 7a5001fe12
commit 7f3088232a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 0 additions and 4665 deletions

View File

@ -1,774 +0,0 @@
/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from
* http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202"
* implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein,
* and Peter Schwabe */
#include <stddef.h>
#include <stdint.h>
#include "fips202.h"
#define NROUNDS 24
#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
/*************************************************
* Name: load64
*
* Description: Load 8 bytes into uint64_t in little-endian order
*
* Arguments: - const uint8_t *x: pointer to input byte array
*
* Returns the loaded 64-bit unsigned integer
**************************************************/
static uint64_t load64(const uint8_t x[8]) {
unsigned int i;
uint64_t r = 0;
for(i=0;i<8;i++)
r |= (uint64_t)x[i] << 8*i;
return r;
}
/*************************************************
* Name: store64
*
* Description: Store a 64-bit integer to array of 8 bytes in little-endian order
*
* Arguments: - uint8_t *x: pointer to the output byte array (allocated)
* - uint64_t u: input 64-bit unsigned integer
**************************************************/
static void store64(uint8_t x[8], uint64_t u) {
unsigned int i;
for(i=0;i<8;i++)
x[i] = u >> 8*i;
}
/* Keccak round constants */
static const uint64_t KeccakF_RoundConstants[NROUNDS] = {
(uint64_t)0x0000000000000001ULL,
(uint64_t)0x0000000000008082ULL,
(uint64_t)0x800000000000808aULL,
(uint64_t)0x8000000080008000ULL,
(uint64_t)0x000000000000808bULL,
(uint64_t)0x0000000080000001ULL,
(uint64_t)0x8000000080008081ULL,
(uint64_t)0x8000000000008009ULL,
(uint64_t)0x000000000000008aULL,
(uint64_t)0x0000000000000088ULL,
(uint64_t)0x0000000080008009ULL,
(uint64_t)0x000000008000000aULL,
(uint64_t)0x000000008000808bULL,
(uint64_t)0x800000000000008bULL,
(uint64_t)0x8000000000008089ULL,
(uint64_t)0x8000000000008003ULL,
(uint64_t)0x8000000000008002ULL,
(uint64_t)0x8000000000000080ULL,
(uint64_t)0x000000000000800aULL,
(uint64_t)0x800000008000000aULL,
(uint64_t)0x8000000080008081ULL,
(uint64_t)0x8000000000008080ULL,
(uint64_t)0x0000000080000001ULL,
(uint64_t)0x8000000080008008ULL
};
/*************************************************
* Name: KeccakF1600_StatePermute
*
* Description: The Keccak F1600 Permutation
*
* Arguments: - uint64_t *state: pointer to input/output Keccak state
**************************************************/
static void KeccakF1600_StatePermute(uint64_t state[25])
{
int round;
uint64_t Aba, Abe, Abi, Abo, Abu;
uint64_t Aga, Age, Agi, Ago, Agu;
uint64_t Aka, Ake, Aki, Ako, Aku;
uint64_t Ama, Ame, Ami, Amo, Amu;
uint64_t Asa, Ase, Asi, Aso, Asu;
uint64_t BCa, BCe, BCi, BCo, BCu;
uint64_t Da, De, Di, Do, Du;
uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
uint64_t Ega, Ege, Egi, Ego, Egu;
uint64_t Eka, Eke, Eki, Eko, Eku;
uint64_t Ema, Eme, Emi, Emo, Emu;
uint64_t Esa, Ese, Esi, Eso, Esu;
//copyFromState(A, state)
Aba = state[ 0];
Abe = state[ 1];
Abi = state[ 2];
Abo = state[ 3];
Abu = state[ 4];
Aga = state[ 5];
Age = state[ 6];
Agi = state[ 7];
Ago = state[ 8];
Agu = state[ 9];
Aka = state[10];
Ake = state[11];
Aki = state[12];
Ako = state[13];
Aku = state[14];
Ama = state[15];
Ame = state[16];
Ami = state[17];
Amo = state[18];
Amu = state[19];
Asa = state[20];
Ase = state[21];
Asi = state[22];
Aso = state[23];
Asu = state[24];
for(round = 0; round < NROUNDS; round += 2) {
// prepareTheta
BCa = Aba^Aga^Aka^Ama^Asa;
BCe = Abe^Age^Ake^Ame^Ase;
BCi = Abi^Agi^Aki^Ami^Asi;
BCo = Abo^Ago^Ako^Amo^Aso;
BCu = Abu^Agu^Aku^Amu^Asu;
//thetaRhoPiChiIotaPrepareTheta(round, A, E)
Da = BCu^ROL(BCe, 1);
De = BCa^ROL(BCi, 1);
Di = BCe^ROL(BCo, 1);
Do = BCi^ROL(BCu, 1);
Du = BCo^ROL(BCa, 1);
Aba ^= Da;
BCa = Aba;
Age ^= De;
BCe = ROL(Age, 44);
Aki ^= Di;
BCi = ROL(Aki, 43);
Amo ^= Do;
BCo = ROL(Amo, 21);
Asu ^= Du;
BCu = ROL(Asu, 14);
Eba = BCa ^((~BCe)& BCi );
Eba ^= (uint64_t)KeccakF_RoundConstants[round];
Ebe = BCe ^((~BCi)& BCo );
Ebi = BCi ^((~BCo)& BCu );
Ebo = BCo ^((~BCu)& BCa );
Ebu = BCu ^((~BCa)& BCe );
Abo ^= Do;
BCa = ROL(Abo, 28);
Agu ^= Du;
BCe = ROL(Agu, 20);
Aka ^= Da;
BCi = ROL(Aka, 3);
Ame ^= De;
BCo = ROL(Ame, 45);
Asi ^= Di;
BCu = ROL(Asi, 61);
Ega = BCa ^((~BCe)& BCi );
Ege = BCe ^((~BCi)& BCo );
Egi = BCi ^((~BCo)& BCu );
Ego = BCo ^((~BCu)& BCa );
Egu = BCu ^((~BCa)& BCe );
Abe ^= De;
BCa = ROL(Abe, 1);
Agi ^= Di;
BCe = ROL(Agi, 6);
Ako ^= Do;
BCi = ROL(Ako, 25);
Amu ^= Du;
BCo = ROL(Amu, 8);
Asa ^= Da;
BCu = ROL(Asa, 18);
Eka = BCa ^((~BCe)& BCi );
Eke = BCe ^((~BCi)& BCo );
Eki = BCi ^((~BCo)& BCu );
Eko = BCo ^((~BCu)& BCa );
Eku = BCu ^((~BCa)& BCe );
Abu ^= Du;
BCa = ROL(Abu, 27);
Aga ^= Da;
BCe = ROL(Aga, 36);
Ake ^= De;
BCi = ROL(Ake, 10);
Ami ^= Di;
BCo = ROL(Ami, 15);
Aso ^= Do;
BCu = ROL(Aso, 56);
Ema = BCa ^((~BCe)& BCi );
Eme = BCe ^((~BCi)& BCo );
Emi = BCi ^((~BCo)& BCu );
Emo = BCo ^((~BCu)& BCa );
Emu = BCu ^((~BCa)& BCe );
Abi ^= Di;
BCa = ROL(Abi, 62);
Ago ^= Do;
BCe = ROL(Ago, 55);
Aku ^= Du;
BCi = ROL(Aku, 39);
Ama ^= Da;
BCo = ROL(Ama, 41);
Ase ^= De;
BCu = ROL(Ase, 2);
Esa = BCa ^((~BCe)& BCi );
Ese = BCe ^((~BCi)& BCo );
Esi = BCi ^((~BCo)& BCu );
Eso = BCo ^((~BCu)& BCa );
Esu = BCu ^((~BCa)& BCe );
// prepareTheta
BCa = Eba^Ega^Eka^Ema^Esa;
BCe = Ebe^Ege^Eke^Eme^Ese;
BCi = Ebi^Egi^Eki^Emi^Esi;
BCo = Ebo^Ego^Eko^Emo^Eso;
BCu = Ebu^Egu^Eku^Emu^Esu;
//thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
Da = BCu^ROL(BCe, 1);
De = BCa^ROL(BCi, 1);
Di = BCe^ROL(BCo, 1);
Do = BCi^ROL(BCu, 1);
Du = BCo^ROL(BCa, 1);
Eba ^= Da;
BCa = Eba;
Ege ^= De;
BCe = ROL(Ege, 44);
Eki ^= Di;
BCi = ROL(Eki, 43);
Emo ^= Do;
BCo = ROL(Emo, 21);
Esu ^= Du;
BCu = ROL(Esu, 14);
Aba = BCa ^((~BCe)& BCi );
Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
Abe = BCe ^((~BCi)& BCo );
Abi = BCi ^((~BCo)& BCu );
Abo = BCo ^((~BCu)& BCa );
Abu = BCu ^((~BCa)& BCe );
Ebo ^= Do;
BCa = ROL(Ebo, 28);
Egu ^= Du;
BCe = ROL(Egu, 20);
Eka ^= Da;
BCi = ROL(Eka, 3);
Eme ^= De;
BCo = ROL(Eme, 45);
Esi ^= Di;
BCu = ROL(Esi, 61);
Aga = BCa ^((~BCe)& BCi );
Age = BCe ^((~BCi)& BCo );
Agi = BCi ^((~BCo)& BCu );
Ago = BCo ^((~BCu)& BCa );
Agu = BCu ^((~BCa)& BCe );
Ebe ^= De;
BCa = ROL(Ebe, 1);
Egi ^= Di;
BCe = ROL(Egi, 6);
Eko ^= Do;
BCi = ROL(Eko, 25);
Emu ^= Du;
BCo = ROL(Emu, 8);
Esa ^= Da;
BCu = ROL(Esa, 18);
Aka = BCa ^((~BCe)& BCi );
Ake = BCe ^((~BCi)& BCo );
Aki = BCi ^((~BCo)& BCu );
Ako = BCo ^((~BCu)& BCa );
Aku = BCu ^((~BCa)& BCe );
Ebu ^= Du;
BCa = ROL(Ebu, 27);
Ega ^= Da;
BCe = ROL(Ega, 36);
Eke ^= De;
BCi = ROL(Eke, 10);
Emi ^= Di;
BCo = ROL(Emi, 15);
Eso ^= Do;
BCu = ROL(Eso, 56);
Ama = BCa ^((~BCe)& BCi );
Ame = BCe ^((~BCi)& BCo );
Ami = BCi ^((~BCo)& BCu );
Amo = BCo ^((~BCu)& BCa );
Amu = BCu ^((~BCa)& BCe );
Ebi ^= Di;
BCa = ROL(Ebi, 62);
Ego ^= Do;
BCe = ROL(Ego, 55);
Eku ^= Du;
BCi = ROL(Eku, 39);
Ema ^= Da;
BCo = ROL(Ema, 41);
Ese ^= De;
BCu = ROL(Ese, 2);
Asa = BCa ^((~BCe)& BCi );
Ase = BCe ^((~BCi)& BCo );
Asi = BCi ^((~BCo)& BCu );
Aso = BCo ^((~BCu)& BCa );
Asu = BCu ^((~BCa)& BCe );
}
//copyToState(state, A)
state[ 0] = Aba;
state[ 1] = Abe;
state[ 2] = Abi;
state[ 3] = Abo;
state[ 4] = Abu;
state[ 5] = Aga;
state[ 6] = Age;
state[ 7] = Agi;
state[ 8] = Ago;
state[ 9] = Agu;
state[10] = Aka;
state[11] = Ake;
state[12] = Aki;
state[13] = Ako;
state[14] = Aku;
state[15] = Ama;
state[16] = Ame;
state[17] = Ami;
state[18] = Amo;
state[19] = Amu;
state[20] = Asa;
state[21] = Ase;
state[22] = Asi;
state[23] = Aso;
state[24] = Asu;
}
/*************************************************
* Name: keccak_init
*
* Description: Initializes the Keccak state.
*
* Arguments: - uint64_t *s: pointer to Keccak state
**************************************************/
static void keccak_init(uint64_t s[25])
{
unsigned int i;
for(i=0;i<25;i++)
s[i] = 0;
}
/*************************************************
* Name: keccak_absorb
*
* Description: Absorb step of Keccak; incremental.
*
* Arguments: - uint64_t *s: pointer to Keccak state
* - unsigned int pos: position in current block to be absorbed
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
*
* Returns new position pos in current block
**************************************************/
static unsigned int keccak_absorb(uint64_t s[25],
unsigned int pos,
unsigned int r,
const uint8_t *in,
size_t inlen)
{
unsigned int i;
while(pos+inlen >= r) {
for(i=pos;i<r;i++)
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
inlen -= r-pos;
KeccakF1600_StatePermute(s);
pos = 0;
}
for(i=pos;i<pos+inlen;i++)
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
return i;
}
/*************************************************
* Name: keccak_finalize
*
* Description: Finalize absorb step.
*
* Arguments: - uint64_t *s: pointer to Keccak state
* - unsigned int pos: position in current block to be absorbed
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
* - uint8_t p: domain separation byte
**************************************************/
static void keccak_finalize(uint64_t s[25], unsigned int pos, unsigned int r, uint8_t p)
{
s[pos/8] ^= (uint64_t)p << 8*(pos%8);
s[r/8-1] ^= 1ULL << 63;
}
/*************************************************
* Name: keccak_squeeze
*
* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes.
* Modifies the state. Can be called multiple times to keep
* squeezing, i.e., is incremental.
*
* Arguments: - uint8_t *out: pointer to output
* - size_t outlen: number of bytes to be squeezed (written to out)
* - uint64_t *s: pointer to input/output Keccak state
* - unsigned int pos: number of bytes in current block already squeezed
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
*
* Returns new position pos in current block
**************************************************/
static unsigned int keccak_squeeze(uint8_t *out,
size_t outlen,
uint64_t s[25],
unsigned int pos,
unsigned int r)
{
unsigned int i;
while(outlen) {
if(pos == r) {
KeccakF1600_StatePermute(s);
pos = 0;
}
for(i=pos;i < r && i < pos+outlen; i++)
*out++ = s[i/8] >> 8*(i%8);
outlen -= i-pos;
pos = i;
}
return pos;
}
/*************************************************
* Name: keccak_absorb_once
*
* Description: Absorb step of Keccak;
* non-incremental, starts by zeroeing the state.
*
* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
* - uint8_t p: domain-separation byte for different Keccak-derived functions
**************************************************/
static void keccak_absorb_once(uint64_t s[25],
unsigned int r,
const uint8_t *in,
size_t inlen,
uint8_t p)
{
unsigned int i;
for(i=0;i<25;i++)
s[i] = 0;
while(inlen >= r) {
for(i=0;i<r/8;i++)
s[i] ^= load64(in+8*i);
in += r;
inlen -= r;
KeccakF1600_StatePermute(s);
}
for(i=0;i<inlen;i++)
s[i/8] ^= (uint64_t)in[i] << 8*(i%8);
s[i/8] ^= (uint64_t)p << 8*(i%8);
s[(r-1)/8] ^= 1ULL << 63;
}
/*************************************************
* Name: keccak_squeezeblocks
*
* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.
* Modifies the state. Can be called multiple times to keep
* squeezing, i.e., is incremental. Assumes zero bytes of current
* block have already been squeezed.
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t nblocks: number of blocks to be squeezed (written to out)
* - uint64_t *s: pointer to input/output Keccak state
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
**************************************************/
static void keccak_squeezeblocks(uint8_t *out,
size_t nblocks,
uint64_t s[25],
unsigned int r)
{
unsigned int i;
while(nblocks) {
KeccakF1600_StatePermute(s);
for(i=0;i<r/8;i++)
store64(out+8*i, s[i]);
out += r;
nblocks -= 1;
}
}
/*************************************************
* Name: shake128_init
*
* Description: Initilizes Keccak state for use as SHAKE128 XOF
*
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
**************************************************/
void shake128_init(keccak_state *state)
{
keccak_init(state->s);
state->pos = 0;
}
/*************************************************
* Name: shake128_absorb
*
* Description: Absorb step of the SHAKE128 XOF; incremental.
*
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
{
state->pos = keccak_absorb(state->s, state->pos, SHAKE128_RATE, in, inlen);
}
/*************************************************
* Name: shake128_finalize
*
* Description: Finalize absorb step of the SHAKE128 XOF.
*
* Arguments: - keccak_state *state: pointer to Keccak state
**************************************************/
void shake128_finalize(keccak_state *state)
{
keccak_finalize(state->s, state->pos, SHAKE128_RATE, 0x1F);
state->pos = SHAKE128_RATE;
}
/*************************************************
* Name: shake128_squeeze
*
* Description: Squeeze step of SHAKE128 XOF. Squeezes arbitraily many
* bytes. Can be called multiple times to keep squeezing.
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t outlen : number of bytes to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
{
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE128_RATE);
}
/*************************************************
* Name: shake128_absorb_once
*
* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental.
*
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
{
keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F);
state->pos = SHAKE128_RATE;
}
/*************************************************
* Name: shake128_squeezeblocks
*
* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
* SHAKE128_RATE bytes each. Can be called multiple times
* to keep squeezing. Assumes new block has not yet been
* started (state->pos = SHAKE128_RATE).
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t nblocks: number of blocks to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
{
keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE);
}
/*************************************************
* Name: shake256_init
*
* Description: Initilizes Keccak state for use as SHAKE256 XOF
*
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
**************************************************/
void shake256_init(keccak_state *state)
{
keccak_init(state->s);
state->pos = 0;
}
/*************************************************
* Name: shake256_absorb
*
* Description: Absorb step of the SHAKE256 XOF; incremental.
*
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
{
state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen);
}
/*************************************************
* Name: shake256_finalize
*
* Description: Finalize absorb step of the SHAKE256 XOF.
*
* Arguments: - keccak_state *state: pointer to Keccak state
**************************************************/
void shake256_finalize(keccak_state *state)
{
keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F);
state->pos = SHAKE256_RATE;
}
/*************************************************
* Name: shake256_squeeze
*
* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many
* bytes. Can be called multiple times to keep squeezing.
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t outlen : number of bytes to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
{
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE);
}
/*************************************************
* Name: shake256_absorb_once
*
* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental.
*
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
{
keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F);
state->pos = SHAKE256_RATE;
}
/*************************************************
* Name: shake256_squeezeblocks
*
* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of
* SHAKE256_RATE bytes each. Can be called multiple times
* to keep squeezing. Assumes next block has not yet been
* started (state->pos = SHAKE256_RATE).
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t nblocks: number of blocks to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
{
keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE);
}
/*************************************************
* Name: shake128
*
* Description: SHAKE128 XOF with non-incremental API
*
* Arguments: - uint8_t *out: pointer to output
* - size_t outlen: requested output length in bytes
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
{
size_t nblocks;
keccak_state state;
shake128_absorb_once(&state, in, inlen);
nblocks = outlen/SHAKE128_RATE;
shake128_squeezeblocks(out, nblocks, &state);
outlen -= nblocks*SHAKE128_RATE;
out += nblocks*SHAKE128_RATE;
shake128_squeeze(out, outlen, &state);
}
/*************************************************
* Name: shake256
*
* Description: SHAKE256 XOF with non-incremental API
*
* Arguments: - uint8_t *out: pointer to output
* - size_t outlen: requested output length in bytes
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
{
size_t nblocks;
keccak_state state;
shake256_absorb_once(&state, in, inlen);
nblocks = outlen/SHAKE256_RATE;
shake256_squeezeblocks(out, nblocks, &state);
outlen -= nblocks*SHAKE256_RATE;
out += nblocks*SHAKE256_RATE;
shake256_squeeze(out, outlen, &state);
}
/*************************************************
* Name: sha3_256
*
* Description: SHA3-256 with non-incremental API
*
* Arguments: - uint8_t *h: pointer to output (32 bytes)
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen)
{
unsigned int i;
uint64_t s[25];
keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06);
KeccakF1600_StatePermute(s);
for(i=0;i<4;i++)
store64(h+8*i,s[i]);
}
/*************************************************
* Name: sha3_512
*
* Description: SHA3-512 with non-incremental API
*
* Arguments: - uint8_t *h: pointer to output (64 bytes)
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen)
{
unsigned int i;
uint64_t s[25];
keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06);
KeccakF1600_StatePermute(s);
for(i=0;i<8;i++)
store64(h+8*i,s[i]);
}

View File

@ -1,54 +0,0 @@
#ifndef FIPS202_H
#define FIPS202_H
#include <stddef.h>
#include <stdint.h>
#define SHAKE128_RATE 168
#define SHAKE256_RATE 136
#define SHA3_256_RATE 136
#define SHA3_512_RATE 72
#define FIPS202_NAMESPACE(s) pqcrystals_kyber_fips202_avx2_##s
typedef struct {
uint64_t s[25];
unsigned int pos;
} keccak_state;
#define shake128_init FIPS202_NAMESPACE(shake128_init)
void shake128_init(keccak_state *state);
#define shake128_absorb FIPS202_NAMESPACE(shake128_absorb)
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake128_finalize FIPS202_NAMESPACE(shake128_finalize)
void shake128_finalize(keccak_state *state);
#define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze)
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
#define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once)
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks)
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
#define shake256_init FIPS202_NAMESPACE(shake256_init)
void shake256_init(keccak_state *state);
#define shake256_absorb FIPS202_NAMESPACE(shake256_absorb)
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake256_finalize FIPS202_NAMESPACE(shake256_finalize)
void shake256_finalize(keccak_state *state);
#define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze)
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
#define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once)
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks)
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
#define shake128 FIPS202_NAMESPACE(shake128)
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
#define shake256 FIPS202_NAMESPACE(shake256)
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
#define sha3_256 FIPS202_NAMESPACE(sha3_256)
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen);
#define sha3_512 FIPS202_NAMESPACE(sha3_512)
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen);
#endif

View File

@ -1,200 +0,0 @@
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
#include <string.h>
#include "fips202.h"
#include "fips202x4.h"
/* Use implementation from the Keccak Code Package */
#define KeccakF1600_StatePermute4x FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds)
extern void KeccakF1600_StatePermute4x(__m256i *s);
static void keccakx4_absorb_once(__m256i s[25],
unsigned int r,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen,
uint8_t p)
{
size_t i;
uint64_t pos = 0;
__m256i t, idx;
for(i = 0; i < 25; ++i)
s[i] = _mm256_setzero_si256();
idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0);
while(inlen >= r) {
for(i = 0; i < r/8; ++i) {
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
s[i] = _mm256_xor_si256(s[i], t);
pos += 8;
}
inlen -= r;
KeccakF1600_StatePermute4x(s);
}
for(i = 0; i < inlen/8; ++i) {
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
s[i] = _mm256_xor_si256(s[i], t);
pos += 8;
}
inlen -= 8*i;
if(inlen) {
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
idx = _mm256_set1_epi64x((1ULL << (8*inlen)) - 1);
t = _mm256_and_si256(t, idx);
s[i] = _mm256_xor_si256(s[i], t);
}
t = _mm256_set1_epi64x((uint64_t)p << 8*inlen);
s[i] = _mm256_xor_si256(s[i], t);
t = _mm256_set1_epi64x(1ULL << 63);
s[r/8 - 1] = _mm256_xor_si256(s[r/8 - 1], t);
}
static void keccakx4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
unsigned int r,
__m256i s[25])
{
unsigned int i;
__m128d t;
while(nblocks > 0) {
KeccakF1600_StatePermute4x(s);
for(i=0; i < r/8; ++i) {
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out0[8*i], t);
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out1[8*i], t);
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i],1));
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out2[8*i], t);
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out3[8*i], t);
}
out0 += r;
out1 += r;
out2 += r;
out3 += r;
--nblocks;
}
}
void shake128x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F);
}
void shake128x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state)
{
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s);
}
void shake256x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F);
}
void shake256x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state)
{
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s);
}
void shake128x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
unsigned int i;
size_t nblocks = outlen/SHAKE128_RATE;
uint8_t t[4][SHAKE128_RATE];
keccakx4_state state;
shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen);
shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
out0 += nblocks*SHAKE128_RATE;
out1 += nblocks*SHAKE128_RATE;
out2 += nblocks*SHAKE128_RATE;
out3 += nblocks*SHAKE128_RATE;
outlen -= nblocks*SHAKE128_RATE;
if(outlen) {
shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
for(i = 0; i < outlen; ++i) {
out0[i] = t[0][i];
out1[i] = t[1][i];
out2[i] = t[2][i];
out3[i] = t[3][i];
}
}
}
void shake256x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
unsigned int i;
size_t nblocks = outlen/SHAKE256_RATE;
uint8_t t[4][SHAKE256_RATE];
keccakx4_state state;
shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen);
shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
out0 += nblocks*SHAKE256_RATE;
out1 += nblocks*SHAKE256_RATE;
out2 += nblocks*SHAKE256_RATE;
out3 += nblocks*SHAKE256_RATE;
outlen -= nblocks*SHAKE256_RATE;
if(outlen) {
shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
for(i = 0; i < outlen; ++i) {
out0[i] = t[0][i];
out1[i] = t[1][i];
out2[i] = t[2][i];
out3[i] = t[3][i];
}
}
}

View File

@ -1,70 +0,0 @@
#ifndef FIPS202X4_H
#define FIPS202X4_H
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
#define FIPS202X4_NAMESPACE(s) pqcrystals_kyber_fips202x4_avx2_##s
typedef struct {
__m256i s[25];
} keccakx4_state;
#define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once)
void shake128x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks)
void shake128x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state);
#define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once)
void shake256x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks)
void shake256x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state);
#define shake128x4 FIPS202X4_NAMESPACE(shake128x4)
void shake128x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake256x4 FIPS202X4_NAMESPACE(shake256x4)
void shake256x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#endif

View File

@ -1,70 +0,0 @@
#ifndef FIPS202X4_H
#define FIPS202X4_H
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
#define FIPS202X4_NAMESPACE(s) pqcrystals_kyber_fips202x4_avx2_##s
typedef struct {
__m256i s[25];
} keccakx4_state;
#define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once)
void shake128x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks)
void shake128x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state);
#define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once)
void shake256x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks)
void shake256x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state);
#define shake128x4 FIPS202X4_NAMESPACE(shake128x4)
void shake128x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake256x4 FIPS202X4_NAMESPACE(shake256x4)
void shake256x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#endif

View File

@ -1,65 +0,0 @@
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakP_1600_times4_SnP_h_
#define _KeccakP_1600_times4_SnP_h_
/** For the documentation, see PlSnP-documentation.h.
*/
#include "KeccakP-SIMD256-config.h"
#include "../fips202x4.h"
#define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")"
#define KeccakP1600times4_statesSizeInBytes 800
#define KeccakP1600times4_statesAlignment 32
#define KeccakF1600times4_FastLoop_supported
#define KeccakP1600times4_12rounds_FastLoop_supported
#include <stddef.h>
#define KeccakP1600times4_StaticInitialize()
#define KeccakP1600times4_InitializeAll FIPS202X4_NAMESPACE(KeccakP1600times4_InitializeAll)
void KeccakP1600times4_InitializeAll(void *states);
#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \
((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte)
#define KeccakP1600times4_AddBytes FIPS202X4_NAMESPACE(KeccakP1600times4_AddBytes)
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
#define KeccakP1600times4_AddLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_AddLanesAll)
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
#define KeccakP1600times4_OverwriteBytes FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteBytes)
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
#define KeccakP1600times4_OverwriteLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteLanesAll)
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
#define KeccakP1600times4_OverwriteWithZeroes FIPS202X4_NAMESPACE(KeccakP1600times4_OverwriteWithZeroes)
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
#define KeccakP1600times4_PermuteAll_12rounds FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_12rounds)
void KeccakP1600times4_PermuteAll_12rounds(void *states);
#define KeccakP1600times4_PermuteAll_24rounds FIPS202X4_NAMESPACE(KeccakP1600times4_PermuteAll_24rounds)
void KeccakP1600times4_PermuteAll_24rounds(void *states);
#define KeccakP1600times4_ExtractBytes FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractBytes)
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
#define KeccakP1600times4_ExtractLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractLanesAll)
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
#define KeccakP1600times4_ExtractAndAddBytes FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractAndAddBytes)
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
#define KeccakP1600times4_ExtractAndAddLanesAll FIPS202X4_NAMESPACE(KeccakP1600times4_ExtractAndAddLanesAll)
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
#define KeccakF1600times4_FastLoop_Absorb FIPS202X4_NAMESPACE(KeccakF1600times4_FastLoop_Absorb)
size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
#define KeccakP1600times4_12rounds_FastLoop_Absorb FIPS202X4_NAMESPACE(KeccakP1600times4_12rounds_FastLoop_Absorb)
size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
#endif

View File

@ -1,198 +0,0 @@
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#if (defined(FullUnrolling))
#define rounds24 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds12 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#elif (Unrolling == 12)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=12) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
} \
#define rounds12 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#elif (Unrolling == 6)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
#elif (Unrolling == 4)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
#elif (Unrolling == 3)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
#elif (Unrolling == 2)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#elif (Unrolling == 1)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
#else
#error "Unrolling is not correctly specified!"
#endif
#define roundsN(__nrounds) \
prepareTheta \
i = 24 - (__nrounds); \
if ((i&1) != 0) { \
thetaRhoPiChiIotaPrepareTheta(i, A, E) \
copyStateVariables(A, E) \
++i; \
} \
for( /* empty */; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
}

View File

@ -1,3 +0,0 @@
#define KeccakP1600times4_implementation_config "AVX2, all rounds unrolled"
#define KeccakP1600times4_fullUnrolling
#define KeccakP1600times4_useAVX2

View File

@ -1,34 +0,0 @@
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _keccakp_align_h_
#define _keccakp_align_h_
/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */
#ifdef ALIGN
#undef ALIGN
#endif
#if defined(__GNUC__)
#define ALIGN(x) __attribute__ ((aligned(x)))
#elif defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#elif defined(__ARMCC_VERSION)
#define ALIGN(x) __align(x)
#else
#define ALIGN(x)
#endif
#endif

View File

@ -1,142 +0,0 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
*/
#ifndef _KECCAKP_BRG_ENDIAN_H
#define _KECCAKP_BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# else
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
#endif
#endif
#endif

View File

@ -1,907 +0,0 @@
/* Taken from Bas Westerbaan's new 4-way SHAKE implementation
* for Sphincs+ (https://github.com/sphincs/sphincsplus/pull/14/),
* but uses vpshufb for byte-granular rotations as in the Keccak Code Package. */
#include "fips202x4.h"
.data
.p2align 5
rho8:
.byte 7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14,7,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14
rho56:
.byte 1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8,1,2,3,4,5,6,7,0,9,10,11,12,13,14,15,8
.text
.global cdecl(f1600x4)
cdecl(f1600x4):
vmovdqa rho8(%rip), %ymm0
movq $6, %rax
looptop:
vmovdqa 0(%rdi), %ymm8
vmovdqa 32(%rdi), %ymm9
vmovdqa 64(%rdi), %ymm10
vmovdqa 96(%rdi), %ymm11
vmovdqa 128(%rdi), %ymm12
vpxor 160(%rdi), %ymm8, %ymm8
vpxor 192(%rdi), %ymm9, %ymm9
vpxor 224(%rdi), %ymm10, %ymm10
vpxor 256(%rdi), %ymm11, %ymm11
vpxor 288(%rdi), %ymm12, %ymm12
vpxor 320(%rdi), %ymm8, %ymm8
vpxor 352(%rdi), %ymm9, %ymm9
vpxor 384(%rdi), %ymm10, %ymm10
vpxor 416(%rdi), %ymm11, %ymm11
vpxor 448(%rdi), %ymm12, %ymm12
vpxor 480(%rdi), %ymm8, %ymm8
vpxor 512(%rdi), %ymm9, %ymm9
vpxor 544(%rdi), %ymm10, %ymm10
vpxor 576(%rdi), %ymm11, %ymm11
vpxor 608(%rdi), %ymm12, %ymm12
vpxor 640(%rdi), %ymm8, %ymm8
vpxor 672(%rdi), %ymm9, %ymm9
vpxor 704(%rdi), %ymm10, %ymm10
vpxor 736(%rdi), %ymm11, %ymm11
vpxor 768(%rdi), %ymm12, %ymm12
vpsllq $1, %ymm9, %ymm13
vpsllq $1, %ymm10, %ymm14
vpsllq $1, %ymm11, %ymm15
vpsllq $1, %ymm12, %ymm7
vpsllq $1, %ymm8, %ymm6
vpsrlq $63, %ymm9, %ymm5
vpsrlq $63, %ymm10, %ymm4
vpsrlq $63, %ymm11, %ymm3
vpsrlq $63, %ymm12, %ymm2
vpsrlq $63, %ymm8, %ymm1
vpor %ymm13, %ymm5, %ymm5
vpor %ymm14, %ymm4, %ymm4
vpor %ymm15, %ymm3, %ymm3
vpor %ymm7, %ymm2, %ymm2
vpor %ymm6, %ymm1, %ymm1
vpxor %ymm5, %ymm12, %ymm5
vpxor %ymm4, %ymm8, %ymm4
vpxor %ymm3, %ymm9, %ymm3
vpxor %ymm2, %ymm10, %ymm2
vpxor %ymm1, %ymm11, %ymm1
vpxor 0(%rdi), %ymm5, %ymm8
vpxor 192(%rdi), %ymm4, %ymm9
vpxor 384(%rdi), %ymm3, %ymm10
vpxor 576(%rdi), %ymm2, %ymm11
vpxor 768(%rdi), %ymm1, %ymm12
vpsllq $44, %ymm9, %ymm14
vpsllq $43, %ymm10, %ymm15
vpsllq $21, %ymm11, %ymm7
vpsllq $14, %ymm12, %ymm6
vpsrlq $20, %ymm9, %ymm9
vpsrlq $21, %ymm10, %ymm10
vpsrlq $43, %ymm11, %ymm11
vpsrlq $50, %ymm12, %ymm12
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vpbroadcastq 0(%rsi), %ymm8
vpxor %ymm8, %ymm13, %ymm13
vmovdqa %ymm13, 0(%rdi)
vmovdqa %ymm14, 192(%rdi)
vmovdqa %ymm15, 384(%rdi)
vmovdqa %ymm7, 576(%rdi)
vmovdqa %ymm6, 768(%rdi)
vpxor 96(%rdi), %ymm2, %ymm8
vpxor 288(%rdi), %ymm1, %ymm9
vpxor 320(%rdi), %ymm5, %ymm10
vpxor 512(%rdi), %ymm4, %ymm11
vpxor 704(%rdi), %ymm3, %ymm12
vpsllq $28, %ymm8, %ymm13
vpsllq $20, %ymm9, %ymm14
vpsllq $3, %ymm10, %ymm15
vpsllq $45, %ymm11, %ymm7
vpsllq $61, %ymm12, %ymm6
vpsrlq $36, %ymm8, %ymm8
vpsrlq $44, %ymm9, %ymm9
vpsrlq $61, %ymm10, %ymm10
vpsrlq $19, %ymm11, %ymm11
vpsrlq $3, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 320(%rdi)
vmovdqa %ymm14, 512(%rdi)
vmovdqa %ymm15, 704(%rdi)
vmovdqa %ymm7, 96(%rdi)
vmovdqa %ymm6, 288(%rdi)
vpxor 32(%rdi), %ymm4, %ymm8
vpxor 224(%rdi), %ymm3, %ymm9
vpxor 416(%rdi), %ymm2, %ymm10
vpxor 608(%rdi), %ymm1, %ymm11
vpxor 640(%rdi), %ymm5, %ymm12
vpsllq $1, %ymm8, %ymm13
vpsllq $6, %ymm9, %ymm14
vpsllq $25, %ymm10, %ymm15
#vpsllq $8, %ymm11, %ymm7
vpsllq $18, %ymm12, %ymm6
vpsrlq $63, %ymm8, %ymm8
vpsrlq $58, %ymm9, %ymm9
vpsrlq $39, %ymm10, %ymm10
#vpsrlq $56, %ymm11, %ymm11
vpsrlq $46, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
#vpor %ymm7, %ymm11, %ymm11
vpshufb %ymm0, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 640(%rdi)
vmovdqa %ymm14, 32(%rdi)
vmovdqa %ymm15, 224(%rdi)
vmovdqa %ymm7, 416(%rdi)
vmovdqa %ymm6, 608(%rdi)
vpxor 128(%rdi), %ymm1, %ymm8
vpxor 160(%rdi), %ymm5, %ymm9
vpxor 352(%rdi), %ymm4, %ymm10
vpxor 544(%rdi), %ymm3, %ymm11
vpxor 736(%rdi), %ymm2, %ymm12
vpsllq $27, %ymm8, %ymm13
vpsllq $36, %ymm9, %ymm14
vpsllq $10, %ymm10, %ymm15
vpsllq $15, %ymm11, %ymm7
#vpsllq $56, %ymm12, %ymm6
vpsrlq $37, %ymm8, %ymm8
vpsrlq $28, %ymm9, %ymm9
vpsrlq $54, %ymm10, %ymm10
vpsrlq $49, %ymm11, %ymm11
#vpsrlq $8, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
#vpor %ymm6, %ymm12, %ymm12
vpshufb rho56(%rip), %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 160(%rdi)
vmovdqa %ymm14, 352(%rdi)
vmovdqa %ymm15, 544(%rdi)
vmovdqa %ymm7, 736(%rdi)
vmovdqa %ymm6, 128(%rdi)
vpxor 64(%rdi), %ymm3, %ymm8
vpxor 256(%rdi), %ymm2, %ymm9
vpxor 448(%rdi), %ymm1, %ymm10
vpxor 480(%rdi), %ymm5, %ymm11
vpxor 672(%rdi), %ymm4, %ymm12
vpsllq $62, %ymm8, %ymm13
vpsllq $55, %ymm9, %ymm14
vpsllq $39, %ymm10, %ymm15
vpsllq $41, %ymm11, %ymm7
vpsllq $2, %ymm12, %ymm6
vpsrlq $2, %ymm8, %ymm8
vpsrlq $9, %ymm9, %ymm9
vpsrlq $25, %ymm10, %ymm10
vpsrlq $23, %ymm11, %ymm11
vpsrlq $62, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 480(%rdi)
vmovdqa %ymm14, 672(%rdi)
vmovdqa %ymm15, 64(%rdi)
vmovdqa %ymm7, 256(%rdi)
vmovdqa %ymm6, 448(%rdi)
vmovdqa 0(%rdi), %ymm8
vmovdqa 32(%rdi), %ymm9
vmovdqa 64(%rdi), %ymm10
vmovdqa 96(%rdi), %ymm11
vmovdqa 128(%rdi), %ymm12
vpxor 160(%rdi), %ymm8, %ymm8
vpxor 192(%rdi), %ymm9, %ymm9
vpxor 224(%rdi), %ymm10, %ymm10
vpxor 256(%rdi), %ymm11, %ymm11
vpxor 288(%rdi), %ymm12, %ymm12
vpxor 320(%rdi), %ymm8, %ymm8
vpxor 352(%rdi), %ymm9, %ymm9
vpxor 384(%rdi), %ymm10, %ymm10
vpxor 416(%rdi), %ymm11, %ymm11
vpxor 448(%rdi), %ymm12, %ymm12
vpxor 480(%rdi), %ymm8, %ymm8
vpxor 512(%rdi), %ymm9, %ymm9
vpxor 544(%rdi), %ymm10, %ymm10
vpxor 576(%rdi), %ymm11, %ymm11
vpxor 608(%rdi), %ymm12, %ymm12
vpxor 640(%rdi), %ymm8, %ymm8
vpxor 672(%rdi), %ymm9, %ymm9
vpxor 704(%rdi), %ymm10, %ymm10
vpxor 736(%rdi), %ymm11, %ymm11
vpxor 768(%rdi), %ymm12, %ymm12
vpsllq $1, %ymm9, %ymm13
vpsllq $1, %ymm10, %ymm14
vpsllq $1, %ymm11, %ymm15
vpsllq $1, %ymm12, %ymm7
vpsllq $1, %ymm8, %ymm6
vpsrlq $63, %ymm9, %ymm5
vpsrlq $63, %ymm10, %ymm4
vpsrlq $63, %ymm11, %ymm3
vpsrlq $63, %ymm12, %ymm2
vpsrlq $63, %ymm8, %ymm1
vpor %ymm13, %ymm5, %ymm5
vpor %ymm14, %ymm4, %ymm4
vpor %ymm15, %ymm3, %ymm3
vpor %ymm7, %ymm2, %ymm2
vpor %ymm6, %ymm1, %ymm1
vpxor %ymm5, %ymm12, %ymm5
vpxor %ymm4, %ymm8, %ymm4
vpxor %ymm3, %ymm9, %ymm3
vpxor %ymm2, %ymm10, %ymm2
vpxor %ymm1, %ymm11, %ymm1
vpxor 0(%rdi), %ymm5, %ymm8
vpxor 512(%rdi), %ymm4, %ymm9
vpxor 224(%rdi), %ymm3, %ymm10
vpxor 736(%rdi), %ymm2, %ymm11
vpxor 448(%rdi), %ymm1, %ymm12
vpsllq $44, %ymm9, %ymm14
vpsllq $43, %ymm10, %ymm15
vpsllq $21, %ymm11, %ymm7
vpsllq $14, %ymm12, %ymm6
vpsrlq $20, %ymm9, %ymm9
vpsrlq $21, %ymm10, %ymm10
vpsrlq $43, %ymm11, %ymm11
vpsrlq $50, %ymm12, %ymm12
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vpbroadcastq 8(%rsi), %ymm8
vpxor %ymm8, %ymm13, %ymm13
vmovdqa %ymm13, 0(%rdi)
vmovdqa %ymm14, 512(%rdi)
vmovdqa %ymm15, 224(%rdi)
vmovdqa %ymm7, 736(%rdi)
vmovdqa %ymm6, 448(%rdi)
vpxor 576(%rdi), %ymm2, %ymm8
vpxor 288(%rdi), %ymm1, %ymm9
vpxor 640(%rdi), %ymm5, %ymm10
vpxor 352(%rdi), %ymm4, %ymm11
vpxor 64(%rdi), %ymm3, %ymm12
vpsllq $28, %ymm8, %ymm13
vpsllq $20, %ymm9, %ymm14
vpsllq $3, %ymm10, %ymm15
vpsllq $45, %ymm11, %ymm7
vpsllq $61, %ymm12, %ymm6
vpsrlq $36, %ymm8, %ymm8
vpsrlq $44, %ymm9, %ymm9
vpsrlq $61, %ymm10, %ymm10
vpsrlq $19, %ymm11, %ymm11
vpsrlq $3, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 640(%rdi)
vmovdqa %ymm14, 352(%rdi)
vmovdqa %ymm15, 64(%rdi)
vmovdqa %ymm7, 576(%rdi)
vmovdqa %ymm6, 288(%rdi)
vpxor 192(%rdi), %ymm4, %ymm8
vpxor 704(%rdi), %ymm3, %ymm9
vpxor 416(%rdi), %ymm2, %ymm10
vpxor 128(%rdi), %ymm1, %ymm11
vpxor 480(%rdi), %ymm5, %ymm12
vpsllq $1, %ymm8, %ymm13
vpsllq $6, %ymm9, %ymm14
vpsllq $25, %ymm10, %ymm15
#vpsllq $8, %ymm11, %ymm7
vpsllq $18, %ymm12, %ymm6
vpsrlq $63, %ymm8, %ymm8
vpsrlq $58, %ymm9, %ymm9
vpsrlq $39, %ymm10, %ymm10
#vpsrlq $56, %ymm11, %ymm11
vpsrlq $46, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
#vpor %ymm7, %ymm11, %ymm11
vpshufb %ymm0, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 480(%rdi)
vmovdqa %ymm14, 192(%rdi)
vmovdqa %ymm15, 704(%rdi)
vmovdqa %ymm7, 416(%rdi)
vmovdqa %ymm6, 128(%rdi)
vpxor 768(%rdi), %ymm1, %ymm8
vpxor 320(%rdi), %ymm5, %ymm9
vpxor 32(%rdi), %ymm4, %ymm10
vpxor 544(%rdi), %ymm3, %ymm11
vpxor 256(%rdi), %ymm2, %ymm12
vpsllq $27, %ymm8, %ymm13
vpsllq $36, %ymm9, %ymm14
vpsllq $10, %ymm10, %ymm15
vpsllq $15, %ymm11, %ymm7
#vpsllq $56, %ymm12, %ymm6
vpsrlq $37, %ymm8, %ymm8
vpsrlq $28, %ymm9, %ymm9
vpsrlq $54, %ymm10, %ymm10
vpsrlq $49, %ymm11, %ymm11
#vpsrlq $8, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
#vpor %ymm6, %ymm12, %ymm12
vpshufb rho56(%rip), %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 320(%rdi)
vmovdqa %ymm14, 32(%rdi)
vmovdqa %ymm15, 544(%rdi)
vmovdqa %ymm7, 256(%rdi)
vmovdqa %ymm6, 768(%rdi)
vpxor 384(%rdi), %ymm3, %ymm8
vpxor 96(%rdi), %ymm2, %ymm9
vpxor 608(%rdi), %ymm1, %ymm10
vpxor 160(%rdi), %ymm5, %ymm11
vpxor 672(%rdi), %ymm4, %ymm12
vpsllq $62, %ymm8, %ymm13
vpsllq $55, %ymm9, %ymm14
vpsllq $39, %ymm10, %ymm15
vpsllq $41, %ymm11, %ymm7
vpsllq $2, %ymm12, %ymm6
vpsrlq $2, %ymm8, %ymm8
vpsrlq $9, %ymm9, %ymm9
vpsrlq $25, %ymm10, %ymm10
vpsrlq $23, %ymm11, %ymm11
vpsrlq $62, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 160(%rdi)
vmovdqa %ymm14, 672(%rdi)
vmovdqa %ymm15, 384(%rdi)
vmovdqa %ymm7, 96(%rdi)
vmovdqa %ymm6, 608(%rdi)
vmovdqa 0(%rdi), %ymm8
vmovdqa 32(%rdi), %ymm9
vmovdqa 64(%rdi), %ymm10
vmovdqa 96(%rdi), %ymm11
vmovdqa 128(%rdi), %ymm12
vpxor 160(%rdi), %ymm8, %ymm8
vpxor 192(%rdi), %ymm9, %ymm9
vpxor 224(%rdi), %ymm10, %ymm10
vpxor 256(%rdi), %ymm11, %ymm11
vpxor 288(%rdi), %ymm12, %ymm12
vpxor 320(%rdi), %ymm8, %ymm8
vpxor 352(%rdi), %ymm9, %ymm9
vpxor 384(%rdi), %ymm10, %ymm10
vpxor 416(%rdi), %ymm11, %ymm11
vpxor 448(%rdi), %ymm12, %ymm12
vpxor 480(%rdi), %ymm8, %ymm8
vpxor 512(%rdi), %ymm9, %ymm9
vpxor 544(%rdi), %ymm10, %ymm10
vpxor 576(%rdi), %ymm11, %ymm11
vpxor 608(%rdi), %ymm12, %ymm12
vpxor 640(%rdi), %ymm8, %ymm8
vpxor 672(%rdi), %ymm9, %ymm9
vpxor 704(%rdi), %ymm10, %ymm10
vpxor 736(%rdi), %ymm11, %ymm11
vpxor 768(%rdi), %ymm12, %ymm12
vpsllq $1, %ymm9, %ymm13
vpsllq $1, %ymm10, %ymm14
vpsllq $1, %ymm11, %ymm15
vpsllq $1, %ymm12, %ymm7
vpsllq $1, %ymm8, %ymm6
vpsrlq $63, %ymm9, %ymm5
vpsrlq $63, %ymm10, %ymm4
vpsrlq $63, %ymm11, %ymm3
vpsrlq $63, %ymm12, %ymm2
vpsrlq $63, %ymm8, %ymm1
vpor %ymm13, %ymm5, %ymm5
vpor %ymm14, %ymm4, %ymm4
vpor %ymm15, %ymm3, %ymm3
vpor %ymm7, %ymm2, %ymm2
vpor %ymm6, %ymm1, %ymm1
vpxor %ymm5, %ymm12, %ymm5
vpxor %ymm4, %ymm8, %ymm4
vpxor %ymm3, %ymm9, %ymm3
vpxor %ymm2, %ymm10, %ymm2
vpxor %ymm1, %ymm11, %ymm1
vpxor 0(%rdi), %ymm5, %ymm8
vpxor 352(%rdi), %ymm4, %ymm9
vpxor 704(%rdi), %ymm3, %ymm10
vpxor 256(%rdi), %ymm2, %ymm11
vpxor 608(%rdi), %ymm1, %ymm12
vpsllq $44, %ymm9, %ymm14
vpsllq $43, %ymm10, %ymm15
vpsllq $21, %ymm11, %ymm7
vpsllq $14, %ymm12, %ymm6
vpsrlq $20, %ymm9, %ymm9
vpsrlq $21, %ymm10, %ymm10
vpsrlq $43, %ymm11, %ymm11
vpsrlq $50, %ymm12, %ymm12
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vpbroadcastq 16(%rsi), %ymm8
vpxor %ymm8, %ymm13, %ymm13
vmovdqa %ymm13, 0(%rdi)
vmovdqa %ymm14, 352(%rdi)
vmovdqa %ymm15, 704(%rdi)
vmovdqa %ymm7, 256(%rdi)
vmovdqa %ymm6, 608(%rdi)
vpxor 736(%rdi), %ymm2, %ymm8
vpxor 288(%rdi), %ymm1, %ymm9
vpxor 480(%rdi), %ymm5, %ymm10
vpxor 32(%rdi), %ymm4, %ymm11
vpxor 384(%rdi), %ymm3, %ymm12
vpsllq $28, %ymm8, %ymm13
vpsllq $20, %ymm9, %ymm14
vpsllq $3, %ymm10, %ymm15
vpsllq $45, %ymm11, %ymm7
vpsllq $61, %ymm12, %ymm6
vpsrlq $36, %ymm8, %ymm8
vpsrlq $44, %ymm9, %ymm9
vpsrlq $61, %ymm10, %ymm10
vpsrlq $19, %ymm11, %ymm11
vpsrlq $3, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 480(%rdi)
vmovdqa %ymm14, 32(%rdi)
vmovdqa %ymm15, 384(%rdi)
vmovdqa %ymm7, 736(%rdi)
vmovdqa %ymm6, 288(%rdi)
vpxor 512(%rdi), %ymm4, %ymm8
vpxor 64(%rdi), %ymm3, %ymm9
vpxor 416(%rdi), %ymm2, %ymm10
vpxor 768(%rdi), %ymm1, %ymm11
vpxor 160(%rdi), %ymm5, %ymm12
vpsllq $1, %ymm8, %ymm13
vpsllq $6, %ymm9, %ymm14
vpsllq $25, %ymm10, %ymm15
#vpsllq $8, %ymm11, %ymm7
vpsllq $18, %ymm12, %ymm6
vpsrlq $63, %ymm8, %ymm8
vpsrlq $58, %ymm9, %ymm9
vpsrlq $39, %ymm10, %ymm10
#vpsrlq $56, %ymm11, %ymm11
vpsrlq $46, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
#vpor %ymm7, %ymm11, %ymm11
vpshufb %ymm0, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 160(%rdi)
vmovdqa %ymm14, 512(%rdi)
vmovdqa %ymm15, 64(%rdi)
vmovdqa %ymm7, 416(%rdi)
vmovdqa %ymm6, 768(%rdi)
vpxor 448(%rdi), %ymm1, %ymm8
vpxor 640(%rdi), %ymm5, %ymm9
vpxor 192(%rdi), %ymm4, %ymm10
vpxor 544(%rdi), %ymm3, %ymm11
vpxor 96(%rdi), %ymm2, %ymm12
vpsllq $27, %ymm8, %ymm13
vpsllq $36, %ymm9, %ymm14
vpsllq $10, %ymm10, %ymm15
vpsllq $15, %ymm11, %ymm7
#vpsllq $56, %ymm12, %ymm6
vpsrlq $37, %ymm8, %ymm8
vpsrlq $28, %ymm9, %ymm9
vpsrlq $54, %ymm10, %ymm10
vpsrlq $49, %ymm11, %ymm11
#vpsrlq $8, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
#vpor %ymm6, %ymm12, %ymm12
vpshufb rho56(%rip), %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 640(%rdi)
vmovdqa %ymm14, 192(%rdi)
vmovdqa %ymm15, 544(%rdi)
vmovdqa %ymm7, 96(%rdi)
vmovdqa %ymm6, 448(%rdi)
vpxor 224(%rdi), %ymm3, %ymm8
vpxor 576(%rdi), %ymm2, %ymm9
vpxor 128(%rdi), %ymm1, %ymm10
vpxor 320(%rdi), %ymm5, %ymm11
vpxor 672(%rdi), %ymm4, %ymm12
vpsllq $62, %ymm8, %ymm13
vpsllq $55, %ymm9, %ymm14
vpsllq $39, %ymm10, %ymm15
vpsllq $41, %ymm11, %ymm7
vpsllq $2, %ymm12, %ymm6
vpsrlq $2, %ymm8, %ymm8
vpsrlq $9, %ymm9, %ymm9
vpsrlq $25, %ymm10, %ymm10
vpsrlq $23, %ymm11, %ymm11
vpsrlq $62, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 320(%rdi)
vmovdqa %ymm14, 672(%rdi)
vmovdqa %ymm15, 224(%rdi)
vmovdqa %ymm7, 576(%rdi)
vmovdqa %ymm6, 128(%rdi)
vmovdqa 0(%rdi), %ymm8
vmovdqa 32(%rdi), %ymm9
vmovdqa 64(%rdi), %ymm10
vmovdqa 96(%rdi), %ymm11
vmovdqa 128(%rdi), %ymm12
vpxor 160(%rdi), %ymm8, %ymm8
vpxor 192(%rdi), %ymm9, %ymm9
vpxor 224(%rdi), %ymm10, %ymm10
vpxor 256(%rdi), %ymm11, %ymm11
vpxor 288(%rdi), %ymm12, %ymm12
vpxor 320(%rdi), %ymm8, %ymm8
vpxor 352(%rdi), %ymm9, %ymm9
vpxor 384(%rdi), %ymm10, %ymm10
vpxor 416(%rdi), %ymm11, %ymm11
vpxor 448(%rdi), %ymm12, %ymm12
vpxor 480(%rdi), %ymm8, %ymm8
vpxor 512(%rdi), %ymm9, %ymm9
vpxor 544(%rdi), %ymm10, %ymm10
vpxor 576(%rdi), %ymm11, %ymm11
vpxor 608(%rdi), %ymm12, %ymm12
vpxor 640(%rdi), %ymm8, %ymm8
vpxor 672(%rdi), %ymm9, %ymm9
vpxor 704(%rdi), %ymm10, %ymm10
vpxor 736(%rdi), %ymm11, %ymm11
vpxor 768(%rdi), %ymm12, %ymm12
vpsllq $1, %ymm9, %ymm13
vpsllq $1, %ymm10, %ymm14
vpsllq $1, %ymm11, %ymm15
vpsllq $1, %ymm12, %ymm7
vpsllq $1, %ymm8, %ymm6
vpsrlq $63, %ymm9, %ymm5
vpsrlq $63, %ymm10, %ymm4
vpsrlq $63, %ymm11, %ymm3
vpsrlq $63, %ymm12, %ymm2
vpsrlq $63, %ymm8, %ymm1
vpor %ymm13, %ymm5, %ymm5
vpor %ymm14, %ymm4, %ymm4
vpor %ymm15, %ymm3, %ymm3
vpor %ymm7, %ymm2, %ymm2
vpor %ymm6, %ymm1, %ymm1
vpxor %ymm5, %ymm12, %ymm5
vpxor %ymm4, %ymm8, %ymm4
vpxor %ymm3, %ymm9, %ymm3
vpxor %ymm2, %ymm10, %ymm2
vpxor %ymm1, %ymm11, %ymm1
vpxor 0(%rdi), %ymm5, %ymm8
vpxor 32(%rdi), %ymm4, %ymm9
vpxor 64(%rdi), %ymm3, %ymm10
vpxor 96(%rdi), %ymm2, %ymm11
vpxor 128(%rdi), %ymm1, %ymm12
vpsllq $44, %ymm9, %ymm14
vpsllq $43, %ymm10, %ymm15
vpsllq $21, %ymm11, %ymm7
vpsllq $14, %ymm12, %ymm6
vpsrlq $20, %ymm9, %ymm9
vpsrlq $21, %ymm10, %ymm10
vpsrlq $43, %ymm11, %ymm11
vpsrlq $50, %ymm12, %ymm12
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vpbroadcastq 24(%rsi), %ymm8
vpxor %ymm8, %ymm13, %ymm13
vmovdqa %ymm13, 0(%rdi)
vmovdqa %ymm14, 32(%rdi)
vmovdqa %ymm15, 64(%rdi)
vmovdqa %ymm7, 96(%rdi)
vmovdqa %ymm6, 128(%rdi)
vpxor 256(%rdi), %ymm2, %ymm8
vpxor 288(%rdi), %ymm1, %ymm9
vpxor 160(%rdi), %ymm5, %ymm10
vpxor 192(%rdi), %ymm4, %ymm11
vpxor 224(%rdi), %ymm3, %ymm12
vpsllq $28, %ymm8, %ymm13
vpsllq $20, %ymm9, %ymm14
vpsllq $3, %ymm10, %ymm15
vpsllq $45, %ymm11, %ymm7
vpsllq $61, %ymm12, %ymm6
vpsrlq $36, %ymm8, %ymm8
vpsrlq $44, %ymm9, %ymm9
vpsrlq $61, %ymm10, %ymm10
vpsrlq $19, %ymm11, %ymm11
vpsrlq $3, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 160(%rdi)
vmovdqa %ymm14, 192(%rdi)
vmovdqa %ymm15, 224(%rdi)
vmovdqa %ymm7, 256(%rdi)
vmovdqa %ymm6, 288(%rdi)
vpxor 352(%rdi), %ymm4, %ymm8
vpxor 384(%rdi), %ymm3, %ymm9
vpxor 416(%rdi), %ymm2, %ymm10
vpxor 448(%rdi), %ymm1, %ymm11
vpxor 320(%rdi), %ymm5, %ymm12
vpsllq $1, %ymm8, %ymm13
vpsllq $6, %ymm9, %ymm14
vpsllq $25, %ymm10, %ymm15
#vpsllq $8, %ymm11, %ymm7
vpsllq $18, %ymm12, %ymm6
vpsrlq $63, %ymm8, %ymm8
vpsrlq $58, %ymm9, %ymm9
vpsrlq $39, %ymm10, %ymm10
#vpsrlq $56, %ymm11, %ymm11
vpsrlq $46, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
#vpor %ymm7, %ymm11, %ymm11
vpshufb %ymm0, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 320(%rdi)
vmovdqa %ymm14, 352(%rdi)
vmovdqa %ymm15, 384(%rdi)
vmovdqa %ymm7, 416(%rdi)
vmovdqa %ymm6, 448(%rdi)
vpxor 608(%rdi), %ymm1, %ymm8
vpxor 480(%rdi), %ymm5, %ymm9
vpxor 512(%rdi), %ymm4, %ymm10
vpxor 544(%rdi), %ymm3, %ymm11
vpxor 576(%rdi), %ymm2, %ymm12
vpsllq $27, %ymm8, %ymm13
vpsllq $36, %ymm9, %ymm14
vpsllq $10, %ymm10, %ymm15
vpsllq $15, %ymm11, %ymm7
#vpsllq $56, %ymm12, %ymm6
vpsrlq $37, %ymm8, %ymm8
vpsrlq $28, %ymm9, %ymm9
vpsrlq $54, %ymm10, %ymm10
vpsrlq $49, %ymm11, %ymm11
#vpsrlq $8, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
#vpor %ymm6, %ymm12, %ymm12
vpshufb rho56(%rip), %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 480(%rdi)
vmovdqa %ymm14, 512(%rdi)
vmovdqa %ymm15, 544(%rdi)
vmovdqa %ymm7, 576(%rdi)
vmovdqa %ymm6, 608(%rdi)
vpxor 704(%rdi), %ymm3, %ymm8
vpxor 736(%rdi), %ymm2, %ymm9
vpxor 768(%rdi), %ymm1, %ymm10
vpxor 640(%rdi), %ymm5, %ymm11
vpxor 672(%rdi), %ymm4, %ymm12
vpsllq $62, %ymm8, %ymm13
vpsllq $55, %ymm9, %ymm14
vpsllq $39, %ymm10, %ymm15
vpsllq $41, %ymm11, %ymm7
vpsllq $2, %ymm12, %ymm6
vpsrlq $2, %ymm8, %ymm8
vpsrlq $9, %ymm9, %ymm9
vpsrlq $25, %ymm10, %ymm10
vpsrlq $23, %ymm11, %ymm11
vpsrlq $62, %ymm12, %ymm12
vpor %ymm13, %ymm8, %ymm8
vpor %ymm14, %ymm9, %ymm9
vpor %ymm15, %ymm10, %ymm10
vpor %ymm7, %ymm11, %ymm11
vpor %ymm6, %ymm12, %ymm12
vpandn %ymm10, %ymm9, %ymm13
vpandn %ymm11, %ymm10, %ymm14
vpandn %ymm12, %ymm11, %ymm15
vpandn %ymm8, %ymm12, %ymm7
vpandn %ymm9, %ymm8, %ymm6
vpxor %ymm8, %ymm13, %ymm13
vpxor %ymm9, %ymm14, %ymm14
vpxor %ymm10, %ymm15, %ymm15
vpxor %ymm11, %ymm7, %ymm7
vpxor %ymm12, %ymm6, %ymm6
vmovdqa %ymm13, 640(%rdi)
vmovdqa %ymm14, 672(%rdi)
vmovdqa %ymm15, 704(%rdi)
vmovdqa %ymm7, 736(%rdi)
vmovdqa %ymm6, 768(%rdi)
addq $32, %rsi
subq $1, %rax
jnz looptop
ret

View File

@ -1,774 +0,0 @@
/* Based on the public domain implementation in crypto_hash/keccakc512/simple/ from
* http://bench.cr.yp.to/supercop.html by Ronny Van Keer and the public domain "TweetFips202"
* implementation from https://twitter.com/tweetfips202 by Gilles Van Assche, Daniel J. Bernstein,
* and Peter Schwabe */
#include <stddef.h>
#include <stdint.h>
#include "fips202.h"
#define NROUNDS 24
#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset)))
/*************************************************
* Name: load64
*
* Description: Load 8 bytes into uint64_t in little-endian order
*
* Arguments: - const uint8_t *x: pointer to input byte array
*
* Returns the loaded 64-bit unsigned integer
**************************************************/
static uint64_t load64(const uint8_t x[8]) {
unsigned int i;
uint64_t r = 0;
for(i=0;i<8;i++)
r |= (uint64_t)x[i] << 8*i;
return r;
}
/*************************************************
* Name: store64
*
* Description: Store a 64-bit integer to array of 8 bytes in little-endian order
*
* Arguments: - uint8_t *x: pointer to the output byte array (allocated)
* - uint64_t u: input 64-bit unsigned integer
**************************************************/
static void store64(uint8_t x[8], uint64_t u) {
unsigned int i;
for(i=0;i<8;i++)
x[i] = u >> 8*i;
}
/* Keccak round constants */
const uint64_t KeccakF_RoundConstants[NROUNDS] = {
(uint64_t)0x0000000000000001ULL,
(uint64_t)0x0000000000008082ULL,
(uint64_t)0x800000000000808aULL,
(uint64_t)0x8000000080008000ULL,
(uint64_t)0x000000000000808bULL,
(uint64_t)0x0000000080000001ULL,
(uint64_t)0x8000000080008081ULL,
(uint64_t)0x8000000000008009ULL,
(uint64_t)0x000000000000008aULL,
(uint64_t)0x0000000000000088ULL,
(uint64_t)0x0000000080008009ULL,
(uint64_t)0x000000008000000aULL,
(uint64_t)0x000000008000808bULL,
(uint64_t)0x800000000000008bULL,
(uint64_t)0x8000000000008089ULL,
(uint64_t)0x8000000000008003ULL,
(uint64_t)0x8000000000008002ULL,
(uint64_t)0x8000000000000080ULL,
(uint64_t)0x000000000000800aULL,
(uint64_t)0x800000008000000aULL,
(uint64_t)0x8000000080008081ULL,
(uint64_t)0x8000000000008080ULL,
(uint64_t)0x0000000080000001ULL,
(uint64_t)0x8000000080008008ULL
};
/*************************************************
* Name: KeccakF1600_StatePermute
*
* Description: The Keccak F1600 Permutation
*
* Arguments: - uint64_t *state: pointer to input/output Keccak state
**************************************************/
static void KeccakF1600_StatePermute(uint64_t state[25])
{
int round;
uint64_t Aba, Abe, Abi, Abo, Abu;
uint64_t Aga, Age, Agi, Ago, Agu;
uint64_t Aka, Ake, Aki, Ako, Aku;
uint64_t Ama, Ame, Ami, Amo, Amu;
uint64_t Asa, Ase, Asi, Aso, Asu;
uint64_t BCa, BCe, BCi, BCo, BCu;
uint64_t Da, De, Di, Do, Du;
uint64_t Eba, Ebe, Ebi, Ebo, Ebu;
uint64_t Ega, Ege, Egi, Ego, Egu;
uint64_t Eka, Eke, Eki, Eko, Eku;
uint64_t Ema, Eme, Emi, Emo, Emu;
uint64_t Esa, Ese, Esi, Eso, Esu;
//copyFromState(A, state)
Aba = state[ 0];
Abe = state[ 1];
Abi = state[ 2];
Abo = state[ 3];
Abu = state[ 4];
Aga = state[ 5];
Age = state[ 6];
Agi = state[ 7];
Ago = state[ 8];
Agu = state[ 9];
Aka = state[10];
Ake = state[11];
Aki = state[12];
Ako = state[13];
Aku = state[14];
Ama = state[15];
Ame = state[16];
Ami = state[17];
Amo = state[18];
Amu = state[19];
Asa = state[20];
Ase = state[21];
Asi = state[22];
Aso = state[23];
Asu = state[24];
for(round = 0; round < NROUNDS; round += 2) {
// prepareTheta
BCa = Aba^Aga^Aka^Ama^Asa;
BCe = Abe^Age^Ake^Ame^Ase;
BCi = Abi^Agi^Aki^Ami^Asi;
BCo = Abo^Ago^Ako^Amo^Aso;
BCu = Abu^Agu^Aku^Amu^Asu;
//thetaRhoPiChiIotaPrepareTheta(round, A, E)
Da = BCu^ROL(BCe, 1);
De = BCa^ROL(BCi, 1);
Di = BCe^ROL(BCo, 1);
Do = BCi^ROL(BCu, 1);
Du = BCo^ROL(BCa, 1);
Aba ^= Da;
BCa = Aba;
Age ^= De;
BCe = ROL(Age, 44);
Aki ^= Di;
BCi = ROL(Aki, 43);
Amo ^= Do;
BCo = ROL(Amo, 21);
Asu ^= Du;
BCu = ROL(Asu, 14);
Eba = BCa ^((~BCe)& BCi );
Eba ^= (uint64_t)KeccakF_RoundConstants[round];
Ebe = BCe ^((~BCi)& BCo );
Ebi = BCi ^((~BCo)& BCu );
Ebo = BCo ^((~BCu)& BCa );
Ebu = BCu ^((~BCa)& BCe );
Abo ^= Do;
BCa = ROL(Abo, 28);
Agu ^= Du;
BCe = ROL(Agu, 20);
Aka ^= Da;
BCi = ROL(Aka, 3);
Ame ^= De;
BCo = ROL(Ame, 45);
Asi ^= Di;
BCu = ROL(Asi, 61);
Ega = BCa ^((~BCe)& BCi );
Ege = BCe ^((~BCi)& BCo );
Egi = BCi ^((~BCo)& BCu );
Ego = BCo ^((~BCu)& BCa );
Egu = BCu ^((~BCa)& BCe );
Abe ^= De;
BCa = ROL(Abe, 1);
Agi ^= Di;
BCe = ROL(Agi, 6);
Ako ^= Do;
BCi = ROL(Ako, 25);
Amu ^= Du;
BCo = ROL(Amu, 8);
Asa ^= Da;
BCu = ROL(Asa, 18);
Eka = BCa ^((~BCe)& BCi );
Eke = BCe ^((~BCi)& BCo );
Eki = BCi ^((~BCo)& BCu );
Eko = BCo ^((~BCu)& BCa );
Eku = BCu ^((~BCa)& BCe );
Abu ^= Du;
BCa = ROL(Abu, 27);
Aga ^= Da;
BCe = ROL(Aga, 36);
Ake ^= De;
BCi = ROL(Ake, 10);
Ami ^= Di;
BCo = ROL(Ami, 15);
Aso ^= Do;
BCu = ROL(Aso, 56);
Ema = BCa ^((~BCe)& BCi );
Eme = BCe ^((~BCi)& BCo );
Emi = BCi ^((~BCo)& BCu );
Emo = BCo ^((~BCu)& BCa );
Emu = BCu ^((~BCa)& BCe );
Abi ^= Di;
BCa = ROL(Abi, 62);
Ago ^= Do;
BCe = ROL(Ago, 55);
Aku ^= Du;
BCi = ROL(Aku, 39);
Ama ^= Da;
BCo = ROL(Ama, 41);
Ase ^= De;
BCu = ROL(Ase, 2);
Esa = BCa ^((~BCe)& BCi );
Ese = BCe ^((~BCi)& BCo );
Esi = BCi ^((~BCo)& BCu );
Eso = BCo ^((~BCu)& BCa );
Esu = BCu ^((~BCa)& BCe );
// prepareTheta
BCa = Eba^Ega^Eka^Ema^Esa;
BCe = Ebe^Ege^Eke^Eme^Ese;
BCi = Ebi^Egi^Eki^Emi^Esi;
BCo = Ebo^Ego^Eko^Emo^Eso;
BCu = Ebu^Egu^Eku^Emu^Esu;
//thetaRhoPiChiIotaPrepareTheta(round+1, E, A)
Da = BCu^ROL(BCe, 1);
De = BCa^ROL(BCi, 1);
Di = BCe^ROL(BCo, 1);
Do = BCi^ROL(BCu, 1);
Du = BCo^ROL(BCa, 1);
Eba ^= Da;
BCa = Eba;
Ege ^= De;
BCe = ROL(Ege, 44);
Eki ^= Di;
BCi = ROL(Eki, 43);
Emo ^= Do;
BCo = ROL(Emo, 21);
Esu ^= Du;
BCu = ROL(Esu, 14);
Aba = BCa ^((~BCe)& BCi );
Aba ^= (uint64_t)KeccakF_RoundConstants[round+1];
Abe = BCe ^((~BCi)& BCo );
Abi = BCi ^((~BCo)& BCu );
Abo = BCo ^((~BCu)& BCa );
Abu = BCu ^((~BCa)& BCe );
Ebo ^= Do;
BCa = ROL(Ebo, 28);
Egu ^= Du;
BCe = ROL(Egu, 20);
Eka ^= Da;
BCi = ROL(Eka, 3);
Eme ^= De;
BCo = ROL(Eme, 45);
Esi ^= Di;
BCu = ROL(Esi, 61);
Aga = BCa ^((~BCe)& BCi );
Age = BCe ^((~BCi)& BCo );
Agi = BCi ^((~BCo)& BCu );
Ago = BCo ^((~BCu)& BCa );
Agu = BCu ^((~BCa)& BCe );
Ebe ^= De;
BCa = ROL(Ebe, 1);
Egi ^= Di;
BCe = ROL(Egi, 6);
Eko ^= Do;
BCi = ROL(Eko, 25);
Emu ^= Du;
BCo = ROL(Emu, 8);
Esa ^= Da;
BCu = ROL(Esa, 18);
Aka = BCa ^((~BCe)& BCi );
Ake = BCe ^((~BCi)& BCo );
Aki = BCi ^((~BCo)& BCu );
Ako = BCo ^((~BCu)& BCa );
Aku = BCu ^((~BCa)& BCe );
Ebu ^= Du;
BCa = ROL(Ebu, 27);
Ega ^= Da;
BCe = ROL(Ega, 36);
Eke ^= De;
BCi = ROL(Eke, 10);
Emi ^= Di;
BCo = ROL(Emi, 15);
Eso ^= Do;
BCu = ROL(Eso, 56);
Ama = BCa ^((~BCe)& BCi );
Ame = BCe ^((~BCi)& BCo );
Ami = BCi ^((~BCo)& BCu );
Amo = BCo ^((~BCu)& BCa );
Amu = BCu ^((~BCa)& BCe );
Ebi ^= Di;
BCa = ROL(Ebi, 62);
Ego ^= Do;
BCe = ROL(Ego, 55);
Eku ^= Du;
BCi = ROL(Eku, 39);
Ema ^= Da;
BCo = ROL(Ema, 41);
Ese ^= De;
BCu = ROL(Ese, 2);
Asa = BCa ^((~BCe)& BCi );
Ase = BCe ^((~BCi)& BCo );
Asi = BCi ^((~BCo)& BCu );
Aso = BCo ^((~BCu)& BCa );
Asu = BCu ^((~BCa)& BCe );
}
//copyToState(state, A)
state[ 0] = Aba;
state[ 1] = Abe;
state[ 2] = Abi;
state[ 3] = Abo;
state[ 4] = Abu;
state[ 5] = Aga;
state[ 6] = Age;
state[ 7] = Agi;
state[ 8] = Ago;
state[ 9] = Agu;
state[10] = Aka;
state[11] = Ake;
state[12] = Aki;
state[13] = Ako;
state[14] = Aku;
state[15] = Ama;
state[16] = Ame;
state[17] = Ami;
state[18] = Amo;
state[19] = Amu;
state[20] = Asa;
state[21] = Ase;
state[22] = Asi;
state[23] = Aso;
state[24] = Asu;
}
/*************************************************
* Name: keccak_init
*
* Description: Initializes the Keccak state.
*
* Arguments: - uint64_t *s: pointer to Keccak state
**************************************************/
static void keccak_init(uint64_t s[25])
{
unsigned int i;
for(i=0;i<25;i++)
s[i] = 0;
}
/*************************************************
* Name: keccak_absorb
*
* Description: Absorb step of Keccak; incremental.
*
* Arguments: - uint64_t *s: pointer to Keccak state
* - unsigned int pos: position in current block to be absorbed
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
*
* Returns new position pos in current block
**************************************************/
static unsigned int keccak_absorb(uint64_t s[25],
unsigned int pos,
unsigned int r,
const uint8_t *in,
size_t inlen)
{
unsigned int i;
while(pos+inlen >= r) {
for(i=pos;i<r;i++)
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
inlen -= r-pos;
KeccakF1600_StatePermute(s);
pos = 0;
}
for(i=pos;i<pos+inlen;i++)
s[i/8] ^= (uint64_t)*in++ << 8*(i%8);
return i;
}
/*************************************************
* Name: keccak_finalize
*
* Description: Finalize absorb step.
*
* Arguments: - uint64_t *s: pointer to Keccak state
* - unsigned int pos: position in current block to be absorbed
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
* - uint8_t p: domain separation byte
**************************************************/
static void keccak_finalize(uint64_t s[25], unsigned int pos, unsigned int r, uint8_t p)
{
s[pos/8] ^= (uint64_t)p << 8*(pos%8);
s[r/8-1] ^= 1ULL << 63;
}
/*************************************************
* Name: keccak_squeeze
*
* Description: Squeeze step of Keccak. Squeezes arbitratrily many bytes.
* Modifies the state. Can be called multiple times to keep
* squeezing, i.e., is incremental.
*
* Arguments: - uint8_t *out: pointer to output
* - size_t outlen: number of bytes to be squeezed (written to out)
* - uint64_t *s: pointer to input/output Keccak state
* - unsigned int pos: number of bytes in current block already squeezed
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
*
* Returns new position pos in current block
**************************************************/
static unsigned int keccak_squeeze(uint8_t *out,
size_t outlen,
uint64_t s[25],
unsigned int pos,
unsigned int r)
{
unsigned int i;
while(outlen) {
if(pos == r) {
KeccakF1600_StatePermute(s);
pos = 0;
}
for(i=pos;i < r && i < pos+outlen; i++)
*out++ = s[i/8] >> 8*(i%8);
outlen -= i-pos;
pos = i;
}
return pos;
}
/*************************************************
* Name: keccak_absorb_once
*
* Description: Absorb step of Keccak;
* non-incremental, starts by zeroeing the state.
*
* Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
* - uint8_t p: domain-separation byte for different Keccak-derived functions
**************************************************/
static void keccak_absorb_once(uint64_t s[25],
unsigned int r,
const uint8_t *in,
size_t inlen,
uint8_t p)
{
unsigned int i;
for(i=0;i<25;i++)
s[i] = 0;
while(inlen >= r) {
for(i=0;i<r/8;i++)
s[i] ^= load64(in+8*i);
in += r;
inlen -= r;
KeccakF1600_StatePermute(s);
}
for(i=0;i<inlen;i++)
s[i/8] ^= (uint64_t)in[i] << 8*(i%8);
s[i/8] ^= (uint64_t)p << 8*(i%8);
s[(r-1)/8] ^= 1ULL << 63;
}
/*************************************************
* Name: keccak_squeezeblocks
*
* Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each.
* Modifies the state. Can be called multiple times to keep
* squeezing, i.e., is incremental. Assumes zero bytes of current
* block have already been squeezed.
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t nblocks: number of blocks to be squeezed (written to out)
* - uint64_t *s: pointer to input/output Keccak state
* - unsigned int r: rate in bytes (e.g., 168 for SHAKE128)
**************************************************/
static void keccak_squeezeblocks(uint8_t *out,
size_t nblocks,
uint64_t s[25],
unsigned int r)
{
unsigned int i;
while(nblocks) {
KeccakF1600_StatePermute(s);
for(i=0;i<r/8;i++)
store64(out+8*i, s[i]);
out += r;
nblocks -= 1;
}
}
/*************************************************
* Name: shake128_init
*
* Description: Initilizes Keccak state for use as SHAKE128 XOF
*
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
**************************************************/
void shake128_init(keccak_state *state)
{
keccak_init(state->s);
state->pos = 0;
}
/*************************************************
* Name: shake128_absorb
*
* Description: Absorb step of the SHAKE128 XOF; incremental.
*
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
{
state->pos = keccak_absorb(state->s, state->pos, SHAKE128_RATE, in, inlen);
}
/*************************************************
* Name: shake128_finalize
*
* Description: Finalize absorb step of the SHAKE128 XOF.
*
* Arguments: - keccak_state *state: pointer to Keccak state
**************************************************/
void shake128_finalize(keccak_state *state)
{
keccak_finalize(state->s, state->pos, SHAKE128_RATE, 0x1F);
state->pos = SHAKE128_RATE;
}
/*************************************************
* Name: shake128_squeeze
*
* Description: Squeeze step of SHAKE128 XOF. Squeezes arbitraily many
* bytes. Can be called multiple times to keep squeezing.
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t outlen : number of bytes to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
{
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE128_RATE);
}
/*************************************************
* Name: shake128_absorb_once
*
* Description: Initialize, absorb into and finalize SHAKE128 XOF; non-incremental.
*
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
{
keccak_absorb_once(state->s, SHAKE128_RATE, in, inlen, 0x1F);
state->pos = SHAKE128_RATE;
}
/*************************************************
* Name: shake128_squeezeblocks
*
* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of
* SHAKE128_RATE bytes each. Can be called multiple times
* to keep squeezing. Assumes new block has not yet been
* started (state->pos = SHAKE128_RATE).
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t nblocks: number of blocks to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
{
keccak_squeezeblocks(out, nblocks, state->s, SHAKE128_RATE);
}
/*************************************************
* Name: shake256_init
*
* Description: Initilizes Keccak state for use as SHAKE256 XOF
*
* Arguments: - keccak_state *state: pointer to (uninitialized) Keccak state
**************************************************/
void shake256_init(keccak_state *state)
{
keccak_init(state->s);
state->pos = 0;
}
/*************************************************
* Name: shake256_absorb
*
* Description: Absorb step of the SHAKE256 XOF; incremental.
*
* Arguments: - keccak_state *state: pointer to (initialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen)
{
state->pos = keccak_absorb(state->s, state->pos, SHAKE256_RATE, in, inlen);
}
/*************************************************
* Name: shake256_finalize
*
* Description: Finalize absorb step of the SHAKE256 XOF.
*
* Arguments: - keccak_state *state: pointer to Keccak state
**************************************************/
void shake256_finalize(keccak_state *state)
{
keccak_finalize(state->s, state->pos, SHAKE256_RATE, 0x1F);
state->pos = SHAKE256_RATE;
}
/*************************************************
* Name: shake256_squeeze
*
* Description: Squeeze step of SHAKE256 XOF. Squeezes arbitraily many
* bytes. Can be called multiple times to keep squeezing.
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t outlen : number of bytes to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state)
{
state->pos = keccak_squeeze(out, outlen, state->s, state->pos, SHAKE256_RATE);
}
/*************************************************
* Name: shake256_absorb_once
*
* Description: Initialize, absorb into and finalize SHAKE256 XOF; non-incremental.
*
* Arguments: - keccak_state *state: pointer to (uninitialized) output Keccak state
* - const uint8_t *in: pointer to input to be absorbed into s
* - size_t inlen: length of input in bytes
**************************************************/
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen)
{
keccak_absorb_once(state->s, SHAKE256_RATE, in, inlen, 0x1F);
state->pos = SHAKE256_RATE;
}
/*************************************************
* Name: shake256_squeezeblocks
*
* Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of
* SHAKE256_RATE bytes each. Can be called multiple times
* to keep squeezing. Assumes next block has not yet been
* started (state->pos = SHAKE256_RATE).
*
* Arguments: - uint8_t *out: pointer to output blocks
* - size_t nblocks: number of blocks to be squeezed (written to output)
* - keccak_state *s: pointer to input/output Keccak state
**************************************************/
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state)
{
keccak_squeezeblocks(out, nblocks, state->s, SHAKE256_RATE);
}
/*************************************************
* Name: shake128
*
* Description: SHAKE128 XOF with non-incremental API
*
* Arguments: - uint8_t *out: pointer to output
* - size_t outlen: requested output length in bytes
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
{
size_t nblocks;
keccak_state state;
shake128_absorb_once(&state, in, inlen);
nblocks = outlen/SHAKE128_RATE;
shake128_squeezeblocks(out, nblocks, &state);
outlen -= nblocks*SHAKE128_RATE;
out += nblocks*SHAKE128_RATE;
shake128_squeeze(out, outlen, &state);
}
/*************************************************
* Name: shake256
*
* Description: SHAKE256 XOF with non-incremental API
*
* Arguments: - uint8_t *out: pointer to output
* - size_t outlen: requested output length in bytes
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen)
{
size_t nblocks;
keccak_state state;
shake256_absorb_once(&state, in, inlen);
nblocks = outlen/SHAKE256_RATE;
shake256_squeezeblocks(out, nblocks, &state);
outlen -= nblocks*SHAKE256_RATE;
out += nblocks*SHAKE256_RATE;
shake256_squeeze(out, outlen, &state);
}
/*************************************************
* Name: sha3_256
*
* Description: SHA3-256 with non-incremental API
*
* Arguments: - uint8_t *h: pointer to output (32 bytes)
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen)
{
unsigned int i;
uint64_t s[25];
keccak_absorb_once(s, SHA3_256_RATE, in, inlen, 0x06);
KeccakF1600_StatePermute(s);
for(i=0;i<4;i++)
store64(h+8*i,s[i]);
}
/*************************************************
* Name: sha3_512
*
* Description: SHA3-512 with non-incremental API
*
* Arguments: - uint8_t *h: pointer to output (64 bytes)
* - const uint8_t *in: pointer to input
* - size_t inlen: length of input in bytes
**************************************************/
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen)
{
unsigned int i;
uint64_t s[25];
keccak_absorb_once(s, SHA3_512_RATE, in, inlen, 0x06);
KeccakF1600_StatePermute(s);
for(i=0;i<8;i++)
store64(h+8*i,s[i]);
}

View File

@ -1,57 +0,0 @@
#ifndef FIPS202_H
#define FIPS202_H
#include <stddef.h>
#include <stdint.h>
#define SHAKE128_RATE 168
#define SHAKE256_RATE 136
#define SHA3_256_RATE 136
#define SHA3_512_RATE 72
#define FIPS202_NAMESPACE(s) pqcrystals_dilithium_fips202_avx2_##s
typedef struct {
uint64_t s[25];
unsigned int pos;
} keccak_state;
#define KeccakF_RoundConstants FIPS202_NAMESPACE(KeccakF_RoundConstants)
extern const uint64_t KeccakF_RoundConstants[];
#define shake128_init FIPS202_NAMESPACE(shake128_init)
void shake128_init(keccak_state *state);
#define shake128_absorb FIPS202_NAMESPACE(shake128_absorb)
void shake128_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake128_finalize FIPS202_NAMESPACE(shake128_finalize)
void shake128_finalize(keccak_state *state);
#define shake128_squeeze FIPS202_NAMESPACE(shake128_squeeze)
void shake128_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
#define shake128_absorb_once FIPS202_NAMESPACE(shake128_absorb_once)
void shake128_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks)
void shake128_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
#define shake256_init FIPS202_NAMESPACE(shake256_init)
void shake256_init(keccak_state *state);
#define shake256_absorb FIPS202_NAMESPACE(shake256_absorb)
void shake256_absorb(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake256_finalize FIPS202_NAMESPACE(shake256_finalize)
void shake256_finalize(keccak_state *state);
#define shake256_squeeze FIPS202_NAMESPACE(shake256_squeeze)
void shake256_squeeze(uint8_t *out, size_t outlen, keccak_state *state);
#define shake256_absorb_once FIPS202_NAMESPACE(shake256_absorb_once)
void shake256_absorb_once(keccak_state *state, const uint8_t *in, size_t inlen);
#define shake256_squeezeblocks FIPS202_NAMESPACE(shake256_squeezeblocks)
void shake256_squeezeblocks(uint8_t *out, size_t nblocks, keccak_state *state);
#define shake128 FIPS202_NAMESPACE(shake128)
void shake128(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
#define shake256 FIPS202_NAMESPACE(shake256)
void shake256(uint8_t *out, size_t outlen, const uint8_t *in, size_t inlen);
#define sha3_256 FIPS202_NAMESPACE(sha3_256)
void sha3_256(uint8_t h[32], const uint8_t *in, size_t inlen);
#define sha3_512 FIPS202_NAMESPACE(sha3_512)
void sha3_512(uint8_t h[64], const uint8_t *in, size_t inlen);
#endif

View File

@ -1,196 +0,0 @@
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
#include <string.h>
#include "fips202.h"
#include "fips202x4.h"
static void keccakx4_absorb_once(__m256i s[25],
unsigned int r,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen,
uint8_t p)
{
size_t i;
uint64_t pos = 0;
__m256i t, idx;
for(i = 0; i < 25; ++i)
s[i] = _mm256_setzero_si256();
idx = _mm256_set_epi64x((long long)in3, (long long)in2, (long long)in1, (long long)in0);
while(inlen >= r) {
for(i = 0; i < r/8; ++i) {
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
s[i] = _mm256_xor_si256(s[i], t);
pos += 8;
}
inlen -= r;
f1600x4(s, KeccakF_RoundConstants);
}
for(i = 0; i < inlen/8; ++i) {
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
s[i] = _mm256_xor_si256(s[i], t);
pos += 8;
}
inlen -= 8*i;
if(inlen) {
t = _mm256_i64gather_epi64((long long *)pos, idx, 1);
idx = _mm256_set1_epi64x((1ULL << (8*inlen)) - 1);
t = _mm256_and_si256(t, idx);
s[i] = _mm256_xor_si256(s[i], t);
}
t = _mm256_set1_epi64x((uint64_t)p << 8*inlen);
s[i] = _mm256_xor_si256(s[i], t);
t = _mm256_set1_epi64x(1ULL << 63);
s[r/8 - 1] = _mm256_xor_si256(s[r/8 - 1], t);
}
static void keccakx4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
unsigned int r,
__m256i s[25])
{
unsigned int i;
__m128d t;
while(nblocks > 0) {
f1600x4(s, KeccakF_RoundConstants);
for(i=0; i < r/8; ++i) {
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out0[8*i], t);
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out1[8*i], t);
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i],1));
_mm_storel_pd((__attribute__((__may_alias__)) double *)&out2[8*i], t);
_mm_storeh_pd((__attribute__((__may_alias__)) double *)&out3[8*i], t);
}
out0 += r;
out1 += r;
out2 += r;
out3 += r;
--nblocks;
}
}
void shake128x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
keccakx4_absorb_once(state->s, SHAKE128_RATE, in0, in1, in2, in3, inlen, 0x1F);
}
void shake128x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state)
{
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE128_RATE, state->s);
}
void shake256x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
keccakx4_absorb_once(state->s, SHAKE256_RATE, in0, in1, in2, in3, inlen, 0x1F);
}
void shake256x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state)
{
keccakx4_squeezeblocks(out0, out1, out2, out3, nblocks, SHAKE256_RATE, state->s);
}
void shake128x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
unsigned int i;
size_t nblocks = outlen/SHAKE128_RATE;
uint8_t t[4][SHAKE128_RATE];
keccakx4_state state;
shake128x4_absorb_once(&state, in0, in1, in2, in3, inlen);
shake128x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
out0 += nblocks*SHAKE128_RATE;
out1 += nblocks*SHAKE128_RATE;
out2 += nblocks*SHAKE128_RATE;
out3 += nblocks*SHAKE128_RATE;
outlen -= nblocks*SHAKE128_RATE;
if(outlen) {
shake128x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
for(i = 0; i < outlen; ++i) {
out0[i] = t[0][i];
out1[i] = t[1][i];
out2[i] = t[2][i];
out3[i] = t[3][i];
}
}
}
void shake256x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen)
{
unsigned int i;
size_t nblocks = outlen/SHAKE256_RATE;
uint8_t t[4][SHAKE256_RATE];
keccakx4_state state;
shake256x4_absorb_once(&state, in0, in1, in2, in3, inlen);
shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &state);
out0 += nblocks*SHAKE256_RATE;
out1 += nblocks*SHAKE256_RATE;
out2 += nblocks*SHAKE256_RATE;
out3 += nblocks*SHAKE256_RATE;
outlen -= nblocks*SHAKE256_RATE;
if(outlen) {
shake256x4_squeezeblocks(t[0], t[1], t[2], t[3], 1, &state);
for(i = 0; i < outlen; ++i) {
out0[i] = t[0][i];
out1[i] = t[1][i];
out2[i] = t[2][i];
out3[i] = t[3][i];
}
}
}

View File

@ -1,91 +0,0 @@
#ifndef FIPS202X4_H
#define FIPS202X4_H
#define FIPS202X4_NAMESPACE(s) pqcrystals_dilithium_fips202x4_avx2_##s
#ifdef __ASSEMBLER__
/* The C ABI on MacOS exports all symbols with a leading
* underscore. This means that any symbols we refer to from
* C files (functions) can't be found, and all symbols we
* refer to from ASM also can't be found.
*
* This define helps us get around this
*/
#if defined(__WIN32__) || defined(__APPLE__)
#define decorate(s) _##s
#define _cdecl(s) decorate(s)
#define cdecl(s) _cdecl(FIPS202X4_NAMESPACE(##s))
#else
#define cdecl(s) FIPS202X4_NAMESPACE(##s)
#endif
#else
#include <stddef.h>
#include <stdint.h>
#include <immintrin.h>
typedef struct {
__m256i s[25];
} keccakx4_state;
#define f1600x4 FIPS202X4_NAMESPACE(f1600x4)
void f1600x4(__m256i *s, const uint64_t *rc);
#define shake128x4_absorb_once FIPS202X4_NAMESPACE(shake128x4_absorb_once)
void shake128x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake128x4_squeezeblocks FIPS202X4_NAMESPACE(shake128x4_squeezeblocks)
void shake128x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state);
#define shake256x4_absorb_once FIPS202X4_NAMESPACE(shake256x4_absorb_once)
void shake256x4_absorb_once(keccakx4_state *state,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake256x4_squeezeblocks FIPS202X4_NAMESPACE(shake256x4_squeezeblocks)
void shake256x4_squeezeblocks(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t nblocks,
keccakx4_state *state);
#define shake128x4 FIPS202X4_NAMESPACE(shake128x4)
void shake128x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#define shake256x4 FIPS202X4_NAMESPACE(shake256x4)
void shake256x4(uint8_t *out0,
uint8_t *out1,
uint8_t *out2,
uint8_t *out3,
size_t outlen,
const uint8_t *in0,
const uint8_t *in1,
const uint8_t *in2,
const uint8_t *in3,
size_t inlen);
#endif
#endif