Integrated Picnic round2.

This commit is contained in:
Christian Paquin 2019-05-02 22:07:21 -04:00
parent d55c59d087
commit 8198072ce7
115 changed files with 10913 additions and 5463 deletions

View File

@ -12,7 +12,7 @@ RET=0
# We need to temporarily remove bash fail-on-error for the last command, because grep returns with error code 1 when there are no lines found
set +e
FREE=$(find src -name '*.c' | grep -v upstream | xargs grep '[^_]free' | grep "free(" | grep -v 'IGNORE free-check')
FREE=$(find src -name '*.c' | grep -v upstream | grep -v 'picnic/external' | xargs grep '[^_]free' | grep "free(" | grep -v 'IGNORE free-check')
ERROR_CODE=$?
set -e

View File

@ -72,3 +72,12 @@ EXPORTS
OQS_SIG_picnic_L5_UR_keypair
OQS_SIG_picnic_L5_UR_sign
OQS_SIG_picnic_L5_UR_verify
OQS_SIG_picnic2_L1_FS_keypair
OQS_SIG_picnic2_L1_FS_sign
OQS_SIG_picnic2_L1_FS_verify
OQS_SIG_picnic2_L3_FS_keypair
OQS_SIG_picnic2_L3_FS_sign
OQS_SIG_picnic2_L3_FS_verify
OQS_SIG_picnic2_L5_FS_keypair
OQS_SIG_picnic2_L5_FS_sign
OQS_SIG_picnic2_L5_FS_verify

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="DebugDLL|Win32">
@ -69,8 +69,16 @@
<ClInclude Include="..\..\src\sig\picnic\external\mpc_lowmc.h" />
<ClInclude Include="..\..\src\sig\picnic\external\mzd_additional.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_impl.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_simulate.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_simulate_mul.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_tree.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_types.h" />
<ClInclude Include="..\..\src\sig\picnic\external\picnic_impl.h" />
<ClInclude Include="..\..\src\sig\picnic\external\sha3\KeccakHashtimes4.h" />
<ClInclude Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600.h" />
<ClInclude Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600times4.h" />
<ClInclude Include="..\..\src\sig\picnic\external\sha3\opt64\KeccakP-1600-SnP.h" />
<ClInclude Include="..\..\src\sig\picnic\external\simd.h" />
<ClInclude Include="..\..\src\sig\picnic\sig_picnic.h" />
<ClInclude Include="..\..\src\sig\sig.h" />
@ -116,10 +124,18 @@
<ClCompile Include="..\..\src\sig\picnic\external\mpc_lowmc.c" />
<ClCompile Include="..\..\src\sig\picnic\external\mzd_additional.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_impl.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_simulate.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_simulate_mul.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_tree.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_types.c" />
<ClCompile Include="..\..\src\sig\picnic\external\picnic_impl.c" />
<ClCompile Include="..\..\src\sig\picnic\external\sha3\KeccakHash.c" />
<ClCompile Include="..\..\src\sig\picnic\external\sha3\KeccakHashtimes4.c" />
<ClCompile Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600.c" />
<ClCompile Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600times4.c" />
<ClCompile Include="..\..\src\sig\picnic\external\sha3\opt64\KeccakP-1600-opt64.c" />
<ClCompile Include="..\..\src\sig\picnic\external\sha3\opt64\KeccakP-1600-times4-on1.c" />
<ClCompile Include="..\..\src\sig\picnic\sig_picnic.c" />
<ClCompile Include="..\..\src\sig\sig.c" />
</ItemGroup>
@ -560,4 +576,4 @@ copy "$(SolutionDir)..\src\sig\qtesla\sig_qtesla.h" "$(SolutionDir)include\oqs\"
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -136,6 +136,30 @@
<ClCompile Include="..\..\src\sig\picnic\external\sha3\opt64\KeccakP-1600-opt64.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\sha3\KeccakHashtimes4.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600times4.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_impl.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_simulate.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_simulate_mul.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_tree.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\picnic2_types.c">
<Filter>picnic</Filter>
</ClCompile>
<ClCompile Include="..\..\src\sig\picnic\external\sha3\opt64\KeccakP-1600-times4-on1.c">
<Filter>picnic</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\src\oqs.h" />
@ -254,6 +278,30 @@
<ClInclude Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\sha3\KeccakHashtimes4.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\sha3\KeccakSpongeWidth1600times4.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_impl.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_simulate.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_simulate_mul.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_tree.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\picnic2_types.h">
<Filter>picnic</Filter>
</ClInclude>
<ClInclude Include="..\..\src\sig\picnic\external\sha3\opt64\KeccakP-1600-SnP.h">
<Filter>picnic</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="dll.def" />

View File

@ -21,5 +21,8 @@
#define OQS_ENABLE_SIG_picnic_L3_UR
#define OQS_ENABLE_SIG_picnic_L5_FS
#define OQS_ENABLE_SIG_picnic_L5_UR
#define OQS_ENABLE_SIG_picnic2_L1_FS
#define OQS_ENABLE_SIG_picnic2_L3_FS
#define OQS_ENABLE_SIG_picnic2_L5_FS
#endif

View File

@ -106,6 +106,9 @@ AC_DEFUN([CONFIG_FEATURES],
AC_DEFINE(OQS_ENABLE_SIG_picnic_L3_UR, 1, "Define to 1 when picnic-L3-UR enabled")
AC_DEFINE(OQS_ENABLE_SIG_picnic_L5_FS, 1, "Define to 1 when picnic-L5-FS enabled")
AC_DEFINE(OQS_ENABLE_SIG_picnic_L5_UR, 1, "Define to 1 when picnic-L5-UR enabled")
AC_DEFINE(OQS_ENABLE_SIG_picnic2_L1_FS, 1, "Define to 1 when picnic2-L1-FS enabled")
AC_DEFINE(OQS_ENABLE_SIG_picnic2_L3_FS, 1, "Define to 1 when picnic2-L3-FS enabled")
AC_DEFINE(OQS_ENABLE_SIG_picnic2_L5_FS, 1, "Define to 1 when picnic2-L5-FS enabled")
])
AM_COND_IF([ENABLE_KEM_NEWHOPE], [

View File

@ -7,8 +7,8 @@ Summary
- **Name**: Picnic
- **Algorithm type**: signature
- **Main cryptographic assumption**: hash function security (ROM/QROM), key recovery attacks on the lowMC block cipher
- **NIST submission URL**: https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-1/submissions/Picnic.zip
- **Submitters (to NIST competition)**: Greg Zaverucha, Melissa Chase, David Derler, Steven Goldfeder, Claudio Orlandi, Sebastian Ramacher, Christian Rechberger, Daniel Slamanig
- **NIST submission URL**: https://csrc.nist.gov/CSRC/media/Projects/Post-Quantum-Cryptography/documents/round-2/submissions/Picnic-Round2.zip
- **Submitters (to NIST competition)**: Greg Zaverucha, Melissa Chase, David Derler, Steven Goldfeder, Claudio Orlandi, Sebastian Ramacher, Christian Rechberger, Daniel Slamanig, Jonathan Katz, Xiao Wang, Vladmir Kolesnikov
- **Submitters' website**: https://microsoft.github.io/Picnic/
- **Added to liboqs by**: Christian Paquin
@ -17,18 +17,21 @@ Parameter sets
| Parameter set | Security model | Claimed NIST security level | Public key size (bytes) | Secret key size (bytes) | Signature size (bytes) |
|-----------------|:--------------:|:---------------------------:|:-----------------------:|:-----------------------:|:----------------------:|
| picnic_L1_FS | EUF-CMA | 1 | 33 | 49 | 34020 |
| picnic_L1_UR | EUF-CMA | 1 | 33 | 49 | 53949 |
| picnic_L3_FS | EUF-CMA | 3 | 49 | 73 | 76768 |
| picnic_L3_UR | EUF-CMA | 3 | 49 | 73 | 121841 |
| picnic_L1_FS | EUF-CMA | 1 | 33 | 49 | 34038 |
| picnic_L1_UR | EUF-CMA | 1 | 33 | 49 | 53965 |
| picnic_L3_FS | EUF-CMA | 3 | 49 | 73 | 76776 |
| picnic_L3_UR | EUF-CMA | 3 | 49 | 73 | 121849 |
| picnic_L5_FS | EUF-CMA | 5 | 65 | 97 | 132860 |
| picnic_L5_UR | EUF-CMA | 5 | 65 | 97 | 209510 |
| picnic2_L1_FS | EUF-CMA | 1 | 33 | 49 | 13806 |
| picnic2_L3_FS | EUF-CMA | 3 | 49 | 73 | 29754 |
| picnic2_L5_FS | EUF-CMA | 5 | 65 | 97 | 54736 |
Implementation
--------------
- **Source of implementation:** https://github.com/IAIK/Picnic
- **Implementation version:** https://github.com/IAIK/Picnic/tree/v1.3.1
- **Implementation version:** https://github.com/IAIK/Picnic/tree/v2.0
- **License:** MIT License
- **Language:** C
- **Constant-time:** Yes

View File

@ -1,7 +1,7 @@
AUTOMAKE_OPTIONS = foreign
noinst_LTLIBRARIES = libpicnic_i.la
libpicnic_i_la_SOURCES = sig_picnic.c external/aligned_alloc.c external/bitstream.c external/cpu.c external/io.c external/lowmc.c external/lowmc_pars.c external/lowmc_128_128_20.c external/lowmc_192_192_30.c external/lowmc_256_256_38.c external/mpc_lowmc.c external/mzd_additional.c external/picnic.c external/picnic_impl.c external/sha3/KeccakHash.c external/sha3/KeccakSpongeWidth1600.c external/sha3/opt64/KeccakP-1600-opt64.c
libpicnic_i_la_SOURCES = sig_picnic.c external/aligned_alloc.c external/bitstream.c external/cpu.c external/io.c external/lowmc.c external/lowmc_pars.c external/lowmc_128_128_20.c external/lowmc_128_128_182.c external/lowmc_192_192_284.c external/lowmc_192_192_30.c external/lowmc_256_256_38.c external/lowmc_256_256_363.c external/mpc_lowmc.c external/mzd_additional.c external/picnic.c external/picnic_impl.c external/picnic2_impl.c external/picnic2_simulate.c external/picnic2_simulate_mul.c external/picnic2_tree.c external/picnic2_types.c external/sha3/KeccakHash.c external/sha3/KeccakSpongeWidth1600.c external/sha3/KeccakHashtimes4.c external/sha3/KeccakSpongeWidth1600times4.c external/sha3/opt64/KeccakP-1600-opt64.c external/sha3/opt64/KeccakP-1600-times4-on1.c
libpicnic_i_la_CFLAGS = -Iexternal -Iexternal/sha3 -Iexternal/sha3/opt64 -DPICNIC_STATIC -DOPTIMIZED_LINEAR_LAYER_EVALUATION -DREDUCED_ROUND_KEY_COMPUTATION -DWITH_LOWMC_128_128_20 -DWITH_LOWMC_192_192_30 -DWITH_LOWMC_256_256_38 -DWITH_OPT -DWITH_POPCNT

View File

@ -3,12 +3,10 @@ Picnic: Post-Quantum Signatures
The Picnic signature scheme is a family of digital signature schemes secure against attacks by quantum computers. This repository contains an optimized implementation of these schemes. The scheme and parameter sets are specified in the [Picnic Specification Document](https://github.com/Microsoft/Picnic/blob/master/spec.pdf). The public API of the library and the serialization format is compatible with the [reference implementation](https://github.com/Microsoft/Picnic).
A research paper describing the signature scheme is also available:
Research paper describing the signature scheme and the optimizations are also available:
* **Post-Quantum Zero-Knowledge and Signatures from Symmetric-Key Primitives** Melissa Chase and David Derler and Steven Goldfeder and Claudio Orlandi and Sebastian Ramacher and Christian Rechberger and Daniel Slamanig and Greg Zaverucha. *In Proceedings of ACM CCS 2017*. *[Cryptology ePrint Archive: Report 2017/279](http://eprint.iacr.org/2017/279)*
Preprints describing the LowMC optimizations are available too:
* **Improvements to the Linear Operations of LowMC: A Faster Picnic** Daniel Kales and Léo Perrin and Angela Promitzer and Sebastian Ramacher and Christian Rechberger. *[Cryptology ePrint Archive: Report 2017/1148](http://eprint.iacr.org/2017/1148)*
* **Linear Equivalence of Block Ciphers with Partial Non-Linear Layers: Application to LowMC** Itai Dinur. *[Cryptology ePrint Archive: Report 2018/772](http://eprint.iacr.org/2018/772)*
* **Improved Non-Interactive Zero Knowledge with Applications to Post-Quantum Signatures** Jonathan Katz and Vladimir Kolesnikov and Xiao Wang. *In Proceedings of ACM CCS 2018*. *[Cryptology ePrint Archive: Report 2018/475](http://eprint.iacr.org/2018/475)*
* **Linear Equivalence of Block Ciphers with Partial Non-Linear Layers: Application to LowMC** Itai Dinur and Daniel Kales and Angela Promitzer and Sebastian Ramacher and Christian Rechberger. *In Proceedings of Eurocrypt 2019*. *[Cryptology ePrint Archive: Report 2018/772](http://eprint.iacr.org/2018/772)*
Packages
--------

View File

@ -7,11 +7,15 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#else
/* define HAVE_* for more known good configurations */
#if !defined(HAVE_POSIX_MEMALIGN) && \
((defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) || defined(__APPLE__))
/* defined in POSIX and available on OS X */
#define HAVE_POSIX_MEMALIGN
#endif
#if !defined(HAVE_MEMALIGN) && defined(__linux__)
/* always availalbe on Linux */
@ -19,7 +23,6 @@
#endif
#endif
#include <oqs/common.h>
#include "compat.h"
#if !defined(HAVE_ALIGNED_ALLOC)
#include <errno.h>
@ -63,7 +66,7 @@ void* aligned_alloc(size_t alignment, size_t size) {
void aligned_free(void* ptr) {
#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN)
OQS_MEM_insecure_free(ptr);
free(ptr);
#elif defined(__MINGW32__) || defined(__MINGW64__)
__mingw_aligned_free(ptr);
#elif defined(_MSC_VER)

View File

@ -7,10 +7,14 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "bitstream.h"
#include "macros.h"
uint64_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) {
uint64_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 64);
const uint8_t* p = &bs->buffer.r[bs->position / 8];
@ -36,7 +40,7 @@ uint64_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bit
return ret;
}
uint8_t oqs_sig_picnic_bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits) {
uint8_t bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 8);
const uint8_t* p = &bs->buffer.r[bs->position / 8];
@ -58,7 +62,7 @@ uint8_t oqs_sig_picnic_bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bi
return ret;
}
uint32_t oqs_sig_picnic_bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits) {
uint32_t bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 32);
const uint8_t* p = &bs->buffer.r[bs->position / 8];
@ -84,7 +88,7 @@ uint32_t oqs_sig_picnic_bitstream_get_bits_32(bitstream_t* bs, unsigned int num_
return ret;
}
void oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits) {
void bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 64);
const unsigned int skip_bits = bs->position % 8;
@ -109,7 +113,7 @@ void oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned
}
}
void oqs_sig_picnic_bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits) {
void bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 8);
const unsigned int skip_bits = bs->position % 8;
@ -130,7 +134,7 @@ void oqs_sig_picnic_bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigne
}
}
void oqs_sig_picnic_bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits) {
void bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits) {
ASSUME(1 <= num_bits && num_bits <= 32);
const unsigned int skip_bits = bs->position % 8;

View File

@ -11,6 +11,7 @@
#include <stddef.h>
#include <stdint.h>
#include "oqs_picnic_macros.h"
typedef struct {
union {
@ -20,11 +21,11 @@ typedef struct {
size_t position;
} bitstream_t;
uint64_t oqs_sig_picnic_bitstream_get_bits(bitstream_t* bs, unsigned int num_bits);
uint8_t oqs_sig_picnic_bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits);
uint32_t oqs_sig_picnic_bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits);
void oqs_sig_picnic_bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits);
void oqs_sig_picnic_bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits);
void oqs_sig_picnic_bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits);
uint64_t bitstream_get_bits(bitstream_t* bs, unsigned int num_bits);
uint8_t bitstream_get_bits_8(bitstream_t* bs, unsigned int num_bits);
uint32_t bitstream_get_bits_32(bitstream_t* bs, unsigned int num_bits);
void bitstream_put_bits(bitstream_t* bs, uint64_t value, unsigned int num_bits);
void bitstream_put_bits_8(bitstream_t* bs, uint8_t value, unsigned int num_bits);
void bitstream_put_bits_32(bitstream_t* bs, uint32_t value, unsigned int num_bits);
#endif

View File

@ -10,12 +10,16 @@
#ifndef PICNIC_COMPAT_H
#define PICNIC_COMPAT_H
#if defined(HAVE_CONFIG_H)
#include <config.h>
#else
/* in case cmake checks were not run, define HAVE_* for known good configurations */
#if !defined(HAVE_ALIGNED_ALLOC) && !defined(__APPLE__) && !defined(__MINGW32__) && \
!defined(__MINGW64__) && \
(defined(_ISOC11_SOURCE) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L))
#define HAVE_ALIGNED_ALLOC
#endif
#endif
#if defined(HAVE_ALIGNED_ALLOC)
#include <stdlib.h>

View File

@ -7,6 +7,10 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "cpu.h"
/* If cmake checks were not run, define some known values. */

View File

@ -59,17 +59,11 @@ static inline uint64_t AATR_CONST bswap64(uint64_t x) {
#endif
#endif
/* OS X */
#if defined(__APPLE__)
/* OS X / OpenBSD */
#if defined(__APPLE__) || defined(__OpenBSD__)
#include <machine/endian.h>
#endif
/* OpenBSD */
#if defined(__OpenBSD__)
#include <machine/endian.h>
#define HAVE_HOSTSWAP
#endif
/* other BSDs */
#if defined(__FreeBSD__) || defined(__NETBSD__) || defined(__NetBSD__)
#include <sys/endian.h>

View File

@ -7,12 +7,16 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "io.h"
#include <string.h>
#include "compat.h"
void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t len) {
void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t len) {
const size_t word_count = len / sizeof(uint64_t);
const block_t* block = CONST_BLOCK(data, 0);
@ -22,7 +26,7 @@ void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, siz
}
}
void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len) {
void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len) {
const size_t word_count = len / sizeof(uint64_t);
block_t* block = BLOCK(result, 0);
@ -33,10 +37,4 @@ void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data
}
}
/* unused
void print_hex(FILE* out, const uint8_t* data, size_t len) {
for (size_t i = len; i; --i, ++data) {
fprintf(out, "%02X", *data);
}
}
*/
/* cropped unused print_hex */

View File

@ -12,11 +12,12 @@
#include <stdint.h>
#include <stdio.h>
#include "oqs_picnic_macros.h"
#include "mzd_additional.h"
void oqs_sig_picnic_mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t numbytes);
void oqs_sig_picnic_mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len);
void mzd_to_char_array(uint8_t* dst, const mzd_local_t* data, size_t numbytes);
void mzd_from_char_array(mzd_local_t* result, const uint8_t* data, size_t len);
/* unused
void print_hex(FILE* out, const uint8_t* data, size_t len);

View File

@ -30,6 +30,8 @@
#else
#include <libkeccak.a.headers/KeccakHash.h>
#endif
// this is not in SUPERCOP, so we ship it ourselves
#include "sha3/KeccakHashtimes4.h"
#include "picnic_impl.h"
@ -38,10 +40,8 @@ typedef Keccak_HashInstance hash_context;
static inline void hash_init(hash_context* ctx, const picnic_instance_t* pp) {
if (pp->digest_size == 32) {
Keccak_HashInitialize_SHAKE128(ctx);
// #defined as Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F)
} else {
Keccak_HashInitialize_SHAKE256(ctx);
// #defined as Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F)
}
}
@ -72,4 +72,42 @@ typedef Keccak_HashInstance kdf_shake_t;
#define kdf_shake_get_randomness(ctx, dst, count) hash_squeeze((ctx), (dst), (count))
#define kdf_shake_clear(ctx)
// Instances that work with 4 states in parallel
typedef Keccak_HashInstancetimes4 hash_context_x4;
static inline void hash_init_x4(hash_context_x4* ctx, const picnic_instance_t* pp) {
if (pp->digest_size == 32) {
Keccak_HashInitializetimes4_SHAKE128(ctx);
} else {
Keccak_HashInitializetimes4_SHAKE256(ctx);
}
}
static inline void hash_update_x4(hash_context_x4* ctx, const uint8_t** data, size_t size) {
Keccak_HashUpdatetimes4(ctx, data, size << 3);
}
static inline void hash_init_prefix_x4(hash_context_x4* ctx, const picnic_instance_t* pp,
const uint8_t prefix) {
hash_init_x4(ctx, pp);
const uint8_t* prefixes[] = {&prefix, &prefix, &prefix, &prefix};
hash_update_x4(ctx, prefixes, sizeof(prefix));
}
static inline void hash_final_x4(hash_context_x4* ctx) {
Keccak_HashFinaltimes4(ctx, NULL);
}
static inline void hash_squeeze_x4(hash_context_x4* ctx, uint8_t** buffer, size_t buflen) {
Keccak_HashSqueezetimes4(ctx, buffer, buflen << 3);
}
typedef Keccak_HashInstancetimes4 kdf_shake_x4_t;
#define kdf_shake_x4_init(ctx, pp) hash_init_x4((ctx), (pp))
#define kdf_shake_x4_init_prefix(ctx, pp, prefix) hash_init_prefix_x4((ctx), (pp), (prefix))
#define kdf_shake_x4_update_key(ctx, key, keylen) hash_update_x4((ctx), (key), (keylen))
#define kdf_shake_x4_finalize_key(ctx) hash_final_x4((ctx))
#define kdf_shake_x4_get_randomness(ctx, dst, count) hash_squeeze_x4((ctx), (dst), (count))
#define kdf_shake_x4_clear(ctx)
#endif

View File

@ -7,10 +7,15 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "io.h"
#include "lowmc.h"
#include "lowmc_pars.h"
#include "mzd_additional.h"
#include "picnic2_impl.h"
#if defined(WITH_OPT)
#include "simd.h"
@ -89,130 +94,20 @@ static void sbox_layer_1_uint64(uint64_t* d) {
#endif
// uint64 based implementation
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_uint64_128, oqs_sig_picnic_mzd_addmul_vl_uint64_128)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_128, oqs_sig_picnic_mzd_mul_vl_uint64_128)
#define XOR oqs_sig_picnic_mzd_xor_uint64_128
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_128
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_128_576, oqs_sig_picnic_mzd_mul_vl_uint64_128_576)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_128_640, oqs_sig_picnic_mzd_mul_vl_uint64_128_640)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_uint64_3_128
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_uint64_30_128
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_uint64_576
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_uint64_640
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#include "lowmc_fns_uint64_L1.h"
#define LOWMC lowmc_uint64_128
#include "lowmc.c.i"
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_uint64_192, oqs_sig_picnic_mzd_addmul_vl_uint64_192)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_192, oqs_sig_picnic_mzd_mul_vl_uint64_192)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_192
#define XOR oqs_sig_picnic_mzd_xor_uint64_192
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_192_896, oqs_sig_picnic_mzd_mul_vl_uint64_192_896)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_192_960, oqs_sig_picnic_mzd_mul_vl_uint64_192_960)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_uint64_3_192
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_uint64_30_192
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_uint64_896
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_uint64_960
#include "lowmc_fns_uint64_L3.h"
#undef LOWMC
#undef LOWMC_N
#undef LOWMC_R_10
#undef LOWMC_R_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_INSTANCE_1
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC lowmc_uint64_192
#include "lowmc.c.i"
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_uint64_256, oqs_sig_picnic_mzd_addmul_vl_uint64_256)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_256, oqs_sig_picnic_mzd_mul_vl_uint64_256)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_256
#define XOR oqs_sig_picnic_mzd_xor_uint64_256
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_256_1152, oqs_sig_picnic_mzd_mul_vl_uint64_256_1152)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_uint64_256_1216, oqs_sig_picnic_mzd_mul_vl_uint64_256_1216)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_uint64_3_256
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_uint64_30_256
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_uint64_1152
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_uint64_1216
#include "lowmc_fns_uint64_L5.h"
#undef LOWMC
#undef LOWMC_N
#undef LOWMC_R_10
#undef LOWMC_R_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_INSTANCE_1
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC lowmc_uint64_256
#include "lowmc.c.i"
#undef LOWMC
#undef LOWMC_N
#undef LOWMC_R_10
#undef LOWMC_R_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_INSTANCE_1
#if defined(WITH_OPT)
#if defined(WITH_SSE2) || defined(WITH_NEON)
#if defined(WITH_SSE2)
@ -220,137 +115,19 @@ static void sbox_layer_1_uint64(uint64_t* d) {
#endif
// L1 using SSE2/NEON
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s128_128, oqs_sig_picnic_mzd_addmul_vl_s128_128)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_128, oqs_sig_picnic_mzd_mul_vl_s128_128)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_128
#define XOR oqs_sig_picnic_mzd_xor_s128_128
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_128_640, oqs_sig_picnic_mzd_mul_vl_s128_128_640)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_128_640, oqs_sig_picnic_mzd_mul_vl_s128_128_640)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s128_3_128
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s128_30_128
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s128_640
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s128_640
#include "lowmc_fns_s128_L1.h"
#undef LOWMC
#define LOWMC lowmc_s128_128
#include "lowmc.c.i"
// L3 using SSE2/NEON
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s128_192, oqs_sig_picnic_mzd_addmul_vl_s128_192)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_192, oqs_sig_picnic_mzd_mul_vl_s128_192)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_192
#define XOR oqs_sig_picnic_mzd_xor_s128_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_192_896, oqs_sig_picnic_mzd_mul_vl_s128_192_896)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_192_1024, oqs_sig_picnic_mzd_mul_vl_s128_192_1024)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s128_3_192
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s128_30_192
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s128_896
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s128_1024
#include "lowmc_fns_s128_L3.h"
#undef LOWMC
#define LOWMC lowmc_s128_192
#include "lowmc.c.i"
// L5 using SSE2/NEON
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s128_256, oqs_sig_picnic_mzd_addmul_vl_s128_256)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_256, oqs_sig_picnic_mzd_mul_vl_s128_256)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_256_1152, oqs_sig_picnic_mzd_mul_vl_s128_256_1152)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_256_1280, oqs_sig_picnic_mzd_mul_vl_s128_256_1280)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s128_3_256
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s128_30_256
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s128_1152
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s128_1280
#include "lowmc_fns_s128_L5.h"
#undef LOWMC
#define LOWMC lowmc_s128_256
#include "lowmc.c.i"
@ -358,437 +135,33 @@ static void sbox_layer_1_uint64(uint64_t* d) {
#undef FN_ATTR
#endif
#if defined(WITH_SSE2) && defined(WITH_POPCNT)
#define FN_ATTR ATTR_TARGET("sse2,popcnt")
// L1 using SSE2 and POPCNT
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s128_128, oqs_sig_picnic_mzd_addmul_vl_s128_128)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_128, oqs_sig_picnic_mzd_mul_vl_s128_128)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_128
#define XOR oqs_sig_picnic_mzd_xor_s128_128
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_128_640, oqs_sig_picnic_mzd_mul_vl_s128_128_640)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_128_640, oqs_sig_picnic_mzd_mul_vl_s128_128_640)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s128_3_128
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s128_30_128
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_popcnt_128_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_popcnt_128_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s128_640
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s128_640
#undef LOWMC
#define LOWMC lowmc_s128_popcnt_128
#include "lowmc.c.i"
// L3 using SSE2 and POPCNT
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s128_192, oqs_sig_picnic_mzd_addmul_vl_s128_192)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_192, oqs_sig_picnic_mzd_mul_vl_s128_192)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_192
#define XOR oqs_sig_picnic_mzd_xor_s128_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_192_896, oqs_sig_picnic_mzd_mul_vl_s128_192_896)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_192_1024, oqs_sig_picnic_mzd_mul_vl_s128_192_1024)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s128_3_192
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s128_30_192
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_popcnt_192_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_popcnt_192_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s128_896
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s128_1024
#undef LOWMC
#define LOWMC lowmc_s128_popcnt_192
#include "lowmc.c.i"
// L5 using SSE2 and POPCNT
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s128_256, oqs_sig_picnic_mzd_addmul_vl_s128_256)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_256, oqs_sig_picnic_mzd_mul_vl_s128_256)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_256_1152, oqs_sig_picnic_mzd_mul_vl_s128_256_1152)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s128_256_1280, oqs_sig_picnic_mzd_mul_vl_s128_256_1280)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s128_3_256
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s128_30_256
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_popcnt_256_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_popcnt_256_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s128_1152
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s128_1280
#undef LOWMC
#define LOWMC lowmc_s128_popcnt_256
#include "lowmc.c.i"
#undef FN_ATTR
#endif
#if defined(WITH_AVX2)
#define FN_ATTR ATTR_TARGET_AVX2
// L1 using AVX2
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s256_128, oqs_sig_picnic_mzd_addmul_vl_s256_128)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_128, oqs_sig_picnic_mzd_mul_vl_s256_128)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_pext_128
#define XOR oqs_sig_picnic_mzd_xor_s256_128
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_128_768, oqs_sig_picnic_mzd_mul_vl_s256_128_768)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_128_768, oqs_sig_picnic_mzd_mul_vl_s256_128_768)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s256_3_128
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s256_30_128
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s256_768
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s256_768
#include "lowmc_fns_s128_L1.h"
#undef LOWMC
#define LOWMC lowmc_s256_128
#include "lowmc.c.i"
// L3 using AVX2
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s256_192, oqs_sig_picnic_mzd_addmul_vl_s256_192)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_192, oqs_sig_picnic_mzd_mul_vl_s256_192)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_pext_192
#define XOR oqs_sig_picnic_mzd_xor_s256_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_192_1024, oqs_sig_picnic_mzd_mul_vl_s256_192_1024)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_192_1024, oqs_sig_picnic_mzd_mul_vl_s256_192_1024)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s256_3_192
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s256_30_192
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s256_1024
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s256_1024
#include "lowmc_fns_s256_L3.h"
#undef LOWMC
#define LOWMC lowmc_s256_192
#include "lowmc.c.i"
// L5 using AVX2
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s256_256, oqs_sig_picnic_mzd_addmul_vl_s256_256)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_256, oqs_sig_picnic_mzd_mul_vl_s256_256)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_pext_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_256_1280, oqs_sig_picnic_mzd_mul_vl_s256_256_1280)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_256_1280, oqs_sig_picnic_mzd_mul_vl_s256_256_1280)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s256_3_256
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s256_30_256
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s256_1280
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s256_1280
#include "lowmc_fns_s256_L5.h"
#undef LOWMC
#define LOWMC lowmc_s256_256
#include "lowmc.c.i"
#undef FN_ATTR
#if defined(WITH_POPCNT)
#define FN_ATTR ATTR_TARGET("avx2,bmi2,popcnt")
// L1 using AVX2
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s256_128, oqs_sig_picnic_mzd_addmul_vl_s256_128)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_128, oqs_sig_picnic_mzd_mul_vl_s256_128)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_pext_128
#define XOR oqs_sig_picnic_mzd_xor_s256_128
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_128_768, oqs_sig_picnic_mzd_mul_vl_s256_128_768)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_128_768, oqs_sig_picnic_mzd_mul_vl_s256_128_768)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s256_3_128
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s256_30_128
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_popcnt_128_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_popcnt_128_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s256_768
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s256_768
#undef LOWMC
#define LOWMC lowmc_s256_popcnt_128
#include "lowmc.c.i"
// L3 using AVX2
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#undef XOR
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s256_192, oqs_sig_picnic_mzd_addmul_vl_s256_192)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_192, oqs_sig_picnic_mzd_mul_vl_s256_192)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_pext_192
#define XOR oqs_sig_picnic_mzd_xor_s256_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_192_1024, oqs_sig_picnic_mzd_mul_vl_s256_192_1024)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_192_1024, oqs_sig_picnic_mzd_mul_vl_s256_192_1024)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s256_3_192
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s256_30_192
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_popcnt_192_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_popcnt_192_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s256_1024
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s256_1024
#undef LOWMC
#define LOWMC lowmc_s256_popcnt_192
#include "lowmc.c.i"
// L5 using AVX2
#undef ADDMUL
#undef MUL
#undef SHUFFLE
#define ADDMUL SELECT_V_VL(oqs_sig_picnic_mzd_addmul_v_s256_256, oqs_sig_picnic_mzd_addmul_vl_s256_256)
#define MUL SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_256, oqs_sig_picnic_mzd_mul_vl_s256_256)
#define SHUFFLE oqs_sig_picnic_mzd_shuffle_pext_256
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R
#undef MUL_MC_1
#undef MUL_MC_10
#undef MUL_R_1
#undef MUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef XOR_MC_1
#undef XOR_MC_10
#define MUL_MC_1 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_256_1280, oqs_sig_picnic_mzd_mul_vl_s256_256_1280)
#define MUL_MC_10 SELECT_V_VL(oqs_sig_picnic_mzd_mul_v_s256_256_1280, oqs_sig_picnic_mzd_mul_vl_s256_256_1280)
#define MUL_R_1 oqs_sig_picnic_mzd_addmul_v_s256_3_256
#define MUL_R_10 oqs_sig_picnic_mzd_addmul_v_s256_30_256
#define MUL_Z_1 oqs_sig_picnic_mzd_mul_v_parity_popcnt_256_3
#define MUL_Z_10 oqs_sig_picnic_mzd_mul_v_parity_popcnt_256_30
#define XOR_MC_1 oqs_sig_picnic_mzd_xor_s256_1280
#define XOR_MC_10 oqs_sig_picnic_mzd_xor_s256_1280
#undef LOWMC
#define LOWMC lowmc_s256_popcnt_256
#include "lowmc.c.i"
#undef FN_ATTR
#endif
#undef SHUFFLE
#define SHUFFLE oqs_sig_picnic_mzd_shuffle
#endif
#endif
lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lowmc) {
lowmc_implementation_f lowmc_get_implementation(const lowmc_t* lowmc) {
ASSUME(lowmc->m == 10 || lowmc->m == 1);
ASSUME(lowmc->n == 128 || lowmc->n == 192 || lowmc->n == 256);
@ -796,24 +169,6 @@ lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lo
#if defined(WITH_AVX2)
if (CPU_SUPPORTS_AVX2) {
if (lowmc->m == 10) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_s256_popcnt_128_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_s256_popcnt_192_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_s256_popcnt_256_10;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
@ -831,24 +186,6 @@ lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lo
}
#if defined(WITH_LOWMC_M1)
if (lowmc->m == 1) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
return lowmc_s256_popcnt_128_1;
#endif
#if defined(WITH_LOWMC_192_192_284)
case 192:
return lowmc_s256_popcnt_192_1;
#endif
#if defined(WITH_LOWMC_256_256_363)
case 256:
return lowmc_s256_popcnt_256_1;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
@ -870,24 +207,6 @@ lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lo
#if defined(WITH_SSE2) || defined(WITH_NEON)
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {
if (lowmc->m == 10) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_s128_popcnt_128_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_s128_popcnt_192_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_s128_popcnt_256_10;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
@ -905,24 +224,6 @@ lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lo
}
#if defined(WITH_LOWMC_M1)
if (lowmc->m == 1) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
return lowmc_s128_popcnt_128_1;
#endif
#if defined(WITH_LOWMC_192_192_284)
case 192:
return lowmc_s128_popcnt_192_1;
#endif
#if defined(WITH_LOWMC_256_256_363)
case 256:
return lowmc_s128_popcnt_256_1;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
@ -982,7 +283,7 @@ lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lo
return NULL;
}
lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const lowmc_t* lowmc) {
lowmc_store_implementation_f lowmc_store_get_implementation(const lowmc_t* lowmc) {
ASSUME(lowmc->m == 10 || lowmc->m == 1);
ASSUME(lowmc->n == 128 || lowmc->n == 192 || lowmc->n == 256);
@ -990,24 +291,6 @@ lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const
#if defined(WITH_AVX2)
if (CPU_SUPPORTS_AVX2) {
if (lowmc->m == 10) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_s256_popcnt_128_store_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_s256_popcnt_192_store_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_s256_popcnt_256_store_10;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
@ -1025,24 +308,6 @@ lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const
}
#if defined(WITH_LOWMC_M1)
if (lowmc->m == 1) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
return lowmc_s256_popcnt_128_store_1;
#endif
#if defined(WITH_LOWMC_192_192_284)
case 192:
return lowmc_s256_popcnt_192_store_1;
#endif
#if defined(WITH_LOWMC_256_256_363)
case 256:
return lowmc_s256_popcnt_256_store_1;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
@ -1064,24 +329,6 @@ lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const
#if defined(WITH_SSE2) || defined(WITH_NEON)
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {
if (lowmc->m == 10) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_s128_popcnt_128_store_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_s128_popcnt_192_store_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_s128_popcnt_256_store_10;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
@ -1099,24 +346,6 @@ lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const
}
#if defined(WITH_LOWMC_M1)
if (lowmc->m == 1) {
#if defined(WITH_POPCNT)
if (CPU_SUPPORTS_POPCNT) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
return lowmc_s128_popcnt_128_store_1;
#endif
#if defined(WITH_LOWMC_192_192_284)
case 192:
return lowmc_s128_popcnt_192_store_1;
#endif
#if defined(WITH_LOWMC_256_256_363)
case 256:
return lowmc_s128_popcnt_256_store_1;
#endif
}
}
#endif
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_182)
case 128:
@ -1175,3 +404,69 @@ lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const
return NULL;
}
lowmc_compute_aux_implementation_f lowmc_compute_aux_get_implementation(const lowmc_t* lowmc) {
ASSUME(lowmc->m == 10);
ASSUME(lowmc->n == 128 || lowmc->n == 192 || lowmc->n == 256);
#if defined(WITH_OPT)
#if defined(WITH_AVX2)
if (CPU_SUPPORTS_AVX2) {
if (lowmc->m == 10) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_s256_128_compute_aux_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_s256_192_compute_aux_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_s256_256_compute_aux_10;
#endif
}
}
}
#endif
#if defined(WITH_SSE2) || defined(WITH_NEON)
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {
if (lowmc->m == 10) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_s128_128_compute_aux_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_s128_192_compute_aux_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_s128_256_compute_aux_10;
#endif
}
}
}
#endif
#endif
if (lowmc->m == 10) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_uint64_128_compute_aux_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_uint64_192_compute_aux_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_uint64_256_compute_aux_10;
#endif
}
}
return NULL;
}

View File

@ -12,7 +12,7 @@
#define LOWMC_M 10
#define LOWMC_R LOWMC_R_10
#define MUL_MC MUL_MC_10
#define MUL_R MUL_R_10
#define ADDMUL_R ADDMUL_R_10
#define MUL_Z MUL_Z_10
#define MZD_SHUFFLE CONCAT(SHUFFLE, 30)
#define M_FIXED_10
@ -26,16 +26,26 @@
#define RECORD_STATE
#include "lowmc_impl.c.i"
#undef N_LOWMC
#undef RECORD_STATE
#undef SBOX
#define SBOX(x, tapes) \
sbox_layer_10_uint64_aux(&BLOCK(x, 0)->w64[(LOWMC_N / (sizeof(word) * 8)) - 1], tapes)
#define N_LOWMC CONCAT(LOWMC, compute_aux_10)
#define PICNIC2_AUX_COMPUTATION
#include "lowmc_impl.c.i"
#undef LOWMC_INSTANCE
#undef LOWMC_M
#undef LOWMC_R
#undef MUL_MC
#undef MUL_R
#undef ADDMUL_R
#undef MUL_Z
#undef MZD_SHUFFLE
#undef M_FIXED_10
#undef N_LOWMC
#undef RECORD_STATE
#undef PICNIC2_AUX_COMPUTATION
#undef SBOX
#undef XOR_MC
#endif
@ -45,7 +55,7 @@
#define LOWMC_M 1
#define LOWMC_R LOWMC_R_1
#define MUL_MC MUL_MC_1
#define MUL_R MUL_R_1
#define ADDMUL_R ADDMUL_R_1
#define MUL_Z MUL_Z_1
#define MZD_SHUFFLE CONCAT(SHUFFLE, 3)
#define M_FIXED_1
@ -63,12 +73,13 @@
#undef LOWMC_M
#undef LOWMC_R
#undef MUL_MC
#undef MUL_R
#undef ADDMUL_R
#undef MUL_Z
#undef MZD_SHUFFLE
#undef M_FIXED_1
#undef N_LOWMC
#undef RECORD_STATE
#undef PICNIC2_AUX_COMPUTATION
#undef SBOX
#undef XOR_MC
#endif

View File

@ -16,11 +16,16 @@ typedef struct {
mzd_local_t** state;
} recorded_state_t;
// forward decleration to picnic2_types.h since we get some cyclic dependencies otherwise
typedef struct randomTape_t randomTape_t;
typedef mzd_local_t* (*lowmc_implementation_f)(lowmc_key_t const*, mzd_local_t const*);
typedef void (*lowmc_store_implementation_f)(lowmc_key_t const*, mzd_local_t const*,
recorded_state_t* state);
typedef void (*lowmc_compute_aux_implementation_f)(lowmc_key_t const*, randomTape_t* tapes);
lowmc_implementation_f oqs_sig_picnic_lowmc_get_implementation(const lowmc_t* lowmc);
lowmc_store_implementation_f oqs_sig_picnic_lowmc_store_get_implementation(const lowmc_t* lowmc);
lowmc_implementation_f lowmc_get_implementation(const lowmc_t* lowmc);
lowmc_store_implementation_f lowmc_store_get_implementation(const lowmc_t* lowmc);
lowmc_compute_aux_implementation_f lowmc_compute_aux_get_implementation(const lowmc_t* lowmc);
#endif

View File

@ -1,3 +1,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include "lowmc_128_128_182.h"

View File

@ -1,3 +1,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include "lowmc_128_128_20.h"

View File

@ -1,3 +1,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include "lowmc_192_192_284.h"

View File

@ -1,3 +1,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include "lowmc_192_192_30.h"

View File

@ -1,3 +1,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include "lowmc_256_256_363.h"

View File

@ -1,3 +1,7 @@
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stddef.h>
#include "lowmc_256_256_38.h"

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_s128_128, mzd_addmul_vl_s128_128)
#define MUL SELECT_V_VL(mzd_mul_v_s128_128, mzd_mul_vl_s128_128)
#define SHUFFLE mzd_shuffle_128
#define XOR mzd_xor_s128_128
#define COPY mzd_copy_s128_128
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_s128_128_640, mzd_mul_vl_s128_128_640)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_s128_128_640, mzd_mul_vl_s128_128_640)
#define ADDMUL_R_1 mzd_addmul_v_s128_3_128
#define ADDMUL_R_10 mzd_addmul_v_s128_30_128
#define MUL_Z_1 mzd_mul_v_parity_uint64_128_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_128_30
#define XOR_MC_1 mzd_xor_s128_640
#define XOR_MC_10 mzd_xor_s128_640
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_s128_192, mzd_addmul_vl_s128_192)
#define MUL SELECT_V_VL(mzd_mul_v_s128_192, mzd_mul_vl_s128_192)
#define SHUFFLE mzd_shuffle_192
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_s128_192_896, mzd_mul_vl_s128_192_896)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_s128_192_1024, mzd_mul_vl_s128_192_1024)
#define ADDMUL_R_1 mzd_addmul_v_s128_3_192
#define ADDMUL_R_10 mzd_addmul_v_s128_30_192
#define MUL_Z_1 mzd_mul_v_parity_uint64_192_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_192_30
#define XOR_MC_1 mzd_xor_s128_896
#define XOR_MC_10 mzd_xor_s128_1024
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_s128_256, mzd_addmul_vl_s128_256)
#define MUL SELECT_V_VL(mzd_mul_v_s128_256, mzd_mul_vl_s128_256)
#define SHUFFLE mzd_shuffle_256
#define XOR mzd_xor_s128_256
#define COPY mzd_copy_s128_256
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_s128_256_1152, mzd_mul_vl_s128_256_1152)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_s128_256_1280, mzd_mul_vl_s128_256_1280)
#define ADDMUL_R_1 mzd_addmul_v_s128_3_256
#define ADDMUL_R_10 mzd_addmul_v_s128_30_256
#define MUL_Z_1 mzd_mul_v_parity_uint64_256_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_256_30
#define XOR_MC_1 mzd_xor_s128_1152
#define XOR_MC_10 mzd_xor_s128_1280
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_s256_128, mzd_addmul_vl_s256_128)
#define MUL SELECT_V_VL(mzd_mul_v_s256_128, mzd_mul_vl_s256_128)
#define SHUFFLE mzd_shuffle_pext_128
#define XOR mzd_xor_s256_128
#define COPY mzd_copy_s256_128
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_s256_128_768, mzd_mul_vl_s256_128_768)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_s256_128_768, mzd_mul_vl_s256_128_768)
#define ADDMUL_R_1 mzd_addmul_v_s256_3_128
#define ADDMUL_R_10 mzd_addmul_v_s256_30_128
#define MUL_Z_1 mzd_mul_v_parity_uint64_128_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_128_30
#define XOR_MC_1 mzd_xor_s256_768
#define XOR_MC_10 mzd_xor_s256_768
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_s256_192, mzd_addmul_vl_s256_192)
#define MUL SELECT_V_VL(mzd_mul_v_s256_192, mzd_mul_vl_s256_192)
#define SHUFFLE mzd_shuffle_pext_192
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_s256_192_1024, mzd_mul_vl_s256_192_1024)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_s256_192_1024, mzd_mul_vl_s256_192_1024)
#define ADDMUL_R_1 mzd_addmul_v_s256_3_192
#define ADDMUL_R_10 mzd_addmul_v_s256_30_192
#define MUL_Z_1 mzd_mul_v_parity_uint64_192_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_192_30
#define XOR_MC_1 mzd_xor_s256_1024
#define XOR_MC_10 mzd_xor_s256_1024
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_s256_256, mzd_addmul_vl_s256_256)
#define MUL SELECT_V_VL(mzd_mul_v_s256_256, mzd_mul_vl_s256_256)
#define SHUFFLE mzd_shuffle_pext_256
#define XOR mzd_xor_s256_256
#define COPY mzd_copy_s256_256
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_s256_256_1280, mzd_mul_vl_s256_256_1280)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_s256_256_1280, mzd_mul_vl_s256_256_1280)
#define ADDMUL_R_1 mzd_addmul_v_s256_3_256
#define ADDMUL_R_10 mzd_addmul_v_s256_30_256
#define MUL_Z_1 mzd_mul_v_parity_uint64_256_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_256_30
#define XOR_MC_1 mzd_xor_s256_1280
#define XOR_MC_10 mzd_xor_s256_1280
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_uint64_128, mzd_addmul_vl_uint64_128)
#define MUL SELECT_V_VL(mzd_mul_v_uint64_128, mzd_mul_vl_uint64_128)
#define XOR mzd_xor_uint64_128
#define SHUFFLE mzd_shuffle_128
#define COPY mzd_copy_uint64_128
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_uint64_128_576, mzd_mul_vl_uint64_128_576)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_uint64_128_640, mzd_mul_vl_uint64_128_640)
#define ADDMUL_R_1 mzd_addmul_v_uint64_3_128
#define ADDMUL_R_10 mzd_addmul_v_uint64_30_128
#define MUL_Z_1 mzd_mul_v_parity_uint64_128_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_128_30
#define XOR_MC_1 mzd_xor_uint64_576
#define XOR_MC_10 mzd_xor_uint64_640
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R_10 LOWMC_L1_R
#define LOWMC_R_1 LOWMC_L1_1_R
#if defined(WITH_LOWMC_128_128_20)
#define LOWMC_INSTANCE_10 lowmc_128_128_20
#endif
#if defined(WITH_LOWMC_128_128_182)
#define LOWMC_INSTANCE_1 lowmc_128_128_182
#endif

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_uint64_192, mzd_addmul_vl_uint64_192)
#define MUL SELECT_V_VL(mzd_mul_v_uint64_192, mzd_mul_vl_uint64_192)
#define SHUFFLE mzd_shuffle_192
#define XOR mzd_xor_uint64_192
#define COPY mzd_copy_uint64_192
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_uint64_192_896, mzd_mul_vl_uint64_192_896)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_uint64_192_960, mzd_mul_vl_uint64_192_960)
#define ADDMUL_R_1 mzd_addmul_v_uint64_3_192
#define ADDMUL_R_10 mzd_addmul_v_uint64_30_192
#define MUL_Z_1 mzd_mul_v_parity_uint64_192_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_192_30
#define XOR_MC_1 mzd_xor_uint64_896
#define XOR_MC_10 mzd_xor_uint64_960
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R_10 LOWMC_L3_R
#define LOWMC_R_1 LOWMC_L3_1_R
#if defined(WITH_LOWMC_192_192_30)
#define LOWMC_INSTANCE_10 lowmc_192_192_30
#endif
#if defined(WITH_LOWMC_192_192_284)
#define LOWMC_INSTANCE_1 lowmc_192_192_284
#endif

View File

@ -0,0 +1,35 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "lowmc_fns_undef.h"
#define ADDMUL SELECT_V_VL(mzd_addmul_v_uint64_256, mzd_addmul_vl_uint64_256)
#define MUL SELECT_V_VL(mzd_mul_v_uint64_256, mzd_mul_vl_uint64_256)
#define SHUFFLE mzd_shuffle_256
#define XOR mzd_xor_uint64_256
#define COPY mzd_copy_uint64_256
#define MUL_MC_1 SELECT_V_VL(mzd_mul_v_uint64_256_1152, mzd_mul_vl_uint64_256_1152)
#define MUL_MC_10 SELECT_V_VL(mzd_mul_v_uint64_256_1216, mzd_mul_vl_uint64_256_1216)
#define ADDMUL_R_1 mzd_addmul_v_uint64_3_256
#define ADDMUL_R_10 mzd_addmul_v_uint64_30_256
#define MUL_Z_1 mzd_mul_v_parity_uint64_256_3
#define MUL_Z_10 mzd_mul_v_parity_uint64_256_30
#define XOR_MC_1 mzd_xor_uint64_1152
#define XOR_MC_10 mzd_xor_uint64_1216
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R_10 LOWMC_L5_R
#define LOWMC_R_1 LOWMC_L5_1_R
#if defined(WITH_LOWMC_256_256_38)
#define LOWMC_INSTANCE_10 lowmc_256_256_38
#endif
#if defined(WITH_LOWMC_256_256_363)
#define LOWMC_INSTANCE_1 lowmc_256_256_363
#endif

View File

@ -0,0 +1,27 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#undef ADDMUL
#undef COPY
#undef LOWMC_INSTANCE_1
#undef LOWMC_INSTANCE_10
#undef LOWMC_N
#undef LOWMC_R_1
#undef LOWMC_R_10
#undef MUL
#undef MUL_MC_1
#undef MUL_MC_10
#undef ADDMUL_R_1
#undef ADDMUL_R_10
#undef MUL_Z_1
#undef MUL_Z_10
#undef SHUFFLE
#undef XOR
#undef XOR_MC_1
#undef XOR_MC_10

View File

@ -11,39 +11,52 @@
#error "OLLE is only implemented for 1 or 10 Sboxes"
#endif
#define copy(d, s) memcpy(BLOCK(d, 0), CONST_BLOCK(s, 0), LOWMC_N / 8)
// TODO: fix PICNIC2_AUX_COMPUTATION for OFF & ORKC
#if defined(FN_ATTR)
FN_ATTR
#endif
#if defined(PICNIC2_AUX_COMPUTATION)
static void N_LOWMC(lowmc_key_t const* lowmc_key, randomTape_t* tapes) {
#else
#if defined(RECORD_STATE)
static void N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p, recorded_state_t* state) {
#else
static mzd_local_t* N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p) {
#endif
#endif
mzd_local_t x[((LOWMC_N) + 255) / 256];
mzd_local_t y[((LOWMC_N) + 255) / 256];
#if defined(REDUCED_ROUND_KEY_COMPUTATION)
mzd_local_t* x = oqs_sig_picnic_mzd_local_init_ex(1, LOWMC_N, false);
mzd_local_t* y = oqs_sig_picnic_mzd_local_init_ex(1, LOWMC_N, false);
#if defined(M_FIXED_10)
mzd_local_t nl_part[(LOWMC_R * 32 + 255) / 256];
#elif defined(M_FIXED_1)
mzd_local_t nl_part[(((LOWMC_R + 20) / 21) * 64 + 255) / 256];
#endif
#if defined(OPTIMIZED_LINEAR_LAYER_EVALUATION)
#if defined(OPTIMIZED_LINEAR_LAYER_EVALUATION) // LOWMC_OPT=OLLE
#if defined(PICNIC2_AUX_COMPUTATION)
MUL(x, lowmc_key, CONCAT(LOWMC_INSTANCE.k0, matrix_postfix));
MUL_MC(nl_part, lowmc_key, CONCAT(LOWMC_INSTANCE.precomputed_non_linear_part, matrix_postfix));
#else
XOR(x, p, LOWMC_INSTANCE.precomputed_constant_linear);
ADDMUL(x, lowmc_key, CONCAT(LOWMC_INSTANCE.k0, matrix_postfix));
MUL_MC(nl_part, lowmc_key, CONCAT(LOWMC_INSTANCE.precomputed_non_linear_part, matrix_postfix));
XOR_MC(nl_part, nl_part, LOWMC_INSTANCE.precomputed_constant_non_linear);
#endif
//multiply non-linear part of state with Z0 matrix
// multiply non-linear part of state with Z0 matrix
lowmc_round_t const* round = LOWMC_INSTANCE.rounds;
for (unsigned i = 0; i < LOWMC_R-1; ++i, ++round) {
for (unsigned i = 0; i < LOWMC_R - 1; ++i, ++round) {
#if defined(RECORD_STATE)
copy(state->state[i], x);
COPY(state->state[i], x);
#endif
#if defined(PICNIC2_AUX_COMPUTATION)
SBOX(x, tapes);
#else
SBOX(x);
#endif
#if defined(M_FIXED_10)
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1];
@ -51,28 +64,34 @@ static mzd_local_t* N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p)
(nl << (1 - (i & 1)) * 32) & WORD_C(0xFFFFFFFF00000000);
#elif defined(M_FIXED_1)
const word nl = CONST_BLOCK(nl_part, i / (4 * 21))->w64[(i % (4 * 21)) / 21];
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^= (nl << ((20-(i%21))*3)) & WORD_C(0xE000000000000000);
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^=
(nl << ((20 - (i % 21)) * 3)) & WORD_C(0xE000000000000000);
#endif
MUL_Z(y, x, CONCAT(round->z, matrix_postfix));
MUL_Z(y, x, round->z_matrix);
MZD_SHUFFLE(x, round->r_mask);
MUL_R(y, x, CONCAT(round->r, matrix_postfix));
ADDMUL_R(y, x, round->r_matrix);
#if defined(M_FIXED_10)
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &= WORD_C(0x00000003FFFFFFFF); //clear nl part
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &=
WORD_C(0x00000003FFFFFFFF); // clear nl part
#elif defined(M_FIXED_1)
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &= WORD_C(0x1FFFFFFFFFFFFFFF); //clear nl part
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &=
WORD_C(0x1FFFFFFFFFFFFFFF); // clear nl part
#endif
XOR(x, y, x);
}
#if defined(RECORD_STATE)
copy(state->state[LOWMC_R-1], x);
COPY(state->state[LOWMC_R - 1], x);
#endif
#if defined(PICNIC2_AUX_COMPUTATION)
SBOX(x, tapes);
#else
SBOX(x);
unsigned i = (LOWMC_R-1);
unsigned int i = (LOWMC_R - 1);
#if defined(M_FIXED_10)
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1];
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1];
BLOCK(x, 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] ^=
(nl << (1 - (i & 1)) * 32) & WORD_C(0xFFFFFFFF00000000);
#elif defined(M_FIXED_1)
@ -81,21 +100,29 @@ static mzd_local_t* N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p)
(nl << ((20 - (i % 21)) * 3)) & WORD_C(0xE000000000000000);
#endif
MUL(y, x, CONCAT(LOWMC_INSTANCE.zr, matrix_postfix));
mzd_local_t* t = x;
x = y;
y = t;
COPY(x, y);
#endif
#else // LOWMC_OPT=ORKC
#if defined(PICNIC2_AUX_COMPUTATION)
MUL(x, lowmc_key, CONCAT(LOWMC_INSTANCE.k0, matrix_postfix));
MUL_MC(nl_part, lowmc_key, CONCAT(LOWMC_INSTANCE.precomputed_non_linear_part, matrix_postfix));
#else
XOR(x, p, LOWMC_INSTANCE.precomputed_constant_linear);
ADDMUL(x, lowmc_key, CONCAT(LOWMC_INSTANCE.k0, matrix_postfix));
MUL_MC(nl_part, lowmc_key, CONCAT(LOWMC_INSTANCE.precomputed_non_linear_part, matrix_postfix));
XOR_MC(nl_part, nl_part, LOWMC_INSTANCE.precomputed_constant_non_linear);
#endif
lowmc_round_t const* round = LOWMC_INSTANCE.rounds;
for (unsigned i = 0; i < LOWMC_R; ++i, ++round) {
#if defined(RECORD_STATE)
copy(state->state[i], x);
COPY(state->state[i], x);
#endif
#if defined(PICNIC2_AUX_COMPUTATION)
SBOX(x, tapes);
#else
SBOX(x);
#endif
#if defined(M_FIXED_10)
const word nl = CONST_BLOCK(nl_part, i >> 3)->w64[(i & 0x7) >> 1];
@ -107,48 +134,47 @@ static mzd_local_t* N_LOWMC(lowmc_key_t const* lowmc_key, mzd_local_t const* p)
(nl << ((20 - (i % 21)) * 3)) & WORD_C(0xE000000000000000);
#endif
MUL(y, x, CONCAT(round->l, matrix_postfix));
// swap x and y
mzd_local_t* t = x;
x = y;
y = t;
COPY(x, y);
}
#endif
oqs_sig_picnic_mzd_local_free(y);
#if defined(RECORD_STATE)
copy(state->state[LOWMC_R], x);
oqs_sig_picnic_mzd_local_free(x);
#else // LOWMC_OPT=OFF
#if defined(PICNIC2_AUX_COMPUTATION)
MUL(x, lowmc_key, CONCAT(LOWMC_INSTANCE.k0, matrix_postfix));
#else
return x;
#endif
#else
mzd_local_t* x = oqs_sig_picnic_mzd_local_init_ex(1, LOWMC_N, false);
mzd_local_t* y = oqs_sig_picnic_mzd_local_init_ex(1, LOWMC_N, false);
copy(x, p);
COPY(x, p);
ADDMUL(x, lowmc_key, CONCAT(LOWMC_INSTANCE.k0, matrix_postfix));
#endif
lowmc_round_t const* round = LOWMC_INSTANCE.rounds;
for (unsigned i = 0; i < LOWMC_R; ++i, ++round) {
#if defined(RECORD_STATE)
copy(state->state[i], x);
COPY(state->state[i], x);
#endif
#if defined(PICNIC2_AUX_COMPUTATION)
SBOX(x, tapes);
#else
SBOX(x);
#endif
MUL(y, x, CONCAT(round->l, matrix_postfix));
#if !defined(PICNIC2_AUX_COMPUTATION)
XOR(x, y, round->constant);
#else
COPY(x, y);
#endif
ADDMUL(x, lowmc_key, CONCAT(round->k, matrix_postfix));
}
#endif
oqs_sig_picnic_mzd_local_free(y);
#if !defined(PICNIC2_AUX_COMPUTATION)
#if defined(RECORD_STATE)
copy(state->state[LOWMC_R], x);
oqs_sig_picnic_mzd_local_free(x);
COPY(state->state[LOWMC_R], x);
#else
return x;
mzd_local_t* res = mzd_local_init_ex(1, LOWMC_N, false);
COPY(res, x);
return res;
#endif
#endif
}
#undef copy
// vim: ft=c

View File

@ -7,13 +7,17 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "lowmc_pars.h"
#include "macros.h"
#include "mzd_additional.h"
#if defined(MUL_M4RI)
bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc) {
bool lowmc_init(lowmc_t* lowmc) {
if (!lowmc) {
return false;
}
@ -22,16 +26,18 @@ bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc) {
return false;
}
lowmc->k0_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->k0_matrix, lowmc->n, lowmc->n);
lowmc->k0_lookup = mzd_precompute_matrix_lookup(lowmc->k0_matrix, lowmc->n, lowmc->n);
#if defined(REDUCED_ROUND_KEY_COMPUTATION)
const unsigned int cols = lowmc->m == 1 ? ((lowmc->r + 20) / 21) * 64 : lowmc->r * 32;
lowmc->precomputed_non_linear_part_lookup =
oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->precomputed_non_linear_part_matrix, lowmc->n, cols);
mzd_precompute_matrix_lookup(lowmc->precomputed_non_linear_part_matrix, lowmc->n, cols);
#endif
for (unsigned int i = 0; i < lowmc->r; ++i) {
lowmc->rounds[i].l_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->rounds[i].l_matrix, lowmc->n, lowmc->n);
lowmc->rounds[i].l_lookup =
mzd_precompute_matrix_lookup(lowmc->rounds[i].l_matrix, lowmc->n, lowmc->n);
#if !defined(REDUCED_ROUND_KEY_COMPUTATION)
lowmc->rounds[i].k_lookup = oqs_sig_picnic_mzd_precompute_matrix_lookup(lowmc->rounds[i].k_matrix, lowmc->n, lowmc->n);
lowmc->rounds[i].k_lookup =
mzd_precompute_matrix_lookup(lowmc->rounds[i].k_matrix, lowmc->n, lowmc->n);
#endif
}
@ -39,19 +45,19 @@ bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc) {
}
#endif
void oqs_sig_picnic_lowmc_clear(lowmc_t* lowmc) {
void lowmc_clear(lowmc_t* lowmc) {
for (unsigned int i = 0; i < lowmc->r; ++i) {
#if defined(MUL_M4RI)
#if !defined(REDUCED_ROUND_KEY_COMPUTATION)
oqs_sig_picnic_mzd_local_free(lowmc->rounds[i].k_lookup);
mzd_local_free(lowmc->rounds[i].k_lookup);
#endif
oqs_sig_picnic_mzd_local_free(lowmc->rounds[i].l_lookup);
mzd_local_free(lowmc->rounds[i].l_lookup);
#endif
}
#if defined(MUL_M4RI)
oqs_sig_picnic_mzd_local_free(lowmc->k0_lookup);
mzd_local_free(lowmc->k0_lookup);
#if defined(REDUCED_ROUND_KEY_COMPUTATION)
oqs_sig_picnic_mzd_local_free(lowmc->precomputed_non_linear_part_lookup);
mzd_local_free(lowmc->precomputed_non_linear_part_lookup);
#endif
#endif
}

View File

@ -74,7 +74,7 @@ typedef struct {
#else
const mzd_local_t* z_matrix;
const mzd_local_t* r_matrix;
const word r_mask;
const word r_mask;
#endif
#if !defined(REDUCED_ROUND_KEY_COMPUTATION)
const mzd_local_t* constant;
@ -127,7 +127,7 @@ typedef struct {
*
* \return parameters defining a LowMC instance
*/
bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc);
bool lowmc_init(lowmc_t* lowmc);
#endif
/**
@ -135,7 +135,6 @@ bool oqs_sig_picnic_lowmc_init(lowmc_t* lowmc);
*
* \param lowmc the LowMC parameters to be cleared
*/
void oqs_sig_picnic_lowmc_clear(lowmc_t* lowmc);
void lowmc_clear(lowmc_t* lowmc);
#endif

View File

@ -10,6 +10,16 @@
#ifndef PICNIC_MACROS_H
#define PICNIC_MACROS_H
#include "oqs_picnic_macros.h"
/* __FUNCTION__ generates a warning on Linux with -Wpedantic and newer versions
* of GCC (tested with 5.4). So we use __func__ in all source and define it on
* Windows.
*/
#if defined(__WINDOWS__)
#define __func__ __FUNCTION__
#endif
/* compatibility with clang and other compilers */
#ifndef __has_attribute
#define __has_attribute(a) 0
@ -37,7 +47,9 @@
/* assume */
#if GNUC_CHECK(4, 5) || __has_builtin(__builtin_unreachable)
#define ASSUME(p) if (!(p)) __builtin_unreachable()
#define ASSUME(p) \
if (!(p)) \
__builtin_unreachable()
#elif defined(_MSC_VER)
#define ASSUME(p) __assume(p)
#else
@ -170,21 +182,80 @@ static inline bool sub_overflow_size_t(const size_t x, const size_t y, size_t* d
#include <stdint.h>
/* parity */
/* helper functions for parity computations */
#if GNUC_CHECK(4, 9) || __has_builtin(__builtin_parity)
ATTR_CONST
static inline uint8_t parity64_uint8(uint8_t in) {
return __builtin_parity(in);
}
ATTR_CONST
static inline uint64_t parity64_uint64(uint64_t in) {
return __builtin_parityll(in);
}
#else
ATTR_CONST
static inline uint8_t parity64_uint8(uint8_t in) {
/* byte parity from: https://graphics.stanford.edu/~seander/bithacks.html#ParityWith64Bits */
return (((in * UINT64_C(0x0101010101010101)) & UINT64_C(0x8040201008040201)) % 0x1FF) & 1;
}
ATTR_CONST
static inline uint64_t parity64_uint64(uint64_t in) {
in ^= in >> 32;
in ^= in >> 16;
in ^= in >> 8;
in ^= in >> 4;
return (0x6996 >> (in & 0xf)) & 1;
in ^= in >> 1;
in ^= in >> 2;
in = (in & 0x1111111111111111) * 0x1111111111111111;
return (in >> 60) & 1;
}
#endif
/* helper functions to ocmpute number of leading zeroes */
#if GNUC_CHECK(4, 7) || __has_builtin(__builtin_clz)
ATTR_CONST
static inline uint32_t clz(uint32_t x) {
return x ? __builtin_clz(x) : 32;
}
#else
/* Number of leading zeroes of x.
* From the book
* H.S. Warren, *Hacker's Delight*, Pearson Education, 2003.
* http://www.hackersdelight.org/hdcodetxt/nlz.c.txt
*/
ATTR_CONST
static inline uint32_t clz(uint32_t x) {
if (!x) {
return 32;
}
uint32_t n = 1;
if (!(x >> 16)) {
n = n + 16;
x = x << 16;
}
if (!(x >> 24)) {
n = n + 8;
x = x << 8;
}
if (!(x >> 28)) {
n = n + 4;
x = x << 4;
}
if (!(x >> 30)) {
n = n + 2;
x = x << 2;
}
n = n - (x >> 31);
return n;
}
#endif
ATTR_CONST
static inline uint32_t ceil_log2(uint32_t x) {
if (!x) {
return 0;
}
return 32 - clz(x - 1);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -12,14 +12,14 @@
#define N_SIGN CONCAT(SIGN, 10)
#define N_VERIFY CONCAT(VERIFY, 10)
#define MZD_SHUFFLE CONCAT(SHUFFLE, 30)
#define MUL_R MUL_R_10
#define ADDMUL_R ADDMUL_R_10
#define MUL_Z MUL_Z_10
#define XOR_MC XOR_MC_10
#define MUL_MC MUL_MC_10
#define LOWMC_R LOWMC_R_10
#define LOWMC_INSTANCE LOWMC_INSTANCE_10
#include "mpc_lowmc_impl.c.i"
#undef MUL_R
#undef ADDMUL_R
#undef MUL_Z
#undef LOWMC_R
#undef LOWMC_INSTANCE
@ -34,14 +34,14 @@
#define N_SIGN CONCAT(SIGN, 1)
#define N_VERIFY CONCAT(VERIFY, 1)
#define MZD_SHUFFLE CONCAT(SHUFFLE, 3)
#define MUL_R MUL_R_1
#define ADDMUL_R ADDMUL_R_1
#define MUL_Z MUL_Z_1
#define XOR_MC XOR_MC_1
#define MUL_MC MUL_MC_1
#define LOWMC_R LOWMC_R_1
#define LOWMC_INSTANCE LOWMC_INSTANCE_1
#include "mpc_lowmc_impl.c.i"
#undef MUL_R
#undef ADDMUL_R
#undef MUL_Z
#undef LOWMC_R
#undef LOWMC_INSTANCE

View File

@ -35,10 +35,11 @@ typedef void (*zkbpp_lowmc_implementation_f)(mpc_lowmc_key_t const*, mzd_local_t
in_out_shares_t*, rvec_t*, recorded_state_t*);
typedef void (*zkbpp_lowmc_verify_implementation_f)(mzd_local_t const*, view_t*, in_out_shares_t*,
rvec_t*, unsigned int);
typedef void (*zkbpp_share_implementation_f)(mzd_local_t*, const mzd_local_t*, const mzd_local_t*, const mzd_local_t*);
typedef void (*zkbpp_share_implementation_f)(mzd_local_t*, const mzd_local_t*, const mzd_local_t*,
const mzd_local_t*);
zkbpp_lowmc_implementation_f oqs_sig_picnic_get_zkbpp_lowmc_implementation(const lowmc_t* lowmc);
zkbpp_lowmc_verify_implementation_f oqs_sig_picnic_get_zkbpp_lowmc_verify_implementation(const lowmc_t* lowmc);
zkbpp_share_implementation_f oqs_sig_picnic_get_zkbpp_share_implentation(const lowmc_t* lowmc);
zkbpp_lowmc_implementation_f get_zkbpp_lowmc_implementation(const lowmc_t* lowmc);
zkbpp_lowmc_verify_implementation_f get_zkbpp_lowmc_verify_implementation(const lowmc_t* lowmc);
zkbpp_share_implementation_f get_zkbpp_share_implentation(const lowmc_t* lowmc);
#endif

View File

@ -26,8 +26,6 @@
#define SBOX_VERIFY mpc_sbox_layer_bitsliced_verify_uint64_1
#endif
#define copy(d, s) memcpy(BLOCK(d, 0), CONST_BLOCK(s, 0), LOWMC_N / 8)
#if defined(FN_ATTR)
FN_ATTR
#endif
@ -43,7 +41,7 @@ static void N_SIGN(mpc_lowmc_key_t const* lowmc_key, mzd_local_t const* p, view_
#define shares SC_PROOF
#define sbox SBOX_SIGN
MPC_LOOP_SHARED_1(copy, in_out_shares->s, lowmc_key, SC_PROOF);
MPC_LOOP_SHARED_1(COPY, in_out_shares->s, lowmc_key, SC_PROOF);
++in_out_shares;
mzd_local_t x[SC_PROOF][((LOWMC_N) + 255) / 256];
@ -54,7 +52,7 @@ static void N_SIGN(mpc_lowmc_key_t const* lowmc_key, mzd_local_t const* p, view_
#include "mpc_lowmc_loop.c.i"
MPC_LOOP_SHARED_1(copy, in_out_shares->s, x, SC_PROOF);
MPC_LOOP_SHARED_1(COPY, in_out_shares->s, x, SC_PROOF);
#undef reduced_shares
#undef RECOVER_FROM_STATE
@ -87,7 +85,7 @@ static void N_VERIFY(mzd_local_t const* p, view_t* views, in_out_shares_t* in_ou
#include "mpc_lowmc_loop.c.i"
MPC_LOOP_SHARED_1(copy, in_out_shares->s, x, SC_VERIFY);
MPC_LOOP_SHARED_1(COPY, in_out_shares->s, x, SC_VERIFY);
#undef sbox
#undef reduced_shares
@ -104,6 +102,5 @@ static void N_VERIFY(mzd_local_t const* p, view_t* views, in_out_shares_t* in_ou
#undef RANDTAPE
#undef SBOX
#undef LOWMC_M
#undef copy
// vim: ft=c

View File

@ -45,7 +45,7 @@ lowmc_round_t const* round = LOWMC_INSTANCE.rounds;
MZD_SHUFFLE(y[k], round->r_mask);
}
MPC_LOOP_CONST(MUL_R, x, y, CONCAT(round->r, matrix_postfix), reduced_shares);
MPC_LOOP_CONST(ADDMUL_R, x, y, CONCAT(round->r, matrix_postfix), reduced_shares);
for(unsigned int k = 0; k < reduced_shares; ++k) {
#if defined(M_FIXED_10)
BLOCK(y[k], 0)->w64[(LOWMC_N) / (sizeof(word) * 8) - 1] &= WORD_C(0x00000003FFFFFFFF); //clear nl part

File diff suppressed because it is too large Load Diff

View File

@ -47,196 +47,280 @@ typedef union {
*/
typedef block_t mzd_local_t;
mzd_local_t* oqs_sig_picnic_mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) ATTR_ASSUME_ALIGNED(32);
mzd_local_t* mzd_local_init_ex(uint32_t r, uint32_t c, bool clear) ATTR_ASSUME_ALIGNED(32);
#define mzd_local_init(r, c) oqs_sig_picnic_mzd_local_init_ex(r, c, true)
#define mzd_local_init(r, c) mzd_local_init_ex(r, c, true)
void oqs_sig_picnic_mzd_local_free(mzd_local_t* v);
void mzd_local_free(mzd_local_t* v);
void oqs_sig_picnic_mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear)
void mzd_local_init_multiple_ex(mzd_local_t** dst, size_t n, uint32_t r, uint32_t c, bool clear)
ATTR_NONNULL_ARG(1);
#define mzd_local_init_multiple(dst, n, r, c) oqs_sig_picnic_mzd_local_init_multiple_ex(dst, n, r, c, true)
#define mzd_local_init_multiple(dst, n, r, c) mzd_local_init_multiple_ex(dst, n, r, c, true)
/**
* mzd_local_free for mzd_local_init_multiple.
*/
void oqs_sig_picnic_mzd_local_free_multiple(mzd_local_t** vs);
void mzd_local_free_multiple(mzd_local_t** vs);
void oqs_sig_picnic_mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_576(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_640(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_896(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_960(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_1152(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_uint64_1216(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_640(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_896(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_1024(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_1152(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s128_1280(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void oqs_sig_picnic_mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first, mzd_local_t const* second) ATTR_NONNULL;
void mzd_copy_uint64_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_uint64_192(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_uint64_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s128_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s128_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s256_128(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_copy_s256_256(mzd_local_t* dst, mzd_local_t const* src) ATTR_NONNULL;
void mzd_xor_uint64_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_192(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_576(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_640(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_896(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_960(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_1152(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_uint64_1216(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_640(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_896(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1024(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1152(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s128_1280(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_128(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_256(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_768(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_1024(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
void mzd_xor_s256_1280(mzd_local_t* res, mzd_local_t const* first,
mzd_local_t const* second) ATTR_NONNULL;
/**
* Compute v * A optimized for v being a vector.
*/
void oqs_sig_picnic_mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_128_576(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_128_640(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_192_896(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_192_960(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_256_1152(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_128_640(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_192_896(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_192_1024(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_256_1152(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s128_256_1280(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s256_128_768(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s256_192_1024(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_s256_256_1280(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_128_576(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192_896(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_192_960(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256_1152(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192_896(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256_1152(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s128_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_s256_128_768(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s256_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
void mzd_mul_v_s256_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* At) ATTR_NONNULL;
/**
* Compute v * A optimized for v being a vector, for specific sizes depending on instance
* Only work for specific sizes and RLL_NEXT algorithm using uint64 operations
*/
void oqs_sig_picnic_mzd_addmul_v_uint64_30_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_30_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_30_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_3_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_3_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_3_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_3_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_3_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_3_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Use SSE2 or NEON
*/
void oqs_sig_picnic_mzd_addmul_v_s128_30_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_30_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_30_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_3_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_3_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_3_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_3_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_3_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_3_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Use AVX2
*/
void oqs_sig_picnic_mzd_addmul_v_s256_30_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_30_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_30_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_3_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_3_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_3_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_30_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_3_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_3_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_3_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Compute using parity based algorithm
* */
void oqs_sig_picnic_mzd_mul_v_parity_uint64_128_30(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_uint64_192_30(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_uint64_256_30(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_uint64_128_3(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_uint64_192_3(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_uint64_256_3(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
/**
* Using popcnt
*/
void oqs_sig_picnic_mzd_mul_v_parity_popcnt_128_30(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_popcnt_192_30(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_popcnt_256_30(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_popcnt_128_3(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_popcnt_192_3(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_v_parity_popcnt_256_3(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_128_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_192_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_256_30(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_128_3(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_192_3(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_v_parity_uint64_256_3(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Compute c + v * A optimized for c and v being vectors.
*/
void oqs_sig_picnic_mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_uint64_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_v_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
#if defined(MUL_M4RI)
/**
* Compute v * A optimized for v being a vector.
*/
void oqs_sig_picnic_mzd_mul_vl_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_128_576(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_128_640(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_192_896(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_192_960(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_256_1152(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_128_640(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_192_896(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_192_1024(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_256_1152(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s128_256_1280(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s256_128_768(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s256_192_1024(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_mul_vl_s256_256_1280(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_128_576(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_192_896(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_192_960(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_256_1152(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_uint64_256_1216(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_128_640(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_192_896(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_256_1152(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s128_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s256_128_768(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s256_192_1024(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_mul_vl_s256_256_1280(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Compute c + v * A optimized for c and v being vectors.
*/
void oqs_sig_picnic_mzd_addmul_vl_uint64_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_uint64_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_uint64_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_s128_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_s128_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_s128_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_s256_128(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_s256_192(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void oqs_sig_picnic_mzd_addmul_vl_s256_256(mzd_local_t* c, mzd_local_t const* v, mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_uint64_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_uint64_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_uint64_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_s128_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_s128_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_s128_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_s256_128(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_s256_192(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
void mzd_addmul_vl_s256_256(mzd_local_t* c, mzd_local_t const* v,
mzd_local_t const* A) ATTR_NONNULL;
/**
* Pre-compute matrices for mzd_{add,}mul_vl computions.
*/
mzd_local_t* oqs_sig_picnic_mzd_precompute_matrix_lookup(mzd_local_t const* A, unsigned int r, unsigned int c) ATTR_NONNULL;
mzd_local_t* mzd_precompute_matrix_lookup(mzd_local_t const* A, unsigned int r,
unsigned int c) ATTR_NONNULL;
#endif
/**
* Shuffle vector x according to info in mask. Needed for OLLE optimiztaions.
*/
void oqs_sig_picnic_mzd_shuffle_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_128_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_192_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_256_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_pext_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_pext_128_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_pext_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_pext_192_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_pext_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void oqs_sig_picnic_mzd_shuffle_pext_256_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_128_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_192_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_256_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_128_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_128_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_192_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_192_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_256_30(mzd_local_t* x, const word mask) ATTR_NONNULL;
void mzd_shuffle_pext_256_3(mzd_local_t* x, const word mask) ATTR_NONNULL;
#define BLOCK(v, b) ((block_t*)ASSUME_ALIGNED(&(v)[(b)], 32))
#define CONST_BLOCK(v, b) ((const block_t*)ASSUME_ALIGNED(&(v)[(b)], 32))

View File

@ -0,0 +1,211 @@
#ifndef OQS_PICNIC_MACROS_H
#define OQS_PICNIC_MACROS_H
#include <oqs/common.h>
/* avoid printing debug output */
#define NDEBUG
/* use OQS's free function */
#define free OQS_MEM_insecure_free
/* add oqs_sig_picnic_ prefix to publicly exposed functions */
#define addMerkleNodes oqs_sig_picnic_addMerkleNodes
#define allocateCommitments oqs_sig_picnic_allocateCommitments
#define allocateCommitments2 oqs_sig_picnic_allocateCommitments2
#define allocateInputs oqs_sig_picnic_allocateInputs
#define allocateMsgs oqs_sig_picnic_allocateMsgs
#define allocateProof2 oqs_sig_picnic_allocateProof2
#define allocateRandomTape oqs_sig_picnic_allocateRandomTape
#define allocateShares oqs_sig_picnic_allocateShares
#define allocateSignature2 oqs_sig_picnic_allocateSignature2
#define bitstream_get_bits oqs_sig_picnic_bitstream_get_bits
#define bitstream_get_bits_32 oqs_sig_picnic_bitstream_get_bits_32
#define bitstream_get_bits_8 oqs_sig_picnic_bitstream_get_bits_8
#define bitstream_put_bits oqs_sig_picnic_bitstream_put_bits
#define bitstream_put_bits_32 oqs_sig_picnic_bitstream_put_bits_32
#define bitstream_put_bits_8 oqs_sig_picnic_bitstream_put_bits_8
#define buildMerkleTree oqs_sig_picnic_buildMerkleTree
#define copyShares oqs_sig_picnic_copyShares
#define createTree oqs_sig_picnic_createTree
#define freeCommitments oqs_sig_picnic_freeCommitments
#define freeCommitments2 oqs_sig_picnic_freeCommitments2
#define freeInputs oqs_sig_picnic_freeInputs
#define freeMsgs oqs_sig_picnic_freeMsgs
#define freeProof2 oqs_sig_picnic_freeProof2
#define freeRandomTape oqs_sig_picnic_freeRandomTape
#define freeShares oqs_sig_picnic_freeShares
#define freeSignature2 oqs_sig_picnic_freeSignature2
#define freeTree oqs_sig_picnic_freeTree
#define generateSeeds oqs_sig_picnic_generateSeeds
#define getBit oqs_sig_picnic_getBit
#define getLeaf oqs_sig_picnic_getLeaf
#define getLeaves oqs_sig_picnic_getLeaves
#define get_zkbpp_lowmc_implementation oqs_sig_picnic_get_zkbpp_lowmc_implementation
#define get_zkbpp_lowmc_verify_implementation oqs_sig_picnic_get_zkbpp_lowmc_verify_implementation
#define get_zkbpp_share_implentation oqs_sig_picnic_get_zkbpp_share_implentation
#define impl_sign oqs_sig_picnic_impl_sign
#define impl_sign_picnic2 oqs_sig_picnic_impl_sign_picnic2
#define impl_verify oqs_sig_picnic_impl_verify
#define impl_verify_picnic2 oqs_sig_picnic_impl_verify_picnic2
#define instance_get oqs_sig_picnic_instance_get
#define lowmc_clear oqs_sig_picnic_lowmc_clear
#define lowmc_compute_aux_get_implementation oqs_sig_picnic_lowmc_compute_aux_get_implementation
#define lowmc_get_implementation oqs_sig_picnic_lowmc_get_implementation
#define lowmc_simulate_online_get_implementation oqs_sig_picnic_lowmc_simulate_online_get_implementation
#define lowmc_store_get_implementation oqs_sig_picnic_lowmc_store_get_implementation
#define mpc_matrix_addmul_r_s128_128 oqs_sig_picnic_mpc_matrix_addmul_r_s128_128
#define mpc_matrix_addmul_r_s128_192 oqs_sig_picnic_mpc_matrix_addmul_r_s128_192
#define mpc_matrix_addmul_r_s128_256 oqs_sig_picnic_mpc_matrix_addmul_r_s128_256
#define mpc_matrix_addmul_r_s256_128 oqs_sig_picnic_mpc_matrix_addmul_r_s256_128
#define mpc_matrix_addmul_r_s256_192 oqs_sig_picnic_mpc_matrix_addmul_r_s256_192
#define mpc_matrix_addmul_r_s256_256 oqs_sig_picnic_mpc_matrix_addmul_r_s256_256
#define mpc_matrix_addmul_r_uint64_128 oqs_sig_picnic_mpc_matrix_addmul_r_uint64_128
#define mpc_matrix_addmul_r_uint64_192 oqs_sig_picnic_mpc_matrix_addmul_r_uint64_192
#define mpc_matrix_addmul_r_uint64_256 oqs_sig_picnic_mpc_matrix_addmul_r_uint64_256
#define mpc_matrix_mul_nl_part_s128_128 oqs_sig_picnic_mpc_matrix_mul_nl_part_s128_128
#define mpc_matrix_mul_nl_part_s128_192 oqs_sig_picnic_mpc_matrix_mul_nl_part_s128_192
#define mpc_matrix_mul_nl_part_s128_256 oqs_sig_picnic_mpc_matrix_mul_nl_part_s128_256
#define mpc_matrix_mul_nl_part_s256_128 oqs_sig_picnic_mpc_matrix_mul_nl_part_s256_128
#define mpc_matrix_mul_nl_part_s256_192 oqs_sig_picnic_mpc_matrix_mul_nl_part_s256_192
#define mpc_matrix_mul_nl_part_s256_256 oqs_sig_picnic_mpc_matrix_mul_nl_part_s256_256
#define mpc_matrix_mul_nl_part_uint64_128 oqs_sig_picnic_mpc_matrix_mul_nl_part_uint64_128
#define mpc_matrix_mul_nl_part_uint64_192 oqs_sig_picnic_mpc_matrix_mul_nl_part_uint64_192
#define mpc_matrix_mul_nl_part_uint64_256 oqs_sig_picnic_mpc_matrix_mul_nl_part_uint64_256
#define mpc_matrix_mul_s128_128 oqs_sig_picnic_mpc_matrix_mul_s128_128
#define mpc_matrix_mul_s128_192 oqs_sig_picnic_mpc_matrix_mul_s128_192
#define mpc_matrix_mul_s128_256 oqs_sig_picnic_mpc_matrix_mul_s128_256
#define mpc_matrix_mul_s256_128 oqs_sig_picnic_mpc_matrix_mul_s256_128
#define mpc_matrix_mul_s256_192 oqs_sig_picnic_mpc_matrix_mul_s256_192
#define mpc_matrix_mul_s256_256 oqs_sig_picnic_mpc_matrix_mul_s256_256
#define mpc_matrix_mul_uint64_128 oqs_sig_picnic_mpc_matrix_mul_uint64_128
#define mpc_matrix_mul_uint64_192 oqs_sig_picnic_mpc_matrix_mul_uint64_192
#define mpc_matrix_mul_uint64_256 oqs_sig_picnic_mpc_matrix_mul_uint64_256
#define mpc_matrix_mul_z_s128_128 oqs_sig_picnic_mpc_matrix_mul_z_s128_128
#define mpc_matrix_mul_z_s128_192 oqs_sig_picnic_mpc_matrix_mul_z_s128_192
#define mpc_matrix_mul_z_s128_256 oqs_sig_picnic_mpc_matrix_mul_z_s128_256
#define mpc_matrix_mul_z_s256_128 oqs_sig_picnic_mpc_matrix_mul_z_s256_128
#define mpc_matrix_mul_z_s256_192 oqs_sig_picnic_mpc_matrix_mul_z_s256_192
#define mpc_matrix_mul_z_s256_256 oqs_sig_picnic_mpc_matrix_mul_z_s256_256
#define mpc_matrix_mul_z_uint64_128 oqs_sig_picnic_mpc_matrix_mul_z_uint64_128
#define mpc_matrix_mul_z_uint64_192 oqs_sig_picnic_mpc_matrix_mul_z_uint64_192
#define mpc_matrix_mul_z_uint64_256 oqs_sig_picnic_mpc_matrix_mul_z_uint64_256
#define mzd_addmul_v_s128_128 oqs_sig_picnic_mzd_addmul_v_s128_128
#define mzd_addmul_v_s128_192 oqs_sig_picnic_mzd_addmul_v_s128_192
#define mzd_addmul_v_s128_256 oqs_sig_picnic_mzd_addmul_v_s128_256
#define mzd_addmul_v_s128_30_128 oqs_sig_picnic_mzd_addmul_v_s128_30_128
#define mzd_addmul_v_s128_30_192 oqs_sig_picnic_mzd_addmul_v_s128_30_192
#define mzd_addmul_v_s128_30_256 oqs_sig_picnic_mzd_addmul_v_s128_30_256
#define mzd_addmul_v_s128_3_128 oqs_sig_picnic_mzd_addmul_v_s128_3_128
#define mzd_addmul_v_s128_3_192 oqs_sig_picnic_mzd_addmul_v_s128_3_192
#define mzd_addmul_v_s128_3_256 oqs_sig_picnic_mzd_addmul_v_s128_3_256
#define mzd_addmul_v_s256_128 oqs_sig_picnic_mzd_addmul_v_s256_128
#define mzd_addmul_v_s256_192 oqs_sig_picnic_mzd_addmul_v_s256_192
#define mzd_addmul_v_s256_256 oqs_sig_picnic_mzd_addmul_v_s256_256
#define mzd_addmul_v_s256_30_128 oqs_sig_picnic_mzd_addmul_v_s256_30_128
#define mzd_addmul_v_s256_30_192 oqs_sig_picnic_mzd_addmul_v_s256_30_192
#define mzd_addmul_v_s256_30_256 oqs_sig_picnic_mzd_addmul_v_s256_30_256
#define mzd_addmul_v_s256_3_128 oqs_sig_picnic_mzd_addmul_v_s256_3_128
#define mzd_addmul_v_s256_3_192 oqs_sig_picnic_mzd_addmul_v_s256_3_192
#define mzd_addmul_v_s256_3_256 oqs_sig_picnic_mzd_addmul_v_s256_3_256
#define mzd_addmul_v_uint64_128 oqs_sig_picnic_mzd_addmul_v_uint64_128
#define mzd_addmul_v_uint64_192 oqs_sig_picnic_mzd_addmul_v_uint64_192
#define mzd_addmul_v_uint64_256 oqs_sig_picnic_mzd_addmul_v_uint64_256
#define mzd_addmul_v_uint64_30_128 oqs_sig_picnic_mzd_addmul_v_uint64_30_128
#define mzd_addmul_v_uint64_30_192 oqs_sig_picnic_mzd_addmul_v_uint64_30_192
#define mzd_addmul_v_uint64_30_256 oqs_sig_picnic_mzd_addmul_v_uint64_30_256
#define mzd_addmul_v_uint64_3_128 oqs_sig_picnic_mzd_addmul_v_uint64_3_128
#define mzd_addmul_v_uint64_3_192 oqs_sig_picnic_mzd_addmul_v_uint64_3_192
#define mzd_addmul_v_uint64_3_256 oqs_sig_picnic_mzd_addmul_v_uint64_3_256
#define mzd_copy_s128_128 oqs_sig_picnic_mzd_copy_s128_128
#define mzd_copy_s128_256 oqs_sig_picnic_mzd_copy_s128_256
#define mzd_copy_s256_128 oqs_sig_picnic_mzd_copy_s256_128
#define mzd_copy_s256_256 oqs_sig_picnic_mzd_copy_s256_256
#define mzd_copy_uint64_128 oqs_sig_picnic_mzd_copy_uint64_128
#define mzd_copy_uint64_192 oqs_sig_picnic_mzd_copy_uint64_192
#define mzd_copy_uint64_256 oqs_sig_picnic_mzd_copy_uint64_256
#define mzd_from_char_array oqs_sig_picnic_mzd_from_char_array
#define mzd_local_free oqs_sig_picnic_mzd_local_free
#define mzd_local_free_multiple oqs_sig_picnic_mzd_local_free_multiple
#define mzd_local_init_ex oqs_sig_picnic_mzd_local_init_ex
#define mzd_local_init_multiple_ex oqs_sig_picnic_mzd_local_init_multiple_ex
#define mzd_mul_v_parity_uint64_128_3 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_3
#define mzd_mul_v_parity_uint64_128_30 oqs_sig_picnic_mzd_mul_v_parity_uint64_128_30
#define mzd_mul_v_parity_uint64_192_3 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_3
#define mzd_mul_v_parity_uint64_192_30 oqs_sig_picnic_mzd_mul_v_parity_uint64_192_30
#define mzd_mul_v_parity_uint64_256_3 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_3
#define mzd_mul_v_parity_uint64_256_30 oqs_sig_picnic_mzd_mul_v_parity_uint64_256_30
#define mzd_mul_v_s128_128 oqs_sig_picnic_mzd_mul_v_s128_128
#define mzd_mul_v_s128_128_640 oqs_sig_picnic_mzd_mul_v_s128_128_640
#define mzd_mul_v_s128_192 oqs_sig_picnic_mzd_mul_v_s128_192
#define mzd_mul_v_s128_192_1024 oqs_sig_picnic_mzd_mul_v_s128_192_1024
#define mzd_mul_v_s128_192_896 oqs_sig_picnic_mzd_mul_v_s128_192_896
#define mzd_mul_v_s128_256 oqs_sig_picnic_mzd_mul_v_s128_256
#define mzd_mul_v_s128_256_1152 oqs_sig_picnic_mzd_mul_v_s128_256_1152
#define mzd_mul_v_s128_256_1280 oqs_sig_picnic_mzd_mul_v_s128_256_1280
#define mzd_mul_v_s256_128 oqs_sig_picnic_mzd_mul_v_s256_128
#define mzd_mul_v_s256_128_768 oqs_sig_picnic_mzd_mul_v_s256_128_768
#define mzd_mul_v_s256_192 oqs_sig_picnic_mzd_mul_v_s256_192
#define mzd_mul_v_s256_192_1024 oqs_sig_picnic_mzd_mul_v_s256_192_1024
#define mzd_mul_v_s256_256 oqs_sig_picnic_mzd_mul_v_s256_256
#define mzd_mul_v_s256_256_1280 oqs_sig_picnic_mzd_mul_v_s256_256_1280
#define mzd_mul_v_uint64_128 oqs_sig_picnic_mzd_mul_v_uint64_128
#define mzd_mul_v_uint64_128_576 oqs_sig_picnic_mzd_mul_v_uint64_128_576
#define mzd_mul_v_uint64_128_640 oqs_sig_picnic_mzd_mul_v_uint64_128_640
#define mzd_mul_v_uint64_192 oqs_sig_picnic_mzd_mul_v_uint64_192
#define mzd_mul_v_uint64_192_896 oqs_sig_picnic_mzd_mul_v_uint64_192_896
#define mzd_mul_v_uint64_192_960 oqs_sig_picnic_mzd_mul_v_uint64_192_960
#define mzd_mul_v_uint64_256 oqs_sig_picnic_mzd_mul_v_uint64_256
#define mzd_mul_v_uint64_256_1152 oqs_sig_picnic_mzd_mul_v_uint64_256_1152
#define mzd_mul_v_uint64_256_1216 oqs_sig_picnic_mzd_mul_v_uint64_256_1216
#define mzd_shuffle_128_3 oqs_sig_picnic_mzd_shuffle_128_3
#define mzd_shuffle_128_30 oqs_sig_picnic_mzd_shuffle_128_30
#define mzd_shuffle_192_3 oqs_sig_picnic_mzd_shuffle_192_3
#define mzd_shuffle_192_30 oqs_sig_picnic_mzd_shuffle_192_30
#define mzd_shuffle_256_3 oqs_sig_picnic_mzd_shuffle_256_3
#define mzd_shuffle_256_30 oqs_sig_picnic_mzd_shuffle_256_30
#define mzd_shuffle_pext_128_3 oqs_sig_picnic_mzd_shuffle_pext_128_3
#define mzd_shuffle_pext_128_30 oqs_sig_picnic_mzd_shuffle_pext_128_30
#define mzd_shuffle_pext_192_3 oqs_sig_picnic_mzd_shuffle_pext_192_3
#define mzd_shuffle_pext_192_30 oqs_sig_picnic_mzd_shuffle_pext_192_30
#define mzd_shuffle_pext_256_3 oqs_sig_picnic_mzd_shuffle_pext_256_3
#define mzd_shuffle_pext_256_30 oqs_sig_picnic_mzd_shuffle_pext_256_30
#define mzd_to_char_array oqs_sig_picnic_mzd_to_char_array
#define mzd_xor_s128_1024 oqs_sig_picnic_mzd_xor_s128_1024
#define mzd_xor_s128_1152 oqs_sig_picnic_mzd_xor_s128_1152
#define mzd_xor_s128_128 oqs_sig_picnic_mzd_xor_s128_128
#define mzd_xor_s128_1280 oqs_sig_picnic_mzd_xor_s128_1280
#define mzd_xor_s128_256 oqs_sig_picnic_mzd_xor_s128_256
#define mzd_xor_s128_640 oqs_sig_picnic_mzd_xor_s128_640
#define mzd_xor_s128_896 oqs_sig_picnic_mzd_xor_s128_896
#define mzd_xor_s128_blocks oqs_sig_picnic_mzd_xor_s128_blocks
#define mzd_xor_s256_1024 oqs_sig_picnic_mzd_xor_s256_1024
#define mzd_xor_s256_128 oqs_sig_picnic_mzd_xor_s256_128
#define mzd_xor_s256_1280 oqs_sig_picnic_mzd_xor_s256_1280
#define mzd_xor_s256_256 oqs_sig_picnic_mzd_xor_s256_256
#define mzd_xor_s256_768 oqs_sig_picnic_mzd_xor_s256_768
#define mzd_xor_s256_blocks oqs_sig_picnic_mzd_xor_s256_blocks
#define mzd_xor_uint64_1152 oqs_sig_picnic_mzd_xor_uint64_1152
#define mzd_xor_uint64_1216 oqs_sig_picnic_mzd_xor_uint64_1216
#define mzd_xor_uint64_128 oqs_sig_picnic_mzd_xor_uint64_128
#define mzd_xor_uint64_192 oqs_sig_picnic_mzd_xor_uint64_192
#define mzd_xor_uint64_256 oqs_sig_picnic_mzd_xor_uint64_256
#define mzd_xor_uint64_576 oqs_sig_picnic_mzd_xor_uint64_576
#define mzd_xor_uint64_640 oqs_sig_picnic_mzd_xor_uint64_640
#define mzd_xor_uint64_896 oqs_sig_picnic_mzd_xor_uint64_896
#define mzd_xor_uint64_960 oqs_sig_picnic_mzd_xor_uint64_960
#define openMerkleTree oqs_sig_picnic_openMerkleTree
#define openMerkleTreeSize oqs_sig_picnic_openMerkleTreeSize
#define revealSeeds oqs_sig_picnic_revealSeeds
#define revealSeedsSize oqs_sig_picnic_revealSeedsSize
#define sbox_layer_10_uint64_aux oqs_sig_picnic_sbox_layer_10_uint64_aux
#define setBit oqs_sig_picnic_setBit
#define sign_picnic2 oqs_sig_picnic_sign_picnic2
#define tapesToWord oqs_sig_picnic_tapesToWord
#define transpose_64_64 oqs_sig_picnic_transpose_64_64
#define verifyMerkleTree oqs_sig_picnic_verifyMerkleTree
#define verify_picnic2 oqs_sig_picnic_verify_picnic2
#define xor_array_RC oqs_sig_picnic_xor_array_RC
#define xor_word_array oqs_sig_picnic_xor_word_array
#endif

View File

@ -7,15 +7,20 @@
* SPDX-License-Identifier: MIT
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "picnic.h"
#include <stdlib.h>
#include <oqs/rand.h>
#include <string.h>
#include "io.h"
#include "lowmc.h"
#include "picnic_impl.h"
#include "picnic2_impl.h"
#include <oqs/rand.h>
// Public and private keys are serialized as follows:
// - public key: instance || C || p
@ -30,7 +35,7 @@
#define PK_PT(pk) &(pk)->data[1 + output_size]
size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t param) {
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}
@ -39,7 +44,7 @@ size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t par
}
size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t param) {
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}
@ -48,7 +53,7 @@ size_t PICNIC_CALLING_CONVENTION picnic_signature_size(picnic_params_t param) {
}
size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t param) {
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}
@ -57,7 +62,7 @@ size_t PICNIC_CALLING_CONVENTION picnic_get_private_key_size(picnic_params_t par
}
size_t PICNIC_CALLING_CONVENTION picnic_get_public_key_size(picnic_params_t param) {
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return 0;
}
@ -72,7 +77,7 @@ int PICNIC_CALLING_CONVENTION picnic_keygen(picnic_params_t param, picnic_public
return -1;
}
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -108,7 +113,7 @@ int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk,
}
const picnic_params_t param = sk->data[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -122,22 +127,22 @@ int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk,
uint8_t* pk_pt = PK_PT(pk);
const uint8_t* sk_pt = SK_PT(sk);
mzd_local_t* plaintext = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false);
mzd_local_t* privkey = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->k, false);
mzd_local_t* plaintext = mzd_local_init_ex(1, lowmc->n, false);
mzd_local_t* privkey = mzd_local_init_ex(1, lowmc->k, false);
oqs_sig_picnic_mzd_from_char_array(plaintext, sk_pt, output_size);
oqs_sig_picnic_mzd_from_char_array(privkey, sk_sk, input_size);
mzd_from_char_array(plaintext, sk_pt, output_size);
mzd_from_char_array(privkey, sk_sk, input_size);
// compute public key
mzd_local_t* ciphertext = instance->impls.lowmc(privkey, plaintext);
pk->data[0] = param;
memcpy(pk_pt, sk_pt, output_size);
oqs_sig_picnic_mzd_to_char_array(pk_c, ciphertext, output_size);
mzd_to_char_array(pk_c, ciphertext, output_size);
oqs_sig_picnic_mzd_local_free(ciphertext);
oqs_sig_picnic_mzd_local_free(privkey);
oqs_sig_picnic_mzd_local_free(plaintext);
mzd_local_free(ciphertext);
mzd_local_free(privkey);
mzd_local_free(plaintext);
return 0;
}
@ -149,7 +154,7 @@ int PICNIC_CALLING_CONVENTION picnic_validate_keypair(const picnic_privatekey_t*
}
const picnic_params_t param = sk->data[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -169,21 +174,21 @@ int PICNIC_CALLING_CONVENTION picnic_validate_keypair(const picnic_privatekey_t*
return -1;
}
mzd_local_t* plaintext = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->n, false);
mzd_local_t* privkey = oqs_sig_picnic_mzd_local_init_ex(1, lowmc->k, false);
mzd_local_t* plaintext = mzd_local_init_ex(1, lowmc->n, false);
mzd_local_t* privkey = mzd_local_init_ex(1, lowmc->k, false);
oqs_sig_picnic_mzd_from_char_array(plaintext, sk_pt, instance->output_size);
oqs_sig_picnic_mzd_from_char_array(privkey, sk_sk, instance->input_size);
mzd_from_char_array(plaintext, sk_pt, instance->output_size);
mzd_from_char_array(privkey, sk_sk, instance->input_size);
// compute public key
mzd_local_t* ciphertext = instance->impls.lowmc(privkey, plaintext);
uint8_t buffer[MAX_LOWMC_BLOCK_SIZE];
oqs_sig_picnic_mzd_to_char_array(buffer, ciphertext, output_size);
mzd_to_char_array(buffer, ciphertext, output_size);
oqs_sig_picnic_mzd_local_free(ciphertext);
oqs_sig_picnic_mzd_local_free(privkey);
oqs_sig_picnic_mzd_local_free(plaintext);
mzd_local_free(ciphertext);
mzd_local_free(privkey);
mzd_local_free(plaintext);
return memcmp(buffer, pk_c, output_size);
}
@ -196,7 +201,7 @@ int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, const u
}
const picnic_params_t param = sk->data[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -208,7 +213,11 @@ int PICNIC_CALLING_CONVENTION picnic_sign(const picnic_privatekey_t* sk, const u
const uint8_t* sk_c = SK_C(sk);
const uint8_t* sk_pt = SK_PT(sk);
return oqs_sig_picnic_impl_sign(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, signature_len);
if (param == Picnic2_L1_FS || param == Picnic2_L3_FS || param == Picnic2_L5_FS)
return impl_sign_picnic2(instance, sk_pt, sk_sk, sk_c, message, message_len, signature,
signature_len);
else
return impl_sign(instance, sk_pt, sk_sk, sk_c, message, message_len, signature, signature_len);
}
int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const uint8_t* message,
@ -219,7 +228,7 @@ int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const
}
const picnic_params_t param = pk->data[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -229,7 +238,11 @@ int PICNIC_CALLING_CONVENTION picnic_verify(const picnic_publickey_t* pk, const
const uint8_t* pk_c = PK_C(pk);
const uint8_t* pk_pt = PK_PT(pk);
return oqs_sig_picnic_impl_verify(instance, pk_pt, pk_c, message, message_len, signature, signature_len);
if (param == Picnic2_L1_FS || param == Picnic2_L3_FS || param == Picnic2_L5_FS)
return impl_verify_picnic2(instance, pk_pt, pk_c, message, message_len, signature,
signature_len);
else
return impl_verify(instance, pk_pt, pk_c, message, message_len, signature, signature_len);
}
const char* PICNIC_CALLING_CONVENTION picnic_get_param_name(picnic_params_t parameters) {
@ -258,6 +271,12 @@ const char* PICNIC_CALLING_CONVENTION picnic_get_param_name(picnic_params_t para
return "Picnic_L5_FS";
case Picnic_L5_UR:
return "Picnic_L5_UR";
case Picnic2_L1_FS:
return "Picnic2_L1_FS";
case Picnic2_L3_FS:
return "Picnic2_L3_FS";
case Picnic2_L5_FS:
return "Picnic2_L5_FS";
default:
return "Unknown parameter set";
}
@ -270,7 +289,7 @@ int PICNIC_CALLING_CONVENTION picnic_write_public_key(const picnic_publickey_t*
}
const picnic_params_t param = key->data[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -292,7 +311,7 @@ int PICNIC_CALLING_CONVENTION picnic_read_public_key(picnic_publickey_t* key, co
}
const picnic_params_t param = buf[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -314,7 +333,7 @@ int PICNIC_CALLING_CONVENTION picnic_write_private_key(const picnic_privatekey_t
}
const picnic_params_t param = key->data[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -337,7 +356,7 @@ int PICNIC_CALLING_CONVENTION picnic_read_private_key(picnic_privatekey_t* key,
}
const picnic_params_t param = buf[0];
const picnic_instance_t* instance = oqs_sig_picnic_instance_get(param);
const picnic_instance_t* instance = picnic_instance_get(param);
if (!instance) {
return -1;
}
@ -352,3 +371,5 @@ int PICNIC_CALLING_CONVENTION picnic_read_private_key(picnic_privatekey_t* key,
memcpy(key->data, buf, bytes_required);
return 0;
}
/* cropped unused picnic_visualize_keys */

View File

@ -41,9 +41,13 @@ extern "C" {
#define LOWMC_BLOCK_SIZE_Picnic_L3_UR 24
#define LOWMC_BLOCK_SIZE_Picnic_L5_FS 32
#define LOWMC_BLOCK_SIZE_Picnic_L5_UR 32
#define LOWMC_BLOCK_SIZE_Picnic2_L1_FS 16
#define LOWMC_BLOCK_SIZE_Picnic2_L3_FS 24
#define LOWMC_BLOCK_SIZE_Picnic2_L5_FS 32
#define LOWMC_BLOCK_SIZE(p) PICNIC_CONCAT(LOWMC_BLOCK_SIZE, p)
#define SALT_SIZE 32
#define MAX_LOWMC_ROUNDS 38
#define MAX_LOWMC_SBOXES 10
#define MAX_ROUNDS 438
@ -51,16 +55,19 @@ extern "C" {
#define PICNIC_PRIVATE_KEY_SIZE(p) (1 + 3 * LOWMC_BLOCK_SIZE(p))
#define PICNIC_PUBLIC_KEY_SIZE(p) (1 + 2 * LOWMC_BLOCK_SIZE(p))
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_FS 34016
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_UR 53945
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_FS 76724
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_UR 121837
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_FS 34032
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_UR 53961
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_FS 76732
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_UR 121845
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_FS 132856
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_UR 209506
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_1_FS 32702
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_1_UR 51755
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_1_FS 74790
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_1_UR 117889
#define PICNIC_SIGNATURE_SIZE_Picnic2_L1_FS 13802
#define PICNIC_SIGNATURE_SIZE_Picnic2_L3_FS 29750
#define PICNIC_SIGNATURE_SIZE_Picnic2_L5_FS 54732
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_1_FS 32728
#define PICNIC_SIGNATURE_SIZE_Picnic_L1_1_UR 51771
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_1_FS 74798
#define PICNIC_SIGNATURE_SIZE_Picnic_L3_1_UR 117897
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_1_FS 130228
#define PICNIC_SIGNATURE_SIZE_Picnic_L5_1_UR 204250
@ -75,19 +82,22 @@ extern "C" {
typedef enum {
PARAMETER_SET_INVALID,
/* Instances from the Picnic parameter set with LowMC m=10 */
Picnic_L1_FS, // 1
Picnic_L1_UR, // 2
Picnic_L3_FS, // 3
Picnic_L3_UR, // 4
Picnic_L5_FS, // 5
Picnic_L5_UR, // 6
Picnic_L1_FS, // 1
Picnic_L1_UR, // 2
Picnic_L3_FS, // 3
Picnic_L3_UR, // 4
Picnic_L5_FS, // 5
Picnic_L5_UR, // 6
Picnic2_L1_FS, // 7
Picnic2_L3_FS, // 8
Picnic2_L5_FS, // 9
/* Instances with LowMC m=1 */
Picnic_L1_1_FS, // 7
Picnic_L1_1_UR, // 8
Picnic_L3_1_FS, // 9
Picnic_L3_1_UR, // 10
Picnic_L5_1_FS, // 11
Picnic_L5_1_UR, // 12
Picnic_L1_1_FS, // 10
Picnic_L1_1_UR, // 11
Picnic_L3_1_FS, // 12
Picnic_L3_1_UR, // 13
Picnic_L5_1_FS, // 14
Picnic_L5_1_UR, // 15
PARAMETER_SET_MAX_INDEX
} picnic_params_t;

View File

@ -0,0 +1,15 @@
#ifndef PICNIC2_L1_FS_API_H
#define PICNIC2_L1_FS_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 16 + 16)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 16)
#define CRYPTO_BYTES (4 + 13802)
#define CRYPTO_ALGNAME "picnic2l1fs"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);
int crypto_sign(unsigned char* sm, unsigned long long* smlen, const unsigned char* m,
unsigned long long mlen, const unsigned char* sk);
int crypto_sign_open(unsigned char* m, unsigned long long* mlen, const unsigned char* sm,
unsigned long long smlen, const unsigned char* pk);
#endif

View File

@ -0,0 +1,8 @@
#ifdef SUPERCOP
#include "crypto_sign.h"
#else
#include "api.h"
#endif
#define PICNIC_INSTANCE Picnic2_L1_FS
#include "sign.c.i"

View File

@ -0,0 +1,15 @@
#ifndef PICNIC2_L3_FS_API_H
#define PICNIC2_L3_FS_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 24 + 24)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 24)
#define CRYPTO_BYTES (4 + 29750)
#define CRYPTO_ALGNAME "picnic2l3fs"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);
int crypto_sign(unsigned char* sm, unsigned long long* smlen, const unsigned char* m,
unsigned long long mlen, const unsigned char* sk);
int crypto_sign_open(unsigned char* m, unsigned long long* mlen, const unsigned char* sm,
unsigned long long smlen, const unsigned char* pk);
#endif

View File

@ -0,0 +1,8 @@
#ifdef SUPERCOP
#include "crypto_sign.h"
#else
#include "api.h"
#endif
#define PICNIC_INSTANCE Picnic2_L3_FS
#include "sign.c.i"

View File

@ -0,0 +1,15 @@
#ifndef PICNIC2_L5_FS_API_H
#define PICNIC2_L5_FS_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 32 + 32)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 32)
#define CRYPTO_BYTES (4 + 54732)
#define CRYPTO_ALGNAME "picnic2l5fs"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);
int crypto_sign(unsigned char* sm, unsigned long long* smlen, const unsigned char* m,
unsigned long long mlen, const unsigned char* sk);
int crypto_sign_open(unsigned char* m, unsigned long long* mlen, const unsigned char* sm,
unsigned long long smlen, const unsigned char* pk);
#endif

View File

@ -0,0 +1,8 @@
#ifdef SUPERCOP
#include "crypto_sign.h"
#else
#include "api.h"
#endif
#define PICNIC_INSTANCE Picnic2_L5_FS
#include "sign.c.i"

1039
src/sig/picnic/external/picnic2_impl.c vendored Normal file

File diff suppressed because it is too large Load Diff

61
src/sig/picnic/external/picnic2_impl.h vendored Normal file
View File

@ -0,0 +1,61 @@
/*! @file picnic2_impl.h
* @brief This is the main implementation file of the signature scheme for
* the Picnic2 parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#ifndef PICNIC2_IMPL_H
#define PICNIC2_IMPL_H
#include <stdint.h>
#include <stddef.h>
#include "picnic_impl.h"
typedef struct proof2_t {
uint16_t unOpenedIndex; // P[t], index of the party that is not opened.
uint8_t* seedInfo; // Information required to compute the tree with seeds of of all opened parties
size_t seedInfoLen; // Length of seedInfo buffer
uint8_t* aux; // Last party's correction bits; NULL if P[t] == N-1
uint8_t* C; // Commitment to preprocessing step of unopened party
uint8_t* input; // Masked input used in online execution
uint8_t* msgs; // Broadcast messages of unopened party P[t]
} proof2_t;
typedef struct signature2_t {
uint8_t salt[SALT_SIZE];
uint8_t* iSeedInfo; // Info required to recompute the tree of all initial seeds
size_t iSeedInfoLen;
uint8_t* cvInfo; // Info required to check commitments to views (reconstruct Merkle tree)
size_t cvInfoLen;
uint16_t* challengeC;
uint16_t* challengeP;
proof2_t* proofs; // One proof for each online execution the verifier checks
} signature2_t;
int impl_sign_picnic2(const picnic_instance_t* pp, const uint8_t* plaintext,
const uint8_t* private_key, const uint8_t* public_key, const uint8_t* msg,
size_t msglen, uint8_t* sig, size_t* siglen);
int impl_verify_picnic2(const picnic_instance_t* instance, const uint8_t* plaintext,
const uint8_t* public_key, const uint8_t* msg, size_t msglen,
const uint8_t* signature, size_t signature_len);
int sign_picnic2(uint32_t* privateKey, uint32_t* pubKey, uint32_t* plaintext,
const uint8_t* message, size_t messageByteLength, signature2_t* sig,
const picnic_instance_t* params);
int verify_picnic2(signature2_t* sig, const uint32_t* pubKey, const uint32_t* plaintext,
const uint8_t* message, size_t messageByteLength,
const picnic_instance_t* params);
void allocateSignature2(signature2_t* sig, const picnic_instance_t* params);
void freeSignature2(signature2_t* sig, const picnic_instance_t* params);
/* Helper functions */
void sbox_layer_10_uint64_aux(uint64_t* d, randomTape_t* tapes);
#endif /* PICNIC2_IMPL_H */

View File

@ -0,0 +1,455 @@
/*! @file picnic2_impl.c
* @brief This is the main file of the signature scheme for the Picnic2
* parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "io.h"
#include "picnic2_simulate.h"
#include "picnic2_simulate_mul.h"
static void wordToMsgsNoTranspose(uint64_t w, msgs_t* msgs) {
((uint64_t*)msgs->msgs[msgs->pos % 64])[msgs->pos / 64] = w;
msgs->pos++;
}
static void msgsTranspose(msgs_t* msgs) {
uint64_t buffer_in[64];
uint64_t buffer_out[64];
size_t pos;
for (pos = 0; pos < msgs->pos / 64; pos++) {
for (size_t i = 0; i < 64; i++) {
buffer_in[i / 8 * 8 + 7 - i % 8] = ((uint64_t*)msgs->msgs[i])[pos];
}
transpose_64_64(buffer_in, buffer_out);
for (size_t i = 0; i < 64; i++) {
((uint64_t*)msgs->msgs[i])[pos] = buffer_out[(i) / 8 * 8 + 7 - (i) % 8];
}
}
memset(&buffer_in, 0, 64 * sizeof(uint64_t));
for (size_t i = 0; i < msgs->pos % 64; i++) {
buffer_in[i / 8 * 8 + 7 - i % 8] = ((uint64_t*)msgs->msgs[i])[pos];
}
transpose_64_64(buffer_in, buffer_out);
for (size_t i = 0; i < 64; i++) {
((uint64_t*)msgs->msgs[i])[pos] = buffer_out[(i) / 8 * 8 + 7 - (i) % 8];
}
}
/* For each word in shares; write player i's share to their stream of msgs */
static void broadcast(shares_t* shares, msgs_t* msgs) {
for (size_t w = 0; w < shares->numWords; w++) {
wordToMsgsNoTranspose(shares->shares[w], msgs);
}
}
static inline uint64_t extend(uint64_t bit) {
return ~(bit - 1);
}
static uint8_t mpc_AND(uint8_t a, uint8_t b, uint64_t mask_a, uint64_t mask_b, randomTape_t* tapes,
msgs_t* msgs, uint64_t* out, uint8_t* unopened_msg) {
uint64_t output_mask = tapesToWord(tapes); // A fresh random mask to hide the result
*out = output_mask;
uint64_t and_helper =
tapesToWord(tapes); // The special mask value setup during preprocessing for each AND gate
uint64_t s_shares = (extend(a) & mask_b) ^ (extend(b) & mask_a) ^ and_helper ^ output_mask;
if (msgs->unopened >= 0) {
uint8_t unopenedPartyBit = getBit(unopened_msg, msgs->pos);
setBit((uint8_t*)&s_shares, msgs->unopened, unopenedPartyBit);
}
// Broadcast each share of s
wordToMsgsNoTranspose(s_shares, msgs);
return (uint8_t)(parity64_uint64(s_shares) ^ (a & b));
}
static void mpc_sbox(uint32_t* state, shares_t* state_masks, randomTape_t* tapes, msgs_t* msgs,
uint8_t* unopenened_msg, const picnic_instance_t* params) {
for (size_t i = 0; i < params->lowmc->m * 3; i += 3) {
uint8_t a = getBit((uint8_t*)state, i + 2);
uint64_t mask_a = state_masks->shares[i + 2];
uint8_t b = getBit((uint8_t*)state, i + 1);
uint64_t mask_b = state_masks->shares[i + 1];
uint8_t c = getBit((uint8_t*)state, i);
uint64_t mask_c = state_masks->shares[i];
uint64_t bc_mask, ab_mask, ca_mask; // Fresh output masks used for the AND gate
uint8_t ab = mpc_AND(a, b, mask_a, mask_b, tapes, msgs, &ab_mask, unopenened_msg);
uint8_t bc = mpc_AND(b, c, mask_b, mask_c, tapes, msgs, &bc_mask, unopenened_msg);
uint8_t ca = mpc_AND(c, a, mask_c, mask_a, tapes, msgs, &ca_mask, unopenened_msg);
setBit((uint8_t*)state, i + 2, a ^ bc);
state_masks->shares[i + 2] = mask_a ^ bc_mask;
setBit((uint8_t*)state, i + 1, a ^ b ^ ca);
state_masks->shares[i + 1] = mask_b ^ mask_a ^ ca_mask;
setBit((uint8_t*)state, i, a ^ b ^ c ^ ab);
state_masks->shares[i] = mask_a ^ mask_b ^ mask_c ^ ab_mask;
}
}
#if defined(REDUCED_ROUND_KEY_COMPUTATION)
static void mpc_xor_masks_nl(shares_t* out, const shares_t* a, const shares_t* b, size_t index,
size_t num) {
for (size_t i = 0; i < num; i++) {
out->shares[i] = a->shares[i] ^ b->shares[index + num - 1 - i];
}
}
static void mpc_xor2_nl(uint32_t* output, shares_t* output_masks, const uint32_t* x,
const shares_t* x_masks, const uint32_t* y, const shares_t* y_masks,
size_t index, size_t num) {
xor_array_RC((uint8_t*)output, (uint8_t*)x, (uint8_t*)&y[index / 32], 4);
// xor masks
mpc_xor_masks_nl(output_masks, x_masks, y_masks, index, num);
}
#endif
#if defined(OPTIMIZED_LINEAR_LAYER_EVALUATION)
static void mpc_shuffle(uint8_t* state, shares_t* mask_shares, uint64_t r_mask) {
for (int i = 63; i >= 0 && r_mask != UINT64_C(0xFFFFFFFC00000000); i--) {
if (!((r_mask >> i) & 1)) { // bit is not set
// find next 1 and swap all entries until then
for (int j = i - 1; j >= 0; j--) {
if ((r_mask >> j) & 1) {
for (int k = j; k < i; k++) {
uint64_t t = mask_shares->shares[63 - k];
mask_shares->shares[63 - k] = mask_shares->shares[63 - k - 1];
mask_shares->shares[63 - k - 1] = t;
uint8_t bit = getBit(state, 63 - k);
setBit(state, 63 - k, getBit(state, 63 - k - 1));
setBit(state, 63 - k - 1, bit);
}
r_mask |= (UINT64_C(1) << i); // set bit i
r_mask &= ~(UINT64_C(1) << j); // clear bit j
break;
}
}
}
}
}
#endif
#if !defined(REDUCED_ROUND_KEY_COMPUTATION) || defined(OPTIMIZED_LINEAR_LAYER_EVALUATION)
static void mpc_xor_masks(shares_t* out, const shares_t* a, const shares_t* b) {
assert(out->numWords == a->numWords && a->numWords == b->numWords);
for (size_t i = 0; i < out->numWords; i++) {
out->shares[i] = a->shares[i] ^ b->shares[i];
}
}
static void mpc_xor2(uint32_t* output, shares_t* output_masks, const uint32_t* x,
const shares_t* x_masks, const uint32_t* y, const shares_t* y_masks,
const picnic_instance_t* params) {
xor_word_array(output, x, y, (params->input_size / 4));
mpc_xor_masks(output_masks, x_masks, y_masks);
}
#endif
/* PICNIC2_L1_FS */
#define MPC_MUL mpc_matrix_mul_uint64_128
#define MPC_MUL_MC mpc_matrix_mul_nl_part_uint64_128
#define MPC_ADDMUL_R mpc_matrix_addmul_r_uint64_128
#define MPC_MUL_Z mpc_matrix_mul_z_uint64_128
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R LOWMC_L1_R
#if defined(WITH_LOWMC_128_128_20)
#include "lowmc_128_128_20.h"
#define LOWMC_INSTANCE lowmc_128_128_20
#define SIM_ONLINE lowmc_simulate_online_uint64_128_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
/* PICNIC2_L3_FS */
#define MPC_MUL mpc_matrix_mul_uint64_192
#define MPC_MUL_MC mpc_matrix_mul_nl_part_uint64_192
#define MPC_ADDMUL_R mpc_matrix_addmul_r_uint64_192
#define MPC_MUL_Z mpc_matrix_mul_z_uint64_192
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R LOWMC_L3_R
#if defined(WITH_LOWMC_192_192_30)
#include "lowmc_192_192_30.h"
#define LOWMC_INSTANCE lowmc_192_192_30
#define SIM_ONLINE lowmc_simulate_online_uint64_192_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
/* PICNIC2_L5_FS */
#define MPC_MUL mpc_matrix_mul_uint64_256
#define MPC_MUL_MC mpc_matrix_mul_nl_part_uint64_256
#define MPC_ADDMUL_R mpc_matrix_addmul_r_uint64_256
#define MPC_MUL_Z mpc_matrix_mul_z_uint64_256
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R LOWMC_L5_R
#if defined(WITH_LOWMC_256_256_38)
#include "lowmc_256_256_38.h"
#define LOWMC_INSTANCE lowmc_256_256_38
#define SIM_ONLINE lowmc_simulate_online_uint64_256_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
#if defined(WITH_OPT)
#if defined(WITH_SSE2) || defined(WITH_NEON)
#if defined(WITH_SSE2)
#define FN_ATTR ATTR_TARGET_SSE2
#endif
/* PICNIC2_L1_FS */
#define MPC_MUL mpc_matrix_mul_s128_128
#define MPC_MUL_MC mpc_matrix_mul_nl_part_s128_128
#define MPC_ADDMUL_R mpc_matrix_addmul_r_s128_128
#define MPC_MUL_Z mpc_matrix_mul_z_s128_128
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R LOWMC_L1_R
#if defined(WITH_LOWMC_128_128_20)
#include "lowmc_128_128_20.h"
#define LOWMC_INSTANCE lowmc_128_128_20
#define SIM_ONLINE lowmc_simulate_online_s128_128_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
/* PICNIC2_L3_FS */
#define MPC_MUL mpc_matrix_mul_s128_192
#define MPC_MUL_MC mpc_matrix_mul_nl_part_s128_192
#define MPC_ADDMUL_R mpc_matrix_addmul_r_s128_192
#define MPC_MUL_Z mpc_matrix_mul_z_s128_192
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R LOWMC_L3_R
#if defined(WITH_LOWMC_192_192_30)
#include "lowmc_192_192_30.h"
#define LOWMC_INSTANCE lowmc_192_192_30
#define SIM_ONLINE lowmc_simulate_online_s128_192_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
/* PICNIC2_L5_FS */
#define MPC_MUL mpc_matrix_mul_s128_256
#define MPC_MUL_MC mpc_matrix_mul_nl_part_s128_256
#define MPC_ADDMUL_R mpc_matrix_addmul_r_s128_256
#define MPC_MUL_Z mpc_matrix_mul_z_s128_256
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R LOWMC_L5_R
#if defined(WITH_LOWMC_256_256_38)
#include "lowmc_256_256_38.h"
#define LOWMC_INSTANCE lowmc_256_256_38
#define SIM_ONLINE lowmc_simulate_online_s128_256_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
#undef FN_ATTR
#endif // SSE/NEON
#if defined(WITH_AVX2)
#define FN_ATTR ATTR_TARGET_AVX2
/* PICNIC2_L1_FS */
#define MPC_MUL mpc_matrix_mul_s256_128
#define MPC_MUL_MC mpc_matrix_mul_nl_part_s256_128
#define MPC_ADDMUL_R mpc_matrix_addmul_r_s256_128
#define MPC_MUL_Z mpc_matrix_mul_z_s256_128
#define LOWMC_N LOWMC_L1_N
#define LOWMC_R LOWMC_L1_R
#if defined(WITH_LOWMC_128_128_20)
#include "lowmc_128_128_20.h"
#define LOWMC_INSTANCE lowmc_128_128_20
#define SIM_ONLINE lowmc_simulate_online_s256_128_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
/* PICNIC2_L3_FS */
#define MPC_MUL mpc_matrix_mul_s256_192
#define MPC_MUL_MC mpc_matrix_mul_nl_part_s256_192
#define MPC_ADDMUL_R mpc_matrix_addmul_r_s256_192
#define MPC_MUL_Z mpc_matrix_mul_z_s256_192
#define LOWMC_N LOWMC_L3_N
#define LOWMC_R LOWMC_L3_R
#if defined(WITH_LOWMC_192_192_30)
#include "lowmc_192_192_30.h"
#define LOWMC_INSTANCE lowmc_192_192_30
#define SIM_ONLINE lowmc_simulate_online_s256_192_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
/* PICNIC2_L5_FS */
#define MPC_MUL mpc_matrix_mul_s256_256
#define MPC_MUL_MC mpc_matrix_mul_nl_part_s256_256
#define MPC_ADDMUL_R mpc_matrix_addmul_r_s256_256
#define MPC_MUL_Z mpc_matrix_mul_z_s256_256
#define LOWMC_N LOWMC_L5_N
#define LOWMC_R LOWMC_L5_R
#if defined(WITH_LOWMC_256_256_38)
#include "lowmc_256_256_38.h"
#define LOWMC_INSTANCE lowmc_256_256_38
#define SIM_ONLINE lowmc_simulate_online_s256_256_10
#include "picnic2_simulate.c.i"
#endif
#undef MPC_MUL
#undef MPC_MUL_MC
#undef MPC_ADDMUL_R
#undef MPC_MUL_Z
#undef LOWMC_N
#undef LOWMC_R
#undef LOWMC_INSTANCE
#undef SIM_ONLINE
#undef FN_ATTR
#endif // AVX2
#endif // WITH_OPT
lowmc_simulate_online_f lowmc_simulate_online_get_implementation(const lowmc_t* lowmc) {
ASSUME(lowmc->m == 10 || lowmc->m == 1);
ASSUME(lowmc->n == 128 || lowmc->n == 192 || lowmc->n == 256);
#if defined(WITH_OPT)
#if defined(WITH_AVX2)
if (CPU_SUPPORTS_AVX2) {
if (lowmc->m == 10) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_simulate_online_s256_128_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_simulate_online_s256_192_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_simulate_online_s256_256_10;
#endif
}
}
}
#endif
#if defined(WITH_SSE2) || defined(WITH_NEON)
if (CPU_SUPPORTS_SSE2 || CPU_SUPPORTS_NEON) {
if (lowmc->m == 10) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_simulate_online_s128_128_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_simulate_online_s128_192_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_simulate_online_s128_256_10;
#endif
}
}
}
#endif
#endif
if (lowmc->m == 10) {
switch (lowmc->n) {
#if defined(WITH_LOWMC_128_128_20)
case 128:
return lowmc_simulate_online_uint64_128_10;
#endif
#if defined(WITH_LOWMC_192_192_30)
case 192:
return lowmc_simulate_online_uint64_192_10;
#endif
#if defined(WITH_LOWMC_256_256_38)
case 256:
return lowmc_simulate_online_uint64_256_10;
#endif
}
}
return NULL;
}

View File

@ -0,0 +1,152 @@
/*! @file picnic2_impl.c
* @brief This is the main file of the signature scheme for the Picnic2
* parameter sets.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "kdf_shake.h"
#include "macros.h"
#include "picnic_impl.h"
#include "picnic2_impl.h"
#include "picnic.h"
#include "picnic2_types.h"
#include "picnic2_tree.h"
#include "io.h"
#if defined(FN_ATTR)
FN_ATTR
#endif
static int SIM_ONLINE(uint32_t* maskedKey, shares_t* mask_shares, randomTape_t* tapes, msgs_t* msgs,
const uint32_t* plaintext, const uint32_t* pubKey,
const picnic_instance_t* params) {
int ret = 0;
uint32_t* roundKey = malloc(LOWMC_N / 8);
uint32_t* state = malloc(LOWMC_N / 8);
uint32_t* state2 = malloc(LOWMC_N / 8);
uint32_t* nl_part = malloc(LOWMC_R * sizeof(uint32_t));
shares_t* nl_part_masks = allocateShares(LOWMC_R * 32);
shares_t* key_masks = allocateShares(LOWMC_N); // Make a copy to use when computing each round key
shares_t* mask2_shares = allocateShares(LOWMC_N);
uint8_t* unopened_msgs = NULL;
if (msgs->unopened >= 0) { // We are in verify, save the unopenend parties msgs
unopened_msgs = malloc(params->view_size + params->input_size);
memcpy(unopened_msgs, msgs->msgs[msgs->unopened], params->view_size + params->input_size);
}
copyShares(key_masks, mask_shares);
#if defined(REDUCED_ROUND_KEY_COMPUTATION)
MPC_MUL(state, maskedKey, LOWMC_INSTANCE.k0_matrix->w64,
mask_shares); // roundKey = maskedKey * KMatrix[0]
xor_word_array(state, state, plaintext, (LOWMC_N / 32)); // state = plaintext + roundKey
xor_array_RC((uint8_t*)state, (uint8_t*)state,
(uint8_t*)LOWMC_INSTANCE.precomputed_constant_linear,
LOWMC_N / 8); // state = state + precomp_const
MPC_MUL_MC(nl_part, maskedKey, LOWMC_INSTANCE.precomputed_non_linear_part_matrix->w64,
LOWMC_INSTANCE.precomputed_constant_non_linear->w64, nl_part_masks, key_masks);
#if defined(OPTIMIZED_LINEAR_LAYER_EVALUATION)
for (uint32_t r = 0; r < LOWMC_R - 1; r++) {
mpc_sbox(state, mask_shares, tapes, msgs, unopened_msgs, params);
mpc_xor2_nl(state, mask_shares, state, mask_shares, nl_part, nl_part_masks, r * 32 + 2,
30); // state += roundKey
MPC_MUL_Z(state2, state, mask2_shares, mask_shares, LOWMC_INSTANCE.rounds[r].z_matrix->w64);
mpc_shuffle((uint8_t*)state, mask_shares, LOWMC_INSTANCE.rounds[r].r_mask);
MPC_ADDMUL_R(state2, state, mask2_shares, mask_shares, LOWMC_INSTANCE.rounds[r].r_matrix->w64);
for (uint32_t i = 0; i < 30; i++) {
mask_shares->shares[i] = 0;
setBit((uint8_t*)state, i, 0);
}
mpc_xor2(state, mask_shares, state, mask_shares, state2, mask2_shares, params);
}
mpc_sbox(state, mask_shares, tapes, msgs, unopened_msgs, params);
mpc_xor2_nl(state, mask_shares, state, mask_shares, nl_part, nl_part_masks,
(LOWMC_R - 1) * 32 + 2, 30); // state += roundKey
MPC_MUL(state, state, LOWMC_INSTANCE.zr_matrix->w64,
mask_shares); // state = state * LMatrix (r-1)
#else
for (uint32_t r = 0; r < LOWMC_R; r++) {
mpc_sbox(state, mask_shares, tapes, msgs, unopened_msgs, params);
mpc_xor2_nl(state, mask_shares, state, mask_shares, nl_part, nl_part_masks, r * 32 + 2,
30); // state += roundKey
MPC_MUL(state, state, LOWMC_INSTANCE.rounds[r].l_matrix->w64,
mask_shares); // state = state * LMatrix (r-1)
}
#endif
#else
MPC_MUL(roundKey, maskedKey, LOWMC_INSTANCE.k0_matrix->w64,
mask_shares); // roundKey = maskedKey * KMatrix[0]
xor_word_array(state, roundKey, plaintext, (LOWMC_N / 32)); // state = plaintext + roundKey
shares_t* round_key_masks = allocateShares(mask_shares->numWords);
for (uint32_t r = 0; r < LOWMC_R; r++) {
copyShares(round_key_masks, key_masks);
MPC_MUL(roundKey, maskedKey, LOWMC_INSTANCE.rounds[r].k_matrix->w64, round_key_masks);
mpc_sbox(state, mask_shares, tapes, msgs, unopened_msgs, params);
MPC_MUL(state, state, LOWMC_INSTANCE.rounds[r].l_matrix->w64,
mask_shares); // state = state * LMatrix (r-1)
xor_array_RC((uint8_t*)state, (uint8_t*)state,
(const uint8_t*)(LOWMC_INSTANCE.rounds[r].constant->w64),
LOWMC_N / 8); // state += RConstant
mpc_xor2(state, mask_shares, roundKey, round_key_masks, state, mask_shares,
params); // state += roundKey
}
freeShares(round_key_masks);
#endif
/* Unmask the output, and check that it's correct */
if (msgs->unopened >= 0) {
/* During signature verification we have the shares of the output for
* the unopened party already in msgs, but not in mask_shares. */
for (size_t i = 0; i < LOWMC_N; i++) {
uint8_t share = getBit(unopened_msgs, msgs->pos + i);
setBit((uint8_t*)&mask_shares->shares[i], msgs->unopened, share);
}
}
uint32_t output[LOWMC_N / 8];
reconstructShares(output, mask_shares);
xor_word_array(output, output, state, (LOWMC_N / 32));
if (memcmp(output, pubKey, LOWMC_N / 8) != 0) {
#if !defined(NDEBUG)
printf("%s: output does not match pubKey\n", __func__);
printf("pubKey: ");
print_hex(stdout, (uint8_t*)pubKey, LOWMC_N / 8);
printf("\noutput: ");
print_hex(stdout, (uint8_t*)output, LOWMC_N / 8);
printf("\n");
#endif
ret = -1;
goto Exit;
}
broadcast(mask_shares, msgs);
msgsTranspose(msgs);
free(unopened_msgs);
free(state);
free(state2);
free(roundKey);
free(nl_part);
freeShares(key_masks);
freeShares(mask2_shares);
freeShares(nl_part_masks);
Exit:
return ret;
}

View File

@ -0,0 +1,26 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#ifndef PICNIC2_SIMULATE_H
#define PICNIC2_SIMULATE_H
#include "lowmc_pars.h"
typedef struct randomTape_t randomTape_t;
typedef struct shares_t shares_t;
typedef struct msgs_t msgs_t;
typedef struct picnic_instance_t picnic_instance_t;
typedef int (*lowmc_simulate_online_f)(uint32_t* maskedKey, shares_t* mask_shares,
randomTape_t* tapes, msgs_t* msgs, const uint32_t* plaintext,
const uint32_t* pubKey, const picnic_instance_t* params);
lowmc_simulate_online_f lowmc_simulate_online_get_implementation(const lowmc_t* lowmc);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,119 @@
/*
* This file is part of the optimized implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#ifndef PICNIC2_SIMULATE_MUL_H
#define PICNIC2_SIMULATE_MUL_H
#include "picnic2_types.h"
void mpc_matrix_mul_uint64_128(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_uint64_192(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_uint64_256(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_s128_128(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_s128_192(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_s128_256(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_s256_128(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_s256_192(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_s256_256(uint32_t* output, const uint32_t* vec, const uint64_t* matrix,
shares_t* mask_shares);
void mpc_matrix_mul_z_uint64_128(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_uint64_192(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_uint64_256(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_s128_128(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_s128_192(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_s128_256(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_s256_128(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_s256_192(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_z_s256_256(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
const shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_uint64_128(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_uint64_192(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_uint64_256(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_s128_128(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_s128_192(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_s128_256(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_s256_128(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_s256_192(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_addmul_r_s256_256(uint32_t* state2, const uint32_t* state, shares_t* mask2_shares,
shares_t* mask_shares, const uint64_t* matrix);
void mpc_matrix_mul_nl_part_uint64_128(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_uint64_192(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_uint64_256(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_s128_128(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_s128_192(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_s128_256(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_s256_128(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_s256_192(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
void mpc_matrix_mul_nl_part_s256_256(uint32_t* nl_part, const uint32_t* key,
const uint64_t* precomputed_nl_matrix,
const uint64_t* precomputed_constant_nl,
shares_t* nl_part_masks, const shares_t* key_masks);
/* helper functions */
void copyShares(shares_t* dst, shares_t* src);
uint8_t getBit(const uint8_t* array, uint32_t bitNumber);
void setBit(uint8_t* bytes, uint32_t bitNumber, uint8_t val);
void xor_word_array(uint32_t* out, const uint32_t* in1, const uint32_t* in2, uint32_t length);
void xor_array_RC(uint8_t* out, const uint8_t* in1, const uint8_t* in2, uint32_t length);
uint64_t tapesToWord(randomTape_t* tapes);
void reconstructShares(uint32_t* output, shares_t* shares);
void transpose_64_64(const uint64_t* in, uint64_t* out);
#endif

547
src/sig/picnic/external/picnic2_tree.c vendored Normal file
View File

@ -0,0 +1,547 @@
/*! @file tree.c
* @brief This file has the tree implementation used to generate random seeds
* and commit to multiple values with a Merkle tree.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include <assert.h>
#include <stdlib.h>
#include <limits.h>
#include "endian_compat.h"
#include "kdf_shake.h"
#include "picnic.h"
#include "picnic_impl.h"
#include "picnic2_tree.h"
#include "picnic2_types.h"
static int contains(size_t* list, size_t len, size_t value) {
for (size_t i = 0; i < len; i++) {
if (list[i] == value) {
return 1;
}
}
return 0;
}
static int exists(tree_t* tree, size_t i) {
if (i >= tree->numNodes) {
return 0;
}
if (tree->exists[i]) {
return 1;
}
return 0;
}
tree_t* createTree(size_t numLeaves, size_t dataSize) {
tree_t* tree = malloc(sizeof(tree_t));
tree->depth = ceil_log2(numLeaves) + 1;
tree->numNodes =
((1 << (tree->depth)) - 1) -
((1 << (tree->depth - 1)) - numLeaves); /* Num nodes in complete - number of missing leaves */
tree->numLeaves = numLeaves;
tree->dataSize = dataSize;
tree->nodes = malloc(tree->numNodes * sizeof(uint8_t*));
uint8_t* slab = calloc(tree->numNodes, dataSize);
for (size_t i = 0; i < tree->numNodes; i++) {
tree->nodes[i] = slab;
slab += dataSize;
}
tree->haveNode = calloc(tree->numNodes, 1);
/* Depending on the number of leaves, the tree may not be complete */
tree->exists = calloc(tree->numNodes, 1);
memset(tree->exists + tree->numNodes - tree->numLeaves, 1, tree->numLeaves); /* Set leaves */
for (int i = tree->numNodes - tree->numLeaves; i > 0; i--) {
if (exists(tree, 2 * i + 1) || exists(tree, 2 * i + 2)) {
tree->exists[i] = 1;
}
}
tree->exists[0] = 1;
return tree;
}
void freeTree(tree_t* tree) {
if (tree != NULL) {
free(tree->nodes[0]);
free(tree->nodes);
free(tree->haveNode);
free(tree->exists);
free(tree);
}
}
static int isLeftChild(size_t node) {
assert(node != 0);
return (node % 2 == 1);
}
static int hasRightChild(tree_t* tree, size_t node) {
return (2 * node + 2 < tree->numNodes && exists(tree, node));
}
static size_t getParent(size_t node) {
assert(node != 0);
if (isLeftChild(node)) {
/* (node - 1) / 2, but since node % 2 == 1, that's the same as node / 2 */
return node >> 1;
}
return (node - 2) / 2;
}
uint8_t** getLeaves(tree_t* tree) {
return &tree->nodes[tree->numNodes - tree->numLeaves];
}
uint8_t* getLeaf(tree_t* tree, size_t leafIndex) {
assert(leafIndex < tree->numLeaves);
size_t firstLeaf = tree->numNodes - tree->numLeaves;
return tree->nodes[firstLeaf + leafIndex];
}
static void hashSeed(uint8_t* digest, const uint8_t* inputSeed, uint8_t* salt, uint8_t hashPrefix,
size_t repIndex, size_t nodeIndex, const picnic_instance_t* params) {
Keccak_HashInstance ctx;
hash_init_prefix(&ctx, params, hashPrefix);
hash_update(&ctx, inputSeed, params->seed_size);
hash_update(&ctx, salt, SALT_SIZE);
uint16_t repIndexLE = htole16((uint16_t)repIndex);
hash_update(&ctx, (uint8_t*)&repIndexLE, sizeof(uint16_t));
uint16_t nodeIndexLE = htole16((uint16_t)nodeIndex);
hash_update(&ctx, (uint8_t*)&nodeIndexLE, sizeof(uint16_t));
hash_final(&ctx);
hash_squeeze(&ctx, digest, 2 * params->seed_size);
}
static void expandSeeds(tree_t* tree, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params) {
uint8_t tmp[2 * MAX_SEED_SIZE_BYTES];
/* Walk the tree, expanding seeds where possible. Compute children of
* non-leaf nodes. */
size_t lastNonLeaf = getParent(tree->numNodes - 1);
for (size_t i = 0; i <= lastNonLeaf; i++) {
if (!tree->haveNode[i]) {
continue;
}
hashSeed(tmp, tree->nodes[i], salt, HASH_PREFIX_1, repIndex, i, params);
if (!tree->haveNode[2 * i + 1]) {
/* left child = H_left(seed_i || salt || t || i) */
memcpy(tree->nodes[2 * i + 1], tmp, params->seed_size);
tree->haveNode[2 * i + 1] = 1;
}
/* The last non-leaf node will only have a left child when there are an odd number of leaves */
if (exists(tree, 2 * i + 2) && !tree->haveNode[2 * i + 2]) {
/* right child = H_right(seed_i || salt || t || i) */
memcpy(tree->nodes[2 * i + 2], tmp + params->seed_size, params->seed_size);
tree->haveNode[2 * i + 2] = 1;
}
}
}
tree_t* generateSeeds(size_t nSeeds, uint8_t* rootSeed, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params) {
tree_t* tree = createTree(nSeeds, params->seed_size);
memcpy(tree->nodes[0], rootSeed, params->seed_size);
tree->haveNode[0] = 1;
expandSeeds(tree, salt, repIndex, params);
return tree;
}
static int isLeafNode(tree_t* tree, size_t node) {
return (2 * node + 1 >= tree->numNodes);
}
static int hasSibling(tree_t* tree, size_t node) {
if (!exists(tree, node)) {
return 0;
}
if (isLeftChild(node) && !exists(tree, node + 1)) {
return 0;
}
return 1;
}
static size_t getSibling(tree_t* tree, size_t node) {
assert(node < tree->numNodes);
assert(node != 0);
assert(hasSibling(tree, node));
if (isLeftChild(node)) {
if (node + 1 < tree->numNodes) {
return node + 1;
} else {
assert(!"getSibling: request for node with not sibling");
return 0;
}
} else {
return node - 1;
}
}
/* Returns the number of bytes written to output */
static size_t* getRevealedNodes(tree_t* tree, uint16_t* hideList, size_t hideListSize,
size_t* outputSize) {
/* Compute paths up from hideList to root, store as sets of nodes */
size_t pathLen = tree->depth - 1;
/* pathSets[i][0...hideListSize] stores the nodes in the path at depth i
* for each of the leaf nodes in hideListSize */
size_t** pathSets = malloc(pathLen * sizeof(size_t*));
size_t* slab = malloc(hideListSize * pathLen * sizeof(size_t));
for (size_t i = 0; i < pathLen; i++) {
pathSets[i] = slab;
slab += hideListSize;
}
/* Compute the paths back to the root */
for (size_t i = 0; i < hideListSize; i++) {
size_t pos = 0;
size_t node =
hideList[i] +
(tree->numNodes - tree->numLeaves); /* input lists leaf indexes, translate to nodes */
pathSets[pos][i] = node;
pos++;
while ((node = getParent(node)) != 0) {
pathSets[pos][i] = node;
pos++;
}
}
/* Determine seeds to reveal */
size_t* revealed = malloc(tree->numLeaves * sizeof(size_t));
size_t revealedPos = 0;
for (size_t d = 0; d < pathLen; d++) {
for (size_t i = 0; i < hideListSize; i++) {
if (!hasSibling(tree, pathSets[d][i])) {
continue;
}
size_t sibling = getSibling(tree, pathSets[d][i]);
if (!contains(pathSets[d], hideListSize, sibling)) {
// Determine the seed to reveal
while (!hasRightChild(tree, sibling) && !isLeafNode(tree, sibling)) {
sibling = 2 * sibling + 1; // sibling = leftChild(sibling)
}
// Only reveal if we haven't already
if (!contains(revealed, revealedPos, sibling)) {
revealed[revealedPos] = sibling;
revealedPos++;
}
}
}
}
free(pathSets[0]);
free(pathSets);
*outputSize = revealedPos;
return revealed;
}
size_t revealSeedsSize(size_t numNodes, uint16_t* hideList, size_t hideListSize,
const picnic_instance_t* params) {
tree_t* tree = createTree(numNodes, params->seed_size);
size_t numNodesRevealed = 0;
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &numNodesRevealed);
freeTree(tree);
free(revealed);
return numNodesRevealed * params->seed_size;
}
size_t revealSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* output,
size_t outputSize, const picnic_instance_t* params) {
uint8_t* outputBase = output;
size_t revealedSize = 0;
if (outputSize > INT_MAX) {
return -1;
}
int outLen = (int)outputSize;
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &revealedSize);
for (size_t i = 0; i < revealedSize; i++) {
outLen -= params->seed_size;
if (outLen < 0) {
assert(!"Insufficient sized buffer provided to revealSeeds");
free(revealed);
return 0;
}
memcpy(output, tree->nodes[revealed[i]], params->seed_size);
output += params->seed_size;
}
free(revealed);
return output - outputBase;
}
int reconstructSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* input,
size_t inputLen, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params) {
int ret = 0;
if (inputLen > INT_MAX) {
return -1;
}
int inLen = (int)inputLen;
size_t revealedSize = 0;
size_t* revealed = getRevealedNodes(tree, hideList, hideListSize, &revealedSize);
for (size_t i = 0; i < revealedSize; i++) {
inLen -= params->seed_size;
if (inLen < 0) {
ret = -1;
goto Exit;
}
memcpy(tree->nodes[revealed[i]], input, params->seed_size);
tree->haveNode[revealed[i]] = 1;
input += params->seed_size;
}
expandSeeds(tree, salt, repIndex, params);
Exit:
free(revealed);
return ret;
}
static void computeParentHash(tree_t* tree, size_t child, uint8_t* salt,
const picnic_instance_t* params) {
if (!exists(tree, child)) {
return;
}
size_t parent = getParent(child);
if (tree->haveNode[parent]) {
return;
}
/* Compute the hash for parent, if we have everything */
if (!tree->haveNode[2 * parent + 1]) {
return;
}
if (exists(tree, 2 * parent + 2) && !tree->haveNode[2 * parent + 2]) {
return;
}
/* Compute parent data = H(left child data || [right child data] || salt || parent idx) */
Keccak_HashInstance ctx;
hash_init_prefix(&ctx, params, HASH_PREFIX_3);
hash_update(&ctx, tree->nodes[2 * parent + 1], params->digest_size);
if (hasRightChild(tree, parent)) {
/* One node may not have a right child when there's an odd number of leaves */
hash_update(&ctx, tree->nodes[2 * parent + 2], params->digest_size);
}
hash_update(&ctx, salt, SALT_SIZE);
uint16_t parentLE = htole16((uint16_t)parent);
hash_update(&ctx, (uint8_t*)&parentLE, sizeof(uint16_t));
hash_final(&ctx);
hash_squeeze(&ctx, tree->nodes[parent], params->digest_size);
tree->haveNode[parent] = 1;
}
/* Create a Merkle tree by hashing up all nodes.
* leafData must have length tree->numNodes, but some may be NULL. */
void buildMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt,
const picnic_instance_t* params) {
size_t firstLeaf = tree->numNodes - tree->numLeaves;
/* Copy data to the leaves. The actual data being committed to has already been
* hashed, according to the spec. */
for (size_t i = 0; i < tree->numLeaves; i++) {
if (leafData[i] != NULL) {
memcpy(tree->nodes[firstLeaf + i], leafData[i], tree->dataSize);
tree->haveNode[firstLeaf + i] = 1;
}
}
/* Starting at the leaves, work up the tree, computing the hashes for intermediate nodes */
for (int i = (int)tree->numNodes; i > 0; i--) {
computeParentHash(tree, i, salt, params);
}
}
/* Note that we never output the root node */
static size_t* getRevealedMerkleNodes(tree_t* tree, uint16_t* missingLeaves,
size_t missingLeavesSize, size_t* outputSize) {
size_t firstLeaf = tree->numNodes - tree->numLeaves;
uint8_t* missingNodes = calloc(tree->numNodes, 1);
/* Mark leaves that are missing */
for (size_t i = 0; i < missingLeavesSize; i++) {
missingNodes[firstLeaf + missingLeaves[i]] = 1;
}
/* For the nonleaf nodes, if both leaves are missing, mark it as missing too */
int lastNonLeaf = getParent(tree->numNodes - 1);
for (int i = lastNonLeaf; i > 0; i--) {
if (!exists(tree, i)) {
continue;
}
if (exists(tree, 2 * i + 2)) {
if (missingNodes[2 * i + 1] && missingNodes[2 * i + 2]) {
missingNodes[i] = 1;
}
} else {
if (missingNodes[2 * i + 1]) {
missingNodes[i] = 1;
}
}
}
/* For each missing leaf node, add the highest missing node on the path
* back to the root to the set to be revealed */
size_t* revealed = malloc(tree->numLeaves * sizeof(size_t));
size_t pos = 0;
for (size_t i = 0; i < missingLeavesSize; i++) {
size_t node = missingLeaves[i] + firstLeaf; /* input is leaf indexes, translate to nodes */
do {
if (!missingNodes[getParent(node)]) {
if (!contains(revealed, pos, node)) {
revealed[pos] = node;
pos++;
}
break;
}
} while ((node = getParent(node)) != 0);
}
free(missingNodes);
*outputSize = pos;
return revealed;
}
size_t openMerkleTreeSize(size_t numNodes, uint16_t* missingLeaves, size_t missingLeavesSize,
const picnic_instance_t* params) {
tree_t* tree = createTree(numNodes, params->digest_size);
size_t revealedSize = 0;
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize);
freeTree(tree);
free(revealed);
return revealedSize * params->digest_size;
}
/* Serialze the missing nodes that the verifier will require to check commitments for non-missing
* leaves */
uint8_t* openMerkleTree(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize,
size_t* outputSizeBytes) {
size_t revealedSize = 0;
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize);
/* Serialize output */
*outputSizeBytes = revealedSize * tree->dataSize;
uint8_t* output = malloc(*outputSizeBytes);
uint8_t* outputBase = output;
for (size_t i = 0; i < revealedSize; i++) {
memcpy(output, tree->nodes[revealed[i]], tree->dataSize);
output += tree->dataSize;
}
free(revealed);
return outputBase;
}
/* addMerkleNodes: deserialize and add the data for nodes provided by the committer */
int addMerkleNodes(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, uint8_t* input,
size_t inputSize) {
int ret = 0;
assert(missingLeavesSize < tree->numLeaves);
if (inputSize > INT_MAX) {
return -1;
}
int intLen = (int)inputSize;
size_t revealedSize = 0;
size_t* revealed = getRevealedMerkleNodes(tree, missingLeaves, missingLeavesSize, &revealedSize);
assert(!contains(revealed, revealedSize, 0));
/* Deserialize input */
for (size_t i = 0; i < revealedSize; i++) {
intLen -= tree->dataSize;
if (intLen < 0) {
ret = -1;
goto Exit;
}
memcpy(tree->nodes[revealed[i]], input, tree->dataSize);
input += tree->dataSize;
tree->haveNode[revealed[i]] = 1;
}
if (intLen != 0) {
ret = -1;
goto Exit;
}
Exit:
free(revealed);
return ret;
}
/* verifyMerkleTree: verify for each leaf that is set */
int verifyMerkleTree(tree_t* tree, /* uint16_t* missingLeaves, size_t missingLeavesSize, */
uint8_t** leafData, uint8_t* salt, const picnic_instance_t* params) {
size_t firstLeaf = tree->numNodes - tree->numLeaves;
/* Copy the leaf data, where we have it. The actual data being committed to has already been
* hashed, according to the spec. */
for (size_t i = 0; i < tree->numLeaves; i++) {
if (leafData[i] != NULL) {
if (tree->haveNode[firstLeaf + i] == 1) {
return -1; /* A leaf was assigned from the prover for a node we've recomputed */
}
if (leafData[i] != NULL) {
memcpy(tree->nodes[firstLeaf + i], leafData[i], tree->dataSize);
tree->haveNode[firstLeaf + i] = 1;
}
}
}
/* At this point the tree has some of the leaves, and some intermediate nodes
* Work up the tree, computing all nodes we don't have that are missing. */
for (int i = (int)tree->numNodes; i > 0; i--) {
computeParentHash(tree, i, salt, params);
}
/* Fail if the root was not computed. */
if (!tree->haveNode[0]) {
return -1;
}
return 0;
}

80
src/sig/picnic/external/picnic2_tree.h vendored Normal file
View File

@ -0,0 +1,80 @@
/*! @file tree.h
* @brief This file has part of the tree implementation used to generate
* random seeds and commit to multiple values with a Merkle tree.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#ifndef PICNIC2_TREE_H
#define PICNIC2_TREE_H
/*
* Represents a (nearly) complete binary tree, stored in memory as an array.
* The root is at nodes[0], and the left child of node k is 2k + 1, the right
* child is at 2k + 2
*/
typedef struct tree_t {
size_t depth; /* The depth of the tree */
uint8_t** nodes; /* The data for each node */
size_t dataSize; /* The size data at each node, in bytes */
uint8_t* haveNode; /* If we have the data (seed or hash) for node i, haveSeed[i] is 1 */
uint8_t* exists; /* Since the tree is not always complete, nodes marked 0 don't exist */
size_t numNodes; /* The total number of nodes in the tree */
size_t numLeaves; /* The total number of leaves in the tree */
} tree_t;
/* The largest seed size is 256 bits, for the Picnic2-L5-FS parameter set. */
#define MAX_SEED_SIZE_BYTES (32)
tree_t* createTree(size_t numLeaves, size_t dataSize);
void freeTree(tree_t* tree);
uint8_t** getLeaves(tree_t* tree);
/* Get one leaf, leafIndex must be in [0, tree->numLeaves -1] */
uint8_t* getLeaf(tree_t* tree, size_t leafIndex);
/* Functions for trees used to derive seeds.
* Signer's usage: generateSeeds -> revealSeeds -> freeTree
* Verifier's usage: createTree -> reconstructSeeds -> freeTree
*/
/* Returns the number of bytes written to output. A safe number of bytes for
* callers to allocate is numLeaves*params->seedSizeBytes, or call revealSeedsSize. */
tree_t* generateSeeds(size_t nSeeds, uint8_t* rootSeed, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params);
size_t revealSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* output,
size_t outputLen, const picnic_instance_t* params);
size_t revealSeedsSize(size_t numNodes, uint16_t* hideList, size_t hideListSize,
const picnic_instance_t* params);
int reconstructSeeds(tree_t* tree, uint16_t* hideList, size_t hideListSize, uint8_t* input,
size_t inputLen, uint8_t* salt, size_t repIndex,
const picnic_instance_t* params);
/* Functions for Merkle hash trees used for commitments.
*
* Signer call sequence:
* 1. createTree
* 2. buildMerkleTree with all commitments as leaf nodes
* 3. openMerkleTree with missingLeaves - list of commitments the verifier won't recompute
* 4. freeTree
* Verifier call sequence
* 1. createTree
* 2. addMerkleNodes with the output of the signer
* 3. verifyMerkleTree Checks that all leaf nodes present are correct commitments
* 4. freeTree
*/
void buildMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt,
const picnic_instance_t* params);
uint8_t* openMerkleTree(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize,
size_t* outputSizeBytes);
size_t openMerkleTreeSize(size_t numNodes, uint16_t* notMissingLeaves, size_t notMissingLeavesSize,
const picnic_instance_t* params);
int addMerkleNodes(tree_t* tree, uint16_t* missingLeaves, size_t missingLeavesSize, uint8_t* input,
size_t inputSize);
int verifyMerkleTree(tree_t* tree, uint8_t** leafData, uint8_t* salt,
const picnic_instance_t* params);
#endif

187
src/sig/picnic/external/picnic2_types.c vendored Normal file
View File

@ -0,0 +1,187 @@
/*! @file picnic_types.c
* @brief Functions to allocate/free data types used in the Picnic signature
* scheme implementation.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#include "picnic2_types.h"
#include "compat.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
shares_t* allocateShares(size_t count) {
shares_t* shares = malloc(sizeof(shares_t));
shares->shares = aligned_alloc(32, count * sizeof(uint64_t));
memset(shares->shares, 0, count * sizeof(uint64_t));
shares->numWords = count;
return shares;
}
void freeShares(shares_t* shares) {
aligned_free(shares->shares);
free(shares);
}
void allocateRandomTape(randomTape_t* tape, const picnic_instance_t* params) {
tape->nTapes = params->num_MPC_parties;
tape->tape = malloc(tape->nTapes * sizeof(uint8_t*));
size_t tapeSizeBytes = 2 * params->view_size + params->input_size;
tapeSizeBytes = ((tapeSizeBytes + 7) / 8) * 8;
uint8_t* slab = calloc(1, tape->nTapes * tapeSizeBytes);
for (uint8_t i = 0; i < tape->nTapes; i++) {
tape->tape[i] = slab;
slab += tapeSizeBytes;
}
tape->pos = 0;
}
void freeRandomTape(randomTape_t* tape) {
if (tape != NULL) {
free(tape->tape[0]);
free(tape->tape);
}
}
void allocateProof2(proof2_t* proof, const picnic_instance_t* params) {
memset(proof, 0, sizeof(proof2_t));
proof->unOpenedIndex = 0;
proof->seedInfo = NULL; // Sign/verify code sets it
proof->seedInfoLen = 0;
proof->C = malloc(params->digest_size);
proof->input = malloc(params->input_size);
proof->aux = malloc(params->view_size);
proof->msgs = malloc(params->view_size + params->input_size);
}
void freeProof2(proof2_t* proof) {
free(proof->seedInfo);
free(proof->C);
free(proof->input);
free(proof->aux);
free(proof->msgs);
}
void allocateSignature2(signature2_t* sig, const picnic_instance_t* params) {
sig->iSeedInfo = NULL;
sig->iSeedInfoLen = 0;
sig->cvInfo = NULL; // Sign/verify code sets it
sig->cvInfoLen = 0;
sig->challengeC = (uint16_t*)malloc(params->num_opened_rounds * sizeof(uint16_t));
sig->challengeP = (uint16_t*)malloc(params->num_opened_rounds * sizeof(uint16_t));
sig->proofs = calloc(params->num_rounds, sizeof(proof2_t));
// Individual proofs are allocated during signature generation, only for rounds when neeeded
}
void freeSignature2(signature2_t* sig, const picnic_instance_t* params) {
free(sig->iSeedInfo);
free(sig->cvInfo);
free(sig->challengeC);
free(sig->challengeP);
for (size_t i = 0; i < params->num_rounds; i++) {
freeProof2(&sig->proofs[i]);
}
free(sig->proofs);
}
commitments_t* allocateCommitments(const picnic_instance_t* params, size_t numCommitments) {
commitments_t* commitments = malloc(params->num_rounds * sizeof(commitments_t));
commitments->nCommitments = (numCommitments) ? numCommitments : params->num_MPC_parties;
uint8_t* slab = malloc(params->num_rounds * (commitments->nCommitments * params->digest_size +
commitments->nCommitments * sizeof(uint8_t*)));
for (uint32_t i = 0; i < params->num_rounds; i++) {
commitments[i].hashes = (uint8_t**)slab;
slab += commitments->nCommitments * sizeof(uint8_t*);
for (uint32_t j = 0; j < commitments->nCommitments; j++) {
commitments[i].hashes[j] = slab;
slab += params->digest_size;
}
}
return commitments;
}
void freeCommitments(commitments_t* commitments) {
free(commitments[0].hashes);
free(commitments);
}
/* Allocate one commitments_t object with capacity for numCommitments values */
void allocateCommitments2(commitments_t* commitments, const picnic_instance_t* params,
size_t numCommitments) {
commitments->nCommitments = numCommitments;
uint8_t* slab = malloc(numCommitments * params->digest_size + numCommitments * sizeof(uint8_t*));
commitments->hashes = (uint8_t**)slab;
slab += numCommitments * sizeof(uint8_t*);
for (size_t i = 0; i < numCommitments; i++) {
commitments->hashes[i] = slab;
slab += params->digest_size;
}
}
void freeCommitments2(commitments_t* commitments) {
if (commitments != NULL) {
free(commitments->hashes);
}
}
inputs_t allocateInputs(const picnic_instance_t* params) {
uint8_t* slab = calloc(1, params->num_rounds * (params->input_size + sizeof(uint8_t*)));
inputs_t inputs = (uint8_t**)slab;
slab += params->num_rounds * sizeof(uint8_t*);
for (uint32_t i = 0; i < params->num_rounds; i++) {
inputs[i] = (uint8_t*)slab;
slab += params->input_size;
}
return inputs;
}
void freeInputs(inputs_t inputs) {
free(inputs);
}
msgs_t* allocateMsgs(const picnic_instance_t* params) {
msgs_t* msgs = malloc(params->num_rounds * sizeof(msgs_t));
uint8_t* slab =
calloc(1, params->num_rounds * (params->num_MPC_parties *
((params->view_size + params->input_size + 7) / 8 * 8) +
params->num_MPC_parties * sizeof(uint8_t*)));
for (uint32_t i = 0; i < params->num_rounds; i++) {
msgs[i].pos = 0;
msgs[i].unopened = -1;
msgs[i].msgs = (uint8_t**)slab;
slab += params->num_MPC_parties * sizeof(uint8_t*);
for (uint32_t j = 0; j < params->num_MPC_parties; j++) {
msgs[i].msgs[j] = slab;
slab += (params->view_size + params->input_size + 7) / 8 * 8;
}
}
return msgs;
}
void freeMsgs(msgs_t* msgs) {
free(msgs[0].msgs);
free(msgs);
}

68
src/sig/picnic/external/picnic2_types.h vendored Normal file
View File

@ -0,0 +1,68 @@
/*! @file picnic_types.h
* @brief Functions to allocate/free data types used in the Picnic signature
* scheme implementation.
*
* This file is part of the reference implementation of the Picnic signature scheme.
* See the accompanying documentation for complete details.
*
* The code is provided under the MIT license, see LICENSE for
* more details.
* SPDX-License-Identifier: MIT
*/
#ifndef PICNIC_TYPES_H
#define PICNIC_TYPES_H
#include "picnic2_impl.h"
/* Type definitions */
typedef struct randomTape_t {
uint8_t** tape;
uint32_t pos;
size_t nTapes;
uint64_t buffer[64];
} randomTape_t;
typedef struct commitments_t {
uint8_t** hashes;
size_t nCommitments;
} commitments_t;
typedef uint8_t** inputs_t;
typedef struct msgs_t {
uint8_t** msgs; // One for each player
size_t pos;
int unopened; // Index of the unopened party, or -1 if all parties opened (when signing)
} msgs_t;
typedef struct shares_t {
uint64_t* shares;
size_t numWords;
} shares_t;
#define UNUSED_PARAMETER(x) (void)(x)
void allocateRandomTape(randomTape_t* tape, const picnic_instance_t* params);
void freeRandomTape(randomTape_t* tape);
void allocateProof2(proof2_t* proof, const picnic_instance_t* params);
void freeProof2(proof2_t* proof);
commitments_t* allocateCommitments(const picnic_instance_t* params, size_t nCommitments);
void freeCommitments(commitments_t* commitments);
void allocateCommitments2(commitments_t* commitments, const picnic_instance_t* params,
size_t nCommitments);
void freeCommitments2(commitments_t* commitments);
inputs_t allocateInputs(const picnic_instance_t* params);
void freeInputs(inputs_t inputs);
msgs_t* allocateMsgs(const picnic_instance_t* params);
void freeMsgs(msgs_t* msgs);
shares_t* allocateShares(size_t count);
void freeShares(shares_t* shares);
#endif /* PICNIC_TYPES_H */

View File

@ -1,9 +1,9 @@
#ifndef L1_FS_API_H
#define L1_FS_API_H
#ifndef PICNIC_L1_FS_API_H
#define PICNIC_L1_FS_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 16 + 16)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 16)
#define CRYPTO_BYTES (4 + 34016)
#define CRYPTO_BYTES (4 + 34032)
#define CRYPTO_ALGNAME "picnicl1fs"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);

View File

@ -1,9 +1,9 @@
#ifndef L1_UR_API_H
#define L1_UR_API_H
#ifndef PICNIC_L1_UR_API_H
#define PICNIC_L1_UR_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 16 + 16)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 16)
#define CRYPTO_BYTES (4 + 53945)
#define CRYPTO_BYTES (4 + 53961)
#define CRYPTO_ALGNAME "picnicl1ur"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);

View File

@ -1,9 +1,9 @@
#ifndef L3_FS_API_H
#define L3_FS_API_H
#ifndef PICNIC_L3_FS_API_H
#define PICNIC_L3_FS_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 24 + 24)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 24)
#define CRYPTO_BYTES (4 + 76764)
#define CRYPTO_BYTES (4 + 76772)
#define CRYPTO_ALGNAME "picnicl3fs"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);

View File

@ -1,9 +1,9 @@
#ifndef L3_UR_API_H
#define L3_UR_API_H
#ifndef PICNIC_L3_UR_API_H
#define PICNIC_L3_UR_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 24 + 24)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 24)
#define CRYPTO_BYTES (4 + 121837)
#define CRYPTO_BYTES (4 + 121845)
#define CRYPTO_ALGNAME "picnicl3ur"
int crypto_sign_keypair(unsigned char* pk, unsigned char* sk);

View File

@ -1,5 +1,5 @@
#ifndef L5_FS_API_H
#define L5_FS_API_H
#ifndef PICNIC_L5_FS_API_H
#define PICNIC_L5_FS_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 32 + 32)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 32)

View File

@ -1,5 +1,5 @@
#ifndef L5_UR_API_H
#define L5_UR_API_H
#ifndef PICNIC_L5_UR_API_H
#define PICNIC_L5_UR_API_H
#define CRYPTO_SECRETKEYBYTES (1 + 2 * 32 + 32)
#define CRYPTO_PUBLICKEYBYTES (1 + 2 * 32)

File diff suppressed because it is too large Load Diff

View File

@ -12,6 +12,7 @@
#include "lowmc.h"
#include "mpc_lowmc.h"
#include "picnic2_simulate.h"
#include "picnic.h"
#define MAX_DIGEST_SIZE 64
@ -19,12 +20,14 @@
typedef enum { TRANSFORM_FS, TRANSFORM_UR } transform_t;
typedef struct {
typedef struct picnic_instance_t {
const lowmc_t* lowmc;
uint32_t digest_size; /* bytes */
uint32_t seed_size; /* bytes */
uint32_t num_rounds;
uint32_t digest_size; /* bytes */
uint32_t seed_size; /* bytes */
uint32_t num_rounds; // T
uint32_t num_opened_rounds; // u
uint32_t num_MPC_parties; // N
uint32_t input_size; /* bytes */
uint32_t output_size; /* bytes */
@ -45,16 +48,18 @@ typedef struct {
zkbpp_lowmc_implementation_f zkbpp_lowmc;
zkbpp_lowmc_verify_implementation_f zkbpp_lowmc_verify;
zkbpp_share_implementation_f mzd_share;
lowmc_compute_aux_implementation_f lowmc_aux;
lowmc_simulate_online_f lowmc_simulate_online;
} impls;
} picnic_instance_t;
const picnic_instance_t* oqs_sig_picnic_instance_get(picnic_params_t param);
const picnic_instance_t* picnic_instance_get(picnic_params_t param);
int oqs_sig_picnic_impl_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key,
int impl_sign(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* private_key,
const uint8_t* public_key, const uint8_t* msg, size_t msglen, uint8_t* sig,
size_t* siglen);
int oqs_sig_picnic_impl_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key,
int impl_verify(const picnic_instance_t* pp, const uint8_t* plaintext, const uint8_t* public_key,
const uint8_t* msg, size_t msglen, const uint8_t* sig, size_t siglen);
PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_lowmc_block_size(picnic_params_t param);
@ -70,6 +75,14 @@ PICNIC_EXPORT size_t PICNIC_CALLING_CONVENTION picnic_get_public_key_size(picnic
PICNIC_EXPORT int PICNIC_CALLING_CONVENTION picnic_sk_to_pk(const picnic_privatekey_t* sk,
picnic_publickey_t* pk);
// Prefix values for domain separation
extern const uint8_t HASH_PREFIX_0; // = 0
extern const uint8_t HASH_PREFIX_1; // = 1
extern const uint8_t HASH_PREFIX_2; // = 2
extern const uint8_t HASH_PREFIX_3; // = 3
extern const uint8_t HASH_PREFIX_4; // = 4
extern const uint8_t HASH_PREFIX_5; // = 5
#if defined(PICNIC_STATIC)
void visualize_signature(FILE* out, const picnic_instance_t* pp, const uint8_t* msg, size_t msglen,
const uint8_t* sig, size_t siglen);

View File

@ -0,0 +1,60 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakHashtimes4.h"
/* ---------------------------------------------------------------- */
HashReturn Keccak_HashInitializetimes4(Keccak_HashInstancetimes4 *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix)
{
HashReturn result;
if (delimitedSuffix == 0)
return FAIL;
result = (HashReturn)KeccakWidth1600times4_SpongeInitialize(&instance->sponge, rate, capacity);
if (result != SUCCESS)
return result;
instance->fixedOutputLength = hashbitlen;
instance->delimitedSuffix = delimitedSuffix;
return SUCCESS;
}
/* ---------------------------------------------------------------- */
HashReturn Keccak_HashUpdatetimes4(Keccak_HashInstancetimes4 *instance, const BitSequence **data, BitLength databitlen)
{
if ((databitlen % 8) != 0)
return FAIL;
return (HashReturn)KeccakWidth1600times4_SpongeAbsorb(&instance->sponge, data, databitlen/8);
}
/* ---------------------------------------------------------------- */
HashReturn Keccak_HashFinaltimes4(Keccak_HashInstancetimes4 *instance, BitSequence **hashval)
{
HashReturn ret = (HashReturn)KeccakWidth1600times4_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix);
if (ret == SUCCESS)
return (HashReturn)KeccakWidth1600times4_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8);
else
return ret;
}
/* ---------------------------------------------------------------- */
HashReturn Keccak_HashSqueezetimes4(Keccak_HashInstancetimes4 *instance, BitSequence **data, BitLength databitlen)
{
if ((databitlen % 8) != 0)
return FAIL;
return (HashReturn)KeccakWidth1600times4_SpongeSqueeze(&instance->sponge, data, databitlen/8);
}

View File

@ -0,0 +1,105 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakHashInterfacetimes4_h_
#define _KeccakHashInterfacetimes4_h_
#ifndef KeccakP1600times4_excluded
#include "KeccakHash.h"
#include "KeccakSpongeWidth1600times4.h"
typedef struct {
KeccakWidth1600times4_SpongeInstance sponge;
unsigned int fixedOutputLength;
unsigned char delimitedSuffix;
} Keccak_HashInstancetimes4;
/**
* Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode.
* @param hashInstance Pointer to the hash instance to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @param hashbitlen The desired number of output bits,
* or 0 for an arbitrarily-long output.
* @param delimitedSuffix Bits that will be automatically appended to the end
* of the input message, as in domain separation.
* This is a byte containing from 0 to 7 bits
* formatted like the @a delimitedData parameter of
* the Keccak_SpongeAbsorbLastFewBits() function.
* @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashInitializetimes4(Keccak_HashInstancetimes4 *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix);
/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHAKE128(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1344, 256, 0, 0x1F)
/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHAKE256(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1088, 512, 0, 0x1F)
/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_224(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1152, 448, 224, 0x06)
/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_256(hashInstance) Keccak_HashInitializetimes4(hashInstance, 1088, 512, 256, 0x06)
/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_384(hashInstance) Keccak_HashInitializetimes4(hashInstance, 832, 768, 384, 0x06)
/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard.
*/
#define Keccak_HashInitializetimes4_SHA3_512(hashInstance) Keccak_HashInitializetimes4(hashInstance, 576, 1024, 512, 0x06)
/**
* Function to give input data to be absorbed.
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* @param data Array of 4 pointers to the input data.
* @param databitLen The number of input bits provided in the input data, must be a multiple of 8.
* @pre @a databitlen is a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashUpdatetimes4(Keccak_HashInstancetimes4 *hashInstance, const BitSequence **data, BitLength databitlen);
/**
* Function to call after all input blocks have been input and to get
* output bits if the length was specified when calling Keccak_HashInitialize().
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of
* output bits is equal to @a hashbitlen.
* If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits
* must be extracted using the Keccak_HashSqueeze() function.
* @param hashval Pointer to the buffer where to store the output data.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashFinaltimes4(Keccak_HashInstancetimes4 *hashInstance, BitSequence **hashval);
/**
* Function to squeeze output data.
* @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize().
* @param data Array of 4 pointers to the buffers where to store the output data.
* @param databitlen The number of output bits desired (must be a multiple of 8).
* @pre Keccak_HashFinal() must have been already called.
* @pre @a databitlen is a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Keccak_HashSqueezetimes4(Keccak_HashInstancetimes4 *hashInstance, BitSequence **data, BitLength databitlen);
#endif
#endif

View File

@ -1,160 +0,0 @@
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakSponge_h_
#define _KeccakSponge_h_
/** General information
*
* The following type and functions are not actually implemented. Their
* documentation is generic, with the prefix Prefix replaced by
* - KeccakWidth200 for a sponge function based on Keccak-f[200]
* - KeccakWidth400 for a sponge function based on Keccak-f[400]
* - KeccakWidth800 for a sponge function based on Keccak-f[800]
* - KeccakWidth1600 for a sponge function based on Keccak-f[1600]
*
* In all these functions, the rate and capacity must sum to the width of the
* chosen permutation. For instance, to use the sponge function
* Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination
* of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(),
* KeccakWidth1600_SpongeAbsorbLastFewBits() and
* KeccakWidth1600_SpongeSqueeze().
*
* The Prefix_SpongeInstance contains the sponge instance attributes for use
* with the Prefix_Sponge* functions.
* It gathers the state processed by the permutation as well as the rate,
* the position of input/output bytes in the state and the phase
* (absorbing or squeezing).
*/
#ifdef DontReallyInclude_DocumentationOnly
/** Function to evaluate the sponge function Keccak[r, c] in a single call.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @param input Pointer to the input message (before the suffix).
* @param inputByteLen The length of the input message in bytes.
* @param suffix Byte containing from 0 to 7 suffix bits
* that must be absorbed after @a input.
* These <i>n</i> bits must be in the least significant bit positions.
* These bits must be delimited with a bit 1 at position <i>n</i>
* (counting from 0=LSB to 7=MSB) and followed by bits 0
* from position <i>n</i>+1 to position 7.
* Some examples:
* - If no bits are to be absorbed, then @a suffix must be 0x01.
* - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04.
* - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32.
* - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B.
* .
* @param output Pointer to the output buffer.
* @param outputByteLen The desired number of output bytes.
* @pre One must have r+c equal to the supported width of this implementation
* and the rate a multiple of 8 bits (one byte) in this implementation.
* @pre @a suffix 0x00
* @return Zero if successful, 1 otherwise.
*/
int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen);
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The phase of the sponge function is set to absorbing.
* @param spongeInstance Pointer to the sponge instance to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @pre One must have r+c equal to the supported width of this implementation
* and the rate a multiple of 8 bits (one byte) in this implementation.
* @return Zero if successful, 1 otherwise.
*/
int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity);
/**
* Function to give input data bytes for the sponge function to absorb.
* @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
* @param data Pointer to the input data.
* @param dataByteLen The number of input bytes provided in the input data.
* @pre The sponge function must be in the absorbing phase,
* i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
* must not have been called before.
* @return Zero if successful, 1 otherwise.
*/
int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen);
/**
* Function to give input data bits for the sponge function to absorb
* and then to switch to the squeezing phase.
* @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
* @param delimitedData Byte containing from 0 to 7 trailing bits
* that must be absorbed.
* These <i>n</i> bits must be in the least significant bit positions.
* These bits must be delimited with a bit 1 at position <i>n</i>
* (counting from 0=LSB to 7=MSB) and followed by bits 0
* from position <i>n</i>+1 to position 7.
* Some examples:
* - If no bits are to be absorbed, then @a delimitedData must be 0x01.
* - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04.
* - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32.
* - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B.
* .
* @pre The sponge function must be in the absorbing phase,
* i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits()
* must not have been called before.
* @pre @a delimitedData 0x00
* @return Zero if successful, 1 otherwise.
*/
int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData);
/**
* Function to squeeze output data from the sponge function.
* If the sponge function was in the absorbing phase, this function
* switches it to the squeezing phase
* as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called.
* @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize().
* @param data Pointer to the buffer where to store the output data.
* @param dataByteLen The number of output bytes desired.
* @return Zero if successful, 1 otherwise.
*/
int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
#endif
#include <string.h>
#include "align.h"
#define KCP_DeclareSpongeStructure(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \
unsigned char state[size]; \
unsigned int rate; \
unsigned int byteIOIndex; \
int squeezing; \
} prefix##_SpongeInstance;
#define KCP_DeclareSpongeFunctions(prefix) \
int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \
int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \
int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \
int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \
int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen);
#ifndef KeccakP1600_excluded
#include "KeccakP-1600-SnP.h"
KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
KCP_DeclareSpongeFunctions(KeccakWidth1600)
#endif
#ifndef KeccakP1600_excluded
#include "KeccakP-1600-SnP.h"
KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment)
KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds)
#endif
#endif

View File

@ -0,0 +1,34 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include "KeccakSpongeWidth1600times4.h"
#ifndef KeccakP1600times4_excluded
#include "KeccakP-1600-times4-SnP.h"
#define prefix KeccakWidth1600times4
#define PlSnP KeccakP1600times4
#define PlSnP_width 1600
#define PlSnP_Permute KeccakP1600times4_PermuteAll_24rounds
#if defined(KeccakF1600times4_FastLoop_supported)
//can we enable fastloop absorb?
//#define PlSnP_FastLoop_Absorb KeccakF1600times4_FastLoop_Absorb
#endif
#include "KeccakSpongetimes4.inc"
#undef prefix
#undef PlSnP
#undef PlSnP_width
#undef PlSnP_Permute
#undef PlSnP_FastLoop_Absorb
#endif

View File

@ -0,0 +1,40 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakSpongeWidth1600times4_h_
#define _KeccakSpongeWidth1600times4_h_
#include <string.h>
#include "align.h"
#define KCP_DeclareSpongeStructuretimes4(prefix, size, alignment) \
ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \
unsigned char state[size]; \
unsigned int rate; \
unsigned int byteIOIndex; \
int squeezing; \
} prefix##_SpongeInstance;
#define KCP_DeclareSpongeFunctionstimes4(prefix) \
int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \
int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char **data, size_t dataByteLen); \
int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \
int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char **data, size_t dataByteLen);
#ifndef KeccakP1600times4_excluded
#include "KeccakP-1600-times4-SnP.h"
KCP_DeclareSpongeStructuretimes4(KeccakWidth1600times4, KeccakP1600times4_statesSizeInBytes, KeccakP1600times4_statesAlignment)
KCP_DeclareSpongeFunctionstimes4(KeccakWidth1600times4)
#endif
#endif

View File

@ -0,0 +1,211 @@
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define Sponge JOIN(prefix, _Sponge)
#define SpongeInstance JOIN(prefix, _SpongeInstance)
#define SpongeInitialize JOIN(prefix, _SpongeInitialize)
#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb)
#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits)
#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze)
#define PlSnP_statesSizeInBytes JOIN(PlSnP, _statesSizeInBytes)
#define PlSnP_statesAlignment JOIN(PlSnP, _statesAlignment)
#define PlSnP_StaticInitialize JOIN(PlSnP, _StaticInitialize)
#define PlSnP_InitializeAll JOIN(PlSnP, _InitializeAll)
#define PlSnP_AddByte JOIN(PlSnP, _AddByte)
#define PlSnP_AddBytes JOIN(PlSnP, _AddBytes)
#define PlSnP_ExtractBytes JOIN(PlSnP, _ExtractBytes)
/* ---------------------------------------------------------------- */
/* ---------------------------------------------------------------- */
/* ---------------------------------------------------------------- */
int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity)
{
if (rate+capacity != PlSnP_width)
return 1;
if ((rate <= 0) || (rate > PlSnP_width) || ((rate % 8) != 0))
return 1;
PlSnP_StaticInitialize();
PlSnP_InitializeAll(instance->state);
instance->rate = rate;
instance->byteIOIndex = 0;
instance->squeezing = 0;
return 0;
}
/* ---------------------------------------------------------------- */
int SpongeAbsorb(SpongeInstance *instance, const unsigned char **data, size_t dataByteLen)
{
size_t i, j;
unsigned int partialBlock;
const unsigned char *curData[4];
unsigned int rateInBytes = instance->rate/8;
if (instance->squeezing)
return 1; /* Too late for additional input */
i = 0;
if(dataByteLen > 0) {
for (unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
curData[instanceIndex] = data[instanceIndex];
}
}
while(i < dataByteLen) {
if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) {
#ifdef PlSnP_FastLoop_Absorb
/* processing full blocks first */
if ((rateInBytes % (PlSnP_width/200)) == 0) {
/* fast lane: whole lane rate */
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
j = PlSnP_FastLoop_Absorb(instance->state, rateInBytes/(PlSnP_width/200), curData[instanceIndex], dataByteLen - i);
curData[instanceIndex] += j;
}
i += j;
}
else {
#endif
for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
PlSnP_AddBytes(instance->state, instanceIndex, curData[instanceIndex], 0, rateInBytes);
curData[instanceIndex]+=rateInBytes;
}
PlSnP_Permute(instance->state);
}
i = dataByteLen - j;
#ifdef PlSnP_FastLoop_Absorb
}
#endif
}
else {
/* normal lane: using the message queue */
partialBlock = (unsigned int)(dataByteLen - i);
if (partialBlock+instance->byteIOIndex > rateInBytes)
partialBlock = rateInBytes-instance->byteIOIndex;
i += partialBlock;
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
PlSnP_AddBytes(instance->state, instanceIndex, curData[instanceIndex], instance->byteIOIndex, partialBlock);
curData[instanceIndex] += partialBlock;
}
instance->byteIOIndex += partialBlock;
if (instance->byteIOIndex == rateInBytes) {
PlSnP_Permute(instance->state);
instance->byteIOIndex = 0;
}
}
}
return 0;
}
/* ---------------------------------------------------------------- */
int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData)
{
unsigned int rateInBytes = instance->rate/8;
if (delimitedData == 0)
return 1;
if (instance->squeezing)
return 1; /* Too late for additional input */
/* Last few bits, whose delimiter coincides with first bit of padding */
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
PlSnP_AddByte(instance->state, instanceIndex, delimitedData, instance->byteIOIndex);
}
/* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */
if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1)))
PlSnP_Permute(instance->state);
/* Second bit of padding */
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
PlSnP_AddByte(instance->state, instanceIndex, 0x80, rateInBytes - 1);
}
PlSnP_Permute(instance->state);
instance->byteIOIndex = 0;
instance->squeezing = 1;
return 0;
}
/* ---------------------------------------------------------------- */
int SpongeSqueeze(SpongeInstance *instance, unsigned char **data, size_t dataByteLen)
{
size_t i, j;
unsigned int partialBlock;
unsigned int rateInBytes = instance->rate/8;
unsigned char *curData[4] = { NULL, NULL, NULL, NULL};
if (!instance->squeezing)
SpongeAbsorbLastFewBits(instance, 0x01);
i = 0;
if(dataByteLen > 0) {
for (unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
curData[instanceIndex] = data[instanceIndex];
}
}
while(i < dataByteLen) {
if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) {
for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) {
PlSnP_Permute(instance->state);
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
PlSnP_ExtractBytes(instance->state, instanceIndex, curData[instanceIndex], 0, rateInBytes);
curData[instanceIndex]+=rateInBytes;
}
}
i = dataByteLen - j;
}
else {
/* normal lane: using the message queue */
if (instance->byteIOIndex == rateInBytes) {
PlSnP_Permute(instance->state);
instance->byteIOIndex = 0;
}
partialBlock = (unsigned int)(dataByteLen - i);
if (partialBlock+instance->byteIOIndex > rateInBytes)
partialBlock = rateInBytes-instance->byteIOIndex;
i += partialBlock;
for(unsigned int instanceIndex = 0; instanceIndex < 4; instanceIndex++) {
PlSnP_ExtractBytes(instance->state, instanceIndex, curData[instanceIndex], instance->byteIOIndex, partialBlock);
curData[instanceIndex] += partialBlock;
}
instance->byteIOIndex += partialBlock;
}
}
return 0;
}
/* ---------------------------------------------------------------- */
#undef Sponge
#undef SpongeInstance
#undef SpongeInitialize
#undef SpongeAbsorb
#undef SpongeAbsorbLastFewBits
#undef SpongeSqueeze
#undef PlSnP_statesSizeInBytes
#undef PlSnP_statesAlignment
#undef PlSnP_StaticInitialize
#undef PlSnP_InitializeAll
#undef PlSnP_AddByte
#undef PlSnP_AddBytes
#undef PlSnP_ExtractBytes

View File

@ -1,26 +0,0 @@
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _Phases_h_
#define _Phases_h_
typedef enum {
NOT_INITIALIZED,
ABSORBING,
FINAL,
SQUEEZING
} KCP_Phases;
#endif

View File

@ -0,0 +1,45 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to PlSnP-documentation.h for more details.
*/
#ifndef _KeccakP_1600_times4_SnP_h_
#define _KeccakP_1600_times4_SnP_h_
#include "KeccakP-1600-SnP.h"
#define KeccakP1600times4_implementation "fallback on serial implementation (" KeccakP1600_implementation ")"
#define KeccakP1600times4_statesSizeInBytes (((KeccakP1600_stateSizeInBytes+(KeccakP1600_stateAlignment-1))/KeccakP1600_stateAlignment)*KeccakP1600_stateAlignment*4)
#define KeccakP1600times4_statesAlignment KeccakP1600_stateAlignment
#define KeccakP1600times4_isFallback
void KeccakP1600times4_StaticInitialize( void );
void KeccakP1600times4_InitializeAll(void *states);
void KeccakP1600times4_AddByte(void *states, unsigned int instanceIndex, unsigned char data, unsigned int offset);
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
void KeccakP1600times4_PermuteAll_4rounds(void *states);
void KeccakP1600times4_PermuteAll_6rounds(void *states);
void KeccakP1600times4_PermuteAll_12rounds(void *states);
void KeccakP1600times4_PermuteAll_24rounds(void *states);
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
#endif

View File

@ -0,0 +1,37 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
This file implements Keccak-p[1600]×4 in a PlSnP-compatible way.
Please refer to PlSnP-documentation.h for more details.
This implementation comes with KeccakP-1600-times4-SnP.h in the same folder.
Please refer to LowLevel.build for the exact list of other files it must be combined with.
*/
#include "KeccakP-1600-SnP.h"
#define prefix KeccakP1600times4
#define PlSnP_baseParallelism 1
#define PlSnP_targetParallelism 4
#define SnP_laneLengthInBytes 8
#define SnP KeccakP1600
#define SnP_Permute KeccakP1600_Permute_24rounds
#define SnP_Permute_12rounds KeccakP1600_Permute_12rounds
#define SnP_Permute_Nrounds KeccakP1600_Permute_Nrounds
#define PlSnP_PermuteAll KeccakP1600times4_PermuteAll_24rounds
#define PlSnP_PermuteAll_12rounds KeccakP1600times4_PermuteAll_12rounds
#define PlSnP_PermuteAll_6rounds KeccakP1600times4_PermuteAll_6rounds
#define PlSnP_PermuteAll_4rounds KeccakP1600times4_PermuteAll_4rounds
#include "PlSnP-Fallback.inc"

View File

@ -0,0 +1,287 @@
/*
Implementation by Gilles Van Assche, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
This file contains macros that help make a PlSnP-compatible implementation by
serially falling back on a SnP-compatible implementation or on a PlSnP-compatible
implementation of lower parallism degree.
Please refer to PlSnP-documentation.h for more details.
*/
/* expect PlSnP_baseParallelism, PlSnP_targetParallelism */
/* expect SnP_stateSizeInBytes, SnP_stateAlignment */
/* expect prefix */
/* expect SnP_* */
#define JOIN0(a, b) a ## b
#define JOIN(a, b) JOIN0(a, b)
#define PlSnP_StaticInitialize JOIN(prefix, _StaticInitialize)
#define PlSnP_InitializeAll JOIN(prefix, _InitializeAll)
#define PlSnP_AddByte JOIN(prefix, _AddByte)
#define PlSnP_AddBytes JOIN(prefix, _AddBytes)
#define PlSnP_AddLanesAll JOIN(prefix, _AddLanesAll)
#define PlSnP_OverwriteBytes JOIN(prefix, _OverwriteBytes)
#define PlSnP_OverwriteLanesAll JOIN(prefix, _OverwriteLanesAll)
#define PlSnP_OverwriteWithZeroes JOIN(prefix, _OverwriteWithZeroes)
#define PlSnP_ExtractBytes JOIN(prefix, _ExtractBytes)
#define PlSnP_ExtractLanesAll JOIN(prefix, _ExtractLanesAll)
#define PlSnP_ExtractAndAddBytes JOIN(prefix, _ExtractAndAddBytes)
#define PlSnP_ExtractAndAddLanesAll JOIN(prefix, _ExtractAndAddLanesAll)
#if (PlSnP_baseParallelism == 1)
#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes)
#define SnP_stateAlignment JOIN(SnP, _stateAlignment)
#else
#define SnP_stateSizeInBytes JOIN(SnP, _statesSizeInBytes)
#define SnP_stateAlignment JOIN(SnP, _statesAlignment)
#endif
#define PlSnP_factor ((PlSnP_targetParallelism)/(PlSnP_baseParallelism))
#define SnP_stateOffset (((SnP_stateSizeInBytes+(SnP_stateAlignment-1))/SnP_stateAlignment)*SnP_stateAlignment)
#define stateWithIndex(i) ((unsigned char *)states+((i)*SnP_stateOffset))
#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize)
#define SnP_Initialize JOIN(SnP, _Initialize)
#define SnP_InitializeAll JOIN(SnP, _InitializeAll)
#define SnP_AddByte JOIN(SnP, _AddByte)
#define SnP_AddBytes JOIN(SnP, _AddBytes)
#define SnP_AddLanesAll JOIN(SnP, _AddLanesAll)
#define SnP_OverwriteBytes JOIN(SnP, _OverwriteBytes)
#define SnP_OverwriteLanesAll JOIN(SnP, _OverwriteLanesAll)
#define SnP_OverwriteWithZeroes JOIN(SnP, _OverwriteWithZeroes)
#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes)
#define SnP_ExtractLanesAll JOIN(SnP, _ExtractLanesAll)
#define SnP_ExtractAndAddBytes JOIN(SnP, _ExtractAndAddBytes)
#define SnP_ExtractAndAddLanesAll JOIN(SnP, _ExtractAndAddLanesAll)
void PlSnP_StaticInitialize( void )
{
SnP_StaticInitialize();
}
void PlSnP_InitializeAll(void *states)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++)
#if (PlSnP_baseParallelism == 1)
SnP_Initialize(stateWithIndex(i));
#else
SnP_InitializeAll(stateWithIndex(i));
#endif
}
void PlSnP_AddByte(void *states, unsigned int instanceIndex, unsigned char byte, unsigned int offset)
{
#if (PlSnP_baseParallelism == 1)
SnP_AddByte(stateWithIndex(instanceIndex), byte, offset);
#else
SnP_AddByte(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byte, offset);
#endif
}
void PlSnP_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
{
#if (PlSnP_baseParallelism == 1)
SnP_AddBytes(stateWithIndex(instanceIndex), data, offset, length);
#else
SnP_AddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
#endif
}
void PlSnP_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_AddBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
#else
SnP_AddLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
#endif
data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
}
}
void PlSnP_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length)
{
#if (PlSnP_baseParallelism == 1)
SnP_OverwriteBytes(stateWithIndex(instanceIndex), data, offset, length);
#else
SnP_OverwriteBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
#endif
}
void PlSnP_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_OverwriteBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
#else
SnP_OverwriteLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
#endif
data += PlSnP_baseParallelism*laneOffset*SnP_laneLengthInBytes;
}
}
void PlSnP_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount)
{
#if (PlSnP_baseParallelism == 1)
SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex), byteCount);
#else
SnP_OverwriteWithZeroes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, byteCount);
#endif
}
void PlSnP_PermuteAll(void *states)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_Permute(stateWithIndex(i));
#else
SnP_PermuteAll(stateWithIndex(i));
#endif
}
}
#if (defined(SnP_Permute_12rounds) || defined(SnP_PermuteAll_12rounds))
void PlSnP_PermuteAll_12rounds(void *states)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_Permute_12rounds(stateWithIndex(i));
#else
SnP_PermuteAll_12rounds(stateWithIndex(i));
#endif
}
}
#endif
#if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_6rounds))
void PlSnP_PermuteAll_6rounds(void *states)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_Permute_Nrounds(stateWithIndex(i), 6);
#else
SnP_PermuteAll_6rounds(stateWithIndex(i));
#endif
}
}
#endif
#if (defined(SnP_Permute_Nrounds) || defined(SnP_PermuteAll_4rounds))
void PlSnP_PermuteAll_4rounds(void *states)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_Permute_Nrounds(stateWithIndex(i), 4);
#else
SnP_PermuteAll_4rounds(stateWithIndex(i));
#endif
}
}
#endif
void PlSnP_ExtractBytes(void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length)
{
#if (PlSnP_baseParallelism == 1)
SnP_ExtractBytes(stateWithIndex(instanceIndex), data, offset, length);
#else
SnP_ExtractBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, data, offset, length);
#endif
}
void PlSnP_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_ExtractBytes(stateWithIndex(i), data, 0, laneCount*SnP_laneLengthInBytes);
#else
SnP_ExtractLanesAll(stateWithIndex(i), data, laneCount, laneOffset);
#endif
data += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
}
}
void PlSnP_ExtractAndAddBytes(void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length)
{
#if (PlSnP_baseParallelism == 1)
SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex), input, output, offset, length);
#else
SnP_ExtractAndAddBytes(stateWithIndex(instanceIndex/PlSnP_baseParallelism), instanceIndex%PlSnP_baseParallelism, input, output, offset, length);
#endif
}
void PlSnP_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset)
{
unsigned int i;
for(i=0; i<PlSnP_factor; i++) {
#if (PlSnP_baseParallelism == 1)
SnP_ExtractAndAddBytes(stateWithIndex(i), input, output, 0, laneCount*SnP_laneLengthInBytes);
#else
SnP_ExtractAndAddLanesAll(stateWithIndex(i), input, output, laneCount, laneOffset);
#endif
input += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
output += laneOffset*SnP_laneLengthInBytes*PlSnP_baseParallelism;
}
}
#undef PlSnP_factor
#undef SnP_stateOffset
#undef stateWithIndex
#undef JOIN0
#undef JOIN
#undef PlSnP_StaticInitialize
#undef PlSnP_InitializeAll
#undef PlSnP_AddByte
#undef PlSnP_AddBytes
#undef PlSnP_AddLanesAll
#undef PlSnP_OverwriteBytes
#undef PlSnP_OverwriteLanesAll
#undef PlSnP_OverwriteWithZeroes
#undef PlSnP_PermuteAll
#undef PlSnP_ExtractBytes
#undef PlSnP_ExtractLanesAll
#undef PlSnP_ExtractAndAddBytes
#undef PlSnP_ExtractAndAddLanesAll
#undef SnP_stateAlignment
#undef SnP_stateSizeInBytes
#undef PlSnP_factor
#undef SnP_stateOffset
#undef stateWithIndex
#undef SnP_StaticInitialize
#undef SnP_Initialize
#undef SnP_InitializeAll
#undef SnP_AddByte
#undef SnP_AddBytes
#undef SnP_AddLanesAll
#undef SnP_OverwriteBytes
#undef SnP_OverwriteWithZeroes
#undef SnP_OverwriteLanesAll
#undef SnP_ExtractBytes
#undef SnP_ExtractLanesAll
#undef SnP_ExtractAndAddBytes
#undef SnP_ExtractAndAddLanesAll

View File

@ -1,753 +0,0 @@
/*
Implementation by Vladimir Sedach, hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
////////////////////////////////////////////////////////////////////////////////
// Important: "state" parameter must be SnP_align byte aligned and SnP_stateSizeInBytes long.
// Compile with either -mavx2 or /arch:AVX and -O2 or /O2 options.
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#ifdef __GNUC__
#include <x86intrin.h>
#else
#include <immintrin.h>
#endif
#include "KeccakP-1600-AVX2.h"
#ifdef __GNUC__
#pragma GCC optimize("2")
#endif
#ifdef _MSC_VER
#pragma warning(disable: 4003) //not enough actual parameters for macro
#endif
#ifdef __GNUC__
#define __ALIGN(x) __attribute__((aligned(x)))
#else
#define __ALIGN(x) __declspec(align(x))
#endif
typedef unsigned long long UINT64;
typedef long long INT64;
//*******************
typedef struct
//*******************
{
__m256i a0, a1, a2, a3, a4; //a[row, 0..3] rows
__m256i c4; //a[0..3, 4] column
__m256i a44; //a[4, 4]
} keccak_state_t;
#define SET(i0, i1, i2, i3) _mm256_setr_epi64x(i0, i1, i2, i3)
#define XOR(a, b) _mm256_xor_si256(a, b)
#define PERMUTE(a, i0, i1, i2, i3) _mm256_permute4x64_epi64(a, _MM_SHUFFLE(i3, i2, i1, i0))
#define BLEND(a, b, i0, i1, i2, i3) _mm256_blend_epi32(a, b, _MM_SHUFFLE(3*(i3), 3*(i2), 3*(i1), 3*(i0)))
#define MASKLOAD(p, i0, i1, i2, i3) _mm256_maskload_epi64((const INT64 *)(p), \
SET((UINT64)(i0) << 63, (UINT64)(i1) << 63, (UINT64)(i2) << 63, (UINT64)(i3) << 63))
#define MASKSTORE(p, i0, i1, i2, i3, a) _mm256_maskstore_epi64((INT64 *)(p), \
SET((UINT64)(i0) << 63, (UINT64)(i1) << 63, (UINT64)(i2) << 63, (UINT64)(i3) << 63), a)
#define LOAD(p) _mm256_loadu_si256((const __m256i *)(p))
#define STORE(p, a) _mm256_storeu_si256((__m256i *)(p), a)
#define STORE0(p, a) _mm_storel_epi64((__m128i *)(p), _mm256_castsi256_si128(a))
#define STORE1(p, a) _mm_storeh_pd((double *)(p), _mm_castsi128_pd(_mm256_castsi256_si128(a)))
#define STORE2(p, a) _mm_storel_epi64((__m128i *)(p), _mm256_extracti128_si256(a, 1))
#define STORE3(p, a) _mm_storeh_pd((double *)(p), _mm_castsi128_pd(_mm256_extracti128_si256(a, 1)))
#define LOAD0(p) _mm256_castsi128_si256(_mm_move_epi64(*(__m128i *)(p)))
#define ROLV_TYPE __m256i
#define _ROLV_TYPE __m256i
#define ROLV_CONST(name, i0, i1, i2, i3) \
ROLV_TYPE SLLV##name = SET(i0, i1, i2, i3); \
ROLV_TYPE SRLV##name = SET(64 - i0, 64 - i1, 64 - i2, 64 - i3);
#define _ROLV_CONST(name, i0, i1, i2, i3) \
_ROLV_TYPE SLLV##name = SET(i0, i1, i2, i3); \
_ROLV_TYPE SRLV##name = SET(64 - i0, 64 - i1, 64 - i2, 64 - i3);
#define ROLV(a, name) \
XOR(_mm256_sllv_epi64(a, SLLV##name), \
_mm256_srlv_epi64(a, SRLV##name))
#define ROL(a, i) \
XOR(_mm256_slli_epi64(a, i), \
_mm256_srli_epi64(a, 64 - i))
/*
#define ROLV(a, i0, i1, i2, i3) \
XOR(_mm256_sllv_epi64(a, SET(i0, i1, i2, i3)), \
_mm256_srlv_epi64(a, SET(64 - i0, 64 - i1, 64 - i2, 64 - i3)))
*/
/**************************/\
#define KECCAK_PERMUTE_VARS \
/**************************/\
__m256i a0, a1, a2, a3, a4, c4; \
__m256i a04, a14, a24, a34, a44; \
__m256i b0, b1, b2, b3, b4; \
__m256i b04, b14, b24, b34, b44; \
__m256i r0, r1, r2, r3; \
/* Rotation constants w/o "volatile" attribute. */ \
ROLV_CONST(A0, 0, 1, 62, 28) \
ROLV_CONST(A1, 36, 44, 6, 55) \
ROLV_CONST(A2, 3, 10, 43, 25) \
ROLV_CONST(A3, 41, 45, 15, 21) \
ROLV_CONST(A4, 18, 2, 61, 56) \
ROLV_CONST(C4, 27, 20, 39, 8) \
/* Rotation constants with "volatile" attribute (GC only). */ \
_ROLV_CONST(_A0, 0, 1, 62, 28) \
_ROLV_CONST(_A1, 36, 44, 6, 55) \
_ROLV_CONST(_A2, 3, 10, 43, 25) \
_ROLV_CONST(_A3, 41, 45, 15, 21) \
_ROLV_CONST(_A4, 18, 2, 61, 56) \
_ROLV_CONST(_C4, 27, 20, 39, 8) \
\
keccak_state_t *s = (keccak_state_t *)state; \
ptrdiff_t round_i;
/******************/\
#define KECCAK_LOAD \
/******************/\
a0 = LOAD(&s->a0); \
a1 = LOAD(&s->a1); \
a2 = LOAD(&s->a2); \
a3 = LOAD(&s->a3); \
a4 = LOAD(&s->a4); \
c4 = LOAD(&s->c4); \
a44 = LOAD(&s->a44);
/*******************/\
#define KECCAK_STORE \
/*******************/\
STORE(&s->a0, a0); \
STORE(&s->a1, a1); \
STORE(&s->a2, a2); \
STORE(&s->a3, a3); \
STORE(&s->a4, a4); \
STORE(&s->c4, c4); \
STORE(&s->a44, a44);
#define KECCAK_NO_ASM // !!!
#if defined(KECCAK_NO_ASM) || !(defined(__x86_64__) || defined(__X86_64__) || defined(__LP64__) || \
defined(_M_X64) || defined(_M_AMD64) || defined(_WIN64)) || \
!defined(__GNUC__)
// const_pref: "_" or "" to choose the ROLV_CONST rotation constants with or w/o "volatile".
/*********************************/\
#define KECCAK_PERMUTE_LOOP(const_pref, nrRounds) \
/*********************************/\
for (round_i = (nrRounds-1); round_i >= 0; round_i--) \
{ \
/* a0..a4 are rows a[row, 0..3], c4 is column a[0..3, 4], and a44 is element a[4, 4]. */ \
/* C[x] = A[0, x] ^ A[1, x] ^ A[2, x] ^ A[3, x] ^ A[4, x] */ \
r0 = XOR(a0, a1); \
r0 = XOR(r0, a2); \
r0 = XOR(r0, a3); \
r0 = XOR(r0, a4); /*C[0, 1, 2, 3]*/ \
\
r1 = XOR(c4, _mm256_permute2x128_si256(c4, c4, 0x11)); \
r1 = XOR(r1, _mm256_unpackhi_epi64(r1, r1)); \
r1 = XOR(r1, a44); /*C[4]*/ \
\
/* D[x] = C[x - 1] ^ rot(C[x + 1], 1) */ \
\
/* (b0, b04) = C[4, 0, 1, 2, 3]. */ \
b0 = PERMUTE(r0, 3, 0, 1, 2); /*C[3, 0, 1, 2]*/ \
b04 = b0; /*C[3]*/ \
b0 = BLEND(b0, r1, 1, 0, 0, 0); /*C[4, 0, 1, 2]*/ \
\
r0 = ROL(r0, 1); /*rot(C[0, 1, 2, 3])*/ \
r1 = ROL(r1, 1); /*rot(C[4])*/ \
\
/* (r1, r0) = rot(C[1, 2, 3, 4, 0]). */ \
r1 = BLEND(r0, r1, 1, 0, 0, 0); /*rot(C[4, 1, 2, 3])*/ \
r1 = PERMUTE(r1, 1, 2, 3, 0); /*rot(C[1, 2, 3, 4])*/ \
\
/* (b0, b04) = D[0, 1, 2, 3, 4]. */ \
b0 = XOR(b0, r1); \
b04 = XOR(b04, r0); \
\
/* A[y, x] = A[y, x] ^ D[x] */ \
a0 = XOR(a0, b0); \
a1 = XOR(a1, b0); \
a2 = XOR(a2, b0); \
a3 = XOR(a3, b0); \
a4 = XOR(a4, b0); \
\
a44 = XOR(a44, b04); \
c4 = XOR(c4, _mm256_broadcastq_epi64(_mm256_castsi256_si128(b04))); \
\
/* B[2*x + 3*y, y] = rot(A[y, x], R[y, x]) */ \
/* After this, y-rows of A become y-columns of B. */ \
\
/* b0..b4 are rows a[row, 0..3], c4 is column a[0..3, 4], and a44 is element a[4, 4]. */ \
b0 = ROLV(a0, const_pref##A0); \
b1 = ROLV(a1, const_pref##A1); \
b2 = ROLV(a2, const_pref##A2); \
b3 = ROLV(a3, const_pref##A3); \
b4 = ROLV(a4, const_pref##A4); \
c4 = ROLV(c4, const_pref##C4); \
\
/* c4 = PERMUTE(c4, 2, 1, 3, 0); //to avoid r1 calc below; makes slower other parts */ \
a44 = ROL(a44, 14); \
\
/* Now b0..b4 are columns a[0..3, col], b04..b44 are last elements a[4, 0..4] of those columns. */ \
r0 = PERMUTE(b0, 0, 3, 1, 0); \
r1 = _mm256_broadcastq_epi64(_mm256_castsi256_si128(c4)); \
b04 = _mm256_permute2x128_si256(b0, b0, 0x11); \
b0 = BLEND(r0, r1, 0, 0, 0, 1); \
\
r0 = PERMUTE(b1, 1, 3, 2, 0); \
r1 = _mm256_unpackhi_epi64(c4, c4); \
b14 = PERMUTE(b1, 3, 3, 3, 3); \
/* b14 = _mm256_unpackhi_epi64(r0, r0); */ \
b1 = BLEND(r0, r1, 0, 1, 0, 0); \
\
b2 = PERMUTE(b2, 2, 0, 3, 1); \
b24 = _mm256_permute2x128_si256(c4, c4, 0x11); \
\
r0 = PERMUTE(b3, 3, 1, 0, 2); \
r1 = PERMUTE(c4, 3, 3, 3, 3); \
b34 = b3; \
b3 = BLEND(r0, r1, 0, 0, 1, 0); \
\
r0 = PERMUTE(b4, 1, 2, 0, 3); \
r1 = _mm256_broadcastq_epi64(_mm256_castsi256_si128(a44)); \
b44 = _mm256_unpackhi_epi64(b4, b4); \
/* b44 = r0; */ \
b4 = BLEND(r0, r1, 1, 0, 0, 0); \
\
/* A[y, x] = B[y, x] ^ (~B[y, x + 1] & B[y, x + 2]) */ \
/* A[0, 0] = A[0, 0] ^ RC */ \
\
/* a0..a3, c4 are columnss a[0..3, col]. */ \
a0 = XOR(b0, _mm256_andnot_si256(b1, b2)); \
a0 = XOR(a0, *(__m256i *)(keccak_rc + round_i)); \
\
a1 = XOR(b1, _mm256_andnot_si256(b2, b3)); \
a2 = XOR(b2, _mm256_andnot_si256(b3, b4)); \
a3 = XOR(b3, _mm256_andnot_si256(b4, b0)); \
c4 = XOR(b4, _mm256_andnot_si256(b0, b1)); \
\
/* Transpose A[] so that a0..a4 are rows again. */ \
r0 = _mm256_unpacklo_epi64(a0, a1); \
r1 = _mm256_unpackhi_epi64(a0, a1); \
r2 = _mm256_unpacklo_epi64(a2, a3); \
r3 = _mm256_unpackhi_epi64(a2, a3); \
a0 = _mm256_permute2x128_si256(r0, r2, 0x20); \
a1 = _mm256_permute2x128_si256(r1, r3, 0x20); \
a2 = _mm256_permute2x128_si256(r2, r0, 0x13); \
a3 = _mm256_permute2x128_si256(r3, r1, 0x13); \
\
a04 = XOR(b04, _mm256_andnot_si256(b14, b24)); \
a14 = XOR(b14, _mm256_andnot_si256(b24, b34)); \
a24 = XOR(b24, _mm256_andnot_si256(b34, b44)); \
a34 = XOR(b34, _mm256_andnot_si256(b44, b04)); \
a44 = XOR(b44, _mm256_andnot_si256(b04, b14)); \
\
r0 = _mm256_unpacklo_epi64(a04, a14); \
r1 = _mm256_unpacklo_epi64(a24, a34); \
a4 = _mm256_permute2x128_si256(r0, r1, 0x20); \
} //for (round_i
#define KECCAK_PERMUTE(const_pref) KECCAK_PERMUTE_LOOP(const_pref, 24)
#define KECCAK_PERMUTE_12rounds(const_pref) KECCAK_PERMUTE_LOOP(const_pref, 12)
#else
/*********************************/\
#define KECCAK_PERMUTE_LOOP(const_pref, nrRounds) \
/*********************************/\
__asm volatile \
( \
"movq %7, %%rax\n" \
"1:\n" \
"vpxor %1, %0, %%ymm9\n" \
"vpxor %2, %%ymm9, %%ymm9\n" \
"vpxor %3, %%ymm9, %%ymm9\n" \
"vpxor %4, %%ymm9, %%ymm9\n" \
"vpermq $147, %%ymm9, %%ymm8\n" \
"vpsrlq $63, %%ymm9, %%ymm7\n" \
"vpsllq $1, %%ymm9, %%ymm9\n" \
"vperm2i128 $17, %5, %5, %%ymm0\n" \
"vpxor %%ymm0, %5, %%ymm0\n" \
"vpunpckhqdq %%ymm0, %%ymm0, %%ymm6\n" \
"vpxor %%ymm6, %%ymm0, %%ymm6\n" \
"vpxor %6, %%ymm6, %%ymm6\n" \
"vpxor %%ymm7, %%ymm9, %%ymm7\n" \
"vpblendd $3, %%ymm6, %%ymm8, %%ymm0\n" \
"vpsrlq $63, %%ymm6, %%ymm9\n" \
"vpsllq $1, %%ymm6, %%ymm6\n" \
"vpxor %%ymm9, %%ymm6, %%ymm6\n" \
"vpblendd $3, %%ymm6, %%ymm7, %%ymm6\n" \
"vpxor %%ymm7, %%ymm8, %%ymm7\n" \
"vpxor %%ymm7, %6, %%ymm9\n" \
"vpermq $57, %%ymm6, %%ymm6\n" \
"vpxor %%ymm6, %%ymm0, %%ymm0\n" \
"vpxor %%ymm0, %0, %0\n" \
"vpxor %%ymm0, %1, %1\n" \
"vpbroadcastq %%xmm7, %%ymm7\n" \
"vpxor %%ymm7, %5, %5\n" \
"vpxor %%ymm0, %2, %2\n" \
"vpxor %%ymm0, %3, %3\n" \
"vpxor %%ymm0, %4, %4\n" \
"vmovdqa %[SRLV_A0], %%ymm6\n" \
"vmovdqa %[SLLV_A0], %6\n" \
"vmovdqa %[SRLV_A1], %%ymm7\n" \
"vpsrlvq %%ymm6, %0, %%ymm6\n" \
"vpsllvq %6, %0, %0\n" \
"vmovdqa %[SLLV_A1], %6\n" \
"vmovdqa %[SRLV_A2], %%ymm0\n" \
"vpsrlvq %%ymm7, %1, %%ymm7\n" \
"vpsllvq %6, %1, %1\n" \
"vmovdqa %[SLLV_A2], %6\n" \
"vmovdqa %[SRLV_A3], %%ymm10\n" \
"vpsrlvq %%ymm0, %2, %%ymm0\n" \
"vpsllvq %6, %2, %2\n" \
"vpxor %%ymm7, %1, %%ymm7\n" \
"vpxor %%ymm6, %0, %%ymm6\n" \
"vpermq $28, %%ymm6, %1\n" \
"vperm2i128 $17, %%ymm6, %%ymm6, %%ymm6\n" \
"vpxor %%ymm0, %2, %2\n" \
"vpsrlvq %%ymm10, %3, %%ymm0\n" \
"vpermq $114, %2, %2\n" \
"vmovdqa %[SLLV_A3], %%ymm10\n" \
"vpsllvq %%ymm10, %3, %3\n" \
"vpxor %%ymm0, %3, %%ymm10\n" \
"vpermq $135, %%ymm10, %3\n" \
"vmovdqa %[SRLV_A4], %%ymm0\n" \
"vpsrlvq %%ymm0, %4, %6\n" \
"vmovdqa %[SLLV_A4], %%ymm0\n" \
"vpsllvq %%ymm0, %4, %4\n" \
"vpxor %6, %4, %%ymm0\n" \
"vpermq $201, %%ymm0, %0\n" \
"vpunpckhqdq %%ymm0, %%ymm0, %%ymm0\n" \
"vmovdqa %[SRLV_C4], %4\n" \
"vpsrlvq %4, %5, %6\n" \
"vmovdqa %[SLLV_C4], %4\n" \
"vpsllvq %4, %5, %5\n" \
"vpxor %6, %5, %4\n" \
"vpsrlq $50, %%ymm9, %5\n" \
"vpsllq $14, %%ymm9, %6\n" \
"vperm2i128 $17, %4, %4, %%ymm8\n" \
"vpxor %5, %6, %%ymm9\n" \
"vmovdqa %x4, %x6\n" \
"vpunpckhqdq %4, %4, %5\n" \
"vpbroadcastq %%xmm9, %%ymm9\n" \
"vpbroadcastq %x6, %6\n" \
"vpermq $255, %4, %4\n" \
"vpblendd $48, %4, %3, %4\n" \
"vpblendd $3, %%ymm9, %0, %3\n" \
"vpblendd $192, %6, %1, %1\n" \
"vpermq $45, %%ymm7, %6\n" \
"vpblendd $12, %5, %6, %5\n" \
"vpermq $255, %%ymm7, %%ymm7\n" \
"vpandn %2, %5, %%ymm9\n" \
"subq $32, %%rax\n" \
"vpxor %1, %%ymm9, %%ymm9\n" \
"vpandn %4, %2, %6\n" \
"vpandn %3, %4, %0\n" \
"vpxor (%%rdx, %%rax), %%ymm9, %%ymm9\n" \
"vpxor %0, %2, %0\n" \
"vpxor %5, %6, %6\n" \
"vpandn %1, %3, %2\n" \
"vpandn %5, %1, %5\n" \
"vpxor %4, %2, %4\n" \
"vpxor %3, %5, %5\n" \
"vpunpcklqdq %6, %%ymm9, %2\n" \
"vpunpckhqdq %6, %%ymm9, %6\n" \
"vpunpcklqdq %4, %0, %%ymm9\n" \
"vpunpckhqdq %4, %0, %4\n" \
"vperm2i128 $32, %%ymm9, %2, %0\n" \
"vperm2i128 $32, %4, %6, %1\n" \
"vperm2i128 $19, %6, %4, %3\n" \
"vperm2i128 $19, %2, %%ymm9, %2\n" \
"vpandn %%ymm10, %%ymm8, %4\n" \
"vpandn %%ymm0, %%ymm10, %6\n" \
"vpandn %%ymm8, %%ymm7, %%ymm9\n" \
"vpxor %4, %%ymm7, %4\n" \
"vpxor %6, %%ymm8, %%ymm8\n" \
"vpxor %%ymm9, %%ymm6, %%ymm9\n" \
"vpandn %%ymm6, %%ymm0, %6\n" \
"vpxor %6, %%ymm10, %%ymm10\n" \
"vpandn %%ymm7, %%ymm6, %6\n" \
"vpunpcklqdq %4, %%ymm9, %%ymm9\n" \
"vpunpcklqdq %%ymm10, %%ymm8, %%ymm8\n" \
"vperm2i128 $32, %%ymm8, %%ymm9, %4\n" \
"vpxor %6, %%ymm0, %6\n" \
"jnz 1b\n" \
\
: "+x"(a0), "+x"(a1), "+x"(a2), "+x"(a3), "+x"(a4), "+x"(c4), "+x"(a44) \
: "i"(8*4*nrRounds), "d"(keccak_rc), \
[SLLV_A0] "m"(SLLV_A0), [SRLV_A0] "m"(SRLV_A0), \
[SLLV_A1] "m"(SLLV_A1), [SRLV_A1] "m"(SRLV_A1), \
[SLLV_A2] "m"(SLLV_A2), [SRLV_A2] "m"(SRLV_A2), \
[SLLV_A3] "m"(SLLV_A3), [SRLV_A3] "m"(SRLV_A3), \
[SLLV_A4] "m"(SLLV_A4), [SRLV_A4] "m"(SRLV_A4), \
[SLLV_C4] "m"(SLLV_C4), [SRLV_C4] "m"(SRLV_C4) \
: "rax", "xmm0", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10" \
);
#define KECCAK_PERMUTE(const_pref) KECCAK_PERMUTE_LOOP(const_pref, 24)
#define KECCAK_PERMUTE_12rounds(const_pref) KECCAK_PERMUTE_LOOP(const_pref, 12)
#endif //__X64 && __GNUC__
typedef UINT64 keccak_rc_t[4];
// Reverse order.
__ALIGN(32) keccak_rc_t keccak_rc[24] =
{
{0x8000000080008008ull}, //round 23
{0x0000000080000001ull},
{0x8000000000008080ull},
{0x8000000080008081ull},
{0x800000008000000Aull},
{0x000000000000800Aull},
{0x8000000000000080ull},
{0x8000000000008002ull},
{0x8000000000008003ull},
{0x8000000000008089ull},
{0x800000000000008Bull},
{0x000000008000808Bull},
{0x000000008000000Aull},
{0x0000000080008009ull},
{0x0000000000000088ull},
{0x000000000000008Aull},
{0x8000000000008009ull},
{0x8000000080008081ull},
{0x0000000080000001ull},
{0x000000000000808Bull},
{0x8000000080008000ull},
{0x800000000000808Aull},
{0x0000000000008082ull},
{0x0000000000000001ull}, //round 0
};
//*****************************
void KeccakP1600_StaticInitialize(void)
//*****************************
{}
//******************************
void KeccakP1600_Initialize(void *state)
//******************************
{ memset(state, 0, sizeof(keccak_state_t));}
//__KeccakP1600_AddByte
//*****************************************************************************
void KeccakP1600_AddByte(void *state, UINT8 byte, size_t offset)
//*****************************************************************************
{
// TODO: optimize this
UINT8 byte1[1];
byte1[0] = byte;
KeccakP1600_AddBytes(state, byte1, offset, 1);
}
//__KeccakP1600_AddBytes
//*****************************************************************************
void KeccakP1600_AddBytes(void *state, const UINT8 *data, size_t offset, size_t length)
//*****************************************************************************
{
keccak_state_t *s = (keccak_state_t *)state;
UINT64 *d = (UINT64 *)data;
UINT8 *t1, *d1;
UINT64 t[25];
UINT64 *t0;
ptrdiff_t lane_n = length / sizeof(UINT64);
ptrdiff_t byte_n = length % sizeof(UINT64);
ptrdiff_t i;
KeccakP1600_ExtractBytes(state, (UINT8 *)t, 0, sizeof(t));
/*
// "trailingBits + 256" is passed as offset to do "state ^ trailingBits".
if (offset >= 256)
{
if (length < sizeof(t))
((UINT8 *)t)[length] ^= (UINT8)offset;
offset = 0;
}
*/
t0 = (UINT64 *)((UINT8 *)t + offset);
for (i = 0; i < lane_n; i++)
t0[i] ^= d[i];
if (byte_n)
{
t1 = (UINT8 *)(t0 + i);
d1 = (UINT8 *)(d + i);
for (i = 0; i < byte_n; i++)
t1[i] ^= d1[i];
}
s->a0 = LOAD(t + 0*5);
s->a1 = LOAD(t + 1*5);
s->a2 = LOAD(t + 2*5);
s->a3 = LOAD(t + 3*5);
s->a4 = LOAD(t + 4*5);
s->c4 = SET(t[0*5 + 4], t[1*5 + 4], t[2*5 + 4], t[3*5 + 4]);
s->a44 = _mm256_set1_epi64x(t[4*5 + 4]);
} //KeccakP1600_AddBytes
//***********************************************************************************
void KeccakP1600_OverwriteBytes(void *state, const UINT8 *data, size_t offset, size_t length)
//***********************************************************************************
{
keccak_state_t *s = (keccak_state_t *)state;
UINT64 *d = (UINT64 *)data;
UINT8 *t1, *d1;
UINT64 t[25];
UINT64 *t0;
ptrdiff_t lane_n = length / sizeof(UINT64);
ptrdiff_t byte_n = length % sizeof(UINT64);
ptrdiff_t i;
KeccakP1600_ExtractBytes(state, (UINT8 *)t, 0, sizeof(t));
t0 = (UINT64 *)((UINT8 *)t + offset);
for (i = 0; i < lane_n; i++)
t0[i] = d[i];
if (byte_n)
{
t1 = (UINT8 *)(t0 + i);
d1 = (UINT8 *)(d + i);
for (i = 0; i < byte_n; i++)
t1[i] = d1[i];
}
s->a0 = LOAD(t + 0*5);
s->a1 = LOAD(t + 1*5);
s->a2 = LOAD(t + 2*5);
s->a3 = LOAD(t + 3*5);
s->a4 = LOAD(t + 4*5);
s->c4 = SET(t[0*5 + 4], t[1*5 + 4], t[2*5 + 4], t[3*5 + 4]);
s->a44 = _mm256_set1_epi64x(t[4*5 + 4]);
} //KeccakP1600_OverwriteBytes
//*********************************************************
void KeccakP1600_OverwriteWithZeroes(void *state, size_t byteCount)
//*********************************************************
{
keccak_state_t *s = (keccak_state_t *)state;
UINT64 t[25];
KeccakP1600_ExtractBytes(state, (UINT8 *)t, 0, sizeof(t));
memset(t, 0, byteCount);
s->a0 = LOAD(t + 0*5);
s->a1 = LOAD(t + 1*5);
s->a2 = LOAD(t + 2*5);
s->a3 = LOAD(t + 3*5);
s->a4 = LOAD(t + 4*5);
s->c4 = SET(t[0*5 + 4], t[1*5 + 4], t[2*5 + 4], t[3*5 + 4]);
s->a44 = _mm256_set1_epi64x(t[4*5 + 4]);
} //KeccakP1600_OverwriteWithZeroes
//__KeccakP1600_ExtractBytes
//*********************************************************************************
void KeccakP1600_ExtractBytes(const void *state, UINT8 *data, size_t offset, size_t length)
//*********************************************************************************
{
keccak_state_t *s = (keccak_state_t *)state;
UINT64 t[25];
UINT64 *d = (!offset && (length >= sizeof(t))) ? (UINT64 *)data : t;
UINT64 *c4 = (UINT64 *)&s->c4;
if ((d == t) && (length > sizeof(t)))
length = sizeof(t);
STORE(d + 0*5, s->a0);
STORE(d + 1*5, s->a1);
STORE(d + 2*5, s->a2);
STORE(d + 3*5, s->a3);
STORE(d + 4*5, s->a4);
d[0*5 + 4] = c4[0];
d[1*5 + 4] = c4[1];
d[2*5 + 4] = c4[2];
d[3*5 + 4] = c4[3];
d[4*5 + 4] = c4[4]; //s.a44[0]
if (d == t)
memcpy(data, (UINT8 *)t + offset, length);
} //KeccakP1600_ExtractBytes
//***************************************************************************************
void KeccakP1600_ExtractAndAddBytes(const void *state, const UINT8 *input, UINT8 *output, size_t offset, size_t length)
//***************************************************************************************
{
UINT64 t[25];
UINT64 *t0;
const UINT64 *dIn = (UINT64 *)input;
UINT64 *dOut = (UINT64 *)output;
UINT8 *t1, *dlIn, *dlOut;
ptrdiff_t lane_n = length / sizeof(UINT64);
ptrdiff_t byte_n = length % sizeof(UINT64);
ptrdiff_t i;
KeccakP1600_ExtractBytes(state, (UINT8 *)t, 0, sizeof(t));
t0 = (UINT64 *)((UINT8 *)t + offset);
for (i = 0; i < lane_n; i++)
dOut[i] = dIn[i] ^ t0[i];
if (byte_n)
{
t1 = (UINT8 *)(t0 + i);
dlIn = (UINT8 *)(dIn + i);
dlOut = (UINT8 *)(dOut + i);
for (i = 0; i < byte_n; i++)
dlOut[i] = dlIn[i] ^ t1[i];
}
} //KeccakP1600_ExtractAndAddBytes
//***************************
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds)
//***************************
{
KECCAK_PERMUTE_VARS
KECCAK_LOAD
KECCAK_PERMUTE_LOOP(, nrounds)
KECCAK_STORE
}
//***************************
void KeccakP1600_Permute_24rounds(void *state)
//***************************
{
KECCAK_PERMUTE_VARS
KECCAK_LOAD
KECCAK_PERMUTE()
KECCAK_STORE
} //KeccakP1600_Permute_24rounds
//***************************
void KeccakP1600_Permute_12rounds(void *state)
//***************************
{
KECCAK_PERMUTE_VARS
KECCAK_LOAD
KECCAK_PERMUTE_12rounds()
KECCAK_STORE
} //KeccakP1600_Permute_12rounds
//__KeccakF1600_FastLoop_Absorb
//**************************************************************************************************************
size_t KeccakF1600_FastLoop_Absorb(void *state, size_t laneCount, const UINT8 *data, size_t dataByteLen)
//**************************************************************************************************************
{
KECCAK_PERMUTE_VARS
const UINT64 *d;
ptrdiff_t di;
KECCAK_LOAD
for (di = 0; di <= (ptrdiff_t)(dataByteLen / sizeof(UINT64) - laneCount); di += laneCount)
{
d = (UINT64 *)data + di;
switch (laneCount)
{
case 9: //576
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
c4 = XOR(c4, MASKLOAD(d + 0*5 + 4, 1, 0, 0, 0));
break;
case 13: //832
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
a2 = XOR(a2, MASKLOAD(d + 2*5, 1, 1, 1, 0));
c4 = XOR(c4, SET(d[0*5 + 4], d[1*5 + 4], 0, 0));
break;
case 16: //1024
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
a2 = XOR(a2, LOAD(d + 2*5));
a3 = XOR(a3, MASKLOAD(d + 3*5, 1, 0, 0, 0));
c4 = XOR(c4, SET(d[0*5 + 4], d[1*5 + 4], d[2*5 + 4], 0));
break;
case 17: //1088
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
a2 = XOR(a2, LOAD(d + 2*5));
a3 = XOR(a3, MASKLOAD(d + 3*5, 1, 1, 0, 0));
c4 = XOR(c4, SET(d[0*5 + 4], d[1*5 + 4], d[2*5 + 4], 0));
break;
case 18: //1152
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
a2 = XOR(a2, LOAD(d + 2*5));
a3 = XOR(a3, MASKLOAD(d + 3*5, 1, 1, 1, 0));
c4 = XOR(c4, SET(d[0*5 + 4], d[1*5 + 4], d[2*5 + 4], 0));
break;
case 21: //1344
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
a2 = XOR(a2, LOAD(d + 2*5));
a3 = XOR(a3, LOAD(d + 3*5));
a4 = XOR(a4, MASKLOAD(d + 4*5, 1, 0, 0, 0));
c4 = XOR(c4, SET(d[0*5 + 4], d[1*5 + 4], d[2*5 + 4], d[3*5 + 4]));
break;
case 25: //1600
a0 = XOR(a0, LOAD(d + 0*5));
a1 = XOR(a1, LOAD(d + 1*5));
a2 = XOR(a2, LOAD(d + 2*5));
a3 = XOR(a3, LOAD(d + 3*5));
a4 = XOR(a4, LOAD(d + 4*5));
c4 = XOR(c4, SET(d[0*5 + 4], d[1*5 + 4], d[2*5 + 4], d[3*5 + 4]));
a44 = XOR(a44, LOAD0(d + 4*5 + 4));
break;
default:
KECCAK_STORE
KeccakP1600_AddBytes(state, (UINT8 *)d, 0, laneCount * sizeof(UINT64));
KECCAK_LOAD
} //switch (laneCount)
KECCAK_PERMUTE(_)
} //for (di
KECCAK_STORE
return di * sizeof(UINT64);
} //KeccakF1600_FastLoop_Absorb

View File

@ -1,38 +0,0 @@
/*
Implementation by Vladimir Sedach, hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakP_1600_AVX2_h_
#define _KeccakP_1600_AVX2_h_
#include <stddef.h>
typedef unsigned char UINT8;
#ifdef __cplusplus
extern "C" {
#endif
void KeccakP1600_StaticInitialize (void);
void KeccakP1600_Initialize (void *state);
void KeccakP1600_AddByte (void *state, UINT8 byte, size_t offset);
void KeccakP1600_AddBytes (void *state, const UINT8 *data, size_t offset, size_t length);
void KeccakP1600_OverwriteBytes (void *state, const UINT8 *data, size_t offset, size_t length);
void KeccakP1600_OverwriteWithZeroes (void *state, size_t byteCount);
void KeccakP1600_Permute_Nrounds (void *state, unsigned int nrounds);
void KeccakP1600_Permute_24rounds (void *state);
void KeccakP1600_Permute_12rounds (void *state);
void KeccakP1600_ExtractBytes (const void *state, UINT8 *data, size_t offset, size_t length);
void KeccakP1600_ExtractAndAddBytes (const void *state, const UINT8 *input, UINT8 *output, size_t offset, size_t length);
size_t KeccakF1600_FastLoop_Absorb (void *state, size_t laneCount, const UINT8 *data, size_t dataByteLen);
#ifdef __cplusplus
}
#endif
#endif /* _KeccakP_1600_AVX2_h_ */

View File

@ -0,0 +1,998 @@
# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
# Copyright (c) 2017 Ronny Van Keer
# All rights reserved.
#
# The source code in this file is licensed under the CRYPTOGAMS license.
# For further details see http://www.openssl.org/~appro/cryptogams/.
#
# Notes:
# The code for the permutation (__KeccakF1600) was generated with
# Andy Polyakov's keccak1600-avx2.pl from the CRYPTOGAMS project
# (https://github.com/dot-asm/cryptogams/blob/master/x86_64/keccak1600-avx2.pl).
# The rest of the code was written by Ronny Van Keer.
.text
# -----------------------------------------------------------------------------
#
# void KeccakP1600_Initialize(void *state);
#
.globl KeccakP1600_Initialize
.type KeccakP1600_Initialize,@function
.align 32
KeccakP1600_Initialize:
vpxor %ymm0,%ymm0,%ymm0
vmovdqa %ymm0,0*32(%rdi)
vmovdqa %ymm0,1*32(%rdi)
vmovdqa %ymm0,2*32(%rdi)
vmovdqa %ymm0,3*32(%rdi)
vmovdqa %ymm0,4*32(%rdi)
vmovdqa %ymm0,5*32(%rdi)
movq $0,6*32(%rdi)
ret
.size KeccakP1600_Initialize,.-KeccakP1600_Initialize
# -----------------------------------------------------------------------------
#
# void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
# %rdi %rsi %rdx
#
.globl KeccakP1600_AddByte
.type KeccakP1600_AddByte,@function
.align 32
KeccakP1600_AddByte:
mov %rdx, %rax
and $7, %rax
and $0xFFFFFFF8, %edx
lea mapState(%rip), %r9
mov (%r9, %rdx), %rdx
add %rdx, %rdi
add %rax, %rdi
xorb %sil, (%rdi)
ret
.size KeccakP1600_AddByte,.-KeccakP1600_AddByte
# -----------------------------------------------------------------------------
#
# void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
# %rdi %rsi %rdx %rcx
#
.globl KeccakP1600_AddBytes
.type KeccakP1600_AddBytes,@function
.align 32
KeccakP1600_AddBytes:
cmp $0, %rcx
jz KeccakP1600_AddBytes_Exit
mov %rdx, %rax # rax offset in lane
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
lea mapState(%rip), %r9
add %r9, %rdx
and $7, %rax
jz KeccakP1600_AddBytes_LaneAlignedCheck
mov $8, %r9 # r9 is (max) length of incomplete lane
sub %rax, %r9
cmp %rcx, %r9
cmovae %rcx, %r9
sub %r9, %rcx # length -= length of incomplete lane
add (%rdx), %rax # rax = pointer to state lane
add $8, %rdx
add %rdi, %rax
KeccakP1600_AddBytes_NotAlignedLoop:
mov (%rsi), %r8b
inc %rsi
xorb %r8b, (%rax)
inc %rax
dec %r9
jnz KeccakP1600_AddBytes_NotAlignedLoop
jmp KeccakP1600_AddBytes_LaneAlignedCheck
KeccakP1600_AddBytes_LaneAlignedLoop:
mov (%rsi), %r8
add $8, %rsi
mov (%rdx), %rax
add $8, %rdx
add %rdi, %rax
xor %r8, (%rax)
KeccakP1600_AddBytes_LaneAlignedCheck:
sub $8, %rcx
jnc KeccakP1600_AddBytes_LaneAlignedLoop
KeccakP1600_AddBytes_LastIncompleteLane:
add $8, %rcx
jz KeccakP1600_AddBytes_Exit
mov (%rdx), %rax
add %rdi, %rax
KeccakP1600_AddBytes_LastIncompleteLaneLoop:
mov (%rsi), %r8b
inc %rsi
xor %r8b, (%rax)
inc %rax
dec %rcx
jnz KeccakP1600_AddBytes_LastIncompleteLaneLoop
KeccakP1600_AddBytes_Exit:
ret
.size KeccakP1600_AddBytes,.-KeccakP1600_AddBytes
# -----------------------------------------------------------------------------
#
# void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
# %rdi %rsi %rdx %rcx
#
.globl KeccakP1600_OverwriteBytes
.type KeccakP1600_OverwriteBytes,@function
.align 32
KeccakP1600_OverwriteBytes:
cmp $0, %rcx
jz KeccakP1600_OverwriteBytes_Exit
mov %rdx, %rax # rax offset in lane
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
lea mapState(%rip), %r9
add %r9, %rdx
and $7, %rax
jz KeccakP1600_OverwriteBytes_LaneAlignedCheck
mov $8, %r9 # r9 is (max) length of incomplete lane
sub %rax, %r9
cmp %rcx, %r9
cmovae %rcx, %r9
sub %r9, %rcx # length -= length of incomplete lane
add (%rdx), %rax # rax = pointer to state lane
add $8, %rdx
add %rdi, %rax
KeccakP1600_OverwriteBytes_NotAlignedLoop:
mov (%rsi), %r8b
inc %rsi
mov %r8b, (%rax)
inc %rax
dec %r9
jnz KeccakP1600_OverwriteBytes_NotAlignedLoop
jmp KeccakP1600_OverwriteBytes_LaneAlignedCheck
KeccakP1600_OverwriteBytes_LaneAlignedLoop:
mov (%rsi), %r8
add $8, %rsi
mov (%rdx), %rax
add $8, %rdx
add %rdi, %rax
mov %r8, (%rax)
KeccakP1600_OverwriteBytes_LaneAlignedCheck:
sub $8, %rcx
jnc KeccakP1600_OverwriteBytes_LaneAlignedLoop
KeccakP1600_OverwriteBytes_LastIncompleteLane:
add $8, %rcx
jz KeccakP1600_OverwriteBytes_Exit
mov (%rdx), %rax
add %rdi, %rax
KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop:
mov (%rsi), %r8b
inc %rsi
mov %r8b, (%rax)
inc %rax
dec %rcx
jnz KeccakP1600_OverwriteBytes_LastIncompleteLaneLoop
KeccakP1600_OverwriteBytes_Exit:
ret
.size KeccakP1600_OverwriteBytes,.-KeccakP1600_OverwriteBytes
# -----------------------------------------------------------------------------
#
# void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
# %rdi %rsi
#
.globl KeccakP1600_OverwriteWithZeroes
.type KeccakP1600_OverwriteWithZeroes,@function
.align 32
KeccakP1600_OverwriteWithZeroes:
cmp $0, %rsi
jz KeccakP1600_OverwriteWithZeroes_Exit
lea mapState(%rip), %rdx # rdx pointer into state index mapper
jmp KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck
KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop:
mov (%rdx), %rax
add $8, %rdx
add %rdi, %rax
movq $0, (%rax)
KeccakP1600_OverwriteWithZeroes_LaneAlignedCheck:
sub $8, %rsi
jnc KeccakP1600_OverwriteWithZeroes_LaneAlignedLoop
KeccakP1600_OverwriteWithZeroes_LastIncompleteLane:
add $8, %rsi
jz KeccakP1600_OverwriteWithZeroes_Exit
mov (%rdx), %rax
add %rdi, %rax
KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop:
movb $0, (%rax)
inc %rax
dec %rsi
jnz KeccakP1600_OverwriteWithZeroes_LastIncompleteLaneLoop
KeccakP1600_OverwriteWithZeroes_Exit:
ret
.size KeccakP1600_OverwriteWithZeroes,.-KeccakP1600_OverwriteWithZeroes
# -----------------------------------------------------------------------------
#
# void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
# %rdi %rsi %rdx %rcx
#
.globl KeccakP1600_ExtractBytes
.type KeccakP1600_ExtractBytes,@function
.align 32
KeccakP1600_ExtractBytes:
push %rbx
cmp $0, %rcx
jz KeccakP1600_ExtractBytes_Exit
mov %rdx, %rax # rax offset in lane
and $0xFFFFFFF8, %edx # rdx pointer into state index mapper
lea mapState(%rip), %r9
add %r9, %rdx
and $7, %rax
jz KeccakP1600_ExtractBytes_LaneAlignedCheck
mov $8, %rbx # rbx is (max) length of incomplete lane
sub %rax, %rbx
cmp %rcx, %rbx
cmovae %rcx, %rbx
sub %rbx, %rcx # length -= length of incomplete lane
mov (%rdx), %r9
add $8, %rdx
add %rdi, %r9
add %rax, %r9
KeccakP1600_ExtractBytes_NotAlignedLoop:
mov (%r9), %r8b
inc %r9
mov %r8b, (%rsi)
inc %rsi
dec %rbx
jnz KeccakP1600_ExtractBytes_NotAlignedLoop
jmp KeccakP1600_ExtractBytes_LaneAlignedCheck
KeccakP1600_ExtractBytes_LaneAlignedLoop:
mov (%rdx), %rax
add $8, %rdx
add %rdi, %rax
mov (%rax), %r8
mov %r8, (%rsi)
add $8, %rsi
KeccakP1600_ExtractBytes_LaneAlignedCheck:
sub $8, %rcx
jnc KeccakP1600_ExtractBytes_LaneAlignedLoop
KeccakP1600_ExtractBytes_LastIncompleteLane:
add $8, %rcx
jz KeccakP1600_ExtractBytes_Exit
mov (%rdx), %rax
add %rdi, %rax
mov (%rax), %r8
KeccakP1600_ExtractBytes_LastIncompleteLaneLoop:
mov %r8b, (%rsi)
shr $8, %r8
inc %rsi
dec %rcx
jnz KeccakP1600_ExtractBytes_LastIncompleteLaneLoop
KeccakP1600_ExtractBytes_Exit:
pop %rbx
ret
.size KeccakP1600_ExtractBytes,.-KeccakP1600_ExtractBytes
# -----------------------------------------------------------------------------
#
# void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
# %rdi %rsi %rdx %rcx %r8
#
.globl KeccakP1600_ExtractAndAddBytes
.type KeccakP1600_ExtractAndAddBytes,@function
.align 32
KeccakP1600_ExtractAndAddBytes:
push %rbx
push %r10
cmp $0, %r8
jz KeccakP1600_ExtractAndAddBytes_Exit
mov %rcx, %rax # rax offset in lane
and $0xFFFFFFF8, %ecx # rcx pointer into state index mapper
lea mapState(%rip), %r9
add %r9, %rcx
and $7, %rax
jz KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
mov $8, %rbx # rbx is (max) length of incomplete lane
sub %rax, %rbx
cmp %r8, %rbx
cmovae %r8, %rbx
sub %rbx, %r8 # length -= length of incomplete lane
mov (%rcx), %r9
add $8, %rcx
add %rdi, %r9
add %rax, %r9
KeccakP1600_ExtractAndAddBytes_NotAlignedLoop:
mov (%r9), %r10b
inc %r9
xor (%rsi), %r10b
inc %rsi
mov %r10b, (%rdx)
inc %rdx
dec %rbx
jnz KeccakP1600_ExtractAndAddBytes_NotAlignedLoop
jmp KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck
KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop:
mov (%rcx), %rax
add $8, %rcx
add %rdi, %rax
mov (%rax), %r10
xor (%rsi), %r10
add $8, %rsi
mov %r10, (%rdx)
add $8, %rdx
KeccakP1600_ExtractAndAddBytes_LaneAlignedCheck:
sub $8, %r8
jnc KeccakP1600_ExtractAndAddBytes_LaneAlignedLoop
KeccakP1600_ExtractAndAddBytes_LastIncompleteLane:
add $8, %r8
jz KeccakP1600_ExtractAndAddBytes_Exit
mov (%rcx), %rax
add %rdi, %rax
mov (%rax), %r10
KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop:
xor (%rsi), %r10b
inc %rsi
mov %r10b, (%rdx)
inc %rdx
shr $8, %r10
dec %r8
jnz KeccakP1600_ExtractAndAddBytes_LastIncompleteLaneLoop
KeccakP1600_ExtractAndAddBytes_Exit:
pop %r10
pop %rbx
ret
.size KeccakP1600_ExtractAndAddBytes,.-KeccakP1600_ExtractAndAddBytes
# -----------------------------------------------------------------------------
#
# internal
#
.type __KeccakF1600,@function
.align 32
__KeccakF1600:
.Loop_avx2:
######################################### Theta
vpshufd $0b01001110,%ymm2,%ymm13
vpxor %ymm3,%ymm5,%ymm12
vpxor %ymm6,%ymm4,%ymm9
vpxor %ymm1,%ymm12,%ymm12
vpxor %ymm9,%ymm12,%ymm12 # C[1..4]
vpermq $0b10010011,%ymm12,%ymm11
vpxor %ymm2,%ymm13,%ymm13
vpermq $0b01001110,%ymm13,%ymm7
vpsrlq $63,%ymm12,%ymm8
vpaddq %ymm12,%ymm12,%ymm9
vpor %ymm9,%ymm8,%ymm8 # ROL64(C[1..4],1)
vpermq $0b00111001,%ymm8,%ymm15
vpxor %ymm11,%ymm8,%ymm14
vpermq $0b00000000,%ymm14,%ymm14 # D[0..0] = ROL64(C[1],1) ^ C[4]
vpxor %ymm0,%ymm13,%ymm13
vpxor %ymm7,%ymm13,%ymm13 # C[0..0]
vpsrlq $63,%ymm13,%ymm7
vpaddq %ymm13,%ymm13,%ymm8
vpor %ymm7,%ymm8,%ymm8 # ROL64(C[0..0],1)
vpxor %ymm14,%ymm2,%ymm2 # ^= D[0..0]
vpxor %ymm14,%ymm0,%ymm0 # ^= D[0..0]
vpblendd $0b11000000,%ymm8,%ymm15,%ymm15
vpblendd $0b00000011,%ymm13,%ymm11,%ymm11
vpxor %ymm11,%ymm15,%ymm15 # D[1..4] = ROL64(C[2..4,0),1) ^ C[0..3]
######################################### Rho + Pi + pre-Chi shuffle
vpsllvq 0*32-96(%r8),%ymm2,%ymm10
vpsrlvq 0*32-96(%r9),%ymm2,%ymm2
vpor %ymm10,%ymm2,%ymm2
vpxor %ymm15,%ymm3,%ymm3 # ^= D[1..4] from Theta
vpsllvq 2*32-96(%r8),%ymm3,%ymm11
vpsrlvq 2*32-96(%r9),%ymm3,%ymm3
vpor %ymm11,%ymm3,%ymm3
vpxor %ymm15,%ymm4,%ymm4 # ^= D[1..4] from Theta
vpsllvq 3*32-96(%r8),%ymm4,%ymm12
vpsrlvq 3*32-96(%r9),%ymm4,%ymm4
vpor %ymm12,%ymm4,%ymm4
vpxor %ymm15,%ymm5,%ymm5 # ^= D[1..4] from Theta
vpsllvq 4*32-96(%r8),%ymm5,%ymm13
vpsrlvq 4*32-96(%r9),%ymm5,%ymm5
vpor %ymm13,%ymm5,%ymm5
vpxor %ymm15,%ymm6,%ymm6 # ^= D[1..4] from Theta
vpermq $0b10001101,%ymm2,%ymm10 # %ymm2 -> future %ymm3
vpermq $0b10001101,%ymm3,%ymm11 # %ymm3 -> future %ymm4
vpsllvq 5*32-96(%r8),%ymm6,%ymm14
vpsrlvq 5*32-96(%r9),%ymm6,%ymm8
vpor %ymm14,%ymm8,%ymm8 # %ymm6 -> future %ymm1
vpxor %ymm15,%ymm1,%ymm1 # ^= D[1..4] from Theta
vpermq $0b00011011,%ymm4,%ymm12 # %ymm4 -> future %ymm5
vpermq $0b01110010,%ymm5,%ymm13 # %ymm5 -> future %ymm6
vpsllvq 1*32-96(%r8),%ymm1,%ymm15
vpsrlvq 1*32-96(%r9),%ymm1,%ymm9
vpor %ymm15,%ymm9,%ymm9 # %ymm1 -> future %ymm2
######################################### Chi
vpsrldq $8,%ymm8,%ymm14
vpandn %ymm14,%ymm8,%ymm7 # tgting [0][0] [0][0] [0][0] [0][0]
vpblendd $0b00001100,%ymm13,%ymm9,%ymm3 # [4][4] [2][0]
vpblendd $0b00001100,%ymm9,%ymm11,%ymm15 # [4][0] [2][1]
vpblendd $0b00001100,%ymm11,%ymm10,%ymm5 # [4][2] [2][4]
vpblendd $0b00001100,%ymm10,%ymm9,%ymm14 # [4][3] [2][0]
vpblendd $0b00110000,%ymm11,%ymm3,%ymm3 # [1][3] [4][4] [2][0]
vpblendd $0b00110000,%ymm12,%ymm15,%ymm15 # [1][4] [4][0] [2][1]
vpblendd $0b00110000,%ymm9,%ymm5,%ymm5 # [1][0] [4][2] [2][4]
vpblendd $0b00110000,%ymm13,%ymm14,%ymm14 # [1][1] [4][3] [2][0]
vpblendd $0b11000000,%ymm12,%ymm3,%ymm3 # [3][2] [1][3] [4][4] [2][0]
vpblendd $0b11000000,%ymm13,%ymm15,%ymm15 # [3][3] [1][4] [4][0] [2][1]
vpblendd $0b11000000,%ymm13,%ymm5,%ymm5 # [3][3] [1][0] [4][2] [2][4]
vpblendd $0b11000000,%ymm11,%ymm14,%ymm14 # [3][4] [1][1] [4][3] [2][0]
vpandn %ymm15,%ymm3,%ymm3 # tgting [3][1] [1][2] [4][3] [2][4]
vpandn %ymm14,%ymm5,%ymm5 # tgting [3][2] [1][4] [4][1] [2][3]
vpblendd $0b00001100,%ymm9,%ymm12,%ymm6 # [4][0] [2][3]
vpblendd $0b00001100,%ymm12,%ymm10,%ymm15 # [4][1] [2][4]
vpxor %ymm10,%ymm3,%ymm3
vpblendd $0b00110000,%ymm10,%ymm6,%ymm6 # [1][2] [4][0] [2][3]
vpblendd $0b00110000,%ymm11,%ymm15,%ymm15 # [1][3] [4][1] [2][4]
vpxor %ymm12,%ymm5,%ymm5
vpblendd $0b11000000,%ymm11,%ymm6,%ymm6 # [3][4] [1][2] [4][0] [2][3]
vpblendd $0b11000000,%ymm9,%ymm15,%ymm15 # [3][0] [1][3] [4][1] [2][4]
vpandn %ymm15,%ymm6,%ymm6 # tgting [3][3] [1][1] [4][4] [2][2]
vpxor %ymm13,%ymm6,%ymm6
vpermq $0b00011110,%ymm8,%ymm4 # [0][1] [0][2] [0][4] [0][3]
vpblendd $0b00110000,%ymm0,%ymm4,%ymm15 # [0][1] [0][0] [0][4] [0][3]
vpermq $0b00111001,%ymm8,%ymm1 # [0][1] [0][4] [0][3] [0][2]
vpblendd $0b11000000,%ymm0,%ymm1,%ymm1 # [0][0] [0][4] [0][3] [0][2]
vpandn %ymm15,%ymm1,%ymm1 # tgting [0][4] [0][3] [0][2] [0][1]
vpblendd $0b00001100,%ymm12,%ymm11,%ymm2 # [4][1] [2][1]
vpblendd $0b00001100,%ymm11,%ymm13,%ymm14 # [4][2] [2][2]
vpblendd $0b00110000,%ymm13,%ymm2,%ymm2 # [1][1] [4][1] [2][1]
vpblendd $0b00110000,%ymm10,%ymm14,%ymm14 # [1][2] [4][2] [2][2]
vpblendd $0b11000000,%ymm10,%ymm2,%ymm2 # [3][1] [1][1] [4][1] [2][1]
vpblendd $0b11000000,%ymm12,%ymm14,%ymm14 # [3][2] [1][2] [4][2] [2][2]
vpandn %ymm14,%ymm2,%ymm2 # tgting [3][0] [1][0] [4][0] [2][0]
vpxor %ymm9,%ymm2,%ymm2
vpermq $0b00000000,%ymm7,%ymm7 # [0][0] [0][0] [0][0] [0][0]
vpermq $0b00011011,%ymm3,%ymm3 # post-Chi shuffle
vpermq $0b10001101,%ymm5,%ymm5
vpermq $0b01110010,%ymm6,%ymm6
vpblendd $0b00001100,%ymm10,%ymm13,%ymm4 # [4][3] [2][2]
vpblendd $0b00001100,%ymm13,%ymm12,%ymm14 # [4][4] [2][3]
vpblendd $0b00110000,%ymm12,%ymm4,%ymm4 # [1][4] [4][3] [2][2]
vpblendd $0b00110000,%ymm9,%ymm14,%ymm14 # [1][0] [4][4] [2][3]
vpblendd $0b11000000,%ymm9,%ymm4,%ymm4 # [3][0] [1][4] [4][3] [2][2]
vpblendd $0b11000000,%ymm10,%ymm14,%ymm14 # [3][1] [1][0] [4][4] [2][3]
vpandn %ymm14,%ymm4,%ymm4 # tgting [3][4] [1][3] [4][2] [2][1]
vpxor %ymm7,%ymm0,%ymm0
vpxor %ymm8,%ymm1,%ymm1
vpxor %ymm11,%ymm4,%ymm4
######################################### Iota
vpxor (%r10),%ymm0,%ymm0
lea 32(%r10),%r10
dec %eax
jnz .Loop_avx2
ret
.size __KeccakF1600,.-__KeccakF1600
.globl KeccakP1600_Permute_24rounds
.type KeccakP1600_Permute_24rounds,@function
.align 32
KeccakP1600_Permute_24rounds:
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea iotas(%rip),%r10
mov $24,%eax
lea 96(%rdi),%rdi
vzeroupper
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
call __KeccakF1600
vmovq %xmm0,-96(%rdi)
vmovdqu %ymm1,8+32*0-96(%rdi)
vmovdqu %ymm2,8+32*1-96(%rdi)
vmovdqu %ymm3,8+32*2-96(%rdi)
vmovdqu %ymm4,8+32*3-96(%rdi)
vmovdqu %ymm5,8+32*4-96(%rdi)
vmovdqu %ymm6,8+32*5-96(%rdi)
vzeroupper
ret
.size KeccakP1600_Permute_24rounds,.-KeccakP1600_Permute_24rounds
.globl KeccakP1600_Permute_12rounds
.type KeccakP1600_Permute_12rounds,@function
.align 32
KeccakP1600_Permute_12rounds:
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea iotas+12*4*8(%rip),%r10
mov $12,%eax
lea 96(%rdi),%rdi
vzeroupper
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
call __KeccakF1600
vmovq %xmm0,-96(%rdi)
vmovdqu %ymm1,8+32*0-96(%rdi)
vmovdqu %ymm2,8+32*1-96(%rdi)
vmovdqu %ymm3,8+32*2-96(%rdi)
vmovdqu %ymm4,8+32*3-96(%rdi)
vmovdqu %ymm5,8+32*4-96(%rdi)
vmovdqu %ymm6,8+32*5-96(%rdi)
vzeroupper
ret
.size KeccakP1600_Permute_12rounds,.-KeccakP1600_Permute_12rounds
.globl KeccakP1600_Permute_Nrounds
.type KeccakP1600_Permute_Nrounds,@function
.align 32
KeccakP1600_Permute_Nrounds:
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea iotas+24*4*8(%rip),%r10
mov %rsi,%rax
shl $2+3,%rsi
sub %rsi, %r10
lea 96(%rdi),%rdi
vzeroupper
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
call __KeccakF1600
vmovq %xmm0,-96(%rdi)
vmovdqu %ymm1,8+32*0-96(%rdi)
vmovdqu %ymm2,8+32*1-96(%rdi)
vmovdqu %ymm3,8+32*2-96(%rdi)
vmovdqu %ymm4,8+32*3-96(%rdi)
vmovdqu %ymm5,8+32*4-96(%rdi)
vmovdqu %ymm6,8+32*5-96(%rdi)
vzeroupper
ret
.size KeccakP1600_Permute_Nrounds,.-KeccakP1600_Permute_Nrounds
# -----------------------------------------------------------------------------
#
# size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
# %rdi %rsi %rdx %rcx
#
.globl KeccakF1600_FastLoop_Absorb
.type KeccakF1600_FastLoop_Absorb,@function
.align 32
KeccakF1600_FastLoop_Absorb:
push %rbx
push %r10
shr $3, %rcx # rcx = data length in lanes
mov %rdx, %rbx # rbx = initial data pointer
cmp %rsi, %rcx
jb KeccakF1600_FastLoop_Absorb_Exit
vzeroupper
cmp $21, %rsi
jnz KeccakF1600_FastLoop_Absorb_Not21Lanes
sub $21, %rcx
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea 96(%rdi),%rdi
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
KeccakF1600_FastLoop_Absorb_Loop21Lanes:
vpbroadcastq (%rdx),%ymm7
vmovdqu 8(%rdx),%ymm8
vmovdqa map2(%rip), %xmm15
vpcmpeqq %ymm14, %ymm14, %ymm14
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
vmovdqa mask3_21(%rip), %ymm14
vpxor %ymm10, %ymm10, %ymm10
vmovdqa map3(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
vmovdqa mask4_21(%rip), %ymm14
vpxor %ymm11, %ymm11, %ymm11
vmovdqa map4(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
vmovdqa mask5_21(%rip), %ymm14
vpxor %ymm12, %ymm12, %ymm12
vmovdqa map5(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
vmovdqa mask6_21(%rip), %ymm14
vpxor %ymm13, %ymm13, %ymm13
vmovdqa map6(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
vpxor %ymm7,%ymm0,%ymm0
vpxor %ymm8,%ymm1,%ymm1
vpxor %ymm9,%ymm2,%ymm2
vpxor %ymm10,%ymm3,%ymm3
vpxor %ymm11,%ymm4,%ymm4
vpxor %ymm12,%ymm5,%ymm5
vpxor %ymm13,%ymm6,%ymm6
add $21*8, %rdx
lea iotas(%rip),%r10
mov $24,%eax
call __KeccakF1600
sub $21, %rcx
jnc KeccakF1600_FastLoop_Absorb_Loop21Lanes
KeccakF1600_FastLoop_Absorb_SaveAndExit:
vmovq %xmm0,-96(%rdi)
vmovdqu %ymm1,8+32*0-96(%rdi)
vmovdqu %ymm2,8+32*1-96(%rdi)
vmovdqu %ymm3,8+32*2-96(%rdi)
vmovdqu %ymm4,8+32*3-96(%rdi)
vmovdqu %ymm5,8+32*4-96(%rdi)
vmovdqu %ymm6,8+32*5-96(%rdi)
KeccakF1600_FastLoop_Absorb_Exit:
vzeroupper
mov %rdx, %rax # return number of bytes processed
sub %rbx, %rax
pop %r10
pop %rbx
ret
KeccakF1600_FastLoop_Absorb_Not21Lanes:
cmp $17, %rsi
jnz KeccakF1600_FastLoop_Absorb_Not17Lanes
sub $17, %rcx
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea 96(%rdi),%rdi
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
KeccakF1600_FastLoop_Absorb_Loop17Lanes:
vpbroadcastq (%rdx),%ymm7
vmovdqu 8(%rdx),%ymm8
vmovdqa mask2_17(%rip), %ymm14
vpxor %ymm9, %ymm9, %ymm9
vmovdqa map2(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
vmovdqa mask3_17(%rip), %ymm14
vpxor %ymm10, %ymm10, %ymm10
vmovdqa map3(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
vmovdqa mask4_17(%rip), %ymm14
vpxor %ymm11, %ymm11, %ymm11
vmovdqa map4(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
vmovdqa mask5_17(%rip), %ymm14
vpxor %ymm12, %ymm12, %ymm12
vmovdqa map5(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
vmovdqa mask6_17(%rip), %ymm14
vpxor %ymm13, %ymm13, %ymm13
vmovdqa map6(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
vpxor %ymm7,%ymm0,%ymm0
vpxor %ymm8,%ymm1,%ymm1
vpxor %ymm9,%ymm2,%ymm2
vpxor %ymm10,%ymm3,%ymm3
vpxor %ymm11,%ymm4,%ymm4
vpxor %ymm12,%ymm5,%ymm5
vpxor %ymm13,%ymm6,%ymm6
add $17*8, %rdx
lea iotas(%rip),%r10
mov $24,%eax
call __KeccakF1600
sub $17, %rcx
jnc KeccakF1600_FastLoop_Absorb_Loop17Lanes
jmp KeccakF1600_FastLoop_Absorb_SaveAndExit
KeccakF1600_FastLoop_Absorb_Not17Lanes:
lea mapState(%rip), %r9
mov %rsi, %rax
KeccakF1600_FastLoop_Absorb_LanesAddLoop:
mov (%rdx), %r8
add $8, %rdx
mov (%r9), %r10
add $8, %r9
add %rdi, %r10
xor %r8, (%r10)
sub $1, %rax
jnz KeccakF1600_FastLoop_Absorb_LanesAddLoop
sub %rsi, %rcx
push %rdi
push %rsi
push %rdx
push %rcx
call KeccakP1600_Permute_24rounds@PLT
pop %rcx
pop %rdx
pop %rsi
pop %rdi
cmp %rsi, %rcx
jae KeccakF1600_FastLoop_Absorb_Not17Lanes
jmp KeccakF1600_FastLoop_Absorb_Exit
.size KeccakF1600_FastLoop_Absorb,.-KeccakF1600_FastLoop_Absorb
# -----------------------------------------------------------------------------
#
# size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
# %rdi %rsi %rdx %rcx
#
.globl KeccakP1600_12rounds_FastLoop_Absorb
.type KeccakP1600_12rounds_FastLoop_Absorb,@function
.align 32
KeccakP1600_12rounds_FastLoop_Absorb:
push %rbx
push %r10
shr $3, %rcx # rcx = data length in lanes
mov %rdx, %rbx # rbx = initial data pointer
cmp %rsi, %rcx
jb KeccakP1600_12rounds_FastLoop_Absorb_Exit
vzeroupper
cmp $21, %rsi
jnz KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes
sub $21, %rcx
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea 96(%rdi),%rdi
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes:
vpbroadcastq (%rdx),%ymm7
vmovdqu 8(%rdx),%ymm8
vmovdqa map2(%rip), %xmm15
vpcmpeqq %ymm14, %ymm14, %ymm14
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
vmovdqa mask3_21(%rip), %ymm14
vpxor %ymm10, %ymm10, %ymm10
vmovdqa map3(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
vmovdqa mask4_21(%rip), %ymm14
vpxor %ymm11, %ymm11, %ymm11
vmovdqa map4(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
vmovdqa mask5_21(%rip), %ymm14
vpxor %ymm12, %ymm12, %ymm12
vmovdqa map5(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
vmovdqa mask6_21(%rip), %ymm14
vpxor %ymm13, %ymm13, %ymm13
vmovdqa map6(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
vpxor %ymm7,%ymm0,%ymm0
vpxor %ymm8,%ymm1,%ymm1
vpxor %ymm9,%ymm2,%ymm2
vpxor %ymm10,%ymm3,%ymm3
vpxor %ymm11,%ymm4,%ymm4
vpxor %ymm12,%ymm5,%ymm5
vpxor %ymm13,%ymm6,%ymm6
add $21*8, %rdx
lea iotas+12*4*8(%rip),%r10
mov $12,%eax
call __KeccakF1600
sub $21, %rcx
jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop21Lanes
KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit:
vmovq %xmm0,-96(%rdi)
vmovdqu %ymm1,8+32*0-96(%rdi)
vmovdqu %ymm2,8+32*1-96(%rdi)
vmovdqu %ymm3,8+32*2-96(%rdi)
vmovdqu %ymm4,8+32*3-96(%rdi)
vmovdqu %ymm5,8+32*4-96(%rdi)
vmovdqu %ymm6,8+32*5-96(%rdi)
KeccakP1600_12rounds_FastLoop_Absorb_Exit:
vzeroupper
mov %rdx, %rax # return number of bytes processed
sub %rbx, %rax
pop %r10
pop %rbx
ret
KeccakP1600_12rounds_FastLoop_Absorb_Not21Lanes:
cmp $17, %rsi
jnz KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
sub $17, %rcx
lea rhotates_left+96(%rip),%r8
lea rhotates_right+96(%rip),%r9
lea 96(%rdi),%rdi
vpbroadcastq -96(%rdi),%ymm0 # load A[5][5]
vmovdqu 8+32*0-96(%rdi),%ymm1
vmovdqu 8+32*1-96(%rdi),%ymm2
vmovdqu 8+32*2-96(%rdi),%ymm3
vmovdqu 8+32*3-96(%rdi),%ymm4
vmovdqu 8+32*4-96(%rdi),%ymm5
vmovdqu 8+32*5-96(%rdi),%ymm6
KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes:
vpbroadcastq (%rdx),%ymm7
vmovdqu 8(%rdx),%ymm8
vmovdqa mask2_17(%rip), %ymm14
vpxor %ymm9, %ymm9, %ymm9
vmovdqa map2(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm9
vmovdqa mask3_17(%rip), %ymm14
vpxor %ymm10, %ymm10, %ymm10
vmovdqa map3(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm10
vmovdqa mask4_17(%rip), %ymm14
vpxor %ymm11, %ymm11, %ymm11
vmovdqa map4(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm11
vmovdqa mask5_17(%rip), %ymm14
vpxor %ymm12, %ymm12, %ymm12
vmovdqa map5(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm12
vmovdqa mask6_17(%rip), %ymm14
vpxor %ymm13, %ymm13, %ymm13
vmovdqa map6(%rip), %xmm15
vpgatherdq %ymm14, (%rdx, %xmm15, 1), %ymm13
vpxor %ymm7,%ymm0,%ymm0
vpxor %ymm8,%ymm1,%ymm1
vpxor %ymm9,%ymm2,%ymm2
vpxor %ymm10,%ymm3,%ymm3
vpxor %ymm11,%ymm4,%ymm4
vpxor %ymm12,%ymm5,%ymm5
vpxor %ymm13,%ymm6,%ymm6
add $17*8, %rdx
lea iotas+12*4*8(%rip),%r10
mov $12,%eax
call __KeccakF1600
sub $17, %rcx
jnc KeccakP1600_12rounds_FastLoop_Absorb_Loop17Lanes
jmp KeccakP1600_12rounds_FastLoop_Absorb_SaveAndExit
KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes:
lea mapState(%rip), %r9
mov %rsi, %rax
KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop:
mov (%rdx), %r8
add $8, %rdx
mov (%r9), %r10
add $8, %r9
add %rdi, %r10
xor %r8, (%r10)
sub $1, %rax
jnz KeccakP1600_12rounds_FastLoop_Absorb_LanesAddLoop
sub %rsi, %rcx
push %rdi
push %rsi
push %rdx
push %rcx
call KeccakP1600_Permute_12rounds@PLT
pop %rcx
pop %rdx
pop %rsi
pop %rdi
cmp %rsi, %rcx
jae KeccakP1600_12rounds_FastLoop_Absorb_Not17Lanes
jmp KeccakP1600_12rounds_FastLoop_Absorb_Exit
.size KeccakP1600_12rounds_FastLoop_Absorb,.-KeccakP1600_12rounds_FastLoop_Absorb
.equ ALLON, 0xFFFFFFFFFFFFFFFF
.align 64
rhotates_left:
.quad 3, 18, 36, 41 # [2][0] [4][0] [1][0] [3][0]
.quad 1, 62, 28, 27 # [0][1] [0][2] [0][3] [0][4]
.quad 45, 6, 56, 39 # [3][1] [1][2] [4][3] [2][4]
.quad 10, 61, 55, 8 # [2][1] [4][2] [1][3] [3][4]
.quad 2, 15, 25, 20 # [4][1] [3][2] [2][3] [1][4]
.quad 44, 43, 21, 14 # [1][1] [2][2] [3][3] [4][4]
rhotates_right:
.quad 64-3, 64-18, 64-36, 64-41
.quad 64-1, 64-62, 64-28, 64-27
.quad 64-45, 64-6, 64-56, 64-39
.quad 64-10, 64-61, 64-55, 64-8
.quad 64-2, 64-15, 64-25, 64-20
.quad 64-44, 64-43, 64-21, 64-14
iotas:
.quad 0x0000000000000001, 0x0000000000000001, 0x0000000000000001, 0x0000000000000001
.quad 0x0000000000008082, 0x0000000000008082, 0x0000000000008082, 0x0000000000008082
.quad 0x800000000000808a, 0x800000000000808a, 0x800000000000808a, 0x800000000000808a
.quad 0x8000000080008000, 0x8000000080008000, 0x8000000080008000, 0x8000000080008000
.quad 0x000000000000808b, 0x000000000000808b, 0x000000000000808b, 0x000000000000808b
.quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
.quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
.quad 0x8000000000008009, 0x8000000000008009, 0x8000000000008009, 0x8000000000008009
.quad 0x000000000000008a, 0x000000000000008a, 0x000000000000008a, 0x000000000000008a
.quad 0x0000000000000088, 0x0000000000000088, 0x0000000000000088, 0x0000000000000088
.quad 0x0000000080008009, 0x0000000080008009, 0x0000000080008009, 0x0000000080008009
.quad 0x000000008000000a, 0x000000008000000a, 0x000000008000000a, 0x000000008000000a
.quad 0x000000008000808b, 0x000000008000808b, 0x000000008000808b, 0x000000008000808b
.quad 0x800000000000008b, 0x800000000000008b, 0x800000000000008b, 0x800000000000008b
.quad 0x8000000000008089, 0x8000000000008089, 0x8000000000008089, 0x8000000000008089
.quad 0x8000000000008003, 0x8000000000008003, 0x8000000000008003, 0x8000000000008003
.quad 0x8000000000008002, 0x8000000000008002, 0x8000000000008002, 0x8000000000008002
.quad 0x8000000000000080, 0x8000000000000080, 0x8000000000000080, 0x8000000000000080
.quad 0x000000000000800a, 0x000000000000800a, 0x000000000000800a, 0x000000000000800a
.quad 0x800000008000000a, 0x800000008000000a, 0x800000008000000a, 0x800000008000000a
.quad 0x8000000080008081, 0x8000000080008081, 0x8000000080008081, 0x8000000080008081
.quad 0x8000000000008080, 0x8000000000008080, 0x8000000000008080, 0x8000000000008080
.quad 0x0000000080000001, 0x0000000080000001, 0x0000000080000001, 0x0000000080000001
.quad 0x8000000080008008, 0x8000000080008008, 0x8000000080008008, 0x8000000080008008
mapState:
.quad 0*8, 1*8, 2*8, 3*8, 4*8
.quad 7*8, 21*8, 10*8, 15*8, 20*8
.quad 5*8, 13*8, 22*8, 19*8, 12*8
.quad 8*8, 9*8, 18*8, 23*8, 16*8
.quad 6*8, 17*8, 14*8, 11*8, 24*8
.align 16
map2:
.long 10*8, 20*8, 5*8, 15*8
map3:
.long 16*8, 7*8, 23*8, 14*8
map4:
.long 11*8, 22*8, 8*8, 19*8
map5:
.long 21*8, 17*8, 13*8, 9*8
map6:
.long 6*8, 12*8, 18*8, 24*8
.align 32
mask3_21:
.quad ALLON, ALLON, 0, ALLON
mask4_21:
.quad ALLON, 0, ALLON, ALLON
mask5_21:
.quad 0, ALLON, ALLON, ALLON
mask6_21:
.quad ALLON, ALLON, ALLON, 0
mask2_17:
.quad ALLON, 0, ALLON, ALLON
mask3_17:
.quad ALLON, ALLON, 0, ALLON
mask4_17:
.quad ALLON, 0, ALLON, 0
mask5_17:
.quad 0, 0, ALLON, ALLON
mask6_17:
.quad ALLON, ALLON, 0, 0
.asciz "Keccak-1600 for AVX2, CRYPTOGAMS by <appro@openssl.org>"

View File

@ -1,23 +1,41 @@
/*
Implementation by Vladimir Sedach, hereby denoted as "the implementer".
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to SnP-documentation.h for more details.
*/
#ifndef _KeccakP_1600_SnP_h_
#define _KeccakP_1600_SnP_h_
/** For the documentation, see SnP-documentation.h.
*/
#include <stddef.h>
#define KeccakP1600_implementation "AVX2 optimized implementation"
#define KeccakP1600_stateSizeInBytes (7 * 4 * 8)
#define KeccakP1600_stateSizeInBytes 200
#define KeccakP1600_stateAlignment 32
#define KeccakF1600_FastLoop_supported
#define KeccakP1600_12rounds_FastLoop_supported
#include <stddef.h>
#include "KeccakP-1600-AVX2.h"
#define KeccakP1600_StaticInitialize()
void KeccakP1600_Initialize(void *state);
void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset);
void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount);
void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds);
void KeccakP1600_Permute_12rounds(void *state);
void KeccakP1600_Permute_24rounds(void *state);
void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,53 @@
/*
Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
---
Please refer to PlSnP-documentation.h for more details.
*/
#ifndef _KeccakP_1600_times4_SnP_h_
#define _KeccakP_1600_times4_SnP_h_
#include <stdint.h>
#include "SIMD256-config.h"
#define KeccakP1600times4_implementation "256-bit SIMD implementation (" KeccakP1600times4_implementation_config ")"
#define KeccakP1600times4_statesSizeInBytes 800
#define KeccakP1600times4_statesAlignment 32
#define KeccakF1600times4_FastLoop_supported
#define KeccakP1600times4_12rounds_FastLoop_supported
#define KeccakF1600times4_FastKravatte_supported
#include <stddef.h>
#define KeccakP1600times4_StaticInitialize()
void KeccakP1600times4_InitializeAll(void *states);
#define KeccakP1600times4_AddByte(states, instanceIndex, byte, offset) \
((unsigned char*)(states))[(instanceIndex)*8 + ((offset)/8)*4*8 + (offset)%8] ^= (byte)
void KeccakP1600times4_AddBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_AddLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_OverwriteBytes(void *states, unsigned int instanceIndex, const unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_OverwriteLanesAll(void *states, const unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_OverwriteWithZeroes(void *states, unsigned int instanceIndex, unsigned int byteCount);
void KeccakP1600times4_PermuteAll_4rounds(void *states);
void KeccakP1600times4_PermuteAll_6rounds(void *states);
void KeccakP1600times4_PermuteAll_12rounds(void *states);
void KeccakP1600times4_PermuteAll_24rounds(void *states);
void KeccakP1600times4_ExtractBytes(const void *states, unsigned int instanceIndex, unsigned char *data, unsigned int offset, unsigned int length);
void KeccakP1600times4_ExtractLanesAll(const void *states, unsigned char *data, unsigned int laneCount, unsigned int laneOffset);
void KeccakP1600times4_ExtractAndAddBytes(const void *states, unsigned int instanceIndex, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length);
void KeccakP1600times4_ExtractAndAddLanesAll(const void *states, const unsigned char *input, unsigned char *output, unsigned int laneCount, unsigned int laneOffset);
size_t KeccakF1600times4_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
size_t KeccakP1600times4_12rounds_FastLoop_Absorb(void *states, unsigned int laneCount, unsigned int laneOffsetParallel, unsigned int laneOffsetSerial, const unsigned char *data, size_t dataByteLen);
size_t KeccakP1600times4_KravatteCompress(uint64_t *xAccu, uint64_t *kRoll, const unsigned char *input, size_t inputByteLen);
size_t KeccakP1600times4_KravatteExpand(uint64_t *yAccu, const uint64_t *kRoll, unsigned char *output, size_t outputByteLen);
#endif

View File

@ -0,0 +1,302 @@
/*
Implementation by the Keccak Team, namely, Guido Bertoni, Joan Daemen,
Michaël Peeters, Gilles Van Assche and Ronny Van Keer,
hereby denoted as "the implementer".
For more information, feedback or questions, please refer to our website:
https://keccak.team/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#if (defined(FullUnrolling))
#define rounds24 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds12 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds6 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#elif (Unrolling == 12)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=12) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
} \
#define rounds12 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds6 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#elif (Unrolling == 6)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
#define rounds6 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#elif (Unrolling == 4)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
#define rounds6 \
prepareTheta \
for(i=18; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#define rounds4 \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
#elif (Unrolling == 3)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
#define rounds6 \
prepareTheta \
for(i=18; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
#define rounds4 \
prepareTheta \
for(i=20; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#elif (Unrolling == 2)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#define rounds6 \
prepareTheta \
for(i=18; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#define rounds4 \
prepareTheta \
for(i=20; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
#elif (Unrolling == 1)
#define rounds24 \
prepareTheta \
for(i=0; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
#define rounds12 \
prepareTheta \
for(i=12; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
#define rounds6 \
prepareTheta \
for(i=18; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
#define rounds4 \
prepareTheta \
for(i=20; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
#else
#error "Unrolling is not correctly specified!"
#endif
#define roundsN(__nrounds) \
prepareTheta \
i = 24 - (__nrounds); \
if ((i&1) != 0) { \
thetaRhoPiChiIotaPrepareTheta(i, A, E) \
copyStateVariables(A, E) \
++i; \
} \
for( /* empty */; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
}

View File

@ -0,0 +1,18 @@
/*
This file defines some parameters of the implementation in the parent directory.
*/
#define KeccakP1600times4_implementation_config "AVX2, all rounds unrolled"
#define KeccakP1600times4_fullUnrolling
#define KeccakP1600times4_useAVX2
/* target attribute */
#ifndef __has_attribute
#define __has_attribute(a) 0
#endif
#if defined(__GNUC__) || __has_attribute(target)
#define ATTRIBUTE_TARGET_AVX2 __attribute__((target(("avx2"))))
#else
#define ATTRIBUTE_TARGET_AVX2
#endif

Some files were not shown because too many files have changed in this diff Show More