From 30fbfd826fd26ff4cdda378a3a79d4e3dfbfd63a Mon Sep 17 00:00:00 2001 From: Douglas Stebila Date: Sun, 1 Aug 2021 17:02:22 -0400 Subject: [PATCH] Sync with PQClean (#1061) * Sync with PQClean 6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 * Update documentation YML from PQClean * Update documentation from PQClean --- docs/algorithms/kem/classic_mceliece.md | 2 +- docs/algorithms/kem/classic_mceliece.yml | 3 +- docs/algorithms/kem/hqc.md | 2 +- docs/algorithms/kem/hqc.yml | 2 +- docs/algorithms/kem/ntru.md | 2 +- docs/algorithms/kem/ntru.yml | 2 +- docs/algorithms/kem/ntruprime.md | 2 +- docs/algorithms/kem/ntruprime.yml | 2 +- docs/algorithms/kem/saber.md | 2 +- docs/algorithms/kem/saber.yml | 2 +- docs/algorithms/sig/falcon.md | 2 +- docs/algorithms/sig/falcon.yml | 2 +- docs/algorithms/sig/rainbow.md | 2 +- docs/algorithms/sig/rainbow.yml | 2 +- docs/algorithms/sig/sphincs.md | 2 +- docs/algorithms/sig/sphincs.yml | 2 +- .../copy_from_upstream/copy_from_upstream.yml | 2 +- src/sig/falcon/CMakeLists.txt | 4 +- .../falcon/pqclean_falcon-1024_avx2/sign.c | 4 +- .../falcon/pqclean_falcon-1024_clean/fpr.c | 260 +++++++++++++++- .../falcon/pqclean_falcon-1024_clean/fpr.h | 280 ++---------------- .../falcon/pqclean_falcon-1024_clean/inner.c | 70 +++++ .../falcon/pqclean_falcon-1024_clean/inner.h | 52 +--- .../falcon/pqclean_falcon-1024_clean/sign.c | 4 +- src/sig/falcon/pqclean_falcon-512_avx2/sign.c | 4 +- src/sig/falcon/pqclean_falcon-512_clean/fpr.c | 260 +++++++++++++++- src/sig/falcon/pqclean_falcon-512_clean/fpr.h | 280 ++---------------- .../falcon/pqclean_falcon-512_clean/inner.c | 70 +++++ .../falcon/pqclean_falcon-512_clean/inner.h | 49 +-- .../falcon/pqclean_falcon-512_clean/sign.c | 4 +- 30 files changed, 756 insertions(+), 620 deletions(-) create mode 100755 src/sig/falcon/pqclean_falcon-1024_clean/inner.c create mode 100755 src/sig/falcon/pqclean_falcon-512_clean/inner.c diff --git a/docs/algorithms/kem/classic_mceliece.md b/docs/algorithms/kem/classic_mceliece.md index eda912836..2f7c0842f 100644 --- a/docs/algorithms/kem/classic_mceliece.md +++ b/docs/algorithms/kem/classic_mceliece.md @@ -5,7 +5,7 @@ - **Principal submitters**: Daniel J. Bernstein, Tung Chou, Tanja Lange, Ingo von Maurich, Rafael Misoczki, Ruben Niederhagen, Edoardo Persichetti, Christiane Peters, Peter Schwabe, Nicolas Sendrier, Jakub Szefer, Wen Wang. - **Authors' website**: https://classic.mceliece.org - **Specification version**: SUPERCOP-20191221. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - SUPERCOP-20191221 "vec" and "avx" implementations - **Implementation license (SPDX-Identifier)**: Public domain. diff --git a/docs/algorithms/kem/classic_mceliece.yml b/docs/algorithms/kem/classic_mceliece.yml index e03e353d9..9efe580f7 100644 --- a/docs/algorithms/kem/classic_mceliece.yml +++ b/docs/algorithms/kem/classic_mceliece.yml @@ -19,7 +19,7 @@ website: https://classic.mceliece.org nist-round: 3 spec-version: SUPERCOP-20191221 spdx-license-identifier: Public domain -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - SUPERCOP-20191221 "vec" and "avx" implementations parameter-sets: @@ -348,3 +348,4 @@ parameter-sets: no-secret-dependent-branching-claimed: false no-secret-dependent-branching-checked-by-valgrind: true large-stack-usage: true +auxiliary-submitters: [] diff --git a/docs/algorithms/kem/hqc.md b/docs/algorithms/kem/hqc.md index 861ec9dc6..e1ef17a05 100644 --- a/docs/algorithms/kem/hqc.md +++ b/docs/algorithms/kem/hqc.md @@ -5,7 +5,7 @@ - **Principal submitters**: Carlos Aguilar Melchor, Nicolas Aragon, Slim Bettaieb, Olivier Blazy, Jurjen Bos, Jean-Christophe Deneuville, Philippe Gaborit, Edoardo Persichetti, Jean-Marc Robert, Pascal Véron, Gilles Zémor, Loïc Bidoux. - **Authors' website**: https://pqc-hqc.org/ - **Specification version**: NIST Round 3 submission. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/jschanck/package-pqclean/tree/29f79e72/hqc, which takes it from: - submission 2020-10-01 at https://pqc-hqc.org/implementation.html - **Implementation license (SPDX-Identifier)**: Public domain. diff --git a/docs/algorithms/kem/hqc.yml b/docs/algorithms/kem/hqc.yml index 5a78f15d9..f87df5a5c 100644 --- a/docs/algorithms/kem/hqc.yml +++ b/docs/algorithms/kem/hqc.yml @@ -18,7 +18,7 @@ website: https://pqc-hqc.org/ nist-round: 3 spec-version: NIST Round 3 submission spdx-license-identifier: Public domain -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/jschanck/package-pqclean/tree/29f79e72/hqc - submission 2020-10-01 at https://pqc-hqc.org/implementation.html diff --git a/docs/algorithms/kem/ntru.md b/docs/algorithms/kem/ntru.md index a5d7d53dc..42eefff3b 100644 --- a/docs/algorithms/kem/ntru.md +++ b/docs/algorithms/kem/ntru.md @@ -6,7 +6,7 @@ - **Auxiliary submitters**: Cong Chen, Oussama Danba, Jeffrey Hoffstein, Andreas Hülsing, Joost Rijneveld, Tsunekazu Saito, Peter Schwabe, William Whyte, Keita Xagawa, Takashi Yamakawa, Zhenfei Zhang. - **Authors' website**: https://ntru.org/ - **Specification version**: NIST Round 3 submission. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/jschanck/ntru/tree/a43a4457 - **Implementation license (SPDX-Identifier)**: CC0-1.0. diff --git a/docs/algorithms/kem/ntru.yml b/docs/algorithms/kem/ntru.yml index 19578a4da..c4ac02067 100644 --- a/docs/algorithms/kem/ntru.yml +++ b/docs/algorithms/kem/ntru.yml @@ -19,7 +19,7 @@ website: https://ntru.org/ nist-round: 3 spec-version: NIST Round 3 submission spdx-license-identifier: CC0-1.0 -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/jschanck/ntru/tree/a43a4457 parameter-sets: diff --git a/docs/algorithms/kem/ntruprime.md b/docs/algorithms/kem/ntruprime.md index 1c1d5aa98..a25add8fb 100644 --- a/docs/algorithms/kem/ntruprime.md +++ b/docs/algorithms/kem/ntruprime.md @@ -5,7 +5,7 @@ - **Principal submitters**: Daniel J. Bernstein, Chitchanok Chuengsatiansup, Tanja Lange, Christine van Vredendaal. - **Authors' website**: https://ntruprime.cr.yp.to - **Specification version**: supercop-20200826. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/jschanck/package-pqclean/tree/4d9f08c3/ntruprime, which takes it from: - supercop-20210604 - **Implementation license (SPDX-Identifier)**: Public domain. diff --git a/docs/algorithms/kem/ntruprime.yml b/docs/algorithms/kem/ntruprime.yml index 6a26cbc71..e702eed76 100644 --- a/docs/algorithms/kem/ntruprime.yml +++ b/docs/algorithms/kem/ntruprime.yml @@ -10,7 +10,7 @@ website: https://ntruprime.cr.yp.to nist-round: 3 spec-version: supercop-20200826 spdx-license-identifier: Public domain -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/jschanck/package-pqclean/tree/4d9f08c3/ntruprime - supercop-20210604 diff --git a/docs/algorithms/kem/saber.md b/docs/algorithms/kem/saber.md index 329178b92..fb4560470 100644 --- a/docs/algorithms/kem/saber.md +++ b/docs/algorithms/kem/saber.md @@ -5,7 +5,7 @@ - **Principal submitters**: Jan-Pieter D'Anvers, Angshuman Karmakar, Sujoy Sinha Roy, Frederik Vercauteren. - **Authors' website**: https://www.esat.kuleuven.be/cosic/pqcrypto/saber/ - **Specification version**: NIST Round 3 submission. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/jschanck/package-pqclean/tree/1ae84c3c/saber, which takes it from: - https://github.com/KULeuven-COSIC/SABER/tree/509cc5ec3a7e12a751ccdd2ef5bd6e54e00bd350 - **Implementation license (SPDX-Identifier)**: Public domain. diff --git a/docs/algorithms/kem/saber.yml b/docs/algorithms/kem/saber.yml index 96437f771..50f4877fe 100644 --- a/docs/algorithms/kem/saber.yml +++ b/docs/algorithms/kem/saber.yml @@ -10,7 +10,7 @@ website: https://www.esat.kuleuven.be/cosic/pqcrypto/saber/ nist-round: 3 spec-version: NIST Round 3 submission spdx-license-identifier: Public domain -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/jschanck/package-pqclean/tree/1ae84c3c/saber - https://github.com/KULeuven-COSIC/SABER/tree/509cc5ec3a7e12a751ccdd2ef5bd6e54e00bd350 diff --git a/docs/algorithms/sig/falcon.md b/docs/algorithms/sig/falcon.md index 064beab3e..7823734c9 100644 --- a/docs/algorithms/sig/falcon.md +++ b/docs/algorithms/sig/falcon.md @@ -6,7 +6,7 @@ - **Auxiliary submitters**: Pierre-Alain Fouque, Jeffrey Hoffstein, Paul Kirchner, Vadim Lyubashevsky, Thomas Pornin, Thomas Ricosset, Gregor Seiler, William Whyte, Zhenfei Zhang. - **Authors' website**: https://falcon-sign.info - **Specification version**: v1.2. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/jschanck/package-pqclean/tree/cea1fa5a/falcon, which takes it from: - supercop-20201018 - **Implementation license (SPDX-Identifier)**: CC0-1.0. diff --git a/docs/algorithms/sig/falcon.yml b/docs/algorithms/sig/falcon.yml index 3414de2fd..5dfa1fdbe 100644 --- a/docs/algorithms/sig/falcon.yml +++ b/docs/algorithms/sig/falcon.yml @@ -17,7 +17,7 @@ website: https://falcon-sign.info nist-round: 3 spec-version: v1.2 spdx-license-identifier: CC0-1.0 -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/jschanck/package-pqclean/tree/cea1fa5a/falcon - supercop-20201018 diff --git a/docs/algorithms/sig/rainbow.md b/docs/algorithms/sig/rainbow.md index 204ce8d80..a720a6308 100644 --- a/docs/algorithms/sig/rainbow.md +++ b/docs/algorithms/sig/rainbow.md @@ -6,7 +6,7 @@ - **Auxiliary submitters**: Ming-Shing Chen, Matthias Kannwischer, Jacques Patarin, Albrecht Petzoldt, Dieter Schmidt, Bo-Yin Yang. - **Authors' website**: https://www.pqcrainbow.org/ - **Specification version**: NIST Round 3 submission. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/fast-crypto-lab/rainbow-submission-round2/commit/173ada0e077e1b9dbd8e4a78994f87acc0c92263 - **Implementation license (SPDX-Identifier)**: CC0-1.0. diff --git a/docs/algorithms/sig/rainbow.yml b/docs/algorithms/sig/rainbow.yml index da9ec8dfc..6a8ab8fcd 100644 --- a/docs/algorithms/sig/rainbow.yml +++ b/docs/algorithms/sig/rainbow.yml @@ -14,7 +14,7 @@ website: https://www.pqcrainbow.org/ nist-round: 3 spec-version: NIST Round 3 submission spdx-license-identifier: CC0-1.0 -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/fast-crypto-lab/rainbow-submission-round2/commit/173ada0e077e1b9dbd8e4a78994f87acc0c92263 parameter-sets: diff --git a/docs/algorithms/sig/sphincs.md b/docs/algorithms/sig/sphincs.md index 3002461c9..d949dd467 100644 --- a/docs/algorithms/sig/sphincs.md +++ b/docs/algorithms/sig/sphincs.md @@ -6,7 +6,7 @@ - **Auxiliary submitters**: Jean-Philippe Aumasson, Daniel J. Bernstein,, Christoph Dobraunig, Maria Eichlseder, Scott Fluhrer, Stefan-Lukas Gazdag, Panos Kampanakis, Stefan Kölbl, Tanja Lange, Martin M. Lauridsen, Florian Mendel, Ruben Niederhagen, Christian Rechberger, Joost Rijneveld, Peter Schwabe. - **Authors' website**: https://sphincs.org/ - **Specification version**: NIST Round 3 submission. -- **Implementation source**: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24, which takes it from: +- **Implementation source**: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2, which takes it from: - https://github.com/sphincs/sphincsplus - **Implementation license (SPDX-Identifier)**: CC0-1.0. diff --git a/docs/algorithms/sig/sphincs.yml b/docs/algorithms/sig/sphincs.yml index 011ac2031..563975ae5 100644 --- a/docs/algorithms/sig/sphincs.yml +++ b/docs/algorithms/sig/sphincs.yml @@ -23,7 +23,7 @@ website: https://sphincs.org/ nist-round: 3 spec-version: NIST Round 3 submission spdx-license-identifier: CC0-1.0 -upstream: https://github.com/PQClean/PQClean/commit/89d34613364deca88659f6c2dd38708279c6bd24 +upstream: https://github.com/PQClean/PQClean/commit/6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 upstream-ancestors: - https://github.com/sphincs/sphincsplus parameter-sets: diff --git a/scripts/copy_from_upstream/copy_from_upstream.yml b/scripts/copy_from_upstream/copy_from_upstream.yml index a46be50e2..f9edbca8d 100644 --- a/scripts/copy_from_upstream/copy_from_upstream.yml +++ b/scripts/copy_from_upstream/copy_from_upstream.yml @@ -3,7 +3,7 @@ upstreams: name: pqclean git_url: https://github.com/PQClean/PQClean.git git_branch: master - git_commit: 89d34613364deca88659f6c2dd38708279c6bd24 + git_commit: 6c1ea921ee4a06a6b50c742ca540bb9b5e51aee2 kem_meta_path: 'crypto_kem/{pqclean_scheme}/META.yml' sig_meta_path: 'crypto_sign/{pqclean_scheme}/META.yml' kem_scheme_path: 'crypto_kem/{pqclean_scheme}' diff --git a/src/sig/falcon/CMakeLists.txt b/src/sig/falcon/CMakeLists.txt index 9479bcb64..170bdf8c6 100644 --- a/src/sig/falcon/CMakeLists.txt +++ b/src/sig/falcon/CMakeLists.txt @@ -6,7 +6,7 @@ set(_FALCON_OBJS "") if(OQS_ENABLE_SIG_falcon_512) - add_library(falcon_512_clean OBJECT sig_falcon_512.c pqclean_falcon-512_clean/codec.c pqclean_falcon-512_clean/common.c pqclean_falcon-512_clean/fft.c pqclean_falcon-512_clean/fpr.c pqclean_falcon-512_clean/keygen.c pqclean_falcon-512_clean/pqclean.c pqclean_falcon-512_clean/rng.c pqclean_falcon-512_clean/sign.c pqclean_falcon-512_clean/vrfy.c) + add_library(falcon_512_clean OBJECT sig_falcon_512.c pqclean_falcon-512_clean/codec.c pqclean_falcon-512_clean/common.c pqclean_falcon-512_clean/fft.c pqclean_falcon-512_clean/fpr.c pqclean_falcon-512_clean/inner.c pqclean_falcon-512_clean/keygen.c pqclean_falcon-512_clean/pqclean.c pqclean_falcon-512_clean/rng.c pqclean_falcon-512_clean/sign.c pqclean_falcon-512_clean/vrfy.c) target_include_directories(falcon_512_clean PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqclean_falcon-512_clean) target_include_directories(falcon_512_clean PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) set(_FALCON_OBJS ${_FALCON_OBJS} $) @@ -21,7 +21,7 @@ if(OQS_ENABLE_SIG_falcon_512_avx2) endif() if(OQS_ENABLE_SIG_falcon_1024) - add_library(falcon_1024_clean OBJECT sig_falcon_1024.c pqclean_falcon-1024_clean/codec.c pqclean_falcon-1024_clean/common.c pqclean_falcon-1024_clean/fft.c pqclean_falcon-1024_clean/fpr.c pqclean_falcon-1024_clean/keygen.c pqclean_falcon-1024_clean/pqclean.c pqclean_falcon-1024_clean/rng.c pqclean_falcon-1024_clean/sign.c pqclean_falcon-1024_clean/vrfy.c) + add_library(falcon_1024_clean OBJECT sig_falcon_1024.c pqclean_falcon-1024_clean/codec.c pqclean_falcon-1024_clean/common.c pqclean_falcon-1024_clean/fft.c pqclean_falcon-1024_clean/fpr.c pqclean_falcon-1024_clean/inner.c pqclean_falcon-1024_clean/keygen.c pqclean_falcon-1024_clean/pqclean.c pqclean_falcon-1024_clean/rng.c pqclean_falcon-1024_clean/sign.c pqclean_falcon-1024_clean/vrfy.c) target_include_directories(falcon_1024_clean PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqclean_falcon-1024_clean) target_include_directories(falcon_1024_clean PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims) set(_FALCON_OBJS ${_FALCON_OBJS} $) diff --git a/src/sig/falcon/pqclean_falcon-1024_avx2/sign.c b/src/sig/falcon/pqclean_falcon-1024_avx2/sign.c index 8ef93bf83..6888fe00a 100644 --- a/src/sig/falcon/pqclean_falcon-1024_avx2/sign.c +++ b/src/sig/falcon/pqclean_falcon-1024_avx2/sign.c @@ -267,7 +267,7 @@ PQCLEAN_FALCON1024_AVX2_expand_privkey(fpr *expanded_key, PQCLEAN_FALCON1024_AVX2_poly_neg(rF, logn); /* - * The Gram matrix is G = B·B*. Formulas are: + * The Gram matrix is G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) @@ -781,7 +781,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, PQCLEAN_FALCON1024_AVX2_poly_neg(b11, logn); /* - * Compute the Gram matrix G = B·B*. Formulas are: + * Compute the Gram matrix G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) diff --git a/src/sig/falcon/pqclean_falcon-1024_clean/fpr.c b/src/sig/falcon/pqclean_falcon-1024_clean/fpr.c index 091462a71..669c825ee 100644 --- a/src/sig/falcon/pqclean_falcon-1024_clean/fpr.c +++ b/src/sig/falcon/pqclean_falcon-1024_clean/fpr.c @@ -78,6 +78,66 @@ (e) += (int)(nt); \ } while (0) +uint64_t +fpr_ursh(uint64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); + return x >> (n & 31); +} + +int64_t +fpr_irsh(int64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); + return x >> (n & 31); +} + +uint64_t +fpr_ulsh(uint64_t x, int n) { + x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); + return x << (n & 31); +} + +fpr +FPR(int s, int e, uint64_t m) { + fpr x; + uint32_t t; + unsigned f; + + /* + * If e >= -1076, then the value is "normal"; otherwise, it + * should be a subnormal, which we clamp down to zero. + */ + e += 1076; + t = (uint32_t)e >> 31; + m &= (uint64_t)t - 1; + + /* + * If m = 0 then we want a zero; make e = 0 too, but conserve + * the sign. + */ + t = (uint32_t)(m >> 54); + e &= -(int)t; + + /* + * The 52 mantissa bits come from m. Value m has its top bit set + * (unless it is a zero); we leave it "as is": the top bit will + * increment the exponent by 1, except when m = 0, which is + * exactly what we want. + */ + x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); + + /* + * Rounding: if the low three bits of m are 011, 110 or 111, + * then the value should be incremented to get the next + * representable value. This implements the usual + * round-to-nearest rule (with preference to even values in case + * of a tie). Note that the increment may make a carry spill + * into the exponent field, which is again exactly what we want + * in that case. + */ + f = (unsigned)m & 7U; + x += (0xC8U >> f) & 1; + return x; +} fpr fpr_scaled(int64_t i, int sc) { @@ -134,7 +194,131 @@ fpr_scaled(int64_t i, int sc) { return FPR(s, e, m); } +fpr +fpr_of(int64_t i) { + return fpr_scaled(i, 0); +} +int64_t +fpr_rint(fpr x) { + uint64_t m, d; + int e; + uint32_t s, dd, f; + + /* + * We assume that the value fits in -(2^63-1)..+(2^63-1). We can + * thus extract the mantissa as a 63-bit integer, then right-shift + * it as needed. + */ + m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + e = 1085 - ((int)(x >> 52) & 0x7FF); + + /* + * If a shift of more than 63 bits is needed, then simply set m + * to zero. This also covers the case of an input operand equal + * to zero. + */ + m &= -(uint64_t)((uint32_t)(e - 64) >> 31); + e &= 63; + + /* + * Right-shift m as needed. Shift count is e. Proper rounding + * mandates that: + * - If the highest dropped bit is zero, then round low. + * - If the highest dropped bit is one, and at least one of the + * other dropped bits is one, then round up. + * - If the highest dropped bit is one, and all other dropped + * bits are zero, then round up if the lowest kept bit is 1, + * or low otherwise (i.e. ties are broken by "rounding to even"). + * + * We thus first extract a word consisting of all the dropped bit + * AND the lowest kept bit; then we shrink it down to three bits, + * the lowest being "sticky". + */ + d = fpr_ulsh(m, 63 - e); + dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); + f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); + m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); + + /* + * Apply the sign bit. + */ + s = (uint32_t)(x >> 63); + return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; +} + +int64_t +fpr_floor(fpr x) { + uint64_t t; + int64_t xi; + int e, cc; + + /* + * We extract the integer as a _signed_ 64-bit integer with + * a scaling factor. Since we assume that the value fits + * in the -(2^63-1)..+(2^63-1) range, we can left-shift the + * absolute value to make it in the 2^62..2^63-1 range: we + * will only need a right-shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + t = x >> 63; + xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) + & (((uint64_t)1 << 63) - 1)); + xi = (xi ^ -(int64_t)t) + (int64_t)t; + cc = 1085 - e; + + /* + * We perform an arithmetic right-shift on the value. This + * applies floor() semantics on both positive and negative values + * (rounding toward minus infinity). + */ + xi = fpr_irsh(xi, cc & 63); + + /* + * If the true shift count was 64 or more, then we should instead + * replace xi with 0 (if nonnegative) or -1 (if negative). Edge + * case: -0 will be floored to -1, not 0 (whether this is correct + * is debatable; in any case, the other functions normalize zero + * to +0). + * + * For an input of zero, the non-shifted xi was incorrect (we used + * a top implicit bit of value 1, not 0), but this does not matter + * since this operation will clamp it down. + */ + xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); + return xi; +} + +int64_t +fpr_trunc(fpr x) { + uint64_t t, xu; + int e, cc; + + /* + * Extract the absolute value. Since we assume that the value + * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift + * the absolute value into the 2^62..2^63-1 range, and then + * do a right shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + cc = 1085 - e; + xu = fpr_ursh(xu, cc & 63); + + /* + * If the exponent is too low (cc > 63), then the shift was wrong + * and we must clamp the value to 0. This also covers the case + * of an input equal to zero. + */ + xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); + + /* + * Apply back the sign, if the source value is negative. + */ + t = x >> 63; + xu = (xu ^ -t) + t; + return *(int64_t *)&xu; +} fpr fpr_add(fpr x, fpr y) { @@ -243,7 +427,42 @@ fpr_add(fpr x, fpr y) { return FPR(sx, ex, xu); } +fpr +fpr_sub(fpr x, fpr y) { + y ^= (uint64_t)1 << 63; + return fpr_add(x, y); +} +fpr +fpr_neg(fpr x) { + x ^= (uint64_t)1 << 63; + return x; +} + +fpr +fpr_half(fpr x) { + /* + * To divide a value by 2, we just have to subtract 1 from its + * exponent, but we have to take care of zero. + */ + uint32_t t; + + x -= (uint64_t)1 << 52; + t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; + x &= (uint64_t)t - 1; + return x; +} + +fpr +fpr_double(fpr x) { + /* + * To double a value, we just increment by one the exponent. We + * don't care about infinites or NaNs; however, 0 is a + * special case. + */ + x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; + return x; +} fpr fpr_mul(fpr x, fpr y) { @@ -340,7 +559,10 @@ fpr_mul(fpr x, fpr y) { return FPR(s, e, zu); } - +fpr +fpr_sqr(fpr x) { + return fpr_mul(x, x); +} fpr fpr_div(fpr x, fpr y) { @@ -428,7 +650,10 @@ fpr_div(fpr x, fpr y) { return FPR(s, e, q); } - +fpr +fpr_inv(fpr x) { + return fpr_div(4607182418800017408u, x); +} fpr fpr_sqrt(fpr x) { @@ -506,6 +731,37 @@ fpr_sqrt(fpr x) { return FPR(0, e, q); } +int +fpr_lt(fpr x, fpr y) { + /* + * If both x and y are positive, then a signed comparison yields + * the proper result: + * - For positive values, the order is preserved. + * - The sign bit is at the same place as in integers, so + * sign is preserved. + * Moreover, we can compute [x < y] as sgn(x-y) and the computation + * of x-y will not overflow. + * + * If the signs differ, then sgn(x) gives the proper result. + * + * If both x and y are negative, then the order is reversed. + * Hence [x < y] = sgn(y-x). We must compute this separately from + * sgn(x-y); simply inverting sgn(x-y) would not handle the edge + * case x = y properly. + */ + int cc0, cc1; + int64_t sx; + int64_t sy; + + sx = *(int64_t *)&x; + sy = *(int64_t *)&y; + sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ + + cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ + cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ + + return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); +} uint64_t fpr_expm_p63(fpr x, fpr ccs) { diff --git a/src/sig/falcon/pqclean_falcon-1024_clean/fpr.h b/src/sig/falcon/pqclean_falcon-1024_clean/fpr.h index dd7e15c22..a1c122275 100644 --- a/src/sig/falcon/pqclean_falcon-1024_clean/fpr.h +++ b/src/sig/falcon/pqclean_falcon-1024_clean/fpr.h @@ -126,11 +126,8 @@ typedef uint64_t fpr; * * Shift count n MUST be in the 0..63 range. */ -static inline uint64_t -fpr_ursh(uint64_t x, int n) { - x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); - return x >> (n & 31); -} +#define fpr_ursh PQCLEAN_FALCON1024_CLEAN_fpr_ursh +uint64_t fpr_ursh(uint64_t x, int n); /* * Right-shift a 64-bit signed value by a possibly secret shift count @@ -138,11 +135,8 @@ fpr_ursh(uint64_t x, int n) { * * Shift count n MUST be in the 0..63 range. */ -static inline int64_t -fpr_irsh(int64_t x, int n) { - x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); - return x >> (n & 31); -} +#define fpr_irsh PQCLEAN_FALCON1024_CLEAN_fpr_irsh +int64_t fpr_irsh(int64_t x, int n); /* * Left-shift a 64-bit unsigned value by a possibly secret shift count @@ -150,11 +144,8 @@ fpr_irsh(int64_t x, int n) { * * Shift count n MUST be in the 0..63 range. */ -static inline uint64_t -fpr_ulsh(uint64_t x, int n) { - x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); - return x << (n & 31); -} +#define fpr_ulsh PQCLEAN_FALCON1024_CLEAN_fpr_ulsh +uint64_t fpr_ulsh(uint64_t x, int n); /* * Expectations: @@ -171,56 +162,15 @@ fpr_ulsh(uint64_t x, int n) { * If e >= -1076 and e != 0, m must be within the expected range * (2^54 to 2^55-1). */ -static inline fpr -FPR(int s, int e, uint64_t m) { - fpr x; - uint32_t t; - unsigned f; +#define FPR PQCLEAN_FALCON1024_CLEAN_FPR +fpr FPR(int s, int e, uint64_t m); - /* - * If e >= -1076, then the value is "normal"; otherwise, it - * should be a subnormal, which we clamp down to zero. - */ - e += 1076; - t = (uint32_t)e >> 31; - m &= (uint64_t)t - 1; - - /* - * If m = 0 then we want a zero; make e = 0 too, but conserve - * the sign. - */ - t = (uint32_t)(m >> 54); - e &= -(int)t; - - /* - * The 52 mantissa bits come from m. Value m has its top bit set - * (unless it is a zero); we leave it "as is": the top bit will - * increment the exponent by 1, except when m = 0, which is - * exactly what we want. - */ - x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); - - /* - * Rounding: if the low three bits of m are 011, 110 or 111, - * then the value should be incremented to get the next - * representable value. This implements the usual - * round-to-nearest rule (with preference to even values in case - * of a tie). Note that the increment may make a carry spill - * into the exponent field, which is again exactly what we want - * in that case. - */ - f = (unsigned)m & 7U; - x += (0xC8U >> f) & 1; - return x; -} #define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled fpr fpr_scaled(int64_t i, int sc); -static inline fpr -fpr_of(int64_t i) { - return fpr_scaled(i, 0); -} +#define fpr_of PQCLEAN_FALCON1024_CLEAN_fpr_of +fpr fpr_of(int64_t i); static const fpr fpr_q = 4667981563525332992; static const fpr fpr_inverse_of_q = 4545632735260551042; @@ -244,217 +194,47 @@ static const fpr fpr_ptwo63m1 = 4890909195324358656; static const fpr fpr_mtwo63m1 = 14114281232179134464U; static const fpr fpr_ptwo63 = 4890909195324358656; -static inline int64_t -fpr_rint(fpr x) { - uint64_t m, d; - int e; - uint32_t s, dd, f; +#define fpr_rint PQCLEAN_FALCON1024_CLEAN_fpr_rint +int64_t fpr_rint(fpr x); - /* - * We assume that the value fits in -(2^63-1)..+(2^63-1). We can - * thus extract the mantissa as a 63-bit integer, then right-shift - * it as needed. - */ - m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); - e = 1085 - ((int)(x >> 52) & 0x7FF); +#define fpr_floor PQCLEAN_FALCON1024_CLEAN_fpr_floor +int64_t fpr_floor(fpr x); - /* - * If a shift of more than 63 bits is needed, then simply set m - * to zero. This also covers the case of an input operand equal - * to zero. - */ - m &= -(uint64_t)((uint32_t)(e - 64) >> 31); - e &= 63; - - /* - * Right-shift m as needed. Shift count is e. Proper rounding - * mandates that: - * - If the highest dropped bit is zero, then round low. - * - If the highest dropped bit is one, and at least one of the - * other dropped bits is one, then round up. - * - If the highest dropped bit is one, and all other dropped - * bits are zero, then round up if the lowest kept bit is 1, - * or low otherwise (i.e. ties are broken by "rounding to even"). - * - * We thus first extract a word consisting of all the dropped bit - * AND the lowest kept bit; then we shrink it down to three bits, - * the lowest being "sticky". - */ - d = fpr_ulsh(m, 63 - e); - dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); - f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); - m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); - - /* - * Apply the sign bit. - */ - s = (uint32_t)(x >> 63); - return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; -} - -static inline int64_t -fpr_floor(fpr x) { - uint64_t t; - int64_t xi; - int e, cc; - - /* - * We extract the integer as a _signed_ 64-bit integer with - * a scaling factor. Since we assume that the value fits - * in the -(2^63-1)..+(2^63-1) range, we can left-shift the - * absolute value to make it in the 2^62..2^63-1 range: we - * will only need a right-shift afterwards. - */ - e = (int)(x >> 52) & 0x7FF; - t = x >> 63; - xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) - & (((uint64_t)1 << 63) - 1)); - xi = (xi ^ -(int64_t)t) + (int64_t)t; - cc = 1085 - e; - - /* - * We perform an arithmetic right-shift on the value. This - * applies floor() semantics on both positive and negative values - * (rounding toward minus infinity). - */ - xi = fpr_irsh(xi, cc & 63); - - /* - * If the true shift count was 64 or more, then we should instead - * replace xi with 0 (if nonnegative) or -1 (if negative). Edge - * case: -0 will be floored to -1, not 0 (whether this is correct - * is debatable; in any case, the other functions normalize zero - * to +0). - * - * For an input of zero, the non-shifted xi was incorrect (we used - * a top implicit bit of value 1, not 0), but this does not matter - * since this operation will clamp it down. - */ - xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); - return xi; -} - -static inline int64_t -fpr_trunc(fpr x) { - uint64_t t, xu; - int e, cc; - - /* - * Extract the absolute value. Since we assume that the value - * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift - * the absolute value into the 2^62..2^63-1 range, and then - * do a right shift afterwards. - */ - e = (int)(x >> 52) & 0x7FF; - xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); - cc = 1085 - e; - xu = fpr_ursh(xu, cc & 63); - - /* - * If the exponent is too low (cc > 63), then the shift was wrong - * and we must clamp the value to 0. This also covers the case - * of an input equal to zero. - */ - xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); - - /* - * Apply back the sign, if the source value is negative. - */ - t = x >> 63; - xu = (xu ^ -t) + t; - return *(int64_t *)&xu; -} +#define fpr_trunc PQCLEAN_FALCON1024_CLEAN_fpr_trunc +int64_t fpr_trunc(fpr x); #define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add fpr fpr_add(fpr x, fpr y); -static inline fpr -fpr_sub(fpr x, fpr y) { - y ^= (uint64_t)1 << 63; - return fpr_add(x, y); -} +#define fpr_sub PQCLEAN_FALCON1024_CLEAN_fpr_sub +fpr fpr_sub(fpr x, fpr y); -static inline fpr -fpr_neg(fpr x) { - x ^= (uint64_t)1 << 63; - return x; -} +#define fpr_neg PQCLEAN_FALCON1024_CLEAN_fpr_neg +fpr fpr_neg(fpr x); -static inline fpr -fpr_half(fpr x) { - /* - * To divide a value by 2, we just have to subtract 1 from its - * exponent, but we have to take care of zero. - */ - uint32_t t; +#define fpr_half PQCLEAN_FALCON1024_CLEAN_fpr_half +fpr fpr_half(fpr x); - x -= (uint64_t)1 << 52; - t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; - x &= (uint64_t)t - 1; - return x; -} - -static inline fpr -fpr_double(fpr x) { - /* - * To double a value, we just increment by one the exponent. We - * don't care about infinites or NaNs; however, 0 is a - * special case. - */ - x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; - return x; -} +#define fpr_double PQCLEAN_FALCON1024_CLEAN_fpr_double +fpr fpr_double(fpr x); #define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul fpr fpr_mul(fpr x, fpr y); -static inline fpr -fpr_sqr(fpr x) { - return fpr_mul(x, x); -} +#define fpr_sqr PQCLEAN_FALCON1024_CLEAN_fpr_sqr +fpr fpr_sqr(fpr x); #define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div fpr fpr_div(fpr x, fpr y); -static inline fpr -fpr_inv(fpr x) { - return fpr_div(4607182418800017408u, x); -} +#define fpr_inv PQCLEAN_FALCON1024_CLEAN_fpr_inv +fpr fpr_inv(fpr x); #define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt fpr fpr_sqrt(fpr x); -static inline int -fpr_lt(fpr x, fpr y) { - /* - * If both x and y are positive, then a signed comparison yields - * the proper result: - * - For positive values, the order is preserved. - * - The sign bit is at the same place as in integers, so - * sign is preserved. - * Moreover, we can compute [x < y] as sgn(x-y) and the computation - * of x-y will not overflow. - * - * If the signs differ, then sgn(x) gives the proper result. - * - * If both x and y are negative, then the order is reversed. - * Hence [x < y] = sgn(y-x). We must compute this separately from - * sgn(x-y); simply inverting sgn(x-y) would not handle the edge - * case x = y properly. - */ - int cc0, cc1; - int64_t sx; - int64_t sy; - - sx = *(int64_t *)&x; - sy = *(int64_t *)&y; - sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ - - cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ - cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ - - return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); -} +#define fpr_lt PQCLEAN_FALCON1024_CLEAN_fpr_lt +int fpr_lt(fpr x, fpr y); /* * Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 diff --git a/src/sig/falcon/pqclean_falcon-1024_clean/inner.c b/src/sig/falcon/pqclean_falcon-1024_clean/inner.c new file mode 100755 index 000000000..f5c269eda --- /dev/null +++ b/src/sig/falcon/pqclean_falcon-1024_clean/inner.c @@ -0,0 +1,70 @@ +#include "inner.h" + +/* + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + */ + +unsigned set_fpu_cw(unsigned x) { + return x; +} + + +uint64_t prng_get_u64(prng *p) { + size_t u; + + /* + * If there are less than 9 bytes in the buffer, we refill it. + * This means that we may drop the last few bytes, but this allows + * for faster extraction code. Also, it means that we never leave + * an empty buffer. + */ + u = p->ptr; + if (u >= (sizeof p->buf.d) - 9) { + PQCLEAN_FALCON1024_CLEAN_prng_refill(p); + u = 0; + } + p->ptr = u + 8; + + return (uint64_t)p->buf.d[u + 0] + | ((uint64_t)p->buf.d[u + 1] << 8) + | ((uint64_t)p->buf.d[u + 2] << 16) + | ((uint64_t)p->buf.d[u + 3] << 24) + | ((uint64_t)p->buf.d[u + 4] << 32) + | ((uint64_t)p->buf.d[u + 5] << 40) + | ((uint64_t)p->buf.d[u + 6] << 48) + | ((uint64_t)p->buf.d[u + 7] << 56); +} + + +unsigned prng_get_u8(prng *p) { + unsigned v; + + v = p->buf.d[p->ptr ++]; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON1024_CLEAN_prng_refill(p); + } + return v; +} diff --git a/src/sig/falcon/pqclean_falcon-1024_clean/inner.h b/src/sig/falcon/pqclean_falcon-1024_clean/inner.h index 5b0477ac1..886f51a67 100644 --- a/src/sig/falcon/pqclean_falcon-1024_clean/inner.h +++ b/src/sig/falcon/pqclean_falcon-1024_clean/inner.h @@ -99,12 +99,8 @@ * targets other than 32-bit x86, or when the native 'double' type is * not used, the set_fpu_cw() function does nothing at all. */ -static inline unsigned -set_fpu_cw(unsigned x) { - return x; -} - - +#define set_fpu_cw PQCLEAN_FALCON1024_CLEAN_set_fpu_cw +unsigned set_fpu_cw(unsigned x); /* ==================================================================== */ @@ -496,50 +492,14 @@ void PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); /* * Get a 64-bit random value from a PRNG. */ -static inline uint64_t -prng_get_u64(prng *p) { - size_t u; - - /* - * If there are less than 9 bytes in the buffer, we refill it. - * This means that we may drop the last few bytes, but this allows - * for faster extraction code. Also, it means that we never leave - * an empty buffer. - */ - u = p->ptr; - if (u >= (sizeof p->buf.d) - 9) { - PQCLEAN_FALCON1024_CLEAN_prng_refill(p); - u = 0; - } - p->ptr = u + 8; - - /* - * On systems that use little-endian encoding and allow - * unaligned accesses, we can simply read the data where it is. - */ - return (uint64_t)p->buf.d[u + 0] - | ((uint64_t)p->buf.d[u + 1] << 8) - | ((uint64_t)p->buf.d[u + 2] << 16) - | ((uint64_t)p->buf.d[u + 3] << 24) - | ((uint64_t)p->buf.d[u + 4] << 32) - | ((uint64_t)p->buf.d[u + 5] << 40) - | ((uint64_t)p->buf.d[u + 6] << 48) - | ((uint64_t)p->buf.d[u + 7] << 56); -} +#define prng_get_u64 PQCLEAN_FALCON1024_CLEAN_prng_get_u64 +uint64_t prng_get_u64(prng *p); /* * Get an 8-bit random value from a PRNG. */ -static inline unsigned -prng_get_u8(prng *p) { - unsigned v; - - v = p->buf.d[p->ptr ++]; - if (p->ptr == sizeof p->buf.d) { - PQCLEAN_FALCON1024_CLEAN_prng_refill(p); - } - return v; -} +#define prng_get_u8 PQCLEAN_FALCON1024_CLEAN_prng_get_u8 +unsigned prng_get_u8(prng *p); /* ==================================================================== */ /* diff --git a/src/sig/falcon/pqclean_falcon-1024_clean/sign.c b/src/sig/falcon/pqclean_falcon-1024_clean/sign.c index fb05cdad0..0baa9148e 100644 --- a/src/sig/falcon/pqclean_falcon-1024_clean/sign.c +++ b/src/sig/falcon/pqclean_falcon-1024_clean/sign.c @@ -267,7 +267,7 @@ PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key, PQCLEAN_FALCON1024_CLEAN_poly_neg(rF, logn); /* - * The Gram matrix is G = B·B*. Formulas are: + * The Gram matrix is G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) @@ -788,7 +788,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, PQCLEAN_FALCON1024_CLEAN_poly_neg(b11, logn); /* - * Compute the Gram matrix G = B·B*. Formulas are: + * Compute the Gram matrix G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) diff --git a/src/sig/falcon/pqclean_falcon-512_avx2/sign.c b/src/sig/falcon/pqclean_falcon-512_avx2/sign.c index 1b6cad3f2..623f618ec 100644 --- a/src/sig/falcon/pqclean_falcon-512_avx2/sign.c +++ b/src/sig/falcon/pqclean_falcon-512_avx2/sign.c @@ -267,7 +267,7 @@ PQCLEAN_FALCON512_AVX2_expand_privkey(fpr *expanded_key, PQCLEAN_FALCON512_AVX2_poly_neg(rF, logn); /* - * The Gram matrix is G = B·B*. Formulas are: + * The Gram matrix is G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) @@ -781,7 +781,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, PQCLEAN_FALCON512_AVX2_poly_neg(b11, logn); /* - * Compute the Gram matrix G = B·B*. Formulas are: + * Compute the Gram matrix G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) diff --git a/src/sig/falcon/pqclean_falcon-512_clean/fpr.c b/src/sig/falcon/pqclean_falcon-512_clean/fpr.c index 091462a71..669c825ee 100644 --- a/src/sig/falcon/pqclean_falcon-512_clean/fpr.c +++ b/src/sig/falcon/pqclean_falcon-512_clean/fpr.c @@ -78,6 +78,66 @@ (e) += (int)(nt); \ } while (0) +uint64_t +fpr_ursh(uint64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); + return x >> (n & 31); +} + +int64_t +fpr_irsh(int64_t x, int n) { + x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); + return x >> (n & 31); +} + +uint64_t +fpr_ulsh(uint64_t x, int n) { + x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); + return x << (n & 31); +} + +fpr +FPR(int s, int e, uint64_t m) { + fpr x; + uint32_t t; + unsigned f; + + /* + * If e >= -1076, then the value is "normal"; otherwise, it + * should be a subnormal, which we clamp down to zero. + */ + e += 1076; + t = (uint32_t)e >> 31; + m &= (uint64_t)t - 1; + + /* + * If m = 0 then we want a zero; make e = 0 too, but conserve + * the sign. + */ + t = (uint32_t)(m >> 54); + e &= -(int)t; + + /* + * The 52 mantissa bits come from m. Value m has its top bit set + * (unless it is a zero); we leave it "as is": the top bit will + * increment the exponent by 1, except when m = 0, which is + * exactly what we want. + */ + x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); + + /* + * Rounding: if the low three bits of m are 011, 110 or 111, + * then the value should be incremented to get the next + * representable value. This implements the usual + * round-to-nearest rule (with preference to even values in case + * of a tie). Note that the increment may make a carry spill + * into the exponent field, which is again exactly what we want + * in that case. + */ + f = (unsigned)m & 7U; + x += (0xC8U >> f) & 1; + return x; +} fpr fpr_scaled(int64_t i, int sc) { @@ -134,7 +194,131 @@ fpr_scaled(int64_t i, int sc) { return FPR(s, e, m); } +fpr +fpr_of(int64_t i) { + return fpr_scaled(i, 0); +} +int64_t +fpr_rint(fpr x) { + uint64_t m, d; + int e; + uint32_t s, dd, f; + + /* + * We assume that the value fits in -(2^63-1)..+(2^63-1). We can + * thus extract the mantissa as a 63-bit integer, then right-shift + * it as needed. + */ + m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + e = 1085 - ((int)(x >> 52) & 0x7FF); + + /* + * If a shift of more than 63 bits is needed, then simply set m + * to zero. This also covers the case of an input operand equal + * to zero. + */ + m &= -(uint64_t)((uint32_t)(e - 64) >> 31); + e &= 63; + + /* + * Right-shift m as needed. Shift count is e. Proper rounding + * mandates that: + * - If the highest dropped bit is zero, then round low. + * - If the highest dropped bit is one, and at least one of the + * other dropped bits is one, then round up. + * - If the highest dropped bit is one, and all other dropped + * bits are zero, then round up if the lowest kept bit is 1, + * or low otherwise (i.e. ties are broken by "rounding to even"). + * + * We thus first extract a word consisting of all the dropped bit + * AND the lowest kept bit; then we shrink it down to three bits, + * the lowest being "sticky". + */ + d = fpr_ulsh(m, 63 - e); + dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); + f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); + m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); + + /* + * Apply the sign bit. + */ + s = (uint32_t)(x >> 63); + return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; +} + +int64_t +fpr_floor(fpr x) { + uint64_t t; + int64_t xi; + int e, cc; + + /* + * We extract the integer as a _signed_ 64-bit integer with + * a scaling factor. Since we assume that the value fits + * in the -(2^63-1)..+(2^63-1) range, we can left-shift the + * absolute value to make it in the 2^62..2^63-1 range: we + * will only need a right-shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + t = x >> 63; + xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) + & (((uint64_t)1 << 63) - 1)); + xi = (xi ^ -(int64_t)t) + (int64_t)t; + cc = 1085 - e; + + /* + * We perform an arithmetic right-shift on the value. This + * applies floor() semantics on both positive and negative values + * (rounding toward minus infinity). + */ + xi = fpr_irsh(xi, cc & 63); + + /* + * If the true shift count was 64 or more, then we should instead + * replace xi with 0 (if nonnegative) or -1 (if negative). Edge + * case: -0 will be floored to -1, not 0 (whether this is correct + * is debatable; in any case, the other functions normalize zero + * to +0). + * + * For an input of zero, the non-shifted xi was incorrect (we used + * a top implicit bit of value 1, not 0), but this does not matter + * since this operation will clamp it down. + */ + xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); + return xi; +} + +int64_t +fpr_trunc(fpr x) { + uint64_t t, xu; + int e, cc; + + /* + * Extract the absolute value. Since we assume that the value + * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift + * the absolute value into the 2^62..2^63-1 range, and then + * do a right shift afterwards. + */ + e = (int)(x >> 52) & 0x7FF; + xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); + cc = 1085 - e; + xu = fpr_ursh(xu, cc & 63); + + /* + * If the exponent is too low (cc > 63), then the shift was wrong + * and we must clamp the value to 0. This also covers the case + * of an input equal to zero. + */ + xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); + + /* + * Apply back the sign, if the source value is negative. + */ + t = x >> 63; + xu = (xu ^ -t) + t; + return *(int64_t *)&xu; +} fpr fpr_add(fpr x, fpr y) { @@ -243,7 +427,42 @@ fpr_add(fpr x, fpr y) { return FPR(sx, ex, xu); } +fpr +fpr_sub(fpr x, fpr y) { + y ^= (uint64_t)1 << 63; + return fpr_add(x, y); +} +fpr +fpr_neg(fpr x) { + x ^= (uint64_t)1 << 63; + return x; +} + +fpr +fpr_half(fpr x) { + /* + * To divide a value by 2, we just have to subtract 1 from its + * exponent, but we have to take care of zero. + */ + uint32_t t; + + x -= (uint64_t)1 << 52; + t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; + x &= (uint64_t)t - 1; + return x; +} + +fpr +fpr_double(fpr x) { + /* + * To double a value, we just increment by one the exponent. We + * don't care about infinites or NaNs; however, 0 is a + * special case. + */ + x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; + return x; +} fpr fpr_mul(fpr x, fpr y) { @@ -340,7 +559,10 @@ fpr_mul(fpr x, fpr y) { return FPR(s, e, zu); } - +fpr +fpr_sqr(fpr x) { + return fpr_mul(x, x); +} fpr fpr_div(fpr x, fpr y) { @@ -428,7 +650,10 @@ fpr_div(fpr x, fpr y) { return FPR(s, e, q); } - +fpr +fpr_inv(fpr x) { + return fpr_div(4607182418800017408u, x); +} fpr fpr_sqrt(fpr x) { @@ -506,6 +731,37 @@ fpr_sqrt(fpr x) { return FPR(0, e, q); } +int +fpr_lt(fpr x, fpr y) { + /* + * If both x and y are positive, then a signed comparison yields + * the proper result: + * - For positive values, the order is preserved. + * - The sign bit is at the same place as in integers, so + * sign is preserved. + * Moreover, we can compute [x < y] as sgn(x-y) and the computation + * of x-y will not overflow. + * + * If the signs differ, then sgn(x) gives the proper result. + * + * If both x and y are negative, then the order is reversed. + * Hence [x < y] = sgn(y-x). We must compute this separately from + * sgn(x-y); simply inverting sgn(x-y) would not handle the edge + * case x = y properly. + */ + int cc0, cc1; + int64_t sx; + int64_t sy; + + sx = *(int64_t *)&x; + sy = *(int64_t *)&y; + sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ + + cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ + cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ + + return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); +} uint64_t fpr_expm_p63(fpr x, fpr ccs) { diff --git a/src/sig/falcon/pqclean_falcon-512_clean/fpr.h b/src/sig/falcon/pqclean_falcon-512_clean/fpr.h index f88595e2c..fb6830e71 100644 --- a/src/sig/falcon/pqclean_falcon-512_clean/fpr.h +++ b/src/sig/falcon/pqclean_falcon-512_clean/fpr.h @@ -126,11 +126,8 @@ typedef uint64_t fpr; * * Shift count n MUST be in the 0..63 range. */ -static inline uint64_t -fpr_ursh(uint64_t x, int n) { - x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); - return x >> (n & 31); -} +#define fpr_ursh PQCLEAN_FALCON512_CLEAN_fpr_ursh +uint64_t fpr_ursh(uint64_t x, int n); /* * Right-shift a 64-bit signed value by a possibly secret shift count @@ -138,11 +135,8 @@ fpr_ursh(uint64_t x, int n) { * * Shift count n MUST be in the 0..63 range. */ -static inline int64_t -fpr_irsh(int64_t x, int n) { - x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); - return x >> (n & 31); -} +#define fpr_irsh PQCLEAN_FALCON512_CLEAN_fpr_irsh +int64_t fpr_irsh(int64_t x, int n); /* * Left-shift a 64-bit unsigned value by a possibly secret shift count @@ -150,11 +144,8 @@ fpr_irsh(int64_t x, int n) { * * Shift count n MUST be in the 0..63 range. */ -static inline uint64_t -fpr_ulsh(uint64_t x, int n) { - x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); - return x << (n & 31); -} +#define fpr_ulsh PQCLEAN_FALCON512_CLEAN_fpr_ulsh +uint64_t fpr_ulsh(uint64_t x, int n); /* * Expectations: @@ -171,56 +162,15 @@ fpr_ulsh(uint64_t x, int n) { * If e >= -1076 and e != 0, m must be within the expected range * (2^54 to 2^55-1). */ -static inline fpr -FPR(int s, int e, uint64_t m) { - fpr x; - uint32_t t; - unsigned f; +#define FPR PQCLEAN_FALCON512_CLEAN_FPR +fpr FPR(int s, int e, uint64_t m); - /* - * If e >= -1076, then the value is "normal"; otherwise, it - * should be a subnormal, which we clamp down to zero. - */ - e += 1076; - t = (uint32_t)e >> 31; - m &= (uint64_t)t - 1; - - /* - * If m = 0 then we want a zero; make e = 0 too, but conserve - * the sign. - */ - t = (uint32_t)(m >> 54); - e &= -(int)t; - - /* - * The 52 mantissa bits come from m. Value m has its top bit set - * (unless it is a zero); we leave it "as is": the top bit will - * increment the exponent by 1, except when m = 0, which is - * exactly what we want. - */ - x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); - - /* - * Rounding: if the low three bits of m are 011, 110 or 111, - * then the value should be incremented to get the next - * representable value. This implements the usual - * round-to-nearest rule (with preference to even values in case - * of a tie). Note that the increment may make a carry spill - * into the exponent field, which is again exactly what we want - * in that case. - */ - f = (unsigned)m & 7U; - x += (0xC8U >> f) & 1; - return x; -} #define fpr_scaled PQCLEAN_FALCON512_CLEAN_fpr_scaled fpr fpr_scaled(int64_t i, int sc); -static inline fpr -fpr_of(int64_t i) { - return fpr_scaled(i, 0); -} +#define fpr_of PQCLEAN_FALCON512_CLEAN_fpr_of +fpr fpr_of(int64_t i); static const fpr fpr_q = 4667981563525332992; static const fpr fpr_inverse_of_q = 4545632735260551042; @@ -244,217 +194,47 @@ static const fpr fpr_ptwo63m1 = 4890909195324358656; static const fpr fpr_mtwo63m1 = 14114281232179134464U; static const fpr fpr_ptwo63 = 4890909195324358656; -static inline int64_t -fpr_rint(fpr x) { - uint64_t m, d; - int e; - uint32_t s, dd, f; +#define fpr_rint PQCLEAN_FALCON512_CLEAN_fpr_rint +int64_t fpr_rint(fpr x); - /* - * We assume that the value fits in -(2^63-1)..+(2^63-1). We can - * thus extract the mantissa as a 63-bit integer, then right-shift - * it as needed. - */ - m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); - e = 1085 - ((int)(x >> 52) & 0x7FF); +#define fpr_floor PQCLEAN_FALCON512_CLEAN_fpr_floor +int64_t fpr_floor(fpr x); - /* - * If a shift of more than 63 bits is needed, then simply set m - * to zero. This also covers the case of an input operand equal - * to zero. - */ - m &= -(uint64_t)((uint32_t)(e - 64) >> 31); - e &= 63; - - /* - * Right-shift m as needed. Shift count is e. Proper rounding - * mandates that: - * - If the highest dropped bit is zero, then round low. - * - If the highest dropped bit is one, and at least one of the - * other dropped bits is one, then round up. - * - If the highest dropped bit is one, and all other dropped - * bits are zero, then round up if the lowest kept bit is 1, - * or low otherwise (i.e. ties are broken by "rounding to even"). - * - * We thus first extract a word consisting of all the dropped bit - * AND the lowest kept bit; then we shrink it down to three bits, - * the lowest being "sticky". - */ - d = fpr_ulsh(m, 63 - e); - dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); - f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); - m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); - - /* - * Apply the sign bit. - */ - s = (uint32_t)(x >> 63); - return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; -} - -static inline int64_t -fpr_floor(fpr x) { - uint64_t t; - int64_t xi; - int e, cc; - - /* - * We extract the integer as a _signed_ 64-bit integer with - * a scaling factor. Since we assume that the value fits - * in the -(2^63-1)..+(2^63-1) range, we can left-shift the - * absolute value to make it in the 2^62..2^63-1 range: we - * will only need a right-shift afterwards. - */ - e = (int)(x >> 52) & 0x7FF; - t = x >> 63; - xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) - & (((uint64_t)1 << 63) - 1)); - xi = (xi ^ -(int64_t)t) + (int64_t)t; - cc = 1085 - e; - - /* - * We perform an arithmetic right-shift on the value. This - * applies floor() semantics on both positive and negative values - * (rounding toward minus infinity). - */ - xi = fpr_irsh(xi, cc & 63); - - /* - * If the true shift count was 64 or more, then we should instead - * replace xi with 0 (if nonnegative) or -1 (if negative). Edge - * case: -0 will be floored to -1, not 0 (whether this is correct - * is debatable; in any case, the other functions normalize zero - * to +0). - * - * For an input of zero, the non-shifted xi was incorrect (we used - * a top implicit bit of value 1, not 0), but this does not matter - * since this operation will clamp it down. - */ - xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); - return xi; -} - -static inline int64_t -fpr_trunc(fpr x) { - uint64_t t, xu; - int e, cc; - - /* - * Extract the absolute value. Since we assume that the value - * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift - * the absolute value into the 2^62..2^63-1 range, and then - * do a right shift afterwards. - */ - e = (int)(x >> 52) & 0x7FF; - xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); - cc = 1085 - e; - xu = fpr_ursh(xu, cc & 63); - - /* - * If the exponent is too low (cc > 63), then the shift was wrong - * and we must clamp the value to 0. This also covers the case - * of an input equal to zero. - */ - xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); - - /* - * Apply back the sign, if the source value is negative. - */ - t = x >> 63; - xu = (xu ^ -t) + t; - return *(int64_t *)&xu; -} +#define fpr_trunc PQCLEAN_FALCON512_CLEAN_fpr_trunc +int64_t fpr_trunc(fpr x); #define fpr_add PQCLEAN_FALCON512_CLEAN_fpr_add fpr fpr_add(fpr x, fpr y); -static inline fpr -fpr_sub(fpr x, fpr y) { - y ^= (uint64_t)1 << 63; - return fpr_add(x, y); -} +#define fpr_sub PQCLEAN_FALCON512_CLEAN_fpr_sub +fpr fpr_sub(fpr x, fpr y); -static inline fpr -fpr_neg(fpr x) { - x ^= (uint64_t)1 << 63; - return x; -} +#define fpr_neg PQCLEAN_FALCON512_CLEAN_fpr_neg +fpr fpr_neg(fpr x); -static inline fpr -fpr_half(fpr x) { - /* - * To divide a value by 2, we just have to subtract 1 from its - * exponent, but we have to take care of zero. - */ - uint32_t t; +#define fpr_half PQCLEAN_FALCON512_CLEAN_fpr_half +fpr fpr_half(fpr x); - x -= (uint64_t)1 << 52; - t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; - x &= (uint64_t)t - 1; - return x; -} - -static inline fpr -fpr_double(fpr x) { - /* - * To double a value, we just increment by one the exponent. We - * don't care about infinites or NaNs; however, 0 is a - * special case. - */ - x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; - return x; -} +#define fpr_double PQCLEAN_FALCON512_CLEAN_fpr_double +fpr fpr_double(fpr x); #define fpr_mul PQCLEAN_FALCON512_CLEAN_fpr_mul fpr fpr_mul(fpr x, fpr y); -static inline fpr -fpr_sqr(fpr x) { - return fpr_mul(x, x); -} +#define fpr_sqr PQCLEAN_FALCON512_CLEAN_fpr_sqr +fpr fpr_sqr(fpr x); #define fpr_div PQCLEAN_FALCON512_CLEAN_fpr_div fpr fpr_div(fpr x, fpr y); -static inline fpr -fpr_inv(fpr x) { - return fpr_div(4607182418800017408u, x); -} +#define fpr_inv PQCLEAN_FALCON512_CLEAN_fpr_inv +fpr fpr_inv(fpr x); #define fpr_sqrt PQCLEAN_FALCON512_CLEAN_fpr_sqrt fpr fpr_sqrt(fpr x); -static inline int -fpr_lt(fpr x, fpr y) { - /* - * If both x and y are positive, then a signed comparison yields - * the proper result: - * - For positive values, the order is preserved. - * - The sign bit is at the same place as in integers, so - * sign is preserved. - * Moreover, we can compute [x < y] as sgn(x-y) and the computation - * of x-y will not overflow. - * - * If the signs differ, then sgn(x) gives the proper result. - * - * If both x and y are negative, then the order is reversed. - * Hence [x < y] = sgn(y-x). We must compute this separately from - * sgn(x-y); simply inverting sgn(x-y) would not handle the edge - * case x = y properly. - */ - int cc0, cc1; - int64_t sx; - int64_t sy; - - sx = *(int64_t *)&x; - sy = *(int64_t *)&y; - sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */ - - cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */ - cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */ - - return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); -} +#define fpr_lt PQCLEAN_FALCON512_CLEAN_fpr_lt +int fpr_lt(fpr x, fpr y); /* * Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 diff --git a/src/sig/falcon/pqclean_falcon-512_clean/inner.c b/src/sig/falcon/pqclean_falcon-512_clean/inner.c new file mode 100755 index 000000000..dd90bd57e --- /dev/null +++ b/src/sig/falcon/pqclean_falcon-512_clean/inner.c @@ -0,0 +1,70 @@ +#include "inner.h" + +/* + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2017-2019 Falcon Project + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + */ + +unsigned set_fpu_cw(unsigned x) { + return x; +} + + +uint64_t prng_get_u64(prng *p) { + size_t u; + + /* + * If there are less than 9 bytes in the buffer, we refill it. + * This means that we may drop the last few bytes, but this allows + * for faster extraction code. Also, it means that we never leave + * an empty buffer. + */ + u = p->ptr; + if (u >= (sizeof p->buf.d) - 9) { + PQCLEAN_FALCON512_CLEAN_prng_refill(p); + u = 0; + } + p->ptr = u + 8; + + return (uint64_t)p->buf.d[u + 0] + | ((uint64_t)p->buf.d[u + 1] << 8) + | ((uint64_t)p->buf.d[u + 2] << 16) + | ((uint64_t)p->buf.d[u + 3] << 24) + | ((uint64_t)p->buf.d[u + 4] << 32) + | ((uint64_t)p->buf.d[u + 5] << 40) + | ((uint64_t)p->buf.d[u + 6] << 48) + | ((uint64_t)p->buf.d[u + 7] << 56); +} + + +unsigned prng_get_u8(prng *p) { + unsigned v; + + v = p->buf.d[p->ptr ++]; + if (p->ptr == sizeof p->buf.d) { + PQCLEAN_FALCON512_CLEAN_prng_refill(p); + } + return v; +} diff --git a/src/sig/falcon/pqclean_falcon-512_clean/inner.h b/src/sig/falcon/pqclean_falcon-512_clean/inner.h index b81197f1c..d469c9237 100644 --- a/src/sig/falcon/pqclean_falcon-512_clean/inner.h +++ b/src/sig/falcon/pqclean_falcon-512_clean/inner.h @@ -99,13 +99,8 @@ * targets other than 32-bit x86, or when the native 'double' type is * not used, the set_fpu_cw() function does nothing at all. */ -static inline unsigned -set_fpu_cw(unsigned x) { - return x; -} - - - +#define set_fpu_cw PQCLEAN_FALCON512_CLEAN_set_fpu_cw +unsigned set_fpu_cw(unsigned x); /* ==================================================================== */ /* @@ -496,46 +491,14 @@ void PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); /* * Get a 64-bit random value from a PRNG. */ -static inline uint64_t -prng_get_u64(prng *p) { - size_t u; - - /* - * If there are less than 9 bytes in the buffer, we refill it. - * This means that we may drop the last few bytes, but this allows - * for faster extraction code. Also, it means that we never leave - * an empty buffer. - */ - u = p->ptr; - if (u >= (sizeof p->buf.d) - 9) { - PQCLEAN_FALCON512_CLEAN_prng_refill(p); - u = 0; - } - p->ptr = u + 8; - - return (uint64_t)p->buf.d[u + 0] - | ((uint64_t)p->buf.d[u + 1] << 8) - | ((uint64_t)p->buf.d[u + 2] << 16) - | ((uint64_t)p->buf.d[u + 3] << 24) - | ((uint64_t)p->buf.d[u + 4] << 32) - | ((uint64_t)p->buf.d[u + 5] << 40) - | ((uint64_t)p->buf.d[u + 6] << 48) - | ((uint64_t)p->buf.d[u + 7] << 56); -} +#define prng_get_u64 PQCLEAN_FALCON512_CLEAN_prng_get_u64 +uint64_t prng_get_u64(prng *p); /* * Get an 8-bit random value from a PRNG. */ -static inline unsigned -prng_get_u8(prng *p) { - unsigned v; - - v = p->buf.d[p->ptr ++]; - if (p->ptr == sizeof p->buf.d) { - PQCLEAN_FALCON512_CLEAN_prng_refill(p); - } - return v; -} +#define prng_get_u8 PQCLEAN_FALCON512_CLEAN_prng_get_u8 +unsigned prng_get_u8(prng *p); /* ==================================================================== */ /* diff --git a/src/sig/falcon/pqclean_falcon-512_clean/sign.c b/src/sig/falcon/pqclean_falcon-512_clean/sign.c index 87566d985..469ae3b42 100644 --- a/src/sig/falcon/pqclean_falcon-512_clean/sign.c +++ b/src/sig/falcon/pqclean_falcon-512_clean/sign.c @@ -267,7 +267,7 @@ PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *expanded_key, PQCLEAN_FALCON512_CLEAN_poly_neg(rF, logn); /* - * The Gram matrix is G = B·B*. Formulas are: + * The Gram matrix is G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01) @@ -788,7 +788,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2, PQCLEAN_FALCON512_CLEAN_poly_neg(b11, logn); /* - * Compute the Gram matrix G = B·B*. Formulas are: + * Compute the Gram matrix G = B x B*. Formulas are: * g00 = b00*adj(b00) + b01*adj(b01) * g01 = b00*adj(b10) + b01*adj(b11) * g10 = b10*adj(b00) + b11*adj(b01)