mirror of
https://github.com/open-quantum-safe/liboqs.git
synced 2025-12-07 00:02:12 -05:00
Sync with PQClean (#846)
* Change BMI1 and add PCLMUL * Copy from PQClean Commit b4078aae55f9efbc7b3ab7b3c702be376f7a7987, excluding HQC * Update algorithm datasheets * Prettyprint * Update version to 0.5.0-dev
This commit is contained in:
parent
993c1d757b
commit
1a923c79e0
@ -136,15 +136,15 @@ cmake_dependent_option(OQS_ENABLE_KEM_saber_firesaber "" ON "OQS_ENABLE_KEM_SABE
|
||||
|
||||
option(OQS_ENABLE_SIG_DILITHIUM "" ON)
|
||||
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_2 "" ON "OQS_ENABLE_SIG_DILITHIUM" OFF)
|
||||
if(ARCH STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" AND OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_AES_INSTRUCTIONS AND OQS_USE_BMI_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)
|
||||
if(ARCH STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" AND OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_AES_INSTRUCTIONS AND OQS_USE_BMI1_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)
|
||||
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_2_avx2 "" ON "OQS_ENABLE_SIG_dilithium_2" OFF)
|
||||
endif()
|
||||
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_3 "" ON "OQS_ENABLE_SIG_DILITHIUM" OFF)
|
||||
if(ARCH STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" AND OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_AES_INSTRUCTIONS AND OQS_USE_BMI_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)
|
||||
if(ARCH STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" AND OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_AES_INSTRUCTIONS AND OQS_USE_BMI1_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)
|
||||
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_3_avx2 "" ON "OQS_ENABLE_SIG_dilithium_3" OFF)
|
||||
endif()
|
||||
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_4 "" ON "OQS_ENABLE_SIG_DILITHIUM" OFF)
|
||||
if(ARCH STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" AND OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI_INSTRUCTIONS AND OQS_USE_AES_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)
|
||||
if(ARCH STREQUAL "x86_64" AND CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" AND OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI1_INSTRUCTIONS AND OQS_USE_AES_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS)
|
||||
cmake_dependent_option(OQS_ENABLE_SIG_dilithium_4_avx2 "" ON "OQS_ENABLE_SIG_dilithium_4" OFF)
|
||||
endif()
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ int main(void) {
|
||||
printf("AVX512F;");
|
||||
#endif
|
||||
#if defined(__BMI__)
|
||||
printf("BMI;");
|
||||
printf("BMI1;");
|
||||
#endif
|
||||
#if defined(__BMI2__)
|
||||
printf("BMI2;");
|
||||
@ -30,6 +30,9 @@ int main(void) {
|
||||
#if defined(__FMA__)
|
||||
printf("FMA;");
|
||||
#endif
|
||||
#if defined(__PCLMUL__)
|
||||
printf("PCLMUL;");
|
||||
#endif
|
||||
#if defined(__POPCNT__)
|
||||
printf("POPCNT;");
|
||||
#endif
|
||||
|
||||
@ -18,7 +18,7 @@ set(CMAKE_C_STANDARD 11)
|
||||
set(CMAKE_C_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set(CMAKE_C_VISIBILITY_PRESET hidden)
|
||||
set(OQS_VERSION_TEXT "0.4.0")
|
||||
set(OQS_VERSION_TEXT "0.5.0-dev")
|
||||
set(OQS_COMPILE_BUILD_TARGET "${CMAKE_SYSTEM_PROCESSOR}-${CMAKE_HOST_SYSTEM}")
|
||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||
|
||||
|
||||
@ -11,7 +11,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: SUPERCOP-20191221, "vec" implementation
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/c50bc64a40ce83639cff54419fce60483a30c430
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/b4078aae55f9efbc7b3ab7b3c702be376f7a7987
|
||||
- **License**: Public domain
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C
|
||||
|
||||
@ -11,7 +11,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: https://github.com/pq-crystals/kyber
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/c50bc64a40ce83639cff54419fce60483a30c430
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/b4078aae55f9efbc7b3ab7b3c702be376f7a7987
|
||||
- **License**: Public domain
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C with AVX2 instructions (if available at run-time)
|
||||
|
||||
@ -11,7 +11,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: NIST Round 2 submission
|
||||
- **Implementation version**: https://github.com/jschanck/ntru/tree/ff3c84e1
|
||||
- **Implementation version**: https://github.com/jschanck/ntru/tree/6d1f44f5
|
||||
- **License**: Public domain
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C, AVX2
|
||||
|
||||
@ -11,7 +11,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: https://github.com/KULeuven-COSIC/SABER
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/c50bc64a40ce83639cff54419fce60483a30c430
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/b4078aae55f9efbc7b3ab7b3c702be376f7a7987
|
||||
- **License**: Public domain
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C
|
||||
|
||||
@ -11,7 +11,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: https://falcon-sign.info/impl/falcon.h.html
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/c50bc64a40ce83639cff54419fce60483a30c430
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/b4078aae55f9efbc7b3ab7b3c702be376f7a7987
|
||||
- **License**: CC0 1.0 Universal
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C
|
||||
|
||||
@ -10,7 +10,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: https://github.com/fast-crypto-lab/rainbow-submission-round2
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/c50bc64a40ce83639cff54419fce60483a30c430
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/b4078aae55f9efbc7b3ab7b3c702be376f7a7987
|
||||
- **License**: CC0 1.0
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C
|
||||
|
||||
@ -11,7 +11,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation**: https://github.com/sphincs/sphincsplus
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/c50bc64a40ce83639cff54419fce60483a30c430
|
||||
- **Implementation version**: https://github.com/PQClean/PQClean/commit/b4078aae55f9efbc7b3ab7b3c702be376f7a7987
|
||||
- **License**: CC0 1.0 Universal
|
||||
- **Constant-time**: Yes
|
||||
- **Optimizations**: Portable C with AESNI and AVX2 instructions (if available at run-time)
|
||||
|
||||
@ -124,7 +124,7 @@ def load_instructions():
|
||||
# are not properly specified.
|
||||
if scheme['pretty_name_full'].startswith('DILITHIUM_'):
|
||||
scheme['metadata']['implementations'][1]['supported_platforms'][0]['operating_systems'] = ['Linux']
|
||||
scheme['metadata']['implementations'][1]['supported_platforms'][0]['required_flags'] = ['avx2', 'bmi', 'popcnt']
|
||||
scheme['metadata']['implementations'][1]['supported_platforms'][0]['required_flags'] = ['avx2', 'bmi1', 'popcnt']
|
||||
|
||||
scheme['metadata']['euf_cma'] = 'true'
|
||||
scheme['pqclean_scheme_c'] = scheme['pqclean_scheme'].replace('-', '')
|
||||
|
||||
@ -41,8 +41,8 @@ const char *const X64_EXTENSIONS_NAMES[] = {
|
||||
#else
|
||||
"",
|
||||
#endif
|
||||
#ifdef OQS_USE_BMI_INSTRUCTIONS
|
||||
"BMI",
|
||||
#ifdef OQS_USE_BMI1_INSTRUCTIONS
|
||||
"BMI1",
|
||||
#else
|
||||
"",
|
||||
#endif
|
||||
@ -51,6 +51,11 @@ const char *const X64_EXTENSIONS_NAMES[] = {
|
||||
#else
|
||||
"",
|
||||
#endif
|
||||
#ifdef OQS_USE_PCLMUL_INSTRUCTIONS
|
||||
"PCLMUL",
|
||||
#else
|
||||
"",
|
||||
#endif
|
||||
#ifdef OQS_USE_POPCNT_INSTRUCTIONS
|
||||
"POPCNT",
|
||||
#else
|
||||
@ -98,8 +103,9 @@ static void set_available_cpu_extensions_x86_64(void) {
|
||||
available_cpu_extensions.AVX_ENABLED = is_bit_set(leaf_1.ecx, 28);
|
||||
available_cpu_extensions.AVX2_ENABLED = is_bit_set(leaf_7.ebx, 5);
|
||||
}
|
||||
available_cpu_extensions.PCLMUL_ENABLED = is_bit_set(leaf_1.ecx, 1);
|
||||
available_cpu_extensions.POPCNT_ENABLED = is_bit_set(leaf_1.ecx, 23);
|
||||
available_cpu_extensions.BMI_ENABLED = is_bit_set(leaf_7.ebx, 3);
|
||||
available_cpu_extensions.BMI1_ENABLED = is_bit_set(leaf_7.ebx, 3);
|
||||
available_cpu_extensions.BMI2_ENABLED = is_bit_set(leaf_7.ebx, 8);
|
||||
|
||||
if (has_mask(xcr0_eax, MASK_XMM)) {
|
||||
|
||||
@ -118,8 +118,9 @@ typedef struct {
|
||||
unsigned int AVX_ENABLED;
|
||||
unsigned int AVX2_ENABLED;
|
||||
unsigned int AVX512_ENABLED;
|
||||
unsigned int BMI_ENABLED;
|
||||
unsigned int BMI1_ENABLED;
|
||||
unsigned int BMI2_ENABLED;
|
||||
unsigned int PCLMUL_ENABLED;
|
||||
unsigned int POPCNT_ENABLED;
|
||||
unsigned int SSE_ENABLED;
|
||||
unsigned int SSE2_ENABLED;
|
||||
|
||||
@ -56,7 +56,7 @@ if(OQS_ENABLE_KEM_ntru_hrss701)
|
||||
endif()
|
||||
|
||||
if(OQS_ENABLE_KEM_ntru_hrss701_avx2)
|
||||
add_library(ntru_hrss701_avx2 OBJECT pqclean_ntruhrss701_avx2/cmov.c pqclean_ntruhrss701_avx2/kem.c pqclean_ntruhrss701_avx2/owcpa.c pqclean_ntruhrss701_avx2/pack3.c pqclean_ntruhrss701_avx2/packq.c pqclean_ntruhrss701_avx2/poly.c pqclean_ntruhrss701_avx2/poly_lift.s pqclean_ntruhrss701_avx2/poly_mod_3_Phi_n.s pqclean_ntruhrss701_avx2/poly_mod_q_Phi_n.s pqclean_ntruhrss701_avx2/poly_r2_inv.c pqclean_ntruhrss701_avx2/poly_r2_mul.s pqclean_ntruhrss701_avx2/poly_rq_mul.s pqclean_ntruhrss701_avx2/poly_rq_to_s3.s pqclean_ntruhrss701_avx2/poly_s3_inv.s pqclean_ntruhrss701_avx2/sample.c pqclean_ntruhrss701_avx2/sample_iid.c pqclean_ntruhrss701_avx2/square_12_701_shufbytes.s pqclean_ntruhrss701_avx2/square_15_701_shufbytes.s pqclean_ntruhrss701_avx2/square_168_701_shufbytes.s pqclean_ntruhrss701_avx2/square_1_701_patience.s pqclean_ntruhrss701_avx2/square_27_701_shufbytes.s pqclean_ntruhrss701_avx2/square_336_701_shufbytes.s pqclean_ntruhrss701_avx2/square_3_701_patience.s pqclean_ntruhrss701_avx2/square_42_701_shufbytes.s pqclean_ntruhrss701_avx2/square_6_701_patience.s pqclean_ntruhrss701_avx2/square_84_701_shufbytes.s pqclean_ntruhrss701_avx2/vec32_sample_iid.s)
|
||||
add_library(ntru_hrss701_avx2 OBJECT pqclean_ntruhrss701_avx2/cmov.c pqclean_ntruhrss701_avx2/kem.c pqclean_ntruhrss701_avx2/owcpa.c pqclean_ntruhrss701_avx2/pack3.c pqclean_ntruhrss701_avx2/packq.c pqclean_ntruhrss701_avx2/poly.c pqclean_ntruhrss701_avx2/poly_lift.s pqclean_ntruhrss701_avx2/poly_mod_3_Phi_n.s pqclean_ntruhrss701_avx2/poly_mod_q_Phi_n.s pqclean_ntruhrss701_avx2/poly_r2_inv.c pqclean_ntruhrss701_avx2/poly_r2_mul.s pqclean_ntruhrss701_avx2/poly_rq_mul.s pqclean_ntruhrss701_avx2/poly_rq_to_s3.s pqclean_ntruhrss701_avx2/poly_s3_inv.c pqclean_ntruhrss701_avx2/sample.c pqclean_ntruhrss701_avx2/sample_iid.c pqclean_ntruhrss701_avx2/square_12_701_shufbytes.s pqclean_ntruhrss701_avx2/square_15_701_shufbytes.s pqclean_ntruhrss701_avx2/square_168_701_shufbytes.s pqclean_ntruhrss701_avx2/square_1_701_patience.s pqclean_ntruhrss701_avx2/square_27_701_shufbytes.s pqclean_ntruhrss701_avx2/square_336_701_shufbytes.s pqclean_ntruhrss701_avx2/square_3_701_patience.s pqclean_ntruhrss701_avx2/square_42_701_shufbytes.s pqclean_ntruhrss701_avx2/square_6_701_patience.s pqclean_ntruhrss701_avx2/square_84_701_shufbytes.s pqclean_ntruhrss701_avx2/vec32_sample_iid.s)
|
||||
target_include_directories(ntru_hrss701_avx2 PRIVATE ${CMAKE_CURRENT_LIST_DIR}/pqclean_ntruhrss701_avx2)
|
||||
target_include_directories(ntru_hrss701_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
|
||||
target_compile_options(ntru_hrss701_avx2 PRIVATE -mavx2 -mbmi2)
|
||||
|
||||
@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_ntru_hps2048509_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_ntru_hps2048509;
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation";
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/6d1f44f5 reference implementation";
|
||||
|
||||
kem->claimed_nist_level = 1;
|
||||
kem->ind_cca = true;
|
||||
|
||||
@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_ntru_hps2048677_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_ntru_hps2048677;
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation";
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/6d1f44f5 reference implementation";
|
||||
|
||||
kem->claimed_nist_level = 3;
|
||||
kem->ind_cca = true;
|
||||
|
||||
@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_ntru_hps4096821_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_ntru_hps4096821;
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation";
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/6d1f44f5 reference implementation";
|
||||
|
||||
kem->claimed_nist_level = 5;
|
||||
kem->ind_cca = true;
|
||||
|
||||
@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_ntru_hrss701_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_ntru_hrss701;
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/ff3c84e1 reference implementation";
|
||||
kem->alg_version = "https://github.com/jschanck/ntru/tree/6d1f44f5 reference implementation";
|
||||
|
||||
kem->claimed_nist_level = 3;
|
||||
kem->ind_cca = true;
|
||||
|
||||
@ -469,8 +469,11 @@ static void int32_sort_2power(int32 *x, size_t n, int flagdown) {
|
||||
}
|
||||
|
||||
q = n >> 3;
|
||||
flip = (p << 1 == q);
|
||||
flipflip = !flip;
|
||||
flip = 0;
|
||||
if (p << 1 == q) {
|
||||
flip = 1;
|
||||
}
|
||||
flipflip = 1 - flip;
|
||||
for (j = 0; j < q; j += p + p) {
|
||||
for (k = j; k < j + p + p; k += p) {
|
||||
for (i = k; i < k + p; i += 8) {
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHPS2048509_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, con
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,40 +2,59 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
static int owcpa_check_m(const poly *m) {
|
||||
/* Check that m is in message space. */
|
||||
/* Note: Assumes that m has coefficients in {0,1,2}. */
|
||||
/* Check that m is in message space, i.e. */
|
||||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
|
||||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
|
||||
/* Note: We may assume that m has coefficients in {0,1,2}. */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint16_t p1 = 0;
|
||||
uint16_t m1 = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t ps = 0;
|
||||
uint16_t ms = 0;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
p1 += m->coeffs[i] & 0x01;
|
||||
m1 += (m->coeffs[i] & 0x02) >> 1;
|
||||
ps += m->coeffs[i] & 1;
|
||||
ms += m->coeffs[i] & 2;
|
||||
}
|
||||
/* Need p1 = m1 and p1 + m1 = NTRU_WEIGHT */
|
||||
t |= p1 ^ m1;
|
||||
t |= (p1 + m1) ^ NTRU_WEIGHT;
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
|
||||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_AVX2_owcpa_keypair(unsigned char *pk,
|
||||
@ -123,11 +142,15 @@ int PQCLEAN_NTRUHPS2048509_AVX2_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHPS2048509_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
fail |= owcpa_check_m(m);
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,3 @@
|
||||
#include "crypto_sort_int32.h"
|
||||
#include "sample.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {
|
||||
@ -25,7 +24,7 @@ void PQCLEAN_NTRUHPS2048509_AVX2_sample_fixed_type(poly *r, const unsigned char
|
||||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
|
||||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
|
||||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 15) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTRU_WEIGHT / 2; i++) {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
#include "crypto_sort_int32.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHPS2048509_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHPS2048509_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, co
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,40 +2,59 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
static int owcpa_check_m(const poly *m) {
|
||||
/* Check that m is in message space. */
|
||||
/* Note: Assumes that m has coefficients in {0,1,2}. */
|
||||
/* Check that m is in message space, i.e. */
|
||||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
|
||||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
|
||||
/* Note: We may assume that m has coefficients in {0,1,2}. */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint16_t p1 = 0;
|
||||
uint16_t m1 = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t ps = 0;
|
||||
uint16_t ms = 0;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
p1 += m->coeffs[i] & 0x01;
|
||||
m1 += (m->coeffs[i] & 0x02) >> 1;
|
||||
ps += m->coeffs[i] & 1;
|
||||
ms += m->coeffs[i] & 2;
|
||||
}
|
||||
/* Need p1 = m1 and p1 + m1 = NTRU_WEIGHT */
|
||||
t |= p1 ^ m1;
|
||||
t |= (p1 + m1) ^ NTRU_WEIGHT;
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
|
||||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_keypair(unsigned char *pk,
|
||||
@ -123,11 +142,15 @@ int PQCLEAN_NTRUHPS2048509_CLEAN_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
fail |= owcpa_check_m(m);
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define MODQ(X) ((X) & (NTRU_Q-1))
|
||||
|
||||
@ -30,14 +30,22 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_q_Phi_n(poly *r) {
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) {
|
||||
/* NOTE: Assumes input is in [0,Q-1]^N */
|
||||
/* Produces output in {0,1,2}^N */
|
||||
int i;
|
||||
uint16_t flag;
|
||||
|
||||
/* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */
|
||||
/* The coefficients of a are stored as non-negative integers. */
|
||||
/* We must translate to representatives in [-q/2, q/2) before */
|
||||
/* reduction mod 3. */
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ;
|
||||
r->coeffs[i] += MODQ(a->coeffs[i]);
|
||||
/* Need an explicit reduction mod q here */
|
||||
r->coeffs[i] = MODQ(a->coeffs[i]);
|
||||
|
||||
/* flag = 1 if r[i] >= q/2 else 0 */
|
||||
flag = r->coeffs[i] >> (NTRU_LOGQ - 1);
|
||||
|
||||
/* Now we will add (-q) mod 3 if r[i] >= q/2 */
|
||||
/* Note (-q) mod 3=(-2^k) mod 3=1<<(1-(k&1)) */
|
||||
r->coeffs[i] += flag << (1 - (NTRU_LOGQ & 1));
|
||||
}
|
||||
|
||||
PQCLEAN_NTRUHPS2048509_CLEAN_poly_mod_3_Phi_n(r);
|
||||
|
||||
@ -3,14 +3,14 @@
|
||||
#include "poly.h"
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -37,7 +37,7 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = g.coeffs[0] & f.coeffs[0];
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -11,14 +11,14 @@ static inline uint8_t mod3(uint8_t a) { /* a between 0 and 9 */
|
||||
}
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -45,7 +45,7 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = mod3((uint8_t) (2 * g.coeffs[0] * f.coeffs[0]));
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
#include "crypto_sort_int32.h"
|
||||
#include "sample.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {
|
||||
@ -25,7 +24,7 @@ void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fixed_type(poly *r, const unsigned char
|
||||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
|
||||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
|
||||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 15) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTRU_WEIGHT / 2; i++) {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
#include "crypto_sort_int32.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHPS2048509_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -469,8 +469,11 @@ static void int32_sort_2power(int32 *x, size_t n, int flagdown) {
|
||||
}
|
||||
|
||||
q = n >> 3;
|
||||
flip = (p << 1 == q);
|
||||
flipflip = !flip;
|
||||
flip = 0;
|
||||
if (p << 1 == q) {
|
||||
flip = 1;
|
||||
}
|
||||
flipflip = 1 - flip;
|
||||
for (j = 0; j < q; j += p + p) {
|
||||
for (k = j; k < j + p + p; k += p) {
|
||||
for (i = k; i < k + p; i += 8) {
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHPS2048677_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, con
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,40 +2,59 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
static int owcpa_check_m(const poly *m) {
|
||||
/* Check that m is in message space. */
|
||||
/* Note: Assumes that m has coefficients in {0,1,2}. */
|
||||
/* Check that m is in message space, i.e. */
|
||||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
|
||||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
|
||||
/* Note: We may assume that m has coefficients in {0,1,2}. */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint16_t p1 = 0;
|
||||
uint16_t m1 = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t ps = 0;
|
||||
uint16_t ms = 0;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
p1 += m->coeffs[i] & 0x01;
|
||||
m1 += (m->coeffs[i] & 0x02) >> 1;
|
||||
ps += m->coeffs[i] & 1;
|
||||
ms += m->coeffs[i] & 2;
|
||||
}
|
||||
/* Need p1 = m1 and p1 + m1 = NTRU_WEIGHT */
|
||||
t |= p1 ^ m1;
|
||||
t |= (p1 + m1) ^ NTRU_WEIGHT;
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
|
||||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_AVX2_owcpa_keypair(unsigned char *pk,
|
||||
@ -123,11 +142,15 @@ int PQCLEAN_NTRUHPS2048677_AVX2_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHPS2048677_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
fail |= owcpa_check_m(m);
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,3 @@
|
||||
#include "crypto_sort_int32.h"
|
||||
#include "sample.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {
|
||||
@ -25,7 +24,7 @@ void PQCLEAN_NTRUHPS2048677_AVX2_sample_fixed_type(poly *r, const unsigned char
|
||||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
|
||||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
|
||||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 15) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTRU_WEIGHT / 2; i++) {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
#include "crypto_sort_int32.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHPS2048677_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHPS2048677_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, co
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,40 +2,59 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
static int owcpa_check_m(const poly *m) {
|
||||
/* Check that m is in message space. */
|
||||
/* Note: Assumes that m has coefficients in {0,1,2}. */
|
||||
/* Check that m is in message space, i.e. */
|
||||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
|
||||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
|
||||
/* Note: We may assume that m has coefficients in {0,1,2}. */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint16_t p1 = 0;
|
||||
uint16_t m1 = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t ps = 0;
|
||||
uint16_t ms = 0;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
p1 += m->coeffs[i] & 0x01;
|
||||
m1 += (m->coeffs[i] & 0x02) >> 1;
|
||||
ps += m->coeffs[i] & 1;
|
||||
ms += m->coeffs[i] & 2;
|
||||
}
|
||||
/* Need p1 = m1 and p1 + m1 = NTRU_WEIGHT */
|
||||
t |= p1 ^ m1;
|
||||
t |= (p1 + m1) ^ NTRU_WEIGHT;
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
|
||||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_keypair(unsigned char *pk,
|
||||
@ -123,11 +142,15 @@ int PQCLEAN_NTRUHPS2048677_CLEAN_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
fail |= owcpa_check_m(m);
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define MODQ(X) ((X) & (NTRU_Q-1))
|
||||
|
||||
@ -30,14 +30,22 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_q_Phi_n(poly *r) {
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) {
|
||||
/* NOTE: Assumes input is in [0,Q-1]^N */
|
||||
/* Produces output in {0,1,2}^N */
|
||||
int i;
|
||||
uint16_t flag;
|
||||
|
||||
/* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */
|
||||
/* The coefficients of a are stored as non-negative integers. */
|
||||
/* We must translate to representatives in [-q/2, q/2) before */
|
||||
/* reduction mod 3. */
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ;
|
||||
r->coeffs[i] += MODQ(a->coeffs[i]);
|
||||
/* Need an explicit reduction mod q here */
|
||||
r->coeffs[i] = MODQ(a->coeffs[i]);
|
||||
|
||||
/* flag = 1 if r[i] >= q/2 else 0 */
|
||||
flag = r->coeffs[i] >> (NTRU_LOGQ - 1);
|
||||
|
||||
/* Now we will add (-q) mod 3 if r[i] >= q/2 */
|
||||
/* Note (-q) mod 3=(-2^k) mod 3=1<<(1-(k&1)) */
|
||||
r->coeffs[i] += flag << (1 - (NTRU_LOGQ & 1));
|
||||
}
|
||||
|
||||
PQCLEAN_NTRUHPS2048677_CLEAN_poly_mod_3_Phi_n(r);
|
||||
|
||||
@ -3,14 +3,14 @@
|
||||
#include "poly.h"
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -37,7 +37,7 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = g.coeffs[0] & f.coeffs[0];
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -11,14 +11,14 @@ static inline uint8_t mod3(uint8_t a) { /* a between 0 and 9 */
|
||||
}
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -45,7 +45,7 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = mod3((uint8_t) (2 * g.coeffs[0] * f.coeffs[0]));
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
#include "crypto_sort_int32.h"
|
||||
#include "sample.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {
|
||||
@ -25,7 +24,7 @@ void PQCLEAN_NTRUHPS2048677_CLEAN_sample_fixed_type(poly *r, const unsigned char
|
||||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
|
||||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
|
||||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 15) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTRU_WEIGHT / 2; i++) {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
#include "crypto_sort_int32.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHPS2048677_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -469,8 +469,11 @@ static void int32_sort_2power(int32 *x, size_t n, int flagdown) {
|
||||
}
|
||||
|
||||
q = n >> 3;
|
||||
flip = (p << 1 == q);
|
||||
flipflip = !flip;
|
||||
flip = 0;
|
||||
if (p << 1 == q) {
|
||||
flip = 1;
|
||||
}
|
||||
flipflip = 1 - flip;
|
||||
for (j = 0; j < q; j += p + p) {
|
||||
for (k = j; k < j + p + p; k += p) {
|
||||
for (i = k; i < k + p; i += 8) {
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHPS4096821_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, con
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHPS4096821_AVX2_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHPS4096821_AVX2_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHPS4096821_AVX2_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,40 +2,59 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
static int owcpa_check_m(const poly *m) {
|
||||
/* Check that m is in message space. */
|
||||
/* Note: Assumes that m has coefficients in {0,1,2}. */
|
||||
/* Check that m is in message space, i.e. */
|
||||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
|
||||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
|
||||
/* Note: We may assume that m has coefficients in {0,1,2}. */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint16_t p1 = 0;
|
||||
uint16_t m1 = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t ps = 0;
|
||||
uint16_t ms = 0;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
p1 += m->coeffs[i] & 0x01;
|
||||
m1 += (m->coeffs[i] & 0x02) >> 1;
|
||||
ps += m->coeffs[i] & 1;
|
||||
ms += m->coeffs[i] & 2;
|
||||
}
|
||||
/* Need p1 = m1 and p1 + m1 = NTRU_WEIGHT */
|
||||
t |= p1 ^ m1;
|
||||
t |= (p1 + m1) ^ NTRU_WEIGHT;
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
|
||||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_AVX2_owcpa_keypair(unsigned char *pk,
|
||||
@ -123,11 +142,15 @@ int PQCLEAN_NTRUHPS4096821_AVX2_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHPS4096821_AVX2_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHPS4096821_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
fail |= owcpa_check_m(m);
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,3 @@
|
||||
#include "crypto_sort_int32.h"
|
||||
#include "sample.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {
|
||||
@ -25,7 +24,7 @@ void PQCLEAN_NTRUHPS4096821_AVX2_sample_fixed_type(poly *r, const unsigned char
|
||||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
|
||||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
|
||||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 15) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTRU_WEIGHT / 2; i++) {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
#include "crypto_sort_int32.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHPS4096821_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHPS4096821_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, co
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,40 +2,59 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
static int owcpa_check_m(const poly *m) {
|
||||
/* Check that m is in message space. */
|
||||
/* Note: Assumes that m has coefficients in {0,1,2}. */
|
||||
/* Check that m is in message space, i.e. */
|
||||
/* (1) |{i : m[i] = 1}| = |{i : m[i] = 2}|, and */
|
||||
/* (2) |{i : m[i] != 0}| = NTRU_WEIGHT. */
|
||||
/* Note: We may assume that m has coefficients in {0,1,2}. */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint16_t p1 = 0;
|
||||
uint16_t m1 = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t ps = 0;
|
||||
uint16_t ms = 0;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
p1 += m->coeffs[i] & 0x01;
|
||||
m1 += (m->coeffs[i] & 0x02) >> 1;
|
||||
ps += m->coeffs[i] & 1;
|
||||
ms += m->coeffs[i] & 2;
|
||||
}
|
||||
/* Need p1 = m1 and p1 + m1 = NTRU_WEIGHT */
|
||||
t |= p1 ^ m1;
|
||||
t |= (p1 + m1) ^ NTRU_WEIGHT;
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= ps ^ (ms >> 1); /* 0 if (1) holds */
|
||||
t |= ms ^ NTRU_WEIGHT; /* 0 if (1) and (2) hold */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_keypair(unsigned char *pk,
|
||||
@ -123,11 +142,15 @@ int PQCLEAN_NTRUHPS4096821_CLEAN_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
fail |= owcpa_check_m(m);
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define MODQ(X) ((X) & (NTRU_Q-1))
|
||||
|
||||
@ -30,14 +30,22 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_q_Phi_n(poly *r) {
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) {
|
||||
/* NOTE: Assumes input is in [0,Q-1]^N */
|
||||
/* Produces output in {0,1,2}^N */
|
||||
int i;
|
||||
uint16_t flag;
|
||||
|
||||
/* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */
|
||||
/* The coefficients of a are stored as non-negative integers. */
|
||||
/* We must translate to representatives in [-q/2, q/2) before */
|
||||
/* reduction mod 3. */
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ;
|
||||
r->coeffs[i] += MODQ(a->coeffs[i]);
|
||||
/* Need an explicit reduction mod q here */
|
||||
r->coeffs[i] = MODQ(a->coeffs[i]);
|
||||
|
||||
/* flag = 1 if r[i] >= q/2 else 0 */
|
||||
flag = r->coeffs[i] >> (NTRU_LOGQ - 1);
|
||||
|
||||
/* Now we will add (-q) mod 3 if r[i] >= q/2 */
|
||||
/* Note (-q) mod 3=(-2^k) mod 3=1<<(1-(k&1)) */
|
||||
r->coeffs[i] += flag << (1 - (NTRU_LOGQ & 1));
|
||||
}
|
||||
|
||||
PQCLEAN_NTRUHPS4096821_CLEAN_poly_mod_3_Phi_n(r);
|
||||
|
||||
@ -3,14 +3,14 @@
|
||||
#include "poly.h"
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -37,7 +37,7 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = g.coeffs[0] & f.coeffs[0];
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -11,14 +11,14 @@ static inline uint8_t mod3(uint8_t a) { /* a between 0 and 9 */
|
||||
}
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -45,7 +45,7 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = mod3((uint8_t) (2 * g.coeffs[0] * f.coeffs[0]));
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
#include "crypto_sort_int32.h"
|
||||
#include "sample.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]) {
|
||||
@ -25,7 +24,7 @@ void PQCLEAN_NTRUHPS4096821_CLEAN_sample_fixed_type(poly *r, const unsigned char
|
||||
s[4 * i + 0] = (u[15 * i + 0] << 2) + (u[15 * i + 1] << 10) + (u[15 * i + 2] << 18) + ((uint32_t) u[15 * i + 3] << 26);
|
||||
s[4 * i + 1] = ((u[15 * i + 3] & 0xc0) >> 4) + (u[15 * i + 4] << 4) + (u[15 * i + 5] << 12) + (u[15 * i + 6] << 20) + ((uint32_t) u[15 * i + 7] << 28);
|
||||
s[4 * i + 2] = ((u[15 * i + 7] & 0xf0) >> 2) + (u[15 * i + 8] << 6) + (u[15 * i + 9] << 14) + (u[15 * i + 10] << 22) + ((uint32_t) u[15 * i + 11] << 30);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 15) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
s[4 * i + 3] = (u[15 * i + 11] & 0xfc) + (u[15 * i + 12] << 8) + (u[15 * i + 13] << 16) + ((uint32_t) u[15 * i + 14] << 24);
|
||||
}
|
||||
|
||||
for (i = 0; i < NTRU_WEIGHT / 2; i++) {
|
||||
|
||||
@ -4,6 +4,8 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
#include "crypto_sort_int32.h"
|
||||
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHPS4096821_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHRSS701_AVX2_crypto_kem_dec(uint8_t *k, const uint8_t *c, const
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHRSS701_AVX2_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHRSS701_AVX2_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHRSS701_AVX2_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,21 +2,37 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
|
||||
@ -106,11 +122,15 @@ int PQCLEAN_NTRUHRSS701_AVX2_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHRSS701_AVX2_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHRSS701_AVX2_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
PQCLEAN_NTRUHRSS701_AVX2_poly_lift(liftm, m);
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
569
src/kem/ntru/pqclean_ntruhrss701_avx2/poly_s3_inv.c
Normal file
569
src/kem/ntru/pqclean_ntruhrss701_avx2/poly_s3_inv.c
Normal file
@ -0,0 +1,569 @@
|
||||
#include "poly.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
typedef signed char small;
|
||||
|
||||
#define p 700
|
||||
#define ppad 768
|
||||
#define numvec 3
|
||||
|
||||
typedef __m256i vec256;
|
||||
|
||||
/*
|
||||
This code stores 768-coeff poly as vec256[3].
|
||||
Order of 256 coefficients in each vec256
|
||||
is optimized in light of costs of vector instructions:
|
||||
0,4,...,252 in 64-bit word;
|
||||
1,5,...,253 in 64-bit word;
|
||||
2,6,...,254 in 64-bit word;
|
||||
3,7,...,255 in 64-bit word.
|
||||
*/
|
||||
|
||||
static inline void vec256_frombits(vec256 *v, const small *b) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numvec; ++i) {
|
||||
vec256 b0 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32; /* 0,1,...,31 */
|
||||
vec256 b1 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32; /* 32,33,... */
|
||||
vec256 b2 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32;
|
||||
vec256 b3 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32;
|
||||
vec256 b4 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32;
|
||||
vec256 b5 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32;
|
||||
vec256 b6 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32;
|
||||
vec256 b7 = _mm256_loadu_si256((vec256 *) b);
|
||||
b += 32;
|
||||
|
||||
vec256 c0 = _mm256_unpacklo_epi32(b0, b1); /* 0 1 2 3 32 33 34 35 4 5 6 7 36 37 38 39 ... 55 */
|
||||
vec256 c1 = _mm256_unpackhi_epi32(b0, b1); /* 8 9 10 11 40 41 42 43 ... 63 */
|
||||
vec256 c2 = _mm256_unpacklo_epi32(b2, b3);
|
||||
vec256 c3 = _mm256_unpackhi_epi32(b2, b3);
|
||||
vec256 c4 = _mm256_unpacklo_epi32(b4, b5);
|
||||
vec256 c5 = _mm256_unpackhi_epi32(b4, b5);
|
||||
vec256 c6 = _mm256_unpacklo_epi32(b6, b7);
|
||||
vec256 c7 = _mm256_unpackhi_epi32(b6, b7);
|
||||
|
||||
vec256 d0 = c0 | _mm256_slli_epi32(c1, 2); /* 0 8, 1 9, 2 10, 3 11, 32 40, 33 41, ..., 55 63 */
|
||||
vec256 d2 = c2 | _mm256_slli_epi32(c3, 2);
|
||||
vec256 d4 = c4 | _mm256_slli_epi32(c5, 2);
|
||||
vec256 d6 = c6 | _mm256_slli_epi32(c7, 2);
|
||||
|
||||
vec256 e0 = _mm256_unpacklo_epi64(d0, d2);
|
||||
vec256 e2 = _mm256_unpackhi_epi64(d0, d2);
|
||||
vec256 e4 = _mm256_unpacklo_epi64(d4, d6);
|
||||
vec256 e6 = _mm256_unpackhi_epi64(d4, d6);
|
||||
|
||||
vec256 f0 = e0 | _mm256_slli_epi32(e2, 1);
|
||||
vec256 f4 = e4 | _mm256_slli_epi32(e6, 1);
|
||||
|
||||
vec256 g0 = _mm256_permute2x128_si256(f0, f4, 0x20);
|
||||
vec256 g4 = _mm256_permute2x128_si256(f0, f4, 0x31);
|
||||
|
||||
vec256 h = g0 | _mm256_slli_epi32(g4, 4);
|
||||
|
||||
#define TRANSPOSE _mm256_set_epi8( 31,27,23,19, 30,26,22,18, 29,25,21,17, 28,24,20,16, 15,11,7,3, 14,10,6,2, 13,9,5,1, 12,8,4,0 )
|
||||
h = _mm256_shuffle_epi8(h, TRANSPOSE);
|
||||
h = _mm256_permute4x64_epi64(h, 0xd8);
|
||||
h = _mm256_shuffle_epi32(h, 0xd8);
|
||||
|
||||
*v++ = h;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec256_tobits(const vec256 *v, small *b) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numvec; ++i) {
|
||||
vec256 h = *v++;
|
||||
|
||||
h = _mm256_shuffle_epi32(h, 0xd8);
|
||||
h = _mm256_permute4x64_epi64(h, 0xd8);
|
||||
h = _mm256_shuffle_epi8(h, TRANSPOSE);
|
||||
|
||||
vec256 g0 = h & _mm256_set1_epi8(15);
|
||||
vec256 g4 = _mm256_srli_epi32(h, 4) & _mm256_set1_epi8(15);
|
||||
|
||||
vec256 f0 = _mm256_permute2x128_si256(g0, g4, 0x20);
|
||||
vec256 f4 = _mm256_permute2x128_si256(g0, g4, 0x31);
|
||||
|
||||
vec256 e0 = f0 & _mm256_set1_epi8(5);
|
||||
vec256 e2 = _mm256_srli_epi32(f0, 1) & _mm256_set1_epi8(5);
|
||||
vec256 e4 = f4 & _mm256_set1_epi8(5);
|
||||
vec256 e6 = _mm256_srli_epi32(f4, 1) & _mm256_set1_epi8(5);
|
||||
|
||||
vec256 d0 = _mm256_unpacklo_epi32(e0, e2);
|
||||
vec256 d2 = _mm256_unpackhi_epi32(e0, e2);
|
||||
vec256 d4 = _mm256_unpacklo_epi32(e4, e6);
|
||||
vec256 d6 = _mm256_unpackhi_epi32(e4, e6);
|
||||
|
||||
vec256 c0 = d0 & _mm256_set1_epi8(1);
|
||||
vec256 c1 = _mm256_srli_epi32(d0, 2) & _mm256_set1_epi8(1);
|
||||
vec256 c2 = d2 & _mm256_set1_epi8(1);
|
||||
vec256 c3 = _mm256_srli_epi32(d2, 2) & _mm256_set1_epi8(1);
|
||||
vec256 c4 = d4 & _mm256_set1_epi8(1);
|
||||
vec256 c5 = _mm256_srli_epi32(d4, 2) & _mm256_set1_epi8(1);
|
||||
vec256 c6 = d6 & _mm256_set1_epi8(1);
|
||||
vec256 c7 = _mm256_srli_epi32(d6, 2) & _mm256_set1_epi8(1);
|
||||
|
||||
vec256 b0 = _mm256_unpacklo_epi64(c0, c1);
|
||||
vec256 b1 = _mm256_unpackhi_epi64(c0, c1);
|
||||
vec256 b2 = _mm256_unpacklo_epi64(c2, c3);
|
||||
vec256 b3 = _mm256_unpackhi_epi64(c2, c3);
|
||||
vec256 b4 = _mm256_unpacklo_epi64(c4, c5);
|
||||
vec256 b5 = _mm256_unpackhi_epi64(c4, c5);
|
||||
vec256 b6 = _mm256_unpacklo_epi64(c6, c7);
|
||||
vec256 b7 = _mm256_unpackhi_epi64(c6, c7);
|
||||
|
||||
_mm256_storeu_si256((vec256 *) b, b0);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b1);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b2);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b3);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b4);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b5);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b6);
|
||||
b += 32;
|
||||
_mm256_storeu_si256((vec256 *) b, b7);
|
||||
b += 32;
|
||||
}
|
||||
}
|
||||
|
||||
static void vec256_init(vec256 *G0, vec256 *G1, const small *s) {
|
||||
int i;
|
||||
small srev[ppad + (ppad - p)];
|
||||
small si;
|
||||
small g0[ppad];
|
||||
small g1[ppad];
|
||||
|
||||
for (i = 0; i < p; ++i) {
|
||||
srev[ppad - 1 - i] = s[i];
|
||||
}
|
||||
for (i = 0; i < ppad - p; ++i) {
|
||||
srev[i] = 0;
|
||||
}
|
||||
for (i = p; i < ppad; ++i) {
|
||||
srev[i + ppad - p] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < ppad; ++i) {
|
||||
si = srev[i + ppad - p];
|
||||
g0[i] = si & 1;
|
||||
g1[i] = (si >> 1) & g0[i];
|
||||
}
|
||||
|
||||
vec256_frombits(G0, g0);
|
||||
vec256_frombits(G1, g1);
|
||||
}
|
||||
|
||||
static void vec256_final(small *out, const vec256 *V0, const vec256 *V1) {
|
||||
int i;
|
||||
small v0[ppad];
|
||||
small v1[ppad];
|
||||
small v[ppad];
|
||||
small vrev[ppad + (ppad - p)];
|
||||
|
||||
vec256_tobits(V0, v0);
|
||||
vec256_tobits(V1, v1);
|
||||
|
||||
for (i = 0; i < ppad; ++i) {
|
||||
v[i] = v0[i] + 2 * v1[i] - 4 * (v0[i] & v1[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ppad; ++i) {
|
||||
vrev[i] = v[ppad - 1 - i];
|
||||
}
|
||||
for (i = ppad; i < ppad + (ppad - p); ++i) {
|
||||
vrev[i] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < p; ++i) {
|
||||
out[i] = vrev[i + ppad - p];
|
||||
}
|
||||
}
|
||||
|
||||
static inline int negative_mask(int x) {
|
||||
return x >> 31;
|
||||
}
|
||||
|
||||
static inline void vec256_swap(vec256 *f, vec256 *g, int len, vec256 mask) {
|
||||
vec256 flip;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; ++i) {
|
||||
flip = mask & (f[i] ^ g[i]);
|
||||
f[i] ^= flip;
|
||||
g[i] ^= flip;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec256_scale(vec256 *f0, vec256 *f1, const vec256 c0, const vec256 c1) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < numvec; ++i) {
|
||||
vec256 f0i = f0[i];
|
||||
vec256 f1i = f1[i];
|
||||
|
||||
f0i &= c0;
|
||||
f1i ^= c1;
|
||||
f1i &= f0i;
|
||||
|
||||
f0[i] = f0i;
|
||||
f1[i] = f1i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void vec256_eliminate(vec256 *f0, vec256 *f1, vec256 *g0, vec256 *g1, int len, const vec256 c0, const vec256 c1) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < len; ++i) {
|
||||
vec256 f0i = f0[i];
|
||||
vec256 f1i = f1[i];
|
||||
vec256 g0i = g0[i];
|
||||
vec256 g1i = g1[i];
|
||||
vec256 t;
|
||||
|
||||
f0i &= c0;
|
||||
f1i ^= c1;
|
||||
f1i &= f0i;
|
||||
|
||||
t = g0i ^ f0i;
|
||||
g0[i] = t | (g1i ^ f1i);
|
||||
g1[i] = (g1i ^ f0i) & (f1i ^ t);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int vec256_bit0mask(vec256 *f) {
|
||||
return -(_mm_cvtsi128_si32(_mm256_castsi256_si128(f[0])) & 1);
|
||||
}
|
||||
|
||||
static inline void vec256_divx_1(vec256 *f) {
|
||||
vec256 f0 = f[0];
|
||||
|
||||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
|
||||
|
||||
low0 = low0 >> 1;
|
||||
|
||||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
|
||||
|
||||
f[0] = _mm256_permute4x64_epi64(f0, 0x39);
|
||||
}
|
||||
|
||||
static inline void vec256_divx_2(vec256 *f) {
|
||||
vec256 f0 = f[0];
|
||||
vec256 f1 = f[1];
|
||||
|
||||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
|
||||
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1));
|
||||
|
||||
low0 = (low0 >> 1) | (low1 << 63);
|
||||
low1 = low1 >> 1;
|
||||
|
||||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
|
||||
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3);
|
||||
|
||||
f[0] = _mm256_permute4x64_epi64(f0, 0x39);
|
||||
f[1] = _mm256_permute4x64_epi64(f1, 0x39);
|
||||
}
|
||||
|
||||
static inline void vec256_divx_3(vec256 *f) {
|
||||
vec256 f0 = f[0];
|
||||
vec256 f1 = f[1];
|
||||
vec256 f2 = f[2];
|
||||
|
||||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
|
||||
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1));
|
||||
unsigned long long low2 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f2));
|
||||
|
||||
low0 = (low0 >> 1) | (low1 << 63);
|
||||
low1 = (low1 >> 1) | (low2 << 63);
|
||||
low2 = low2 >> 1;
|
||||
|
||||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
|
||||
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3);
|
||||
f2 = _mm256_blend_epi32(f2, _mm256_set_epi64x(0, 0, 0, low2), 0x3);
|
||||
|
||||
f[0] = _mm256_permute4x64_epi64(f0, 0x39);
|
||||
f[1] = _mm256_permute4x64_epi64(f1, 0x39);
|
||||
f[2] = _mm256_permute4x64_epi64(f2, 0x39);
|
||||
}
|
||||
|
||||
static inline void vec256_timesx_1(vec256 *f) {
|
||||
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93);
|
||||
|
||||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
|
||||
|
||||
low0 = low0 << 1;
|
||||
|
||||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
|
||||
|
||||
f[0] = f0;
|
||||
}
|
||||
|
||||
static inline void vec256_timesx_2(vec256 *f) {
|
||||
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93);
|
||||
vec256 f1 = _mm256_permute4x64_epi64(f[1], 0x93);
|
||||
|
||||
unsigned long long low0 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f0));
|
||||
unsigned long long low1 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f1));
|
||||
|
||||
low1 = (low1 << 1) | (low0 >> 63);
|
||||
low0 = low0 << 1;
|
||||
|
||||
f0 = _mm256_blend_epi32(f0, _mm256_set_epi64x(0, 0, 0, low0), 0x3);
|
||||
f1 = _mm256_blend_epi32(f1, _mm256_set_epi64x(0, 0, 0, low1), 0x3);
|
||||
|
||||
f[0] = f0;
|
||||
f[1] = f1;
|
||||
}
|
||||
|
||||
static inline void vec256_timesx_3(vec256 *f) {
|
||||
vec256 f0 = _mm256_permute4x64_epi64(f[0], 0x93);
|
||||
vec256 f1 = _mm256_permute4x64_epi64(f[1], 0x93);
|
||||
vec256 f2 = _mm256_permute4x64_epi64(f[2], 0x93);
|
||||
|
||||
unsigned long long low0 = *(unsigned long long *) &f0;
|
||||
unsigned long long low1 = *(unsigned long long *) &f1;
|
||||
unsigned long long low2 = _mm_cvtsi128_si64(_mm256_castsi256_si128(f2));
|
||||
|
||||
low2 = (low2 << 1) | (low1 >> 63);
|
||||
low1 = (low1 << 1) | (low0 >> 63);
|
||||
low0 = low0 << 1;
|
||||
|
||||
*(unsigned long long *) &f0 = low0;
|
||||
*(unsigned long long *) &f1 = low1;
|
||||
f2 = _mm256_blend_epi32(f2, _mm256_set_epi64x(0, 0, 0, low2), 0x3);
|
||||
|
||||
f[0] = f0;
|
||||
f[1] = f1;
|
||||
f[2] = f2;
|
||||
}
|
||||
|
||||
|
||||
static int __poly_S3_inv(unsigned char *outbytes, const unsigned char *inbytes) {
|
||||
small *out = (void *) outbytes;
|
||||
small *in = (void *) inbytes;
|
||||
vec256 F0[numvec];
|
||||
vec256 F1[numvec];
|
||||
vec256 G0[numvec];
|
||||
vec256 G1[numvec];
|
||||
vec256 V0[numvec];
|
||||
vec256 V1[numvec];
|
||||
vec256 R0[numvec];
|
||||
vec256 R1[numvec];
|
||||
vec256 c0vec, c1vec;
|
||||
int loop;
|
||||
int c0, c1;
|
||||
int minusdelta = -1;
|
||||
int swapmask;
|
||||
vec256 swapvec;
|
||||
|
||||
vec256_init(G0, G1, in);
|
||||
F0[0] = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
|
||||
F0[1] = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1);
|
||||
F0[2] = _mm256_set_epi32(32767, -1, 32767, -1, 32767, -1, 65535, -1);
|
||||
F1[0] = _mm256_set1_epi32(0);
|
||||
F1[1] = _mm256_set1_epi32(0);
|
||||
F1[2] = _mm256_set1_epi32(0);
|
||||
|
||||
V0[0] = _mm256_set1_epi32(0);
|
||||
V1[0] = _mm256_set1_epi32(0);
|
||||
V0[1] = _mm256_set1_epi32(0);
|
||||
V1[1] = _mm256_set1_epi32(0);
|
||||
V0[2] = _mm256_set1_epi32(0);
|
||||
V1[2] = _mm256_set1_epi32(0);
|
||||
|
||||
R0[0] = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 1);
|
||||
R1[0] = _mm256_set1_epi32(0);
|
||||
R0[1] = _mm256_set1_epi32(0);
|
||||
R1[1] = _mm256_set1_epi32(0);
|
||||
R0[2] = _mm256_set1_epi32(0);
|
||||
R1[2] = _mm256_set1_epi32(0);
|
||||
|
||||
for (loop = 256; loop > 0; --loop) {
|
||||
vec256_timesx_1(V0);
|
||||
vec256_timesx_1(V1);
|
||||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);
|
||||
|
||||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
|
||||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
|
||||
c1 &= c0;
|
||||
|
||||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
|
||||
minusdelta -= 1;
|
||||
|
||||
swapvec = _mm256_set1_epi32(swapmask);
|
||||
vec256_swap(F0, G0, 3, swapvec);
|
||||
vec256_swap(F1, G1, 3, swapvec);
|
||||
|
||||
c0vec = _mm256_set1_epi32(c0);
|
||||
c1vec = _mm256_set1_epi32(c1);
|
||||
|
||||
vec256_eliminate(F0, F1, G0, G1, 3, c0vec, c1vec);
|
||||
vec256_divx_3(G0);
|
||||
vec256_divx_3(G1);
|
||||
|
||||
vec256_swap(V0, R0, 1, swapvec);
|
||||
vec256_swap(V1, R1, 1, swapvec);
|
||||
vec256_eliminate(V0, V1, R0, R1, 1, c0vec, c1vec);
|
||||
}
|
||||
|
||||
for (loop = 256; loop > 0; --loop) {
|
||||
vec256_timesx_2(V0);
|
||||
vec256_timesx_2(V1);
|
||||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);
|
||||
|
||||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
|
||||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
|
||||
c1 &= c0;
|
||||
|
||||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
|
||||
minusdelta -= 1;
|
||||
|
||||
swapvec = _mm256_set1_epi32(swapmask);
|
||||
vec256_swap(F0, G0, 3, swapvec);
|
||||
vec256_swap(F1, G1, 3, swapvec);
|
||||
|
||||
c0vec = _mm256_set1_epi32(c0);
|
||||
c1vec = _mm256_set1_epi32(c1);
|
||||
|
||||
vec256_eliminate(F0, F1, G0, G1, 3, c0vec, c1vec);
|
||||
vec256_divx_3(G0);
|
||||
vec256_divx_3(G1);
|
||||
|
||||
vec256_swap(V0, R0, 2, swapvec);
|
||||
vec256_swap(V1, R1, 2, swapvec);
|
||||
vec256_eliminate(V0, V1, R0, R1, 2, c0vec, c1vec);
|
||||
}
|
||||
|
||||
for (loop = 375; loop > 0; --loop) {
|
||||
vec256_timesx_3(V0);
|
||||
vec256_timesx_3(V1);
|
||||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);
|
||||
|
||||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
|
||||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
|
||||
c1 &= c0;
|
||||
|
||||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
|
||||
minusdelta -= 1;
|
||||
|
||||
swapvec = _mm256_set1_epi32(swapmask);
|
||||
vec256_swap(F0, G0, 3, swapvec);
|
||||
vec256_swap(F1, G1, 3, swapvec);
|
||||
|
||||
c0vec = _mm256_set1_epi32(c0);
|
||||
c1vec = _mm256_set1_epi32(c1);
|
||||
|
||||
vec256_eliminate(F0, F1, G0, G1, 3, c0vec, c1vec);
|
||||
vec256_divx_3(G0);
|
||||
vec256_divx_3(G1);
|
||||
|
||||
vec256_swap(V0, R0, 3, swapvec);
|
||||
vec256_swap(V1, R1, 3, swapvec);
|
||||
vec256_eliminate(V0, V1, R0, R1, 3, c0vec, c1vec);
|
||||
}
|
||||
|
||||
for (loop = 256; loop > 0; --loop) {
|
||||
vec256_timesx_3(V0);
|
||||
vec256_timesx_3(V1);
|
||||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);
|
||||
|
||||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
|
||||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
|
||||
c1 &= c0;
|
||||
|
||||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
|
||||
minusdelta -= 1;
|
||||
|
||||
swapvec = _mm256_set1_epi32(swapmask);
|
||||
vec256_swap(F0, G0, 2, swapvec);
|
||||
vec256_swap(F1, G1, 2, swapvec);
|
||||
|
||||
c0vec = _mm256_set1_epi32(c0);
|
||||
c1vec = _mm256_set1_epi32(c1);
|
||||
|
||||
vec256_eliminate(F0, F1, G0, G1, 2, c0vec, c1vec);
|
||||
vec256_divx_2(G0);
|
||||
vec256_divx_2(G1);
|
||||
|
||||
vec256_swap(V0, R0, 3, swapvec);
|
||||
vec256_swap(V1, R1, 3, swapvec);
|
||||
vec256_eliminate(V0, V1, R0, R1, 3, c0vec, c1vec);
|
||||
}
|
||||
|
||||
for (loop = 256; loop > 0; --loop) {
|
||||
vec256_timesx_3(V0);
|
||||
vec256_timesx_3(V1);
|
||||
swapmask = negative_mask(minusdelta) & vec256_bit0mask(G0);
|
||||
|
||||
c0 = vec256_bit0mask(F0) & vec256_bit0mask(G0);
|
||||
c1 = vec256_bit0mask(F1) ^ vec256_bit0mask(G1);
|
||||
c1 &= c0;
|
||||
|
||||
minusdelta ^= swapmask & (minusdelta ^ -minusdelta);
|
||||
minusdelta -= 1;
|
||||
|
||||
swapvec = _mm256_set1_epi32(swapmask);
|
||||
vec256_swap(F0, G0, 1, swapvec);
|
||||
vec256_swap(F1, G1, 1, swapvec);
|
||||
|
||||
c0vec = _mm256_set1_epi32(c0);
|
||||
c1vec = _mm256_set1_epi32(c1);
|
||||
|
||||
vec256_eliminate(F0, F1, G0, G1, 1, c0vec, c1vec);
|
||||
vec256_divx_1(G0);
|
||||
vec256_divx_1(G1);
|
||||
|
||||
vec256_swap(V0, R0, 3, swapvec);
|
||||
vec256_swap(V1, R1, 3, swapvec);
|
||||
vec256_eliminate(V0, V1, R0, R1, 3, c0vec, c1vec);
|
||||
}
|
||||
|
||||
c0vec = _mm256_set1_epi32(vec256_bit0mask(F0));
|
||||
c1vec = _mm256_set1_epi32(vec256_bit0mask(F1));
|
||||
vec256_scale(V0, V1, c0vec, c1vec);
|
||||
|
||||
vec256_final(out, V0, V1);
|
||||
out[p] = negative_mask(minusdelta);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This code is based on crypto_core/invhrss701/faster from SUPERCOP. The code was written as a case study
|
||||
// for the paper "Fast constant-time gcd computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
|
||||
void PQCLEAN_NTRUHRSS701_AVX2_poly_S3_inv(poly *r_out, const poly *a) {
|
||||
const unsigned char *in = (void *) a;
|
||||
unsigned char *out = (void *) r_out;
|
||||
|
||||
small input[ppad];
|
||||
small output[ppad];
|
||||
int i;
|
||||
|
||||
/* XXX: obviously input/output format should be packed into bytes */
|
||||
|
||||
for (i = 0; i < p; ++i) {
|
||||
small x = in[2 * i] & 3; /* 0 1 2 3 */
|
||||
x += 1; /* 0 1 2 3 4 5 6, offset by 1 */
|
||||
x &= (x - 3) >> 5; /* 0 1 2, offset by 1 */
|
||||
input[i] = x - 1;
|
||||
}
|
||||
/* XXX: merge with vec256_init */
|
||||
|
||||
__poly_S3_inv((unsigned char *)output, (unsigned char *)input);
|
||||
|
||||
for (i = 0; i < p; ++i) {
|
||||
out[2 * i] = (3 & output[i]) ^ ((3 & output[i]) >> 1);
|
||||
out[2 * i + 1] = 0;
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -4,6 +4,7 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
void PQCLEAN_NTRUHRSS701_AVX2_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHRSS701_AVX2_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -42,12 +42,7 @@ int PQCLEAN_NTRUHRSS701_CLEAN_crypto_kem_dec(uint8_t *k, const uint8_t *c, const
|
||||
uint8_t rm[NTRU_OWCPA_MSGBYTES];
|
||||
uint8_t buf[NTRU_PRFKEYBYTES + NTRU_CIPHERTEXTBYTES];
|
||||
|
||||
fail = 0;
|
||||
|
||||
/* Check that unused bits of last byte of ciphertext are zero */
|
||||
fail |= c[NTRU_CIPHERTEXTBYTES - 1] & (0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG))));
|
||||
|
||||
fail |= PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(rm, c, sk);
|
||||
fail = PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(rm, c, sk);
|
||||
/* If fail = 0 then c = Enc(h, rm). There is no need to re-encapsulate. */
|
||||
/* See comment in PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec for details. */
|
||||
|
||||
|
||||
@ -2,21 +2,37 @@
|
||||
#include "poly.h"
|
||||
#include "sample.h"
|
||||
|
||||
static int owcpa_check_ciphertext(const unsigned char *ciphertext) {
|
||||
/* A ciphertext is log2(q)*(n-1) bits packed into bytes. */
|
||||
/* Check that any unused bits of the final byte are zero. */
|
||||
|
||||
uint16_t t = 0;
|
||||
|
||||
t = ciphertext[NTRU_CIPHERTEXTBYTES - 1];
|
||||
t &= 0xff << (8 - (7 & (NTRU_LOGQ * NTRU_PACK_DEG)));
|
||||
|
||||
/* We have 0 <= t < 256 */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 15));
|
||||
}
|
||||
|
||||
static int owcpa_check_r(const poly *r) {
|
||||
/* Check that r is in message space. */
|
||||
/* Note: Assumes that r has coefficients in {0, 1, ..., q-1} */
|
||||
/* A valid r has coefficients in {0,1,q-1} and has r[N-1] = 0 */
|
||||
/* Note: We may assume that 0 <= r[i] <= q-1 for all i */
|
||||
|
||||
int i;
|
||||
uint64_t t = 0;
|
||||
uint32_t t = 0;
|
||||
uint16_t c;
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
c = MODQ(r->coeffs[i] + 1);
|
||||
t |= c & (NTRU_Q - 4); /* 0 if c is in {0,1,2,3} */
|
||||
t |= (c + 1) & 0x4; /* 0 if c is in {0,1,2} */
|
||||
for (i = 0; i < NTRU_N - 1; i++) {
|
||||
c = r->coeffs[i];
|
||||
t |= (c + 1) & (NTRU_Q - 4); /* 0 iff c is in {-1,0,1,2} */
|
||||
t |= (c + 2) & 4; /* 1 if c = 2, 0 if c is in {-1,0,1} */
|
||||
}
|
||||
t |= MODQ(r->coeffs[NTRU_N - 1]); /* Coefficient n-1 must be zero */
|
||||
t = (~t + 1); // two's complement
|
||||
t >>= 63;
|
||||
return (int) t;
|
||||
t |= r->coeffs[NTRU_N - 1]; /* Coefficient n-1 must be zero */
|
||||
|
||||
/* We have 0 <= t < 2^16. */
|
||||
/* Return 0 on success (t=0), 1 on failure */
|
||||
return (int) (1 & ((~t + 1) >> 31));
|
||||
}
|
||||
|
||||
|
||||
@ -106,11 +122,15 @@ int PQCLEAN_NTRUHRSS701_CLEAN_owcpa_dec(unsigned char *rm,
|
||||
PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_mul(m, mf, finv3);
|
||||
PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_tobytes(rm + NTRU_PACK_TRINARY_BYTES, m);
|
||||
|
||||
/* NOTE: For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
fail = 0;
|
||||
|
||||
/* Check that the unused bits of the last byte of the ciphertext are zero */
|
||||
fail |= owcpa_check_ciphertext(ciphertext);
|
||||
|
||||
/* For the IND-CCA2 KEM we must ensure that c = Enc(h, (r,m)). */
|
||||
/* We can avoid re-computing r*h + Lift(m) as long as we check that */
|
||||
/* r (defined as b/h mod (q, Phi_n)) and m are in the message space. */
|
||||
/* (m can take any value in S3 in NTRU_HRSS) */
|
||||
fail = 0;
|
||||
|
||||
/* b = c - Lift(m) mod (q, x^n - 1) */
|
||||
PQCLEAN_NTRUHRSS701_CLEAN_poly_lift(liftm, m);
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include "params.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define MODQ(X) ((X) & (NTRU_Q-1))
|
||||
|
||||
@ -30,14 +30,22 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_q_Phi_n(poly *r) {
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHRSS701_CLEAN_poly_Rq_to_S3(poly *r, const poly *a) {
|
||||
/* NOTE: Assumes input is in [0,Q-1]^N */
|
||||
/* Produces output in {0,1,2}^N */
|
||||
int i;
|
||||
uint16_t flag;
|
||||
|
||||
/* Center coeffs around 3Q: [0, Q-1] -> [3Q - Q/2, 3Q + Q/2) */
|
||||
/* The coefficients of a are stored as non-negative integers. */
|
||||
/* We must translate to representatives in [-q/2, q/2) before */
|
||||
/* reduction mod 3. */
|
||||
for (i = 0; i < NTRU_N; i++) {
|
||||
r->coeffs[i] = ((MODQ(a->coeffs[i]) >> (NTRU_LOGQ - 1)) ^ 3) << NTRU_LOGQ;
|
||||
r->coeffs[i] += MODQ(a->coeffs[i]);
|
||||
/* Need an explicit reduction mod q here */
|
||||
r->coeffs[i] = MODQ(a->coeffs[i]);
|
||||
|
||||
/* flag = 1 if r[i] >= q/2 else 0 */
|
||||
flag = r->coeffs[i] >> (NTRU_LOGQ - 1);
|
||||
|
||||
/* Now we will add (-q) mod 3 if r[i] >= q/2 */
|
||||
/* Note (-q) mod 3=(-2^k) mod 3=1<<(1-(k&1)) */
|
||||
r->coeffs[i] += flag << (1 - (NTRU_LOGQ & 1));
|
||||
}
|
||||
|
||||
PQCLEAN_NTRUHRSS701_CLEAN_poly_mod_3_Phi_n(r);
|
||||
|
||||
@ -3,14 +3,14 @@
|
||||
#include "poly.h"
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -37,7 +37,7 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_R2_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = g.coeffs[0] & f.coeffs[0];
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -11,14 +11,14 @@ static inline uint8_t mod3(uint8_t a) { /* a between 0 and 9 */
|
||||
}
|
||||
|
||||
/* return -1 if x<0 and y<0; otherwise return 0 */
|
||||
static inline int both_negative_mask(int x, int y) {
|
||||
static inline int16_t both_negative_mask(int16_t x, int16_t y) {
|
||||
return (x & y) >> 15;
|
||||
}
|
||||
|
||||
void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
poly f, g, v, w;
|
||||
int i, loop, delta;
|
||||
int sign, swap, t;
|
||||
size_t i, loop;
|
||||
int16_t delta, sign, swap, t;
|
||||
|
||||
for (i = 0; i < NTRU_N; ++i) {
|
||||
v.coeffs[i] = 0;
|
||||
@ -45,7 +45,7 @@ void PQCLEAN_NTRUHRSS701_CLEAN_poly_S3_inv(poly *r, const poly *a) {
|
||||
v.coeffs[0] = 0;
|
||||
|
||||
sign = mod3((uint8_t) (2 * g.coeffs[0] * f.coeffs[0]));
|
||||
swap = both_negative_mask(-delta, -(int) g.coeffs[0]);
|
||||
swap = both_negative_mask(-delta, -(int16_t) g.coeffs[0]);
|
||||
delta ^= swap & (delta ^ -delta);
|
||||
delta += 1;
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include "params.h"
|
||||
#include "poly.h"
|
||||
|
||||
|
||||
void PQCLEAN_NTRUHRSS701_CLEAN_sample_fg(poly *f, poly *g, const unsigned char uniformbytes[NTRU_SAMPLE_FG_BYTES]);
|
||||
void PQCLEAN_NTRUHRSS701_CLEAN_sample_rm(poly *r, poly *m, const unsigned char uniformbytes[NTRU_SAMPLE_RM_BYTES]);
|
||||
|
||||
|
||||
@ -19,8 +19,9 @@
|
||||
#cmakedefine OQS_USE_AVX_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_AVX2_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_AVX512_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_BMI_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_BMI1_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_BMI2_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_PCLMUL_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_POPCNT_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_SSE_INSTRUCTIONS 1
|
||||
#cmakedefine OQS_USE_SSE2_INSTRUCTIONS 1
|
||||
|
||||
@ -43,7 +43,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_2_keypair(uint8_t *public_key, uint8_t *sec
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_2_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium2_avx2_keypair(public_key, secret_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
@ -60,7 +60,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_2_sign(uint8_t *signature, size_t *signatur
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_2_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium2_avx2_signature(signature, signature_len, message, message_len, secret_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
@ -77,7 +77,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_2_verify(const uint8_t *message, size_t mes
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_2_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium2_avx2_verify(signature, signature_len, message, message_len, public_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
|
||||
@ -43,7 +43,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_3_keypair(uint8_t *public_key, uint8_t *sec
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_3_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium3_avx2_keypair(public_key, secret_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
@ -60,7 +60,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_3_sign(uint8_t *signature, size_t *signatur
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_3_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium3_avx2_signature(signature, signature_len, message, message_len, secret_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
@ -77,7 +77,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_3_verify(const uint8_t *message, size_t mes
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_3_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium3_avx2_verify(signature, signature_len, message, message_len, public_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
|
||||
@ -43,7 +43,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_4_keypair(uint8_t *public_key, uint8_t *sec
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_4_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium4_avx2_keypair(public_key, secret_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
@ -60,7 +60,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_4_sign(uint8_t *signature, size_t *signatur
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_4_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium4_avx2_signature(signature, signature_len, message, message_len, secret_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
@ -77,7 +77,7 @@ OQS_API OQS_STATUS OQS_SIG_dilithium_4_verify(const uint8_t *message, size_t mes
|
||||
#if defined(OQS_ENABLE_SIG_dilithium_4_avx2)
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions();
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.BMI_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
if (available_cpu_extensions.AVX2_ENABLED && available_cpu_extensions.BMI1_ENABLED && available_cpu_extensions.AES_ENABLED && available_cpu_extensions.POPCNT_ENABLED) {
|
||||
#endif /* OQS_PORTABLE_BUILD */
|
||||
return (OQS_STATUS) pqcrystals_dilithium4_avx2_verify(signature, signature_len, message, message_len, public_key);
|
||||
#if defined(OQS_PORTABLE_BUILD)
|
||||
|
||||
@ -443,7 +443,10 @@ PQCLEAN_FALCON1024_CLEAN_comp_decode(
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
x[u] = (int16_t)(s ? -(int)m : (int)m);
|
||||
x[u] = (int16_t) m;
|
||||
if (s) {
|
||||
x[u] = (int16_t) - x[u];
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
@ -424,20 +424,32 @@ fpr fpr_sqrt(fpr x);
|
||||
static inline int
|
||||
fpr_lt(fpr x, fpr y) {
|
||||
/*
|
||||
* If x >= 0 or y >= 0, a signed comparison yields the proper
|
||||
* result:
|
||||
* If both x and y are positive, then a signed comparison yields
|
||||
* the proper result:
|
||||
* - For positive values, the order is preserved.
|
||||
* - The sign bit is at the same place as in integers, so
|
||||
* sign is preserved.
|
||||
* Moreover, we can compute [x < y] as sgn(x-y) and the computation
|
||||
* of x-y will not overflow.
|
||||
*
|
||||
* If the signs differ, then sgn(x) gives the proper result.
|
||||
*
|
||||
* If both x and y are negative, then the order is reversed.
|
||||
* We cannot simply invert the comparison result in that case
|
||||
* because it would not handle the edge case x = y properly.
|
||||
* Hence [x < y] = sgn(y-x). We must compute this separately from
|
||||
* sgn(x-y); simply inverting sgn(x-y) would not handle the edge
|
||||
* case x = y properly.
|
||||
*/
|
||||
int cc0, cc1;
|
||||
int64_t sx;
|
||||
int64_t sy;
|
||||
|
||||
sx = *(int64_t *)&x;
|
||||
sy = *(int64_t *)&y;
|
||||
sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */
|
||||
|
||||
cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */
|
||||
cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */
|
||||
|
||||
cc0 = *(int64_t *)&x < *(int64_t *)&y;
|
||||
cc1 = *(int64_t *)&x > *(int64_t *)&y;
|
||||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63));
|
||||
}
|
||||
|
||||
|
||||
@ -1902,7 +1902,11 @@ zint_add_scaled_mul_small(uint32_t *x, size_t xlen,
|
||||
* Get the next word of y (scaled).
|
||||
*/
|
||||
v = u - sch;
|
||||
wy = v < ylen ? y[v] : ysign;
|
||||
if (v < ylen) {
|
||||
wy = y[v];
|
||||
} else {
|
||||
wy = ysign;
|
||||
}
|
||||
wys = ((wy << scl) & 0x7FFFFFFF) | tw;
|
||||
tw = wy >> (31 - scl);
|
||||
|
||||
@ -1960,7 +1964,11 @@ zint_sub_scaled(uint32_t *x, size_t xlen,
|
||||
* Get the next word of y (scaled).
|
||||
*/
|
||||
v = u - sch;
|
||||
wy = v < ylen ? y[v] : ysign;
|
||||
if (v < ylen) {
|
||||
wy = y[v];
|
||||
} else {
|
||||
wy = ysign;
|
||||
}
|
||||
wys = ((wy << scl) & 0x7FFFFFFF) | tw;
|
||||
tw = wy >> (31 - scl);
|
||||
|
||||
@ -2648,10 +2656,18 @@ make_fg(uint32_t *data, const int8_t *f, const int8_t *g,
|
||||
return;
|
||||
}
|
||||
|
||||
for (d = 0; d < depth; d ++) {
|
||||
make_fg_step(data, logn - d, d,
|
||||
d != 0, (d + 1) < depth || out_ntt);
|
||||
if (depth == 0) {
|
||||
return;
|
||||
}
|
||||
if (depth == 1) {
|
||||
make_fg_step(data, logn, 0, 0, out_ntt);
|
||||
return;
|
||||
}
|
||||
make_fg_step(data, logn, 0, 0, 1);
|
||||
for (d = 1; d + 1 < depth; d ++) {
|
||||
make_fg_step(data, logn - d, d, 1, 1);
|
||||
}
|
||||
make_fg_step(data, logn - depth + 1, depth - 1, 1, out_ntt);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3028,7 +3044,10 @@ solve_NTRU_intermediate(unsigned logn_top,
|
||||
* computed so that average maximum length will fall in the
|
||||
* middle or the upper half of these top 10 words.
|
||||
*/
|
||||
rlen = (slen > 10) ? 10 : slen;
|
||||
rlen = slen;
|
||||
if (rlen > 10) {
|
||||
rlen = 10;
|
||||
}
|
||||
poly_big_to_fp(rt3, ft + slen - rlen, rlen, slen, logn);
|
||||
poly_big_to_fp(rt4, gt + slen - rlen, rlen, slen, logn);
|
||||
|
||||
@ -3102,7 +3121,10 @@ solve_NTRU_intermediate(unsigned logn_top,
|
||||
* Convert current F and G into floating-point. We apply
|
||||
* scaling if the current length is more than 10 words.
|
||||
*/
|
||||
rlen = (FGlen > 10) ? 10 : FGlen;
|
||||
rlen = FGlen;
|
||||
if (rlen > 10) {
|
||||
rlen = 10;
|
||||
}
|
||||
scale_FG = 31 * (int)(FGlen - rlen);
|
||||
poly_big_to_fp(rt1, Ft + FGlen - rlen, rlen, llen, logn);
|
||||
poly_big_to_fp(rt2, Gt + FGlen - rlen, rlen, llen, logn);
|
||||
|
||||
@ -1189,9 +1189,11 @@ PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
|
||||
* Normal sampling. We use a fast PRNG seeded from our
|
||||
* SHAKE context ('rng').
|
||||
*/
|
||||
spc.sigma_min = (logn == 10)
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
if (logn == 10) {
|
||||
spc.sigma_min = fpr_sigma_min_10;
|
||||
} else {
|
||||
spc.sigma_min = fpr_sigma_min_9;
|
||||
}
|
||||
PQCLEAN_FALCON1024_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = PQCLEAN_FALCON1024_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
@ -1234,9 +1236,11 @@ PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
|
||||
* Normal sampling. We use a fast PRNG seeded from our
|
||||
* SHAKE context ('rng').
|
||||
*/
|
||||
spc.sigma_min = (logn == 10)
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
if (logn == 10) {
|
||||
spc.sigma_min = fpr_sigma_min_10;
|
||||
} else {
|
||||
spc.sigma_min = fpr_sigma_min_9;
|
||||
}
|
||||
PQCLEAN_FALCON1024_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = PQCLEAN_FALCON1024_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
|
||||
@ -443,7 +443,10 @@ PQCLEAN_FALCON512_CLEAN_comp_decode(
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
x[u] = (int16_t)(s ? -(int)m : (int)m);
|
||||
x[u] = (int16_t) m;
|
||||
if (s) {
|
||||
x[u] = (int16_t) - x[u];
|
||||
}
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
@ -424,20 +424,32 @@ fpr fpr_sqrt(fpr x);
|
||||
static inline int
|
||||
fpr_lt(fpr x, fpr y) {
|
||||
/*
|
||||
* If x >= 0 or y >= 0, a signed comparison yields the proper
|
||||
* result:
|
||||
* If both x and y are positive, then a signed comparison yields
|
||||
* the proper result:
|
||||
* - For positive values, the order is preserved.
|
||||
* - The sign bit is at the same place as in integers, so
|
||||
* sign is preserved.
|
||||
* Moreover, we can compute [x < y] as sgn(x-y) and the computation
|
||||
* of x-y will not overflow.
|
||||
*
|
||||
* If the signs differ, then sgn(x) gives the proper result.
|
||||
*
|
||||
* If both x and y are negative, then the order is reversed.
|
||||
* We cannot simply invert the comparison result in that case
|
||||
* because it would not handle the edge case x = y properly.
|
||||
* Hence [x < y] = sgn(y-x). We must compute this separately from
|
||||
* sgn(x-y); simply inverting sgn(x-y) would not handle the edge
|
||||
* case x = y properly.
|
||||
*/
|
||||
int cc0, cc1;
|
||||
int64_t sx;
|
||||
int64_t sy;
|
||||
|
||||
sx = *(int64_t *)&x;
|
||||
sy = *(int64_t *)&y;
|
||||
sy &= ~((sx ^ sy) >> 63); /* set sy=0 if signs differ */
|
||||
|
||||
cc0 = (int)((sx - sy) >> 63) & 1; /* Neither subtraction overflows when */
|
||||
cc1 = (int)((sy - sx) >> 63) & 1; /* the signs are the same. */
|
||||
|
||||
cc0 = *(int64_t *)&x < *(int64_t *)&y;
|
||||
cc1 = *(int64_t *)&x > *(int64_t *)&y;
|
||||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63));
|
||||
}
|
||||
|
||||
|
||||
@ -1902,7 +1902,11 @@ zint_add_scaled_mul_small(uint32_t *x, size_t xlen,
|
||||
* Get the next word of y (scaled).
|
||||
*/
|
||||
v = u - sch;
|
||||
wy = v < ylen ? y[v] : ysign;
|
||||
if (v < ylen) {
|
||||
wy = y[v];
|
||||
} else {
|
||||
wy = ysign;
|
||||
}
|
||||
wys = ((wy << scl) & 0x7FFFFFFF) | tw;
|
||||
tw = wy >> (31 - scl);
|
||||
|
||||
@ -1960,7 +1964,11 @@ zint_sub_scaled(uint32_t *x, size_t xlen,
|
||||
* Get the next word of y (scaled).
|
||||
*/
|
||||
v = u - sch;
|
||||
wy = v < ylen ? y[v] : ysign;
|
||||
if (v < ylen) {
|
||||
wy = y[v];
|
||||
} else {
|
||||
wy = ysign;
|
||||
}
|
||||
wys = ((wy << scl) & 0x7FFFFFFF) | tw;
|
||||
tw = wy >> (31 - scl);
|
||||
|
||||
@ -2648,10 +2656,18 @@ make_fg(uint32_t *data, const int8_t *f, const int8_t *g,
|
||||
return;
|
||||
}
|
||||
|
||||
for (d = 0; d < depth; d ++) {
|
||||
make_fg_step(data, logn - d, d,
|
||||
d != 0, (d + 1) < depth || out_ntt);
|
||||
if (depth == 0) {
|
||||
return;
|
||||
}
|
||||
if (depth == 1) {
|
||||
make_fg_step(data, logn, 0, 0, out_ntt);
|
||||
return;
|
||||
}
|
||||
make_fg_step(data, logn, 0, 0, 1);
|
||||
for (d = 1; d + 1 < depth; d ++) {
|
||||
make_fg_step(data, logn - d, d, 1, 1);
|
||||
}
|
||||
make_fg_step(data, logn - depth + 1, depth - 1, 1, out_ntt);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3028,7 +3044,10 @@ solve_NTRU_intermediate(unsigned logn_top,
|
||||
* computed so that average maximum length will fall in the
|
||||
* middle or the upper half of these top 10 words.
|
||||
*/
|
||||
rlen = (slen > 10) ? 10 : slen;
|
||||
rlen = slen;
|
||||
if (rlen > 10) {
|
||||
rlen = 10;
|
||||
}
|
||||
poly_big_to_fp(rt3, ft + slen - rlen, rlen, slen, logn);
|
||||
poly_big_to_fp(rt4, gt + slen - rlen, rlen, slen, logn);
|
||||
|
||||
@ -3102,7 +3121,10 @@ solve_NTRU_intermediate(unsigned logn_top,
|
||||
* Convert current F and G into floating-point. We apply
|
||||
* scaling if the current length is more than 10 words.
|
||||
*/
|
||||
rlen = (FGlen > 10) ? 10 : FGlen;
|
||||
rlen = FGlen;
|
||||
if (rlen > 10) {
|
||||
rlen = 10;
|
||||
}
|
||||
scale_FG = 31 * (int)(FGlen - rlen);
|
||||
poly_big_to_fp(rt1, Ft + FGlen - rlen, rlen, llen, logn);
|
||||
poly_big_to_fp(rt2, Gt + FGlen - rlen, rlen, llen, logn);
|
||||
|
||||
@ -1189,9 +1189,11 @@ PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
|
||||
* Normal sampling. We use a fast PRNG seeded from our
|
||||
* SHAKE context ('rng').
|
||||
*/
|
||||
spc.sigma_min = (logn == 10)
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
if (logn == 10) {
|
||||
spc.sigma_min = fpr_sigma_min_10;
|
||||
} else {
|
||||
spc.sigma_min = fpr_sigma_min_9;
|
||||
}
|
||||
PQCLEAN_FALCON512_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = PQCLEAN_FALCON512_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
@ -1234,9 +1236,11 @@ PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
|
||||
* Normal sampling. We use a fast PRNG seeded from our
|
||||
* SHAKE context ('rng').
|
||||
*/
|
||||
spc.sigma_min = (logn == 10)
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
if (logn == 10) {
|
||||
spc.sigma_min = fpr_sigma_min_10;
|
||||
} else {
|
||||
spc.sigma_min = fpr_sigma_min_9;
|
||||
}
|
||||
PQCLEAN_FALCON512_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = PQCLEAN_FALCON512_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
|
||||
@ -72,7 +72,7 @@ static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsign
|
||||
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + w * j;
|
||||
PQCLEAN_RAINBOWIIICCLASSIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, !PQCLEAN_RAINBOWIIICCLASSIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
PQCLEAN_RAINBOWIIICCLASSIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWIIICCLASSIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
}
|
||||
r8 &= PQCLEAN_RAINBOWIIICCLASSIC_CLEAN_gf256_is_nonzero(ai[i]);
|
||||
uint8_t pivot = ai[i];
|
||||
|
||||
@ -72,7 +72,7 @@ static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsign
|
||||
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + w * j;
|
||||
PQCLEAN_RAINBOWIIICCYCLICCOMPRESSED_CLEAN_gf256v_predicated_add(ai + skip_len_align4, !PQCLEAN_RAINBOWIIICCYCLICCOMPRESSED_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
PQCLEAN_RAINBOWIIICCYCLICCOMPRESSED_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWIIICCYCLICCOMPRESSED_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
}
|
||||
r8 &= PQCLEAN_RAINBOWIIICCYCLICCOMPRESSED_CLEAN_gf256_is_nonzero(ai[i]);
|
||||
uint8_t pivot = ai[i];
|
||||
|
||||
@ -72,7 +72,7 @@ static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsign
|
||||
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + w * j;
|
||||
PQCLEAN_RAINBOWIIICCYCLIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, !PQCLEAN_RAINBOWIIICCYCLIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
PQCLEAN_RAINBOWIIICCYCLIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWIIICCYCLIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
}
|
||||
r8 &= PQCLEAN_RAINBOWIIICCYCLIC_CLEAN_gf256_is_nonzero(ai[i]);
|
||||
uint8_t pivot = ai[i];
|
||||
|
||||
@ -74,7 +74,7 @@ static unsigned int gf16mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsigne
|
||||
uint8_t *ai = mat + n_w_byte * i;
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + n_w_byte * j;
|
||||
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_predicated_add(ai + offset_byte, !PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte);
|
||||
PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf256v_predicated_add(ai + offset_byte, 1 ^ PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte);
|
||||
}
|
||||
uint8_t pivot = PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf16v_get_ele(ai, i);
|
||||
r8 &= PQCLEAN_RAINBOWIACLASSIC_CLEAN_gf16_is_nonzero(pivot);
|
||||
|
||||
@ -74,7 +74,7 @@ static unsigned int gf16mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsigne
|
||||
uint8_t *ai = mat + n_w_byte * i;
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + n_w_byte * j;
|
||||
PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf256v_predicated_add(ai + offset_byte, !PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte);
|
||||
PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf256v_predicated_add(ai + offset_byte, 1 ^ PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte);
|
||||
}
|
||||
uint8_t pivot = PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf16v_get_ele(ai, i);
|
||||
r8 &= PQCLEAN_RAINBOWIACYCLICCOMPRESSED_CLEAN_gf16_is_nonzero(pivot);
|
||||
|
||||
@ -74,7 +74,7 @@ static unsigned int gf16mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsigne
|
||||
uint8_t *ai = mat + n_w_byte * i;
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + n_w_byte * j;
|
||||
PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf256v_predicated_add(ai + offset_byte, !PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte);
|
||||
PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf256v_predicated_add(ai + offset_byte, 1 ^ PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf16_is_nonzero(PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf16v_get_ele(ai, i)), aj + offset_byte, n_w_byte - offset_byte);
|
||||
}
|
||||
uint8_t pivot = PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf16v_get_ele(ai, i);
|
||||
r8 &= PQCLEAN_RAINBOWIACYCLIC_CLEAN_gf16_is_nonzero(pivot);
|
||||
|
||||
@ -72,7 +72,7 @@ static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsign
|
||||
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + w * j;
|
||||
PQCLEAN_RAINBOWVCCLASSIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, !PQCLEAN_RAINBOWVCCLASSIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
PQCLEAN_RAINBOWVCCLASSIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWVCCLASSIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
}
|
||||
r8 &= PQCLEAN_RAINBOWVCCLASSIC_CLEAN_gf256_is_nonzero(ai[i]);
|
||||
uint8_t pivot = ai[i];
|
||||
|
||||
@ -72,7 +72,7 @@ static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsign
|
||||
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + w * j;
|
||||
PQCLEAN_RAINBOWVCCYCLICCOMPRESSED_CLEAN_gf256v_predicated_add(ai + skip_len_align4, !PQCLEAN_RAINBOWVCCYCLICCOMPRESSED_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
PQCLEAN_RAINBOWVCCYCLICCOMPRESSED_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWVCCYCLICCOMPRESSED_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
}
|
||||
r8 &= PQCLEAN_RAINBOWVCCYCLICCOMPRESSED_CLEAN_gf256_is_nonzero(ai[i]);
|
||||
uint8_t pivot = ai[i];
|
||||
|
||||
@ -72,7 +72,7 @@ static unsigned int gf256mat_gauss_elim_ref(uint8_t *mat, unsigned int h, unsign
|
||||
|
||||
for (unsigned int j = i + 1; j < h; j++) {
|
||||
uint8_t *aj = mat + w * j;
|
||||
PQCLEAN_RAINBOWVCCYCLIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, !PQCLEAN_RAINBOWVCCYCLIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
PQCLEAN_RAINBOWVCCYCLIC_CLEAN_gf256v_predicated_add(ai + skip_len_align4, 1 ^ PQCLEAN_RAINBOWVCCYCLIC_CLEAN_gf256_is_nonzero(ai[i]), aj + skip_len_align4, w - skip_len_align4);
|
||||
}
|
||||
r8 &= PQCLEAN_RAINBOWVCCYCLIC_CLEAN_gf256_is_nonzero(ai[i]);
|
||||
uint8_t pivot = ai[i];
|
||||
|
||||
@ -62,10 +62,14 @@ void PQCLEAN_SPHINCSSHA256128FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx,
|
||||
const unsigned char *d6,
|
||||
const unsigned char *d7,
|
||||
unsigned long long len) {
|
||||
unsigned long long i = 0;
|
||||
size_t i = 0;
|
||||
size_t bytes_to_copy;
|
||||
|
||||
while (i < len) {
|
||||
unsigned long long bytes_to_copy = (len - i) > 64 ? 64 : (len - i);
|
||||
bytes_to_copy = (size_t)len - i;
|
||||
if (bytes_to_copy > 64) {
|
||||
bytes_to_copy = 64;
|
||||
}
|
||||
memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy);
|
||||
|
||||
@ -62,10 +62,14 @@ void PQCLEAN_SPHINCSSHA256128FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx,
|
||||
const unsigned char *d6,
|
||||
const unsigned char *d7,
|
||||
unsigned long long len) {
|
||||
unsigned long long i = 0;
|
||||
size_t i = 0;
|
||||
size_t bytes_to_copy;
|
||||
|
||||
while (i < len) {
|
||||
unsigned long long bytes_to_copy = (len - i) > 64 ? 64 : (len - i);
|
||||
bytes_to_copy = (size_t)len - i;
|
||||
if (bytes_to_copy > 64) {
|
||||
bytes_to_copy = 64;
|
||||
}
|
||||
memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy);
|
||||
|
||||
@ -62,10 +62,14 @@ void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx,
|
||||
const unsigned char *d6,
|
||||
const unsigned char *d7,
|
||||
unsigned long long len) {
|
||||
unsigned long long i = 0;
|
||||
size_t i = 0;
|
||||
size_t bytes_to_copy;
|
||||
|
||||
while (i < len) {
|
||||
unsigned long long bytes_to_copy = (len - i) > 64 ? 64 : (len - i);
|
||||
bytes_to_copy = (size_t)len - i;
|
||||
if (bytes_to_copy > 64) {
|
||||
bytes_to_copy = 64;
|
||||
}
|
||||
memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy);
|
||||
|
||||
@ -62,10 +62,14 @@ void PQCLEAN_SPHINCSSHA256128SSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx,
|
||||
const unsigned char *d6,
|
||||
const unsigned char *d7,
|
||||
unsigned long long len) {
|
||||
unsigned long long i = 0;
|
||||
size_t i = 0;
|
||||
size_t bytes_to_copy;
|
||||
|
||||
while (i < len) {
|
||||
unsigned long long bytes_to_copy = (len - i) > 64 ? 64 : (len - i);
|
||||
bytes_to_copy = (size_t)len - i;
|
||||
if (bytes_to_copy > 64) {
|
||||
bytes_to_copy = 64;
|
||||
}
|
||||
memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy);
|
||||
|
||||
@ -62,10 +62,14 @@ void PQCLEAN_SPHINCSSHA256192FROBUST_AVX2_sha256_update8x(sha256ctxx8 *ctx,
|
||||
const unsigned char *d6,
|
||||
const unsigned char *d7,
|
||||
unsigned long long len) {
|
||||
unsigned long long i = 0;
|
||||
size_t i = 0;
|
||||
size_t bytes_to_copy;
|
||||
|
||||
while (i < len) {
|
||||
unsigned long long bytes_to_copy = (len - i) > 64 ? 64 : (len - i);
|
||||
bytes_to_copy = (size_t)len - i;
|
||||
if (bytes_to_copy > 64) {
|
||||
bytes_to_copy = 64;
|
||||
}
|
||||
memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy);
|
||||
|
||||
@ -62,10 +62,14 @@ void PQCLEAN_SPHINCSSHA256192FSIMPLE_AVX2_sha256_update8x(sha256ctxx8 *ctx,
|
||||
const unsigned char *d6,
|
||||
const unsigned char *d7,
|
||||
unsigned long long len) {
|
||||
unsigned long long i = 0;
|
||||
size_t i = 0;
|
||||
size_t bytes_to_copy;
|
||||
|
||||
while (i < len) {
|
||||
unsigned long long bytes_to_copy = (len - i) > 64 ? 64 : (len - i);
|
||||
bytes_to_copy = (size_t)len - i;
|
||||
if (bytes_to_copy > 64) {
|
||||
bytes_to_copy = 64;
|
||||
}
|
||||
memcpy(&ctx->msgblocks[64 * 0], d0 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 1], d1 + i, bytes_to_copy);
|
||||
memcpy(&ctx->msgblocks[64 * 2], d2 + i, bytes_to_copy);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user