mirror of
https://github.com/open-quantum-safe/liboqs.git
synced 2025-10-06 00:03:35 -04:00
Merge pull request #632 from christianpaquin/cp-enable-sike-fast-mode-with-cmake
Updated SIKE implementation with latest changes.
This commit is contained in:
commit
82e1828f4c
@ -29,7 +29,7 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation:** https://github.com/Microsoft/PQCrypto-SIDH
|
||||
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/tree/v3.2)
|
||||
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311)
|
||||
- **License:** MIT License
|
||||
- **Language:** C
|
||||
- **Constant-time:** Yes
|
||||
|
@ -30,13 +30,8 @@ Implementation
|
||||
--------------
|
||||
|
||||
- **Source of implementation:** https://github.com/Microsoft/PQCrypto-SIDH
|
||||
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa)
|
||||
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311)
|
||||
- **License:** MIT License
|
||||
- **Language:** C
|
||||
- **Constant-time:** Yes
|
||||
- **Architectures supported in liboqs master branch**: x86, x64
|
||||
|
||||
Additional comments
|
||||
-------------------
|
||||
|
||||
The original Sike implementation includes optimizations that are not currently being built in liboqs. See src/kem/sike/upstream/README for details.
|
||||
|
@ -1,22 +1,33 @@
|
||||
set(SRCS kem_sike.c
|
||||
P434/P434.c
|
||||
P434/P434_compressed.c
|
||||
P503/P503.c P503/P503_compressed.c
|
||||
P610/P610.c
|
||||
P610/P610_compressed.c
|
||||
P751/P751.c
|
||||
P751/P751_compressed.c)
|
||||
P434/P434.c
|
||||
P434/P434_compressed.c
|
||||
P503/P503.c P503/P503_compressed.c
|
||||
P610/P610.c
|
||||
P610/P610_compressed.c
|
||||
P751/P751.c
|
||||
P751/P751_compressed.c)
|
||||
|
||||
add_library(sike OBJECT ${SRCS})
|
||||
|
||||
# FIXMEOQS: enable FAST mode, assembly
|
||||
target_compile_definitions(sike PRIVATE _GENERIC_)
|
||||
if(ARCH STREQUAL "x86")
|
||||
target_compile_definitions(sike PRIVATE _X86_)
|
||||
elseif(ARCH STREQUAL "x86_64")
|
||||
target_compile_definitions(sike PRIVATE _AMD64_)
|
||||
elseif(ARCH STREQUAL "arm")
|
||||
target_compile_definitions(sike PRIVATE _ARM_)
|
||||
elseif(ARCH STREQUAL "arm64")
|
||||
target_compile_definitions(sike PRIVATE _ARM64_)
|
||||
if(${ARCH} STREQUAL "x86")
|
||||
target_compile_definitions(sike PRIVATE _GENERIC_ _X86_)
|
||||
elseif(${ARCH} STREQUAL "x86_64")
|
||||
target_compile_definitions(sike PRIVATE _AMD64_)
|
||||
if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(SRCS ${SRCS} P503/AMD64/fp_x64_asm.S
|
||||
P751/AMD64/fp_x64_asm.S)
|
||||
if(USE_BMI2_INSTRUCTIONS)
|
||||
target_compile_definitions(sike PRIVATE _MULX_ _ADX_)
|
||||
set(SRCS ${SRCS} P434/AMD64/fp_x64_asm.S
|
||||
P610/AMD64/fp_x64_asm.S)
|
||||
endif()
|
||||
endif()
|
||||
elseif(${ARCH} STREQUAL "arm")
|
||||
target_compile_definitions(sike PRIVATE _GENERIC_ _ARM_)
|
||||
elseif(${ARCH} STREQUAL "arm64")
|
||||
target_compile_definitions(sike PRIVATE _ARM64_)
|
||||
if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(SRCS ${SRCS} P503/AMD64/fp_x64_asm.S
|
||||
P751/AMD64/fp_x64_asm.S)
|
||||
endif()
|
||||
endif()
|
||||
|
@ -17,7 +17,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p434-1]
|
||||
// Output: c in [0, 2*p434-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, carry = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -38,7 +38,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpadd434_asm(a, b, c);
|
||||
oqs_kem_sike_fpadd434_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -47,7 +47,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p434-1]
|
||||
// Output: c in [0, 2*p434-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, borrow = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -63,7 +63,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpsub434_asm(a, b, c);
|
||||
oqs_kem_sike_fpsub434_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
UNREFERENCED_PARAMETER(nwords);
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
unsigned int carry = 0;
|
||||
@ -276,17 +276,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
mul434_asm(a, b, c);
|
||||
oqs_kem_sike_mul434_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
// mc = ma*R^-1 mod p434x2, where R = 2^448.
|
||||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int carry;
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
@ -423,7 +423,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
rdc434_asm(ma, mc);
|
||||
oqs_kem_sike_rdc434_asm(ma, mc);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -36,8 +36,8 @@
|
||||
// Field addition
|
||||
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global fpadd434_asm
|
||||
fpadd434_asm:
|
||||
.global oqs_kem_sike_fpadd434_asm
|
||||
oqs_kem_sike_fpadd434_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -111,8 +111,8 @@ fpadd434_asm:
|
||||
// Field subtraction
|
||||
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global fpsub434_asm
|
||||
fpsub434_asm:
|
||||
.global oqs_kem_sike_fpsub434_asm
|
||||
oqs_kem_sike_fpsub434_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -410,8 +410,8 @@ fpsub434_asm:
|
||||
//*****************************************************************************
|
||||
// 434-bit multiplication using Karatsuba (one level), schoolbook (one level)
|
||||
//*****************************************************************************
|
||||
.global mul434_asm
|
||||
mul434_asm:
|
||||
.global oqs_kem_sike_mul434_asm
|
||||
oqs_kem_sike_mul434_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -562,8 +562,8 @@ mul434_asm:
|
||||
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
|
||||
// NOTE: a=c or b=c are not allowed
|
||||
//***********************************************************************
|
||||
.global mul434_asm
|
||||
mul434_asm:
|
||||
.global oqs_kem_sike_mul434_asm
|
||||
oqs_kem_sike_mul434_asm:
|
||||
|
||||
ret
|
||||
|
||||
@ -660,8 +660,8 @@ mul434_asm:
|
||||
// Operation: c [reg_p2] = a [reg_p1]
|
||||
// NOTE: a=c is not allowed
|
||||
//**************************************************************************************
|
||||
.global rdc434_asm
|
||||
rdc434_asm:
|
||||
.global oqs_kem_sike_rdc434_asm
|
||||
oqs_kem_sike_rdc434_asm:
|
||||
push r12
|
||||
push r13
|
||||
|
||||
@ -767,8 +767,8 @@ rdc434_asm:
|
||||
// Operation: c [reg_p2] = a [reg_p1]
|
||||
// NOTE: a=c is not allowed
|
||||
//***********************************************************************
|
||||
.global rdc434_asm
|
||||
rdc434_asm:
|
||||
.global oqs_kem_sike_rdc434_asm
|
||||
oqs_kem_sike_rdc434_asm:
|
||||
|
||||
ret
|
||||
|
||||
@ -781,8 +781,8 @@ rdc434_asm:
|
||||
// 434-bit multiprecision addition
|
||||
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global mp_add434_asm
|
||||
mp_add434_asm:
|
||||
.global oqs_kem_sike_mp_add434_asm
|
||||
oqs_kem_sike_mp_add434_asm:
|
||||
mov r8, [reg_p1]
|
||||
mov r9, [reg_p1+8]
|
||||
mov r10, [reg_p1+16]
|
||||
@ -812,8 +812,8 @@ mp_add434_asm:
|
||||
// 2x434-bit multiprecision subtraction/addition
|
||||
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448
|
||||
//***********************************************************************
|
||||
.global mp_subadd434x2_asm
|
||||
mp_subadd434x2_asm:
|
||||
.global oqs_kem_sike_mp_subadd434x2_asm
|
||||
oqs_kem_sike_mp_subadd434x2_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -897,8 +897,8 @@ mp_subadd434x2_asm:
|
||||
// Double 2x434-bit multiprecision subtraction
|
||||
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global mp_dblsub434x2_asm
|
||||
mp_dblsub434x2_asm:
|
||||
.global oqs_kem_sike_mp_dblsub434x2_asm
|
||||
oqs_kem_sike_mp_dblsub434x2_asm:
|
||||
push r12
|
||||
push r13
|
||||
|
||||
@ -966,4 +966,4 @@ mp_dblsub434x2_asm:
|
||||
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
ret
|
||||
|
@ -31,15 +31,15 @@
|
||||
// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1
|
||||
//
|
||||
|
||||
static const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF,
|
||||
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
};
|
||||
static const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
|
||||
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
};
|
||||
static const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
|
||||
0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
};
|
||||
const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF,
|
||||
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
};
|
||||
const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
|
||||
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
|
||||
};
|
||||
const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
|
||||
0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
|
||||
};
|
||||
// Order of Alice's subgroup
|
||||
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000};
|
||||
// Order of Bob's subgroup
|
||||
@ -119,9 +119,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define fp2inv_mont fp2inv434_mont
|
||||
#define fp2inv_mont_bingcd fp2inv434_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal434_non_constant_time
|
||||
#define mp_add_asm mp_add434_asm
|
||||
#define mp_subaddx2_asm mp_subadd434x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub434x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add434_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd434x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub434x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p434_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p434_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p434_decaps
|
||||
@ -131,10 +131,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP434_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#else
|
||||
#include "generic/fp_generic.c"
|
||||
#endif
|
||||
|
@ -11,6 +11,9 @@
|
||||
|
||||
// OQS note: size #defines moved to P434.c to avoid redefinitions across parameters
|
||||
|
||||
// Algorithm name
|
||||
#define CRYPTO_ALGNAME "SIKEp434"
|
||||
|
||||
// SIKE's key generation
|
||||
// It produces a private key sk and computes the public key pk.
|
||||
// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 374 bytes)
|
||||
|
@ -381,9 +381,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define fp2inv_mont fp2inv434_mont
|
||||
#define fp2inv_mont_bingcd fp2inv434_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal434_non_constant_time
|
||||
#define mp_add_asm mp_add434_asm
|
||||
#define mp_subaddx2_asm mp_subadd434x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub434x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add434_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd434x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub434x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p434_compressed_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p434_compressed_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p434_compressed_decaps
|
||||
@ -393,10 +393,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_compressed_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_compressed_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_compressed_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP434_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#else
|
||||
#include "generic/fp_generic.c"
|
||||
#endif
|
||||
|
@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
|
||||
|
||||
// 434-bit multiprecision addition, c = a+b
|
||||
static void mp_add434(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
|
||||
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
|
||||
|
||||
// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b
|
||||
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
|
||||
static void mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision left shift
|
||||
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
|
||||
@ -161,11 +161,11 @@ static bool fpequal434_non_constant_time(const digit_t *a, const digit_t *b);
|
||||
|
||||
// Modular addition, c = a+b mod p434
|
||||
extern void fpadd434(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void oqs_kem_sike_fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Modular subtraction, c = a-b mod p434
|
||||
extern void fpsub434(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void oqs_kem_sike_fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Modular negation, a = -a mod p434
|
||||
extern void fpneg434(digit_t *a);
|
||||
@ -177,12 +177,12 @@ static void fpdiv2_434(const digit_t *a, digit_t *c);
|
||||
static void fpcorrection434(digit_t *a);
|
||||
|
||||
// 434-bit Montgomery reduction, c = a mod p
|
||||
static void rdc_mont(const digit_t *a, digit_t *c);
|
||||
static void rdc_mont(digit_t *a, digit_t *c);
|
||||
void oqs_kem_sike_rdc434_asm(digit_t *ma, digit_t *mc);
|
||||
|
||||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
|
||||
static void fpmul434_mont(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mul434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void rdc434_asm(const digit_t *ma, digit_t *mc);
|
||||
void oqs_kem_sike_mul434_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
|
||||
static void fpsqr434_mont(const digit_t *ma, digit_t *mc);
|
||||
|
@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
c[2 * nwords - 1] = v;
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
|
||||
// mc = ma*R^-1 mod p434x2, where R = 2^448.
|
||||
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
@ -17,7 +17,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p503-1]
|
||||
// Output: c in [0, 2*p503-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, carry = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -38,7 +38,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpadd503_asm(a, b, c);
|
||||
oqs_kem_sike_fpadd503_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -47,7 +47,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p503-1]
|
||||
// Output: c in [0, 2*p503-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, borrow = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -63,7 +63,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpsub503_asm(a, b, c);
|
||||
oqs_kem_sike_fpsub503_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
UNREFERENCED_PARAMETER(nwords);
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
unsigned int carry = 0;
|
||||
@ -316,17 +316,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
mul503_asm(a, b, c);
|
||||
oqs_kem_sike_mul503_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
// mc = ma*R^-1 mod p503x2, where R = 2^512.
|
||||
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int carry;
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
@ -504,7 +504,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
rdc503_asm(ma, mc);
|
||||
oqs_kem_sike_rdc503_asm(ma, mc);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -16,14 +16,14 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p503-1]
|
||||
// Output: c in [0, 2*p503-1]
|
||||
|
||||
fpadd503_asm(a, b, c);
|
||||
oqs_kem_sike_fpadd503_asm(a, b, c);
|
||||
}
|
||||
|
||||
__inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p503.
|
||||
// Inputs: a, b in [0, 2*p503-1]
|
||||
// Output: c in [0, 2*p503-1]
|
||||
|
||||
fpsub503_asm(a, b, c);
|
||||
oqs_kem_sike_fpsub503_asm(a, b, c);
|
||||
}
|
||||
|
||||
__inline void fpneg503(digit_t *a) { // Modular negation, a = -a mod p503.
|
||||
@ -68,13 +68,13 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
UNREFERENCED_PARAMETER(nwords);
|
||||
|
||||
mul503_asm(a, b, c);
|
||||
oqs_kem_sike_mul503_asm(a, b, c);
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
// mc = ma*R^-1 mod p503x2, where R = 2^512.
|
||||
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
||||
rdc503_asm(ma, mc);
|
||||
oqs_kem_sike_rdc503_asm(ma, mc);
|
||||
}
|
||||
|
@ -1,168 +1,168 @@
|
||||
//*******************************************************************************************
|
||||
// SIDH: an efficient supersingular isogeny cryptography library
|
||||
//
|
||||
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux
|
||||
//*******************************************************************************************
|
||||
|
||||
.text
|
||||
|
||||
// p503
|
||||
p503:
|
||||
.quad 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xABFFFFFFFFFFFFFF
|
||||
.quad 0x13085BDA2211E7A0
|
||||
.quad 0x1B9BF6C87B7E7DAF
|
||||
.quad 0x6045C6BDDA77A4D0
|
||||
.quad 0x004066F541811E1E
|
||||
|
||||
// p503 + 1
|
||||
p503p1:
|
||||
.quad 0xAC00000000000000
|
||||
.quad 0x13085BDA2211E7A0
|
||||
.quad 0x1B9BF6C87B7E7DAF
|
||||
.quad 0x6045C6BDDA77A4D0
|
||||
.quad 0x004066F541811E1E
|
||||
|
||||
// 2 * p503
|
||||
p503x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE
|
||||
.quad 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0x57FFFFFFFFFFFFFF
|
||||
.quad 0x2610B7B44423CF41
|
||||
.quad 0x3737ED90F6FCFB5E
|
||||
.quad 0xC08B8D7BB4EF49A0
|
||||
//*******************************************************************************************
|
||||
// SIDH: an efficient supersingular isogeny cryptography library
|
||||
//
|
||||
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux
|
||||
//*******************************************************************************************
|
||||
|
||||
.text
|
||||
|
||||
// p503
|
||||
p503:
|
||||
.quad 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0xABFFFFFFFFFFFFFF
|
||||
.quad 0x13085BDA2211E7A0
|
||||
.quad 0x1B9BF6C87B7E7DAF
|
||||
.quad 0x6045C6BDDA77A4D0
|
||||
.quad 0x004066F541811E1E
|
||||
|
||||
// p503 + 1
|
||||
p503p1:
|
||||
.quad 0xAC00000000000000
|
||||
.quad 0x13085BDA2211E7A0
|
||||
.quad 0x1B9BF6C87B7E7DAF
|
||||
.quad 0x6045C6BDDA77A4D0
|
||||
.quad 0x004066F541811E1E
|
||||
|
||||
// 2 * p503
|
||||
p503x2:
|
||||
.quad 0xFFFFFFFFFFFFFFFE
|
||||
.quad 0xFFFFFFFFFFFFFFFF
|
||||
.quad 0x57FFFFFFFFFFFFFF
|
||||
.quad 0x2610B7B44423CF41
|
||||
.quad 0x3737ED90F6FCFB5E
|
||||
.quad 0xC08B8D7BB4EF49A0
|
||||
.quad 0x0080CDEA83023C3C
|
||||
|
||||
p503p1_nz_s8:
|
||||
.quad 0x85BDA2211E7A0AC
|
||||
.quad 0x9BF6C87B7E7DAF13
|
||||
.quad 0x45C6BDDA77A4D01B
|
||||
.quad 0x4066F541811E1E60
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// Field addition
|
||||
// Operation: c [x2] = a [x0] + b [x1]
|
||||
//***********************************************************************
|
||||
.global fpadd503_asm
|
||||
fpadd503_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adc x10, x10, x18
|
||||
|
||||
// Subtract 2xp503
|
||||
ldr x11, p503x2
|
||||
ldr x12, p503x2 + 8
|
||||
ldr x13, p503x2 + 16
|
||||
ldr x14, p503x2 + 24
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
ldr x15, p503x2 + 32
|
||||
ldr x16, p503x2 + 40
|
||||
ldr x17, p503x2 + 48
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbcs x10, x10, x17
|
||||
sbc x18, xzr, xzr
|
||||
|
||||
// Add 2xp503 anded with the mask in x18
|
||||
and x11, x11, x18
|
||||
and x12, x12, x18
|
||||
and x13, x13, x18
|
||||
and x14, x14, x18
|
||||
and x15, x15, x18
|
||||
and x16, x16, x18
|
||||
and x17, x17, x18
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adcs x9, x9, x16
|
||||
adc x10, x10, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// Field subtraction
|
||||
// Operation: c [x2] = a [x0] - b [x1]
|
||||
//***********************************************************************
|
||||
.global fpsub503_asm
|
||||
fpsub503_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbcs x10, x10, x18
|
||||
sbc x18, xzr, xzr
|
||||
|
||||
// Add 2xp503 anded with the mask in x18
|
||||
ldr x11, p503x2
|
||||
ldr x12, p503x2 + 8
|
||||
ldr x13, p503x2 + 16
|
||||
ldr x14, p503x2 + 24
|
||||
and x11, x11, x18
|
||||
and x12, x12, x18
|
||||
and x13, x13, x18
|
||||
and x14, x14, x18
|
||||
ldr x15, p503x2 + 32
|
||||
ldr x16, p503x2 + 40
|
||||
ldr x17, p503x2 + 48
|
||||
and x15, x15, x18
|
||||
and x16, x16, x18
|
||||
and x17, x17, x18
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adcs x9, x9, x16
|
||||
adc x10, x10, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
p503p1_nz_s8:
|
||||
.quad 0x85BDA2211E7A0AC
|
||||
.quad 0x9BF6C87B7E7DAF13
|
||||
.quad 0x45C6BDDA77A4D01B
|
||||
.quad 0x4066F541811E1E60
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// Field addition
|
||||
// Operation: c [x2] = a [x0] + b [x1]
|
||||
//***********************************************************************
|
||||
.global oqs_kem_sike_fpadd503_asm
|
||||
oqs_kem_sike_fpadd503_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
|
||||
// Add a + b
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adc x10, x10, x18
|
||||
|
||||
// Subtract 2xp503
|
||||
ldr x11, p503x2
|
||||
ldr x12, p503x2 + 8
|
||||
ldr x13, p503x2 + 16
|
||||
ldr x14, p503x2 + 24
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x12
|
||||
sbcs x6, x6, x13
|
||||
sbcs x7, x7, x14
|
||||
ldr x15, p503x2 + 32
|
||||
ldr x16, p503x2 + 40
|
||||
ldr x17, p503x2 + 48
|
||||
sbcs x8, x8, x15
|
||||
sbcs x9, x9, x16
|
||||
sbcs x10, x10, x17
|
||||
sbc x18, xzr, xzr
|
||||
|
||||
// Add 2xp503 anded with the mask in x18
|
||||
and x11, x11, x18
|
||||
and x12, x12, x18
|
||||
and x13, x13, x18
|
||||
and x14, x14, x18
|
||||
and x15, x15, x18
|
||||
and x16, x16, x18
|
||||
and x17, x17, x18
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adcs x9, x9, x16
|
||||
adc x10, x10, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// Field subtraction
|
||||
// Operation: c [x2] = a [x0] - b [x1]
|
||||
//***********************************************************************
|
||||
.global oqs_kem_sike_fpsub503_asm
|
||||
oqs_kem_sike_fpsub503_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
|
||||
// Subtract a - b
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
sbcs x7, x7, x15
|
||||
sbcs x8, x8, x16
|
||||
sbcs x9, x9, x17
|
||||
sbcs x10, x10, x18
|
||||
sbc x18, xzr, xzr
|
||||
|
||||
// Add 2xp503 anded with the mask in x18
|
||||
ldr x11, p503x2
|
||||
ldr x12, p503x2 + 8
|
||||
ldr x13, p503x2 + 16
|
||||
ldr x14, p503x2 + 24
|
||||
and x11, x11, x18
|
||||
and x12, x12, x18
|
||||
and x13, x13, x18
|
||||
and x14, x14, x18
|
||||
ldr x15, p503x2 + 32
|
||||
ldr x16, p503x2 + 40
|
||||
ldr x17, p503x2 + 48
|
||||
and x15, x15, x18
|
||||
and x16, x16, x18
|
||||
and x17, x17, x18
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x12
|
||||
adcs x6, x6, x13
|
||||
adcs x7, x7, x14
|
||||
adcs x8, x8, x15
|
||||
adcs x9, x9, x16
|
||||
adc x10, x10, x17
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
ret
|
||||
|
||||
|
||||
@ -263,12 +263,12 @@ fpsub503_asm:
|
||||
.endm
|
||||
|
||||
|
||||
//***********************************************************************************
|
||||
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
|
||||
//***********************************************************************************
|
||||
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
|
||||
// Operation: c [x2] = a [x0] * b [x1]
|
||||
//***********************************************************************************
|
||||
.global mul503_asm
|
||||
mul503_asm:
|
||||
.global oqs_kem_sike_mul503_asm
|
||||
oqs_kem_sike_mul503_asm:
|
||||
sub sp, sp, #96
|
||||
stp x19, x20, [sp,#0]
|
||||
stp x21, x22, [sp,#16]
|
||||
@ -448,35 +448,35 @@ mul503_asm:
|
||||
.endm
|
||||
|
||||
|
||||
//**************************************************************************************
|
||||
// Montgomery reduction
|
||||
// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
|
||||
// Operation: mc [x1] = ma [x0]
|
||||
// NOTE: ma=mc is not allowed
|
||||
//**************************************************************************************
|
||||
// Montgomery reduction
|
||||
// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
|
||||
// Operation: mc [x1] = ma [x0]
|
||||
// NOTE: ma=mc is not allowed
|
||||
//**************************************************************************************
|
||||
.global rdc503_asm
|
||||
rdc503_asm:
|
||||
sub sp, sp, #96
|
||||
stp x19, x20, [sp]
|
||||
stp x21, x22, [sp, #16]
|
||||
stp x23, x24, [sp, #32]
|
||||
stp x25, x26, [sp, #48]
|
||||
stp x27, x28, [sp, #64]
|
||||
.global oqs_kem_sike_rdc503_asm
|
||||
oqs_kem_sike_rdc503_asm:
|
||||
sub sp, sp, #96
|
||||
stp x19, x20, [sp]
|
||||
stp x21, x22, [sp, #16]
|
||||
stp x23, x24, [sp, #32]
|
||||
stp x25, x26, [sp, #48]
|
||||
stp x27, x28, [sp, #64]
|
||||
stp x29, x30, [sp, #80]
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
ldr x24, p503p1_nz_s8 + 0
|
||||
ldr x25, p503p1_nz_s8 + 8
|
||||
ldr x26, p503p1_nz_s8 + 16
|
||||
ldr x27, p503p1_nz_s8 + 24
|
||||
|
||||
ldp x2, x3, [x0,#0] // a[0-1]
|
||||
|
||||
// Load the prime constant
|
||||
ldr x24, p503p1_nz_s8 + 0
|
||||
ldr x25, p503p1_nz_s8 + 8
|
||||
ldr x26, p503p1_nz_s8 + 16
|
||||
ldr x27, p503p1_nz_s8 + 24
|
||||
|
||||
// a[0-1] x p503p1_nz_s8 --> result: x4:x9
|
||||
mul x4, x2, x24 // a[0] x p503p1_nz_s8[0]
|
||||
umulh x7, x2, x24
|
||||
mul x5, x2, x25 // a[0] x p503p1_nz_s8[1]
|
||||
umulh x6, x2, x25
|
||||
umulh x6, x2, x25
|
||||
MUL128x256_COMBA_CUT x2, x3, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
|
||||
|
||||
ldp x3, x11, [x0,#16] // a[2]
|
||||
@ -516,9 +516,9 @@ rdc503_asm:
|
||||
adcs x20, xzr, x20
|
||||
adcs x21, xzr, x21
|
||||
adcs x22, xzr, x22
|
||||
adc x23, xzr, x23
|
||||
|
||||
// a[2-3] x p503p1_nz_s8 --> result: x4:x9
|
||||
adc x23, xzr, x23
|
||||
|
||||
// a[2-3] x p503p1_nz_s8 --> result: x4:x9
|
||||
MUL128x256_COMBA_CUT x3, x11, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
|
||||
|
||||
orr x10, xzr, x9, lsr #8
|
||||
@ -548,9 +548,9 @@ rdc503_asm:
|
||||
umulh x6, x12, x25
|
||||
adcs x21, xzr, x21
|
||||
adcs x22, xzr, x22
|
||||
adc x23, xzr, x23
|
||||
|
||||
// a[4-5] x p503p1_nz_s8 --> result: x4:x9
|
||||
adc x23, xzr, x23
|
||||
|
||||
// a[4-5] x p503p1_nz_s8 --> result: x4:x9
|
||||
MUL128x256_COMBA_CUT x12, x13, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
|
||||
|
||||
orr x10, xzr, x9, lsr #8
|
||||
@ -578,9 +578,9 @@ rdc503_asm:
|
||||
mul x5, x14, x25 // a[6] x p503p1_nz_s8[1]
|
||||
umulh x6, x14, x25
|
||||
adcs x22, xzr, x22
|
||||
adc x23, xzr, x23
|
||||
|
||||
// a[6-7] x p503p1_nz_s8 --> result: x4:x9
|
||||
adc x23, xzr, x23
|
||||
|
||||
// a[6-7] x p503p1_nz_s8 --> result: x4:x9
|
||||
MUL128x256_COMBA_CUT x14, x15, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
|
||||
|
||||
orr x10, xzr, x9, lsr #8
|
||||
@ -607,251 +607,251 @@ rdc503_asm:
|
||||
adc x23, x10, x23
|
||||
stp x20, x21, [x1,#32]
|
||||
stp x22, x23, [x1,#48]
|
||||
|
||||
ldp x19, x20, [sp]
|
||||
ldp x21, x22, [sp, #16]
|
||||
ldp x23, x24, [sp, #32]
|
||||
ldp x25, x26, [sp, #48]
|
||||
ldp x27, x28, [sp, #64]
|
||||
|
||||
ldp x19, x20, [sp]
|
||||
ldp x21, x22, [sp, #16]
|
||||
ldp x23, x24, [sp, #32]
|
||||
ldp x25, x26, [sp, #48]
|
||||
ldp x27, x28, [sp, #64]
|
||||
ldp x29, x30, [sp, #80]
|
||||
add sp, sp, #96
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// 503-bit multiprecision addition
|
||||
// Operation: c [x2] = a [x0] + b [x1]
|
||||
//***********************************************************************
|
||||
.global mp_add503_asm
|
||||
mp_add503_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adc x10, x10, x18
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// 2x503-bit multiprecision addition
|
||||
// Operation: c [x2] = a [x0] + b [x1]
|
||||
//***********************************************************************
|
||||
.global mp_add503x2_asm
|
||||
mp_add503x2_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adcs x10, x10, x18
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
adcs x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x9, x10, [x0,#112]
|
||||
ldp x15, x16, [x1,#96]
|
||||
ldp x17, x18, [x1,#112]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adc x10, x10, x18
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
stp x9, x10, [x2,#112]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// 2x503-bit multiprecision subtraction/addition
|
||||
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
|
||||
//***********************************************************************
|
||||
.global mp_subadd503x2_asm
|
||||
mp_subadd503x2_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
stp x3, x4, [x2,#0]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x11, x12, [x1,#32]
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
stp x5, x6, [x2,#16]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
stp x7, x8, [x2,#32]
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x9, x10, [x0,#112]
|
||||
ldp x13, x14, [x1,#112]
|
||||
sbcs x7, x7, x11
|
||||
ldr x11, p503
|
||||
sbcs x8, x8, x12
|
||||
ldr x12, p503 + 8
|
||||
sbcs x9, x9, x13
|
||||
ldr x13, p503 + 16
|
||||
sbcs x10, x10, x14
|
||||
ldr x14, p503 + 24
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add p503 anded with the mask in x0
|
||||
ldr x15, p503 + 32
|
||||
ldr x16, p503 + 40
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x11
|
||||
stp x3, x4, [x2,#64]
|
||||
adcs x5, x5, x11
|
||||
adcs x6, x6, x12
|
||||
stp x5, x6, [x2,#80]
|
||||
adcs x7, x7, x13
|
||||
adcs x8, x8, x14
|
||||
stp x7, x8, [x2,#96]
|
||||
adcs x9, x9, x15
|
||||
adc x10, x10, x16
|
||||
stp x9, x10, [x2,#112]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// Double 2x503-bit multiprecision subtraction
|
||||
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
|
||||
//***********************************************************************
|
||||
.global mp_dblsub503x2_asm
|
||||
mp_dblsub503x2_asm:
|
||||
sub sp, sp, #32
|
||||
stp x27, x28, [sp, #0]
|
||||
stp x29, x30, [sp, #16]
|
||||
ldp x3, x4, [x2,#0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
ldp x9, x10, [x2,#48]
|
||||
ldp x11, x12, [x2,#64]
|
||||
ldp x13, x14, [x2,#80]
|
||||
ldp x15, x16, [x2,#96]
|
||||
ldp x17, x18, [x2,#112]
|
||||
|
||||
ldp x27, x28, [x0,#0]
|
||||
ldp x29, x30, [x0,#16]
|
||||
subs x3, x3, x27
|
||||
sbcs x4, x4, x28
|
||||
sbcs x5, x5, x29
|
||||
sbcs x6, x6, x30
|
||||
ldp x27, x28, [x0,#32]
|
||||
ldp x29, x30, [x0,#48]
|
||||
sbcs x7, x7, x27
|
||||
sbcs x8, x8, x28
|
||||
sbcs x9, x9, x29
|
||||
sbcs x10, x10, x30
|
||||
ldp x27, x28, [x0,#64]
|
||||
ldp x29, x30, [x0,#80]
|
||||
sbcs x11, x11, x27
|
||||
sbcs x12, x12, x28
|
||||
sbcs x13, x13, x29
|
||||
sbcs x14, x14, x30
|
||||
ldp x27, x28, [x0,#96]
|
||||
ldp x29, x30, [x0,#112]
|
||||
sbcs x15, x15, x27
|
||||
sbcs x16, x16, x28
|
||||
sbcs x17, x17, x29
|
||||
sbc x18, x18, x30
|
||||
|
||||
ldp x27, x28, [x1,#0]
|
||||
ldp x29, x30, [x1,#16]
|
||||
subs x3, x3, x27
|
||||
sbcs x4, x4, x28
|
||||
sbcs x5, x5, x29
|
||||
sbcs x6, x6, x30
|
||||
ldp x27, x28, [x1,#32]
|
||||
ldp x29, x30, [x1,#48]
|
||||
sbcs x7, x7, x27
|
||||
sbcs x8, x8, x28
|
||||
sbcs x9, x9, x29
|
||||
sbcs x10, x10, x30
|
||||
ldp x27, x28, [x1,#64]
|
||||
ldp x29, x30, [x1,#80]
|
||||
sbcs x11, x11, x27
|
||||
sbcs x12, x12, x28
|
||||
sbcs x13, x13, x29
|
||||
sbcs x14, x14, x30
|
||||
ldp x27, x28, [x1,#96]
|
||||
ldp x29, x30, [x1,#112]
|
||||
sbcs x15, x15, x27
|
||||
sbcs x16, x16, x28
|
||||
sbcs x17, x17, x29
|
||||
sbc x18, x18, x30
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
stp x11, x12, [x2,#64]
|
||||
stp x13, x14, [x2,#80]
|
||||
stp x15, x16, [x2,#96]
|
||||
stp x17, x18, [x2,#112]
|
||||
|
||||
ldp x27, x28, [sp, #0]
|
||||
ldp x29, x30, [sp, #16]
|
||||
add sp, sp, #32
|
||||
ret
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// 503-bit multiprecision addition
|
||||
// Operation: c [x2] = a [x0] + b [x1]
|
||||
//***********************************************************************
|
||||
.global oqs_kem_sike_mp_add503_asm
|
||||
oqs_kem_sike_mp_add503_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adc x10, x10, x18
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// 2x503-bit multiprecision addition
|
||||
// Operation: c [x2] = a [x0] + b [x1]
|
||||
//***********************************************************************
|
||||
.global oqs_kem_sike_mp_add503x2_asm
|
||||
oqs_kem_sike_mp_add503x2_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x13, x14, [x1,#16]
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x15, x16, [x1,#32]
|
||||
ldp x17, x18, [x1,#48]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adcs x10, x10, x18
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x13, x14, [x1,#80]
|
||||
adcs x3, x3, x11
|
||||
adcs x4, x4, x12
|
||||
adcs x5, x5, x13
|
||||
adcs x6, x6, x14
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x9, x10, [x0,#112]
|
||||
ldp x15, x16, [x1,#96]
|
||||
ldp x17, x18, [x1,#112]
|
||||
adcs x7, x7, x15
|
||||
adcs x8, x8, x16
|
||||
adcs x9, x9, x17
|
||||
adc x10, x10, x18
|
||||
|
||||
stp x3, x4, [x2,#64]
|
||||
stp x5, x6, [x2,#80]
|
||||
stp x7, x8, [x2,#96]
|
||||
stp x9, x10, [x2,#112]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// 2x503-bit multiprecision subtraction/addition
|
||||
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
|
||||
//***********************************************************************
|
||||
.global oqs_kem_sike_mp_subadd503x2_asm
|
||||
oqs_kem_sike_mp_subadd503x2_asm:
|
||||
ldp x3, x4, [x0,#0]
|
||||
ldp x11, x12, [x1,#0]
|
||||
ldp x5, x6, [x0,#16]
|
||||
ldp x13, x14, [x1,#16]
|
||||
subs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
stp x3, x4, [x2,#0]
|
||||
ldp x7, x8, [x0,#32]
|
||||
ldp x11, x12, [x1,#32]
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
stp x5, x6, [x2,#16]
|
||||
ldp x9, x10, [x0,#48]
|
||||
ldp x13, x14, [x1,#48]
|
||||
sbcs x7, x7, x11
|
||||
sbcs x8, x8, x12
|
||||
stp x7, x8, [x2,#32]
|
||||
sbcs x9, x9, x13
|
||||
sbcs x10, x10, x14
|
||||
stp x9, x10, [x2,#48]
|
||||
|
||||
ldp x3, x4, [x0,#64]
|
||||
ldp x11, x12, [x1,#64]
|
||||
ldp x5, x6, [x0,#80]
|
||||
ldp x13, x14, [x1,#80]
|
||||
sbcs x3, x3, x11
|
||||
sbcs x4, x4, x12
|
||||
ldp x7, x8, [x0,#96]
|
||||
ldp x11, x12, [x1,#96]
|
||||
sbcs x5, x5, x13
|
||||
sbcs x6, x6, x14
|
||||
ldp x9, x10, [x0,#112]
|
||||
ldp x13, x14, [x1,#112]
|
||||
sbcs x7, x7, x11
|
||||
ldr x11, p503
|
||||
sbcs x8, x8, x12
|
||||
ldr x12, p503 + 8
|
||||
sbcs x9, x9, x13
|
||||
ldr x13, p503 + 16
|
||||
sbcs x10, x10, x14
|
||||
ldr x14, p503 + 24
|
||||
sbc x0, xzr, xzr
|
||||
|
||||
// Add p503 anded with the mask in x0
|
||||
ldr x15, p503 + 32
|
||||
ldr x16, p503 + 40
|
||||
and x11, x11, x0
|
||||
and x12, x12, x0
|
||||
and x13, x13, x0
|
||||
and x14, x14, x0
|
||||
and x15, x15, x0
|
||||
and x16, x16, x0
|
||||
adds x3, x3, x11
|
||||
adcs x4, x4, x11
|
||||
stp x3, x4, [x2,#64]
|
||||
adcs x5, x5, x11
|
||||
adcs x6, x6, x12
|
||||
stp x5, x6, [x2,#80]
|
||||
adcs x7, x7, x13
|
||||
adcs x8, x8, x14
|
||||
stp x7, x8, [x2,#96]
|
||||
adcs x9, x9, x15
|
||||
adc x10, x10, x16
|
||||
stp x9, x10, [x2,#112]
|
||||
ret
|
||||
|
||||
|
||||
//***********************************************************************
|
||||
// Double 2x503-bit multiprecision subtraction
|
||||
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
|
||||
//***********************************************************************
|
||||
.global oqs_kem_sike_mp_dblsub503x2_asm
|
||||
oqs_kem_sike_mp_dblsub503x2_asm:
|
||||
sub sp, sp, #32
|
||||
stp x27, x28, [sp, #0]
|
||||
stp x29, x30, [sp, #16]
|
||||
ldp x3, x4, [x2,#0]
|
||||
ldp x5, x6, [x2,#16]
|
||||
ldp x7, x8, [x2,#32]
|
||||
ldp x9, x10, [x2,#48]
|
||||
ldp x11, x12, [x2,#64]
|
||||
ldp x13, x14, [x2,#80]
|
||||
ldp x15, x16, [x2,#96]
|
||||
ldp x17, x18, [x2,#112]
|
||||
|
||||
ldp x27, x28, [x0,#0]
|
||||
ldp x29, x30, [x0,#16]
|
||||
subs x3, x3, x27
|
||||
sbcs x4, x4, x28
|
||||
sbcs x5, x5, x29
|
||||
sbcs x6, x6, x30
|
||||
ldp x27, x28, [x0,#32]
|
||||
ldp x29, x30, [x0,#48]
|
||||
sbcs x7, x7, x27
|
||||
sbcs x8, x8, x28
|
||||
sbcs x9, x9, x29
|
||||
sbcs x10, x10, x30
|
||||
ldp x27, x28, [x0,#64]
|
||||
ldp x29, x30, [x0,#80]
|
||||
sbcs x11, x11, x27
|
||||
sbcs x12, x12, x28
|
||||
sbcs x13, x13, x29
|
||||
sbcs x14, x14, x30
|
||||
ldp x27, x28, [x0,#96]
|
||||
ldp x29, x30, [x0,#112]
|
||||
sbcs x15, x15, x27
|
||||
sbcs x16, x16, x28
|
||||
sbcs x17, x17, x29
|
||||
sbc x18, x18, x30
|
||||
|
||||
ldp x27, x28, [x1,#0]
|
||||
ldp x29, x30, [x1,#16]
|
||||
subs x3, x3, x27
|
||||
sbcs x4, x4, x28
|
||||
sbcs x5, x5, x29
|
||||
sbcs x6, x6, x30
|
||||
ldp x27, x28, [x1,#32]
|
||||
ldp x29, x30, [x1,#48]
|
||||
sbcs x7, x7, x27
|
||||
sbcs x8, x8, x28
|
||||
sbcs x9, x9, x29
|
||||
sbcs x10, x10, x30
|
||||
ldp x27, x28, [x1,#64]
|
||||
ldp x29, x30, [x1,#80]
|
||||
sbcs x11, x11, x27
|
||||
sbcs x12, x12, x28
|
||||
sbcs x13, x13, x29
|
||||
sbcs x14, x14, x30
|
||||
ldp x27, x28, [x1,#96]
|
||||
ldp x29, x30, [x1,#112]
|
||||
sbcs x15, x15, x27
|
||||
sbcs x16, x16, x28
|
||||
sbcs x17, x17, x29
|
||||
sbc x18, x18, x30
|
||||
|
||||
stp x3, x4, [x2,#0]
|
||||
stp x5, x6, [x2,#16]
|
||||
stp x7, x8, [x2,#32]
|
||||
stp x9, x10, [x2,#48]
|
||||
stp x11, x12, [x2,#64]
|
||||
stp x13, x14, [x2,#80]
|
||||
stp x15, x16, [x2,#96]
|
||||
stp x17, x18, [x2,#112]
|
||||
|
||||
ldp x27, x28, [sp, #0]
|
||||
ldp x29, x30, [sp, #16]
|
||||
add sp, sp, #32
|
||||
ret
|
||||
|
@ -31,15 +31,15 @@
|
||||
// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=6, B=1, C=1 and p503 = 2^250*3^159-1
|
||||
//
|
||||
|
||||
static const uint64_t p503[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
|
||||
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
|
||||
};
|
||||
static const uint64_t p503p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
|
||||
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
|
||||
};
|
||||
static const uint64_t p503x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
|
||||
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
|
||||
};
|
||||
const uint64_t p503[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
|
||||
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
|
||||
};
|
||||
const uint64_t p503p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
|
||||
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
|
||||
};
|
||||
const uint64_t p503x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
|
||||
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
|
||||
};
|
||||
// Order of Alice's subgroup
|
||||
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000};
|
||||
// Order of Bob's subgroup
|
||||
@ -125,9 +125,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define fp2inv_mont fp2inv503_mont
|
||||
#define fp2inv_mont_bingcd fp2inv503_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal503_non_constant_time
|
||||
#define mp_add_asm mp_add503_asm
|
||||
#define mp_subaddx2_asm mp_subadd503x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub503x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add503_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd503x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub503x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p503_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p503_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p503_decaps
|
||||
@ -137,13 +137,14 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p503_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p503_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p503_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP503_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#elif defined(ARM64)
|
||||
#include "ARM64/fp_arm64.c"
|
||||
// #include "ARM64/fp_arm64_asm.S" FIXMEOQS
|
||||
#else
|
||||
#include "generic/fp_generic.c"
|
||||
#endif
|
||||
|
@ -384,9 +384,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define fp2inv_mont fp2inv503_mont
|
||||
#define fp2inv_mont_bingcd fp2inv503_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal503_non_constant_time
|
||||
#define mp_add_asm mp_add503_asm
|
||||
#define mp_subaddx2_asm mp_subadd503x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub503x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add503_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd503x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub503x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p503_compressed_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p503_compressed_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p503_compressed_decaps
|
||||
@ -396,13 +396,14 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p503_compressed_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p503_compressed_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p503_compressed_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP503_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#elif defined(ARM64)
|
||||
#include "ARM64/fp_arm64.c"
|
||||
// #include "ARM64/fp_arm64_asm.S" FIXMEOQS
|
||||
#else
|
||||
#include "generic/fp_generic.c"
|
||||
#endif
|
||||
|
@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
|
||||
|
||||
// 503-bit multiprecision addition, c = a+b
|
||||
static void mp_add503(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_add503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_add503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
|
||||
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
|
||||
|
||||
// 2x503-bit multiprecision subtraction followed by addition with p503*2^512, c = a-b+(p503*2^512) if a-b < 0, otherwise c=a-b
|
||||
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_subadd503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subadd503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Double 2x503-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
|
||||
static void mp_dblsub503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_dblsub503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision left shift
|
||||
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
|
||||
@ -161,13 +161,13 @@ static bool fpequal503_non_constant_time(const digit_t *a, const digit_t *b);
|
||||
|
||||
// Modular addition, c = a+b mod p503
|
||||
extern void fpadd503(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpadd503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void fpadd503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
|
||||
extern void oqs_kem_sike_fpadd503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_fpadd503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
|
||||
|
||||
// Modular subtraction, c = a-b mod p503
|
||||
extern void fpsub503(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpsub503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void fpsub503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
|
||||
extern void oqs_kem_sike_fpsub503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_fpsub503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
|
||||
|
||||
// Modular negation, a = -a mod p503
|
||||
extern void fpneg503(digit_t *a);
|
||||
@ -179,14 +179,13 @@ static void fpdiv2_503(const digit_t *a, digit_t *c);
|
||||
static void fpcorrection503(digit_t *a);
|
||||
|
||||
// 503-bit Montgomery reduction, c = a mod p
|
||||
static void rdc_mont(const digit_t *a, digit_t *c);
|
||||
static void rdc503_asm(const digit_t *ma, digit_t *mc);
|
||||
static void rdc503_inline_asm(const felm_t ma, felm_t mb, felm_t mc);
|
||||
static void rdc_mont(digit_t *a, digit_t *c);
|
||||
void oqs_kem_sike_rdc503_asm(digit_t *ma, digit_t *mc);
|
||||
|
||||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
|
||||
static void fpmul503_mont(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mul503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mul503_inline_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mul503_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mul503_inline_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
|
||||
static void fpsqr503_mont(const digit_t *ma, digit_t *mc);
|
||||
|
@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
c[2 * nwords - 1] = v;
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
|
||||
// mc = ma*R^-1 mod p503x2, where R = 2^512.
|
||||
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
@ -17,7 +17,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p610-1]
|
||||
// Output: c in [0, 2*p610-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, carry = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -38,7 +38,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpadd610_asm(a, b, c);
|
||||
oqs_kem_sike_fpadd610_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -47,7 +47,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p610-1]
|
||||
// Output: c in [0, 2*p610-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, borrow = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -63,7 +63,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpsub610_asm(a, b, c);
|
||||
oqs_kem_sike_fpsub610_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
UNREFERENCED_PARAMETER(nwords);
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
unsigned int carry = 0;
|
||||
@ -408,17 +408,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
mul610_asm(a, b, c);
|
||||
oqs_kem_sike_mul610_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
// mc = ma*R^-1 mod p610x2, where R = 2^640.
|
||||
// If ma < 2^640*p610, the output mc is in the range [0, 2*p610-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int carry;
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
@ -661,7 +661,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
rdc610_asm(ma, mc);
|
||||
oqs_kem_sike_rdc610_asm(ma, mc);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
.intel_syntax noprefix
|
||||
|
||||
|
||||
// Registers that are used for parameter passing:
|
||||
#define reg_p1 rdi
|
||||
#define reg_p2 rsi
|
||||
@ -17,8 +18,8 @@
|
||||
// Field addition
|
||||
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global fpadd610_asm
|
||||
fpadd610_asm:
|
||||
.global oqs_kem_sike_fpadd610_asm
|
||||
oqs_kem_sike_fpadd610_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -123,8 +124,8 @@ fpadd610_asm:
|
||||
// Field subtraction
|
||||
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global fpsub610_asm
|
||||
fpsub610_asm:
|
||||
.global oqs_kem_sike_fpsub610_asm
|
||||
oqs_kem_sike_fpsub610_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -426,8 +427,8 @@ fpsub610_asm:
|
||||
//*****************************************************************************
|
||||
// 610-bit multiplication using Karatsuba (one level), schoolbook (two levels)
|
||||
//*****************************************************************************
|
||||
.global mul610_asm
|
||||
mul610_asm:
|
||||
.global oqs_kem_sike_mul610_asm
|
||||
oqs_kem_sike_mul610_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -621,8 +622,8 @@ mul610_asm:
|
||||
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
|
||||
// NOTE: a=c or b=c are not allowed
|
||||
//***********************************************************************
|
||||
.global mul610_asm
|
||||
mul610_asm:
|
||||
.global oqs_kem_sike_mul610_asm
|
||||
oqs_kem_sike_mul610_asm:
|
||||
|
||||
ret
|
||||
|
||||
@ -757,8 +758,8 @@ mul610_asm:
|
||||
// Operation: c [reg_p2] = a [reg_p1]
|
||||
// NOTE: a=c is not allowed
|
||||
//**************************************************************************************
|
||||
.global rdc610_asm
|
||||
rdc610_asm:
|
||||
.global oqs_kem_sike_rdc610_asm
|
||||
oqs_kem_sike_rdc610_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -939,8 +940,8 @@ rdc610_asm:
|
||||
// Operation: c [reg_p2] = a [reg_p1]
|
||||
// NOTE: a=c is not allowed
|
||||
//***********************************************************************
|
||||
.global rdc610_asm
|
||||
rdc610_asm:
|
||||
.global oqs_kem_sike_rdc610_asm
|
||||
oqs_kem_sike_rdc610_asm:
|
||||
|
||||
ret
|
||||
|
||||
@ -953,8 +954,8 @@ rdc610_asm:
|
||||
// 610-bit multiprecision addition
|
||||
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global mp_add610_asm
|
||||
mp_add610_asm:
|
||||
.global oqs_kem_sike_mp_add610_asm
|
||||
oqs_kem_sike_mp_add610_asm:
|
||||
mov r8, [reg_p1]
|
||||
mov r9, [reg_p1+8]
|
||||
mov r10, [reg_p1+16]
|
||||
@ -993,8 +994,8 @@ mp_add610_asm:
|
||||
// 2x610-bit multiprecision subtraction/addition
|
||||
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640
|
||||
//***********************************************************************
|
||||
.global mp_subadd610x2_asm
|
||||
mp_subadd610x2_asm:
|
||||
.global oqs_kem_sike_mp_subadd610x2_asm
|
||||
oqs_kem_sike_mp_subadd610x2_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -1112,8 +1113,8 @@ mp_subadd610x2_asm:
|
||||
// Double 2x610-bit multiprecision subtraction
|
||||
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global mp_dblsub610x2_asm
|
||||
mp_dblsub610x2_asm:
|
||||
.global oqs_kem_sike_mp_dblsub610x2_asm
|
||||
oqs_kem_sike_mp_dblsub610x2_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -1211,4 +1212,4 @@ mp_dblsub610x2_asm:
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
ret
|
||||
|
@ -31,15 +31,15 @@
|
||||
// Curve isogeny system "SIDHp610". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p610^2), where A=6, B=1, C=1 and p610 = 2^305*3^192-1
|
||||
//
|
||||
|
||||
static const uint64_t p610[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF,
|
||||
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
|
||||
};
|
||||
static const uint64_t p610p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
|
||||
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
|
||||
};
|
||||
static const uint64_t p610x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
|
||||
0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1
|
||||
};
|
||||
const uint64_t p610[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF,
|
||||
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
|
||||
};
|
||||
const uint64_t p610p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
|
||||
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
|
||||
};
|
||||
const uint64_t p610x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
|
||||
0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1
|
||||
};
|
||||
// Order of Alice's subgroup
|
||||
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0002000000000000};
|
||||
// Order of Bob's subgroup
|
||||
@ -127,9 +127,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define fp2inv_mont fp2inv610_mont
|
||||
#define fp2inv_mont_bingcd fp2inv610_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal610_non_constant_time
|
||||
#define mp_add_asm mp_add610_asm
|
||||
#define mp_subaddx2_asm mp_subadd610x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub610x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add610_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd610x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub610x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p610_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p610_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p610_decaps
|
||||
@ -139,10 +139,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p610_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p610_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p610_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP610_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#else
|
||||
#include "generic/fp_generic.c"
|
||||
#endif
|
||||
|
@ -387,9 +387,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define fp2inv_mont fp2inv610_mont
|
||||
#define fp2inv_mont_bingcd fp2inv610_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal610_non_constant_time
|
||||
#define mp_add_asm mp_add610_asm
|
||||
#define mp_subaddx2_asm mp_subadd610x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub610x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add610_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd610x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub610x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p610_compressed_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p610_compressed_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p610_compressed_decaps
|
||||
@ -399,10 +399,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p610_compressed_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p610_compressed_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p610_compressed_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP610_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#else
|
||||
#include "generic/fp_generic.c"
|
||||
#endif
|
||||
|
@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
|
||||
|
||||
// 610-bit multiprecision addition, c = a+b
|
||||
static void mp_add610(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_add610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_add610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
|
||||
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
|
||||
|
||||
// 2x610-bit multiprecision subtraction followed by addition with p610*2^640, c = a-b+(p610*2^640) if a-b < 0, otherwise c=a-b
|
||||
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_subadd610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subadd610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Double 2x610-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
|
||||
static void mp_dblsub610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_dblsub610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision left shift
|
||||
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
|
||||
@ -161,11 +161,11 @@ static bool fpequal610_non_constant_time(const digit_t *a, const digit_t *b);
|
||||
|
||||
// Modular addition, c = a+b mod p610
|
||||
extern void fpadd610(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpadd610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void oqs_kem_sike_fpadd610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Modular subtraction, c = a-b mod p610
|
||||
extern void fpsub610(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpsub610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void oqs_kem_sike_fpsub610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Modular negation, a = -a mod p610
|
||||
extern void fpneg610(digit_t *a);
|
||||
@ -177,12 +177,12 @@ static void fpdiv2_610(const digit_t *a, digit_t *c);
|
||||
static void fpcorrection610(digit_t *a);
|
||||
|
||||
// 610-bit Montgomery reduction, c = a mod p
|
||||
static void rdc_mont(const digit_t *a, digit_t *c);
|
||||
static void rdc_mont(digit_t *a, digit_t *c);
|
||||
void oqs_kem_sike_rdc610_asm(digit_t *a, digit_t *c);
|
||||
|
||||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
|
||||
static void fpmul610_mont(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mul610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void rdc610_asm(const digit_t *ma, digit_t *mc);
|
||||
void oqs_kem_sike_mul610_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
|
||||
static void fpsqr610_mont(const digit_t *ma, digit_t *mc);
|
||||
|
@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
c[2 * nwords - 1] = v;
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
|
||||
// mc = ma*R^-1 mod p610x2, where R = 2^768.
|
||||
// If ma < 2^768*p610, the output mc is in the range [0, 2*p610-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
@ -17,7 +17,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p751-1]
|
||||
// Output: c in [0, 2*p751-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, carry = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -38,7 +38,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpadd751_asm(a, b, c);
|
||||
oqs_kem_sike_fpadd751_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -47,7 +47,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p751-1]
|
||||
// Output: c in [0, 2*p751-1]
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int i, borrow = 0;
|
||||
digit_t mask;
|
||||
|
||||
@ -63,7 +63,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
fpsub751_asm(a, b, c);
|
||||
oqs_kem_sike_fpsub751_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
UNREFERENCED_PARAMETER(nwords);
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
unsigned int carry = 0;
|
||||
@ -516,17 +516,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
mul751_asm(a, b, c);
|
||||
oqs_kem_sike_mul751_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
// mc = ma*R^-1 mod p751x2, where R = 2^768.
|
||||
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
||||
#if (OS_TARGET == OS_WIN)
|
||||
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
|
||||
unsigned int carry;
|
||||
digit_t t = 0;
|
||||
uint128_t uv = {0};
|
||||
@ -842,7 +842,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
rdc751_asm(ma, mc);
|
||||
oqs_kem_sike_rdc751_asm(ma, mc);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
.intel_syntax noprefix
|
||||
|
||||
|
||||
// Registers that are used for parameter passing:
|
||||
#define reg_p1 rdi
|
||||
#define reg_p2 rsi
|
||||
@ -17,8 +18,8 @@
|
||||
// Field addition
|
||||
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global fpadd751_asm
|
||||
fpadd751_asm:
|
||||
.global oqs_kem_sike_fpadd751_asm
|
||||
oqs_kem_sike_fpadd751_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -150,8 +151,8 @@ fpadd751_asm:
|
||||
// Field subtraction
|
||||
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global fpsub751_asm
|
||||
fpsub751_asm:
|
||||
.global oqs_kem_sike_fpsub751_asm
|
||||
oqs_kem_sike_fpsub751_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -567,8 +568,8 @@ fpsub751_asm:
|
||||
//*****************************************************************************
|
||||
// 751-bit multiplication using Karatsuba (one level), schoolbook (two levels)
|
||||
//*****************************************************************************
|
||||
.global mul751_asm
|
||||
mul751_asm:
|
||||
.global oqs_kem_sike_mul751_asm
|
||||
oqs_kem_sike_mul751_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -791,8 +792,8 @@ mul751_asm:
|
||||
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
|
||||
// NOTE: a=c or b=c are not allowed
|
||||
//***********************************************************************
|
||||
.global mul751_asm
|
||||
mul751_asm:
|
||||
.global oqs_kem_sike_mul751_asm
|
||||
oqs_kem_sike_mul751_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -1957,8 +1958,8 @@ mul751_asm:
|
||||
// Operation: c [reg_p2] = a [reg_p1]
|
||||
// NOTE: a=c is not allowed
|
||||
//**************************************************************************************
|
||||
.global rdc751_asm
|
||||
rdc751_asm:
|
||||
.global oqs_kem_sike_rdc751_asm
|
||||
oqs_kem_sike_rdc751_asm:
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
@ -2106,8 +2107,8 @@ rdc751_asm:
|
||||
// Operation: c [reg_p2] = a [reg_p1]
|
||||
// NOTE: a=c is not allowed
|
||||
//***********************************************************************
|
||||
.global rdc751_asm
|
||||
rdc751_asm:
|
||||
.global oqs_kem_sike_rdc751_asm
|
||||
oqs_kem_sike_rdc751_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -2727,8 +2728,8 @@ rdc751_asm:
|
||||
// 751-bit multiprecision addition
|
||||
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global mp_add751_asm
|
||||
mp_add751_asm:
|
||||
.global oqs_kem_sike_mp_add751_asm
|
||||
oqs_kem_sike_mp_add751_asm:
|
||||
mov r8, [reg_p1]
|
||||
mov r9, [reg_p1+8]
|
||||
mov r10, [reg_p1+16]
|
||||
@ -2773,8 +2774,8 @@ mp_add751_asm:
|
||||
// 2x751-bit multiprecision subtraction/addition
|
||||
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768
|
||||
//***********************************************************************
|
||||
.global mp_subadd751x2_asm
|
||||
mp_subadd751x2_asm:
|
||||
.global oqs_kem_sike_mp_subadd751x2_asm
|
||||
oqs_kem_sike_mp_subadd751x2_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -2916,8 +2917,8 @@ mp_subadd751x2_asm:
|
||||
// Double 2x751-bit multiprecision subtraction
|
||||
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
|
||||
//***********************************************************************
|
||||
.global mp_dblsub751x2_asm
|
||||
mp_dblsub751x2_asm:
|
||||
.global oqs_kem_sike_mp_dblsub751x2_asm
|
||||
oqs_kem_sike_mp_dblsub751x2_asm:
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
@ -3034,4 +3035,4 @@ mp_dblsub751x2_asm:
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
ret
|
||||
ret
|
||||
|
@ -16,14 +16,14 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
|
||||
// Inputs: a, b in [0, 2*p751-1]
|
||||
// Output: c in [0, 2*p751-1]
|
||||
|
||||
fpadd751_asm(a, b, c);
|
||||
oqs_kem_sike_fpadd751_asm(a, b, c);
|
||||
}
|
||||
|
||||
__inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751.
|
||||
// Inputs: a, b in [0, 2*p751-1]
|
||||
// Output: c in [0, 2*p751-1]
|
||||
|
||||
fpsub751_asm(a, b, c);
|
||||
oqs_kem_sike_fpsub751_asm(a, b, c);
|
||||
}
|
||||
|
||||
__inline void fpneg751(digit_t *a) { // Modular negation, a = -a mod p751.
|
||||
@ -68,13 +68,13 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
|
||||
UNREFERENCED_PARAMETER(nwords);
|
||||
|
||||
mul751_asm(a, b, c);
|
||||
oqs_kem_sike_mul751_asm(a, b, c);
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
|
||||
// mc = ma*R^-1 mod p751x2, where R = 2^768.
|
||||
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
||||
rdc751_asm(ma, mc);
|
||||
oqs_kem_sike_rdc751_asm(ma, mc);
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -31,15 +31,15 @@
|
||||
// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=6, B=1, C=1 and p751 = 2^372*3^239-1
|
||||
//
|
||||
|
||||
static const uint64_t p751[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
|
||||
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
|
||||
};
|
||||
static const uint64_t p751p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
|
||||
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
|
||||
};
|
||||
static const uint64_t p751x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
|
||||
0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38
|
||||
};
|
||||
const uint64_t p751[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
|
||||
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
|
||||
};
|
||||
const uint64_t p751p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
|
||||
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
|
||||
};
|
||||
const uint64_t p751x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
|
||||
0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38
|
||||
};
|
||||
// Order of Alice's subgroup
|
||||
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0010000000000000};
|
||||
// Order of Bob's subgroup
|
||||
@ -129,9 +129,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define fp2inv_mont fp2inv751_mont
|
||||
#define fp2inv_mont_bingcd fp2inv751_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal751_non_constant_time
|
||||
#define mp_add_asm mp_add751_asm
|
||||
#define mp_subaddx2_asm mp_subadd751x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub751x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add751_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd751x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub751x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p751_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p751_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p751_decaps
|
||||
@ -141,10 +141,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p751_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p751_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p751_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP751_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#elif defined(ARM64)
|
||||
#include "ARM64/fp_arm64.c"
|
||||
#else
|
||||
|
@ -396,9 +396,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define fp2inv_mont fp2inv751_mont
|
||||
#define fp2inv_mont_bingcd fp2inv751_mont_bingcd
|
||||
#define fpequal_non_constant_time fpequal751_non_constant_time
|
||||
#define mp_add_asm mp_add751_asm
|
||||
#define mp_subaddx2_asm mp_subadd751x2_asm
|
||||
#define mp_dblsubx2_asm mp_dblsub751x2_asm
|
||||
#define mp_add_asm oqs_kem_sike_mp_add751_asm
|
||||
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd751x2_asm
|
||||
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub751x2_asm
|
||||
#define crypto_kem_keypair OQS_KEM_sike_p751_compressed_keypair
|
||||
#define crypto_kem_enc OQS_KEM_sike_p751_compressed_encaps
|
||||
#define crypto_kem_dec OQS_KEM_sike_p751_compressed_decaps
|
||||
@ -408,10 +408,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
|
||||
#define EphemeralKeyGeneration_B oqs_kem_sidh_p751_compressed_EphemeralKeyGeneration_B
|
||||
#define EphemeralSecretAgreement_A oqs_kem_sidh_p751_compressed_EphemeralSecretAgreement_A
|
||||
#define EphemeralSecretAgreement_B oqs_kem_sidh_p751_compressed_EphemeralSecretAgreement_B
|
||||
#ifdef USE_SIKEP751_ASM
|
||||
#define USE_SIKE_ASM
|
||||
#endif
|
||||
|
||||
#if defined(X86_64)
|
||||
#include "AMD64/fp_x64.c"
|
||||
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
|
||||
#elif defined(ARM64)
|
||||
#include "ARM64/fp_arm64.c"
|
||||
#else
|
||||
|
@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
|
||||
|
||||
// 751-bit multiprecision addition, c = a+b
|
||||
static void mp_add751(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
|
||||
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
|
||||
|
||||
// 2x751-bit multiprecision subtraction followed by addition with p751*2^768, c = a-b+(p751*2^768) if a-b < 0, otherwise c=a-b
|
||||
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mp_subadd751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_subadd751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Double 2x751-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
|
||||
static void mp_dblsub751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
void oqs_kem_sike_mp_dblsub751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Multiprecision left shift
|
||||
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
|
||||
@ -161,11 +161,11 @@ static bool fpequal751_non_constant_time(const digit_t *a, const digit_t *b);
|
||||
|
||||
// Modular addition, c = a+b mod p751
|
||||
extern void fpadd751(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void oqs_kem_sike_fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Modular subtraction, c = a-b mod p751
|
||||
extern void fpsub751(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
extern void oqs_kem_sike_fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
// Modular negation, a = -a mod p751
|
||||
extern void fpneg751(digit_t *a);
|
||||
@ -177,12 +177,13 @@ static void fpdiv2_751(const digit_t *a, digit_t *c);
|
||||
static void fpcorrection751(digit_t *a);
|
||||
|
||||
// 751-bit Montgomery reduction, c = a mod p
|
||||
static void rdc_mont(const digit_t *a, digit_t *c);
|
||||
static void rdc_mont(digit_t *a, digit_t *c);
|
||||
void oqs_kem_sike_rdc751_asm(digit_t *ma, digit_t *mc);
|
||||
|
||||
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
|
||||
static void fpmul751_mont(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void mul751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
static void rdc751_asm(const digit_t *ma, digit_t *mc);
|
||||
void oqs_kem_sike_mul751_asm(const digit_t *a, const digit_t *b, digit_t *c);
|
||||
|
||||
|
||||
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
|
||||
static void fpsqr751_mont(const digit_t *ma, digit_t *mc);
|
||||
|
@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
|
||||
c[2 * nwords - 1] = v;
|
||||
}
|
||||
|
||||
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
|
||||
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
|
||||
// mc = ma*R^-1 mod p751x2, where R = 2^768.
|
||||
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
|
||||
// ma is assumed to be in Montgomery representation.
|
||||
|
@ -15,9 +15,15 @@
|
||||
|
||||
#define OS_WIN 1
|
||||
#define OS_LINUX 2
|
||||
#define OS_DARWIN 3
|
||||
|
||||
#if defined(_WIN32) // Microsoft Windows OS
|
||||
#define OS_TARGET OS_WIN
|
||||
#elif defined(__APPLE__) // darwin
|
||||
#define OS_TARGET OS_DARWIN
|
||||
#ifndef _GENERIC_ // default to generic implementation on darwin for now (FIXMEOQS: still needed?)
|
||||
#define _GENERIC_
|
||||
#endif
|
||||
#else
|
||||
#define OS_TARGET OS_LINUX // default to Linux
|
||||
#endif
|
||||
|
@ -138,14 +138,10 @@ static void fp2correction(f2elm_t a) { // Modular correction, a = a in GF(p^2).
|
||||
}
|
||||
|
||||
__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision addition, c = a+b.
|
||||
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
|
||||
|
||||
mp_add(a, b, c, NWORDS_FIELD);
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
#if USE_SIKE_ASM
|
||||
mp_add_asm(a, b, c);
|
||||
|
||||
#else
|
||||
mp_add(a, b, c, NWORDS_FIELD);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -172,7 +168,9 @@ __inline unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, con
|
||||
}
|
||||
|
||||
__inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b.
|
||||
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
|
||||
#if USE_SIKE_ASM
|
||||
mp_subaddx2_asm(a, b, c);
|
||||
#else
|
||||
felm_t t1;
|
||||
|
||||
digit_t mask = 0 - (digit_t) mp_sub(a, b, c, 2 * NWORDS_FIELD);
|
||||
@ -180,24 +178,15 @@ __inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *
|
||||
t1[i] = ((digit_t *) PRIME)[i] & mask;
|
||||
}
|
||||
mp_addfast((digit_t *) &c[NWORDS_FIELD], t1, (digit_t *) &c[NWORDS_FIELD]);
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
mp_subaddx2_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
|
||||
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
|
||||
|
||||
#if USE_SIKE_ASM
|
||||
mp_dblsubx2_asm(a, b, c);
|
||||
#else
|
||||
mp_sub(c, a, c, 2 * NWORDS_FIELD);
|
||||
mp_sub(c, b, c, 2 * NWORDS_FIELD);
|
||||
|
||||
#elif (OS_TARGET == OS_LINUX)
|
||||
|
||||
mp_dblsubx2_asm(a, b, c);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_sike_p434_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p434;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 1;
|
||||
kem->ind_cca = true;
|
||||
@ -43,7 +43,7 @@ OQS_KEM *OQS_KEM_sike_p434_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p434_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 1;
|
||||
kem->ind_cca = true;
|
||||
@ -73,7 +73,7 @@ OQS_KEM *OQS_KEM_sike_p503_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p503;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 2;
|
||||
kem->ind_cca = true;
|
||||
@ -103,7 +103,7 @@ OQS_KEM *OQS_KEM_sike_p503_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p503_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 2;
|
||||
kem->ind_cca = true;
|
||||
@ -133,7 +133,7 @@ OQS_KEM *OQS_KEM_sike_p610_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p610;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 3;
|
||||
kem->ind_cca = true;
|
||||
@ -163,7 +163,7 @@ OQS_KEM *OQS_KEM_sike_p610_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p610_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 3;
|
||||
kem->ind_cca = true;
|
||||
@ -193,7 +193,7 @@ OQS_KEM *OQS_KEM_sike_p751_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p751;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 5;
|
||||
kem->ind_cca = true;
|
||||
@ -223,7 +223,7 @@ OQS_KEM *OQS_KEM_sike_p751_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sike_p751_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 5;
|
||||
kem->ind_cca = true;
|
||||
@ -253,7 +253,7 @@ OQS_KEM *OQS_KEM_sidh_p434_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p434;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 1;
|
||||
kem->ind_cca = false;
|
||||
@ -323,7 +323,7 @@ OQS_KEM *OQS_KEM_sidh_p434_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p434_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 1;
|
||||
kem->ind_cca = false;
|
||||
@ -393,7 +393,7 @@ OQS_KEM *OQS_KEM_sidh_p503_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p503;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 2;
|
||||
kem->ind_cca = false;
|
||||
@ -463,7 +463,7 @@ OQS_KEM *OQS_KEM_sidh_p503_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p503_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 2;
|
||||
kem->ind_cca = false;
|
||||
@ -533,7 +533,7 @@ OQS_KEM *OQS_KEM_sidh_p610_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p610;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 3;
|
||||
kem->ind_cca = false;
|
||||
@ -603,7 +603,7 @@ OQS_KEM *OQS_KEM_sidh_p610_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p610_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 3;
|
||||
kem->ind_cca = false;
|
||||
@ -673,7 +673,7 @@ OQS_KEM *OQS_KEM_sidh_p751_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p751;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 5;
|
||||
kem->ind_cca = false;
|
||||
@ -743,7 +743,7 @@ OQS_KEM *OQS_KEM_sidh_p751_compressed_new() {
|
||||
return NULL;
|
||||
}
|
||||
kem->method_name = OQS_KEM_alg_sidh_p751_compressed;
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
|
||||
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
|
||||
|
||||
kem->claimed_nist_level = 5;
|
||||
kem->ind_cca = false;
|
||||
|
@ -1,15 +0,0 @@
|
||||
/*
|
||||
* undefines SIDH symbols included in both P503 and P751
|
||||
* to avoid Visual Studio errors
|
||||
*/
|
||||
|
||||
#if defined(_WIN32)
|
||||
#undef OQS_SIDH_MSR_CRYPTO_SECRETKEYBYTES
|
||||
#undef OQS_SIDH_MSR_CRYPTO_PUBLICKEYBYTES
|
||||
#undef OQS_SIDH_MSR_CRYPTO_BYTES
|
||||
#undef OQS_SIDH_MSR_CRYPTO_CIPHERTEXTBYTES
|
||||
#undef OQS_SIDH_MSR_CRYPTO_ALGNAME
|
||||
#undef SIDH_SECRETKEYBYTES
|
||||
#undef SIDH_PUBLICKEYBYTES
|
||||
#undef SIDH_BYTES
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user