Merge pull request #632 from christianpaquin/cp-enable-sike-fast-mode-with-cmake

Updated SIKE implementation with latest changes.
This commit is contained in:
Christian Paquin 2020-03-04 16:42:39 -05:00 committed by GitHub
commit 82e1828f4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 3049 additions and 3044 deletions

View File

@ -29,7 +29,7 @@ Implementation
--------------
- **Source of implementation:** https://github.com/Microsoft/PQCrypto-SIDH
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/tree/v3.2)
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311)
- **License:** MIT License
- **Language:** C
- **Constant-time:** Yes

View File

@ -30,13 +30,8 @@ Implementation
--------------
- **Source of implementation:** https://github.com/Microsoft/PQCrypto-SIDH
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa)
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311)
- **License:** MIT License
- **Language:** C
- **Constant-time:** Yes
- **Architectures supported in liboqs master branch**: x86, x64
Additional comments
-------------------
The original Sike implementation includes optimizations that are not currently being built in liboqs. See src/kem/sike/upstream/README for details.

View File

@ -1,22 +1,33 @@
set(SRCS kem_sike.c
P434/P434.c
P434/P434_compressed.c
P503/P503.c P503/P503_compressed.c
P610/P610.c
P610/P610_compressed.c
P751/P751.c
P751/P751_compressed.c)
P434/P434.c
P434/P434_compressed.c
P503/P503.c P503/P503_compressed.c
P610/P610.c
P610/P610_compressed.c
P751/P751.c
P751/P751_compressed.c)
add_library(sike OBJECT ${SRCS})
# FIXMEOQS: enable FAST mode, assembly
target_compile_definitions(sike PRIVATE _GENERIC_)
if(ARCH STREQUAL "x86")
target_compile_definitions(sike PRIVATE _X86_)
elseif(ARCH STREQUAL "x86_64")
target_compile_definitions(sike PRIVATE _AMD64_)
elseif(ARCH STREQUAL "arm")
target_compile_definitions(sike PRIVATE _ARM_)
elseif(ARCH STREQUAL "arm64")
target_compile_definitions(sike PRIVATE _ARM64_)
if(${ARCH} STREQUAL "x86")
target_compile_definitions(sike PRIVATE _GENERIC_ _X86_)
elseif(${ARCH} STREQUAL "x86_64")
target_compile_definitions(sike PRIVATE _AMD64_)
if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Linux")
set(SRCS ${SRCS} P503/AMD64/fp_x64_asm.S
P751/AMD64/fp_x64_asm.S)
if(USE_BMI2_INSTRUCTIONS)
target_compile_definitions(sike PRIVATE _MULX_ _ADX_)
set(SRCS ${SRCS} P434/AMD64/fp_x64_asm.S
P610/AMD64/fp_x64_asm.S)
endif()
endif()
elseif(${ARCH} STREQUAL "arm")
target_compile_definitions(sike PRIVATE _GENERIC_ _ARM_)
elseif(${ARCH} STREQUAL "arm64")
target_compile_definitions(sike PRIVATE _ARM64_)
if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Linux")
set(SRCS ${SRCS} P503/AMD64/fp_x64_asm.S
P751/AMD64/fp_x64_asm.S)
endif()
endif()

View File

@ -17,7 +17,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd434_asm(a, b, c);
oqs_kem_sike_fpadd434_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub434_asm(a, b, c);
oqs_kem_sike_fpsub434_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -276,17 +276,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul434_asm(a, b, c);
oqs_kem_sike_mul434_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -423,7 +423,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc434_asm(ma, mc);
oqs_kem_sike_rdc434_asm(ma, mc);
#endif
}

View File

@ -36,8 +36,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd434_asm
fpadd434_asm:
.global oqs_kem_sike_fpadd434_asm
oqs_kem_sike_fpadd434_asm:
push r12
push r13
push r14
@ -111,8 +111,8 @@ fpadd434_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub434_asm
fpsub434_asm:
.global oqs_kem_sike_fpsub434_asm
oqs_kem_sike_fpsub434_asm:
push r12
push r13
push r14
@ -410,8 +410,8 @@ fpsub434_asm:
//*****************************************************************************
// 434-bit multiplication using Karatsuba (one level), schoolbook (one level)
//*****************************************************************************
.global mul434_asm
mul434_asm:
.global oqs_kem_sike_mul434_asm
oqs_kem_sike_mul434_asm:
push r12
push r13
push r14
@ -562,8 +562,8 @@ mul434_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul434_asm
mul434_asm:
.global oqs_kem_sike_mul434_asm
oqs_kem_sike_mul434_asm:
ret
@ -660,8 +660,8 @@ mul434_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc434_asm
rdc434_asm:
.global oqs_kem_sike_rdc434_asm
oqs_kem_sike_rdc434_asm:
push r12
push r13
@ -767,8 +767,8 @@ rdc434_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc434_asm
rdc434_asm:
.global oqs_kem_sike_rdc434_asm
oqs_kem_sike_rdc434_asm:
ret
@ -781,8 +781,8 @@ rdc434_asm:
// 434-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add434_asm
mp_add434_asm:
.global oqs_kem_sike_mp_add434_asm
oqs_kem_sike_mp_add434_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -812,8 +812,8 @@ mp_add434_asm:
// 2x434-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448
//***********************************************************************
.global mp_subadd434x2_asm
mp_subadd434x2_asm:
.global oqs_kem_sike_mp_subadd434x2_asm
oqs_kem_sike_mp_subadd434x2_asm:
push r12
push r13
push r14
@ -897,8 +897,8 @@ mp_subadd434x2_asm:
// Double 2x434-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub434x2_asm
mp_dblsub434x2_asm:
.global oqs_kem_sike_mp_dblsub434x2_asm
oqs_kem_sike_mp_dblsub434x2_asm:
push r12
push r13
@ -966,4 +966,4 @@ mp_dblsub434x2_asm:
pop r13
pop r12
ret
ret

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1
//
static const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
static const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
static const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
};
const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000};
// Order of Bob's subgroup
@ -119,9 +119,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv434_mont
#define fp2inv_mont_bingcd fp2inv434_mont_bingcd
#define fpequal_non_constant_time fpequal434_non_constant_time
#define mp_add_asm mp_add434_asm
#define mp_subaddx2_asm mp_subadd434x2_asm
#define mp_dblsubx2_asm mp_dblsub434x2_asm
#define mp_add_asm oqs_kem_sike_mp_add434_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd434x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub434x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p434_keypair
#define crypto_kem_enc OQS_KEM_sike_p434_encaps
#define crypto_kem_dec OQS_KEM_sike_p434_decaps
@ -131,10 +131,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_EphemeralSecretAgreement_B
#ifdef USE_SIKEP434_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -11,6 +11,9 @@
// OQS note: size #defines moved to P434.c to avoid redefinitions across parameters
// Algorithm name
#define CRYPTO_ALGNAME "SIKEp434"
// SIKE's key generation
// It produces a private key sk and computes the public key pk.
// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 374 bytes)

View File

@ -381,9 +381,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv434_mont
#define fp2inv_mont_bingcd fp2inv434_mont_bingcd
#define fpequal_non_constant_time fpequal434_non_constant_time
#define mp_add_asm mp_add434_asm
#define mp_subaddx2_asm mp_subadd434x2_asm
#define mp_dblsubx2_asm mp_dblsub434x2_asm
#define mp_add_asm oqs_kem_sike_mp_add434_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd434x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub434x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p434_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p434_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p434_compressed_decaps
@ -393,10 +393,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP434_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 434-bit multiprecision addition, c = a+b
static void mp_add434(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,11 +161,11 @@ static bool fpequal434_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p434
extern void fpadd434(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular subtraction, c = a-b mod p434
extern void fpsub434(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular negation, a = -a mod p434
extern void fpneg434(digit_t *a);
@ -177,12 +177,12 @@ static void fpdiv2_434(const digit_t *a, digit_t *c);
static void fpcorrection434(digit_t *a);
// 434-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc434_asm(digit_t *ma, digit_t *mc);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
static void fpmul434_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul434_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void rdc434_asm(const digit_t *ma, digit_t *mc);
void oqs_kem_sike_mul434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
static void fpsqr434_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.

View File

@ -17,7 +17,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd503_asm(a, b, c);
oqs_kem_sike_fpadd503_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub503_asm(a, b, c);
oqs_kem_sike_fpsub503_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -316,17 +316,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul503_asm(a, b, c);
oqs_kem_sike_mul503_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -504,7 +504,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc503_asm(ma, mc);
oqs_kem_sike_rdc503_asm(ma, mc);
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -16,14 +16,14 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
fpadd503_asm(a, b, c);
oqs_kem_sike_fpadd503_asm(a, b, c);
}
__inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p503.
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
fpsub503_asm(a, b, c);
oqs_kem_sike_fpsub503_asm(a, b, c);
}
__inline void fpneg503(digit_t *a) { // Modular negation, a = -a mod p503.
@ -68,13 +68,13 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
mul503_asm(a, b, c);
oqs_kem_sike_mul503_asm(a, b, c);
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.
rdc503_asm(ma, mc);
oqs_kem_sike_rdc503_asm(ma, mc);
}

View File

@ -1,168 +1,168 @@
//*******************************************************************************************
// SIDH: an efficient supersingular isogeny cryptography library
//
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux
//*******************************************************************************************
.text
// p503
p503:
.quad 0xFFFFFFFFFFFFFFFF
.quad 0xABFFFFFFFFFFFFFF
.quad 0x13085BDA2211E7A0
.quad 0x1B9BF6C87B7E7DAF
.quad 0x6045C6BDDA77A4D0
.quad 0x004066F541811E1E
// p503 + 1
p503p1:
.quad 0xAC00000000000000
.quad 0x13085BDA2211E7A0
.quad 0x1B9BF6C87B7E7DAF
.quad 0x6045C6BDDA77A4D0
.quad 0x004066F541811E1E
// 2 * p503
p503x2:
.quad 0xFFFFFFFFFFFFFFFE
.quad 0xFFFFFFFFFFFFFFFF
.quad 0x57FFFFFFFFFFFFFF
.quad 0x2610B7B44423CF41
.quad 0x3737ED90F6FCFB5E
.quad 0xC08B8D7BB4EF49A0
//*******************************************************************************************
// SIDH: an efficient supersingular isogeny cryptography library
//
// Abstract: field arithmetic in 64-bit ARMv8 assembly for P503 on Linux
//*******************************************************************************************
.text
// p503
p503:
.quad 0xFFFFFFFFFFFFFFFF
.quad 0xABFFFFFFFFFFFFFF
.quad 0x13085BDA2211E7A0
.quad 0x1B9BF6C87B7E7DAF
.quad 0x6045C6BDDA77A4D0
.quad 0x004066F541811E1E
// p503 + 1
p503p1:
.quad 0xAC00000000000000
.quad 0x13085BDA2211E7A0
.quad 0x1B9BF6C87B7E7DAF
.quad 0x6045C6BDDA77A4D0
.quad 0x004066F541811E1E
// 2 * p503
p503x2:
.quad 0xFFFFFFFFFFFFFFFE
.quad 0xFFFFFFFFFFFFFFFF
.quad 0x57FFFFFFFFFFFFFF
.quad 0x2610B7B44423CF41
.quad 0x3737ED90F6FCFB5E
.quad 0xC08B8D7BB4EF49A0
.quad 0x0080CDEA83023C3C
p503p1_nz_s8:
.quad 0x85BDA2211E7A0AC
.quad 0x9BF6C87B7E7DAF13
.quad 0x45C6BDDA77A4D01B
.quad 0x4066F541811E1E60
//***********************************************************************
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd503_asm
fpadd503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18
// Subtract 2xp503
ldr x11, p503x2
ldr x12, p503x2 + 8
ldr x13, p503x2 + 16
ldr x14, p503x2 + 24
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
ldr x15, p503x2 + 32
ldr x16, p503x2 + 40
ldr x17, p503x2 + 48
sbcs x8, x8, x15
sbcs x9, x9, x16
sbcs x10, x10, x17
sbc x18, xzr, xzr
// Add 2xp503 anded with the mask in x18
and x11, x11, x18
and x12, x12, x18
and x13, x13, x18
and x14, x14, x18
and x15, x15, x18
and x16, x16, x18
and x17, x17, x18
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adcs x9, x9, x16
adc x10, x10, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret
//***********************************************************************
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub503_asm
fpsub503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbcs x10, x10, x18
sbc x18, xzr, xzr
// Add 2xp503 anded with the mask in x18
ldr x11, p503x2
ldr x12, p503x2 + 8
ldr x13, p503x2 + 16
ldr x14, p503x2 + 24
and x11, x11, x18
and x12, x12, x18
and x13, x13, x18
and x14, x14, x18
ldr x15, p503x2 + 32
ldr x16, p503x2 + 40
ldr x17, p503x2 + 48
and x15, x15, x18
and x16, x16, x18
and x17, x17, x18
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adcs x9, x9, x16
adc x10, x10, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
p503p1_nz_s8:
.quad 0x85BDA2211E7A0AC
.quad 0x9BF6C87B7E7DAF13
.quad 0x45C6BDDA77A4D01B
.quad 0x4066F541811E1E60
//***********************************************************************
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global oqs_kem_sike_fpadd503_asm
oqs_kem_sike_fpadd503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
// Add a + b
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18
// Subtract 2xp503
ldr x11, p503x2
ldr x12, p503x2 + 8
ldr x13, p503x2 + 16
ldr x14, p503x2 + 24
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x12
sbcs x6, x6, x13
sbcs x7, x7, x14
ldr x15, p503x2 + 32
ldr x16, p503x2 + 40
ldr x17, p503x2 + 48
sbcs x8, x8, x15
sbcs x9, x9, x16
sbcs x10, x10, x17
sbc x18, xzr, xzr
// Add 2xp503 anded with the mask in x18
and x11, x11, x18
and x12, x12, x18
and x13, x13, x18
and x14, x14, x18
and x15, x15, x18
and x16, x16, x18
and x17, x17, x18
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adcs x9, x9, x16
adc x10, x10, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret
//***********************************************************************
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global oqs_kem_sike_fpsub503_asm
oqs_kem_sike_fpsub503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
// Subtract a - b
subs x3, x3, x11
sbcs x4, x4, x12
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
sbcs x7, x7, x15
sbcs x8, x8, x16
sbcs x9, x9, x17
sbcs x10, x10, x18
sbc x18, xzr, xzr
// Add 2xp503 anded with the mask in x18
ldr x11, p503x2
ldr x12, p503x2 + 8
ldr x13, p503x2 + 16
ldr x14, p503x2 + 24
and x11, x11, x18
and x12, x12, x18
and x13, x13, x18
and x14, x14, x18
ldr x15, p503x2 + 32
ldr x16, p503x2 + 40
ldr x17, p503x2 + 48
and x15, x15, x18
and x16, x16, x18
and x17, x17, x18
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x12
adcs x6, x6, x13
adcs x7, x7, x14
adcs x8, x8, x15
adcs x9, x9, x16
adc x10, x10, x17
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret
@ -263,12 +263,12 @@ fpsub503_asm:
.endm
//***********************************************************************************
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
//***********************************************************************************
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mul503_asm
mul503_asm:
.global oqs_kem_sike_mul503_asm
oqs_kem_sike_mul503_asm:
sub sp, sp, #96
stp x19, x20, [sp,#0]
stp x21, x22, [sp,#16]
@ -448,35 +448,35 @@ mul503_asm:
.endm
//**************************************************************************************
// Montgomery reduction
// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
// Montgomery reduction
// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc503_asm
rdc503_asm:
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
.global oqs_kem_sike_rdc503_asm
oqs_kem_sike_rdc503_asm:
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
ldr x24, p503p1_nz_s8 + 0
ldr x25, p503p1_nz_s8 + 8
ldr x26, p503p1_nz_s8 + 16
ldr x27, p503p1_nz_s8 + 24
ldp x2, x3, [x0,#0] // a[0-1]
// Load the prime constant
ldr x24, p503p1_nz_s8 + 0
ldr x25, p503p1_nz_s8 + 8
ldr x26, p503p1_nz_s8 + 16
ldr x27, p503p1_nz_s8 + 24
// a[0-1] x p503p1_nz_s8 --> result: x4:x9
mul x4, x2, x24 // a[0] x p503p1_nz_s8[0]
umulh x7, x2, x24
mul x5, x2, x25 // a[0] x p503p1_nz_s8[1]
umulh x6, x2, x25
umulh x6, x2, x25
MUL128x256_COMBA_CUT x2, x3, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
ldp x3, x11, [x0,#16] // a[2]
@ -516,9 +516,9 @@ rdc503_asm:
adcs x20, xzr, x20
adcs x21, xzr, x21
adcs x22, xzr, x22
adc x23, xzr, x23
// a[2-3] x p503p1_nz_s8 --> result: x4:x9
adc x23, xzr, x23
// a[2-3] x p503p1_nz_s8 --> result: x4:x9
MUL128x256_COMBA_CUT x3, x11, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
orr x10, xzr, x9, lsr #8
@ -548,9 +548,9 @@ rdc503_asm:
umulh x6, x12, x25
adcs x21, xzr, x21
adcs x22, xzr, x22
adc x23, xzr, x23
// a[4-5] x p503p1_nz_s8 --> result: x4:x9
adc x23, xzr, x23
// a[4-5] x p503p1_nz_s8 --> result: x4:x9
MUL128x256_COMBA_CUT x12, x13, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
orr x10, xzr, x9, lsr #8
@ -578,9 +578,9 @@ rdc503_asm:
mul x5, x14, x25 // a[6] x p503p1_nz_s8[1]
umulh x6, x14, x25
adcs x22, xzr, x22
adc x23, xzr, x23
// a[6-7] x p503p1_nz_s8 --> result: x4:x9
adc x23, xzr, x23
// a[6-7] x p503p1_nz_s8 --> result: x4:x9
MUL128x256_COMBA_CUT x14, x15, x24, x25, x26, x27, x4, x5, x6, x7, x8, x9, x28, x29, x30, x10
orr x10, xzr, x9, lsr #8
@ -607,251 +607,251 @@ rdc503_asm:
adc x23, x10, x23
stp x20, x21, [x1,#32]
stp x22, x23, [x1,#48]
ldp x19, x20, [sp]
ldp x21, x22, [sp, #16]
ldp x23, x24, [sp, #32]
ldp x25, x26, [sp, #48]
ldp x27, x28, [sp, #64]
ldp x19, x20, [sp]
ldp x21, x22, [sp, #16]
ldp x23, x24, [sp, #32]
ldp x25, x26, [sp, #48]
ldp x27, x28, [sp, #64]
ldp x29, x30, [sp, #80]
add sp, sp, #96
ret
//***********************************************************************
// 503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503_asm
mp_add503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret
//***********************************************************************
// 2x503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503x2_asm
mp_add503x2_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adcs x10, x10, x18
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
adcs x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x9, x10, [x0,#112]
ldp x15, x16, [x1,#96]
ldp x17, x18, [x1,#112]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
stp x9, x10, [x2,#112]
ret
//***********************************************************************
// 2x503-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
//***********************************************************************
.global mp_subadd503x2_asm
mp_subadd503x2_asm:
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
stp x3, x4, [x2,#0]
ldp x7, x8, [x0,#32]
ldp x11, x12, [x1,#32]
sbcs x5, x5, x13
sbcs x6, x6, x14
stp x5, x6, [x2,#16]
ldp x9, x10, [x0,#48]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
stp x7, x8, [x2,#32]
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x11, x12, [x1,#64]
ldp x5, x6, [x0,#80]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x9, x10, [x0,#112]
ldp x13, x14, [x1,#112]
sbcs x7, x7, x11
ldr x11, p503
sbcs x8, x8, x12
ldr x12, p503 + 8
sbcs x9, x9, x13
ldr x13, p503 + 16
sbcs x10, x10, x14
ldr x14, p503 + 24
sbc x0, xzr, xzr
// Add p503 anded with the mask in x0
ldr x15, p503 + 32
ldr x16, p503 + 40
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x11
stp x3, x4, [x2,#64]
adcs x5, x5, x11
adcs x6, x6, x12
stp x5, x6, [x2,#80]
adcs x7, x7, x13
adcs x8, x8, x14
stp x7, x8, [x2,#96]
adcs x9, x9, x15
adc x10, x10, x16
stp x9, x10, [x2,#112]
ret
//***********************************************************************
// Double 2x503-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsub503x2_asm
mp_dblsub503x2_asm:
sub sp, sp, #32
stp x27, x28, [sp, #0]
stp x29, x30, [sp, #16]
ldp x3, x4, [x2,#0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x9, x10, [x2,#48]
ldp x11, x12, [x2,#64]
ldp x13, x14, [x2,#80]
ldp x15, x16, [x2,#96]
ldp x17, x18, [x2,#112]
ldp x27, x28, [x0,#0]
ldp x29, x30, [x0,#16]
subs x3, x3, x27
sbcs x4, x4, x28
sbcs x5, x5, x29
sbcs x6, x6, x30
ldp x27, x28, [x0,#32]
ldp x29, x30, [x0,#48]
sbcs x7, x7, x27
sbcs x8, x8, x28
sbcs x9, x9, x29
sbcs x10, x10, x30
ldp x27, x28, [x0,#64]
ldp x29, x30, [x0,#80]
sbcs x11, x11, x27
sbcs x12, x12, x28
sbcs x13, x13, x29
sbcs x14, x14, x30
ldp x27, x28, [x0,#96]
ldp x29, x30, [x0,#112]
sbcs x15, x15, x27
sbcs x16, x16, x28
sbcs x17, x17, x29
sbc x18, x18, x30
ldp x27, x28, [x1,#0]
ldp x29, x30, [x1,#16]
subs x3, x3, x27
sbcs x4, x4, x28
sbcs x5, x5, x29
sbcs x6, x6, x30
ldp x27, x28, [x1,#32]
ldp x29, x30, [x1,#48]
sbcs x7, x7, x27
sbcs x8, x8, x28
sbcs x9, x9, x29
sbcs x10, x10, x30
ldp x27, x28, [x1,#64]
ldp x29, x30, [x1,#80]
sbcs x11, x11, x27
sbcs x12, x12, x28
sbcs x13, x13, x29
sbcs x14, x14, x30
ldp x27, x28, [x1,#96]
ldp x29, x30, [x1,#112]
sbcs x15, x15, x27
sbcs x16, x16, x28
sbcs x17, x17, x29
sbc x18, x18, x30
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
stp x11, x12, [x2,#64]
stp x13, x14, [x2,#80]
stp x15, x16, [x2,#96]
stp x17, x18, [x2,#112]
ldp x27, x28, [sp, #0]
ldp x29, x30, [sp, #16]
add sp, sp, #32
ret
ret
//***********************************************************************
// 503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global oqs_kem_sike_mp_add503_asm
oqs_kem_sike_mp_add503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ret
//***********************************************************************
// 2x503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global oqs_kem_sike_mp_add503x2_asm
oqs_kem_sike_mp_add503x2_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
ldp x13, x14, [x1,#16]
adds x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#32]
ldp x9, x10, [x0,#48]
ldp x15, x16, [x1,#32]
ldp x17, x18, [x1,#48]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adcs x10, x10, x18
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x5, x6, [x0,#80]
ldp x11, x12, [x1,#64]
ldp x13, x14, [x1,#80]
adcs x3, x3, x11
adcs x4, x4, x12
adcs x5, x5, x13
adcs x6, x6, x14
ldp x7, x8, [x0,#96]
ldp x9, x10, [x0,#112]
ldp x15, x16, [x1,#96]
ldp x17, x18, [x1,#112]
adcs x7, x7, x15
adcs x8, x8, x16
adcs x9, x9, x17
adc x10, x10, x18
stp x3, x4, [x2,#64]
stp x5, x6, [x2,#80]
stp x7, x8, [x2,#96]
stp x9, x10, [x2,#112]
ret
//***********************************************************************
// 2x503-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
//***********************************************************************
.global oqs_kem_sike_mp_subadd503x2_asm
oqs_kem_sike_mp_subadd503x2_asm:
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
ldp x13, x14, [x1,#16]
subs x3, x3, x11
sbcs x4, x4, x12
stp x3, x4, [x2,#0]
ldp x7, x8, [x0,#32]
ldp x11, x12, [x1,#32]
sbcs x5, x5, x13
sbcs x6, x6, x14
stp x5, x6, [x2,#16]
ldp x9, x10, [x0,#48]
ldp x13, x14, [x1,#48]
sbcs x7, x7, x11
sbcs x8, x8, x12
stp x7, x8, [x2,#32]
sbcs x9, x9, x13
sbcs x10, x10, x14
stp x9, x10, [x2,#48]
ldp x3, x4, [x0,#64]
ldp x11, x12, [x1,#64]
ldp x5, x6, [x0,#80]
ldp x13, x14, [x1,#80]
sbcs x3, x3, x11
sbcs x4, x4, x12
ldp x7, x8, [x0,#96]
ldp x11, x12, [x1,#96]
sbcs x5, x5, x13
sbcs x6, x6, x14
ldp x9, x10, [x0,#112]
ldp x13, x14, [x1,#112]
sbcs x7, x7, x11
ldr x11, p503
sbcs x8, x8, x12
ldr x12, p503 + 8
sbcs x9, x9, x13
ldr x13, p503 + 16
sbcs x10, x10, x14
ldr x14, p503 + 24
sbc x0, xzr, xzr
// Add p503 anded with the mask in x0
ldr x15, p503 + 32
ldr x16, p503 + 40
and x11, x11, x0
and x12, x12, x0
and x13, x13, x0
and x14, x14, x0
and x15, x15, x0
and x16, x16, x0
adds x3, x3, x11
adcs x4, x4, x11
stp x3, x4, [x2,#64]
adcs x5, x5, x11
adcs x6, x6, x12
stp x5, x6, [x2,#80]
adcs x7, x7, x13
adcs x8, x8, x14
stp x7, x8, [x2,#96]
adcs x9, x9, x15
adc x10, x10, x16
stp x9, x10, [x2,#112]
ret
//***********************************************************************
// Double 2x503-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global oqs_kem_sike_mp_dblsub503x2_asm
oqs_kem_sike_mp_dblsub503x2_asm:
sub sp, sp, #32
stp x27, x28, [sp, #0]
stp x29, x30, [sp, #16]
ldp x3, x4, [x2,#0]
ldp x5, x6, [x2,#16]
ldp x7, x8, [x2,#32]
ldp x9, x10, [x2,#48]
ldp x11, x12, [x2,#64]
ldp x13, x14, [x2,#80]
ldp x15, x16, [x2,#96]
ldp x17, x18, [x2,#112]
ldp x27, x28, [x0,#0]
ldp x29, x30, [x0,#16]
subs x3, x3, x27
sbcs x4, x4, x28
sbcs x5, x5, x29
sbcs x6, x6, x30
ldp x27, x28, [x0,#32]
ldp x29, x30, [x0,#48]
sbcs x7, x7, x27
sbcs x8, x8, x28
sbcs x9, x9, x29
sbcs x10, x10, x30
ldp x27, x28, [x0,#64]
ldp x29, x30, [x0,#80]
sbcs x11, x11, x27
sbcs x12, x12, x28
sbcs x13, x13, x29
sbcs x14, x14, x30
ldp x27, x28, [x0,#96]
ldp x29, x30, [x0,#112]
sbcs x15, x15, x27
sbcs x16, x16, x28
sbcs x17, x17, x29
sbc x18, x18, x30
ldp x27, x28, [x1,#0]
ldp x29, x30, [x1,#16]
subs x3, x3, x27
sbcs x4, x4, x28
sbcs x5, x5, x29
sbcs x6, x6, x30
ldp x27, x28, [x1,#32]
ldp x29, x30, [x1,#48]
sbcs x7, x7, x27
sbcs x8, x8, x28
sbcs x9, x9, x29
sbcs x10, x10, x30
ldp x27, x28, [x1,#64]
ldp x29, x30, [x1,#80]
sbcs x11, x11, x27
sbcs x12, x12, x28
sbcs x13, x13, x29
sbcs x14, x14, x30
ldp x27, x28, [x1,#96]
ldp x29, x30, [x1,#112]
sbcs x15, x15, x27
sbcs x16, x16, x28
sbcs x17, x17, x29
sbc x18, x18, x30
stp x3, x4, [x2,#0]
stp x5, x6, [x2,#16]
stp x7, x8, [x2,#32]
stp x9, x10, [x2,#48]
stp x11, x12, [x2,#64]
stp x13, x14, [x2,#80]
stp x15, x16, [x2,#96]
stp x17, x18, [x2,#112]
ldp x27, x28, [sp, #0]
ldp x29, x30, [sp, #16]
add sp, sp, #32
ret

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=6, B=1, C=1 and p503 = 2^250*3^159-1
//
static const uint64_t p503[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
static const uint64_t p503p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
static const uint64_t p503x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
};
const uint64_t p503[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
const uint64_t p503p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
const uint64_t p503x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000};
// Order of Bob's subgroup
@ -125,9 +125,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv503_mont
#define fp2inv_mont_bingcd fp2inv503_mont_bingcd
#define fpequal_non_constant_time fpequal503_non_constant_time
#define mp_add_asm mp_add503_asm
#define mp_subaddx2_asm mp_subadd503x2_asm
#define mp_dblsubx2_asm mp_dblsub503x2_asm
#define mp_add_asm oqs_kem_sike_mp_add503_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd503x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub503x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p503_keypair
#define crypto_kem_enc OQS_KEM_sike_p503_encaps
#define crypto_kem_dec OQS_KEM_sike_p503_decaps
@ -137,13 +137,14 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p503_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p503_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p503_EphemeralSecretAgreement_B
#ifdef USE_SIKEP503_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
// #include "ARM64/fp_arm64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -384,9 +384,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv503_mont
#define fp2inv_mont_bingcd fp2inv503_mont_bingcd
#define fpequal_non_constant_time fpequal503_non_constant_time
#define mp_add_asm mp_add503_asm
#define mp_subaddx2_asm mp_subadd503x2_asm
#define mp_dblsubx2_asm mp_dblsub503x2_asm
#define mp_add_asm oqs_kem_sike_mp_add503_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd503x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub503x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p503_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p503_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p503_compressed_decaps
@ -396,13 +396,14 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p503_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p503_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p503_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP503_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
// #include "ARM64/fp_arm64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 503-bit multiprecision addition, c = a+b
static void mp_add503(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add503_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x503-bit multiprecision subtraction followed by addition with p503*2^512, c = a-b+(p503*2^512) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x503-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,13 +161,13 @@ static bool fpequal503_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p503
extern void fpadd503(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd503_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void fpadd503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
extern void oqs_kem_sike_fpadd503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_fpadd503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
// Modular subtraction, c = a-b mod p503
extern void fpsub503(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub503_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void fpsub503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
extern void oqs_kem_sike_fpsub503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_fpsub503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
// Modular negation, a = -a mod p503
extern void fpneg503(digit_t *a);
@ -179,14 +179,13 @@ static void fpdiv2_503(const digit_t *a, digit_t *c);
static void fpcorrection503(digit_t *a);
// 503-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc503_asm(const digit_t *ma, digit_t *mc);
static void rdc503_inline_asm(const felm_t ma, felm_t mb, felm_t mc);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc503_asm(digit_t *ma, digit_t *mc);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
static void fpmul503_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul503_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mul503_inline_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mul503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mul503_inline_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
static void fpsqr503_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.

View File

@ -17,7 +17,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p610-1]
// Output: c in [0, 2*p610-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd610_asm(a, b, c);
oqs_kem_sike_fpadd610_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p610-1]
// Output: c in [0, 2*p610-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub610_asm(a, b, c);
oqs_kem_sike_fpsub610_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -408,17 +408,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul610_asm(a, b, c);
oqs_kem_sike_mul610_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p610x2, where R = 2^640.
// If ma < 2^640*p610, the output mc is in the range [0, 2*p610-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -661,7 +661,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc610_asm(ma, mc);
oqs_kem_sike_rdc610_asm(ma, mc);
#endif
}

View File

@ -6,6 +6,7 @@
.intel_syntax noprefix
// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
@ -17,8 +18,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd610_asm
fpadd610_asm:
.global oqs_kem_sike_fpadd610_asm
oqs_kem_sike_fpadd610_asm:
push r12
push r13
push r14
@ -123,8 +124,8 @@ fpadd610_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub610_asm
fpsub610_asm:
.global oqs_kem_sike_fpsub610_asm
oqs_kem_sike_fpsub610_asm:
push r12
push r13
push r14
@ -426,8 +427,8 @@ fpsub610_asm:
//*****************************************************************************
// 610-bit multiplication using Karatsuba (one level), schoolbook (two levels)
//*****************************************************************************
.global mul610_asm
mul610_asm:
.global oqs_kem_sike_mul610_asm
oqs_kem_sike_mul610_asm:
push r12
push r13
push r14
@ -621,8 +622,8 @@ mul610_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul610_asm
mul610_asm:
.global oqs_kem_sike_mul610_asm
oqs_kem_sike_mul610_asm:
ret
@ -757,8 +758,8 @@ mul610_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc610_asm
rdc610_asm:
.global oqs_kem_sike_rdc610_asm
oqs_kem_sike_rdc610_asm:
push r12
push r13
push r14
@ -939,8 +940,8 @@ rdc610_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc610_asm
rdc610_asm:
.global oqs_kem_sike_rdc610_asm
oqs_kem_sike_rdc610_asm:
ret
@ -953,8 +954,8 @@ rdc610_asm:
// 610-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add610_asm
mp_add610_asm:
.global oqs_kem_sike_mp_add610_asm
oqs_kem_sike_mp_add610_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -993,8 +994,8 @@ mp_add610_asm:
// 2x610-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640
//***********************************************************************
.global mp_subadd610x2_asm
mp_subadd610x2_asm:
.global oqs_kem_sike_mp_subadd610x2_asm
oqs_kem_sike_mp_subadd610x2_asm:
push r12
push r13
push r14
@ -1112,8 +1113,8 @@ mp_subadd610x2_asm:
// Double 2x610-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub610x2_asm
mp_dblsub610x2_asm:
.global oqs_kem_sike_mp_dblsub610x2_asm
oqs_kem_sike_mp_dblsub610x2_asm:
push r12
push r13
push r14
@ -1211,4 +1212,4 @@ mp_dblsub610x2_asm:
pop r14
pop r13
pop r12
ret
ret

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp610". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p610^2), where A=6, B=1, C=1 and p610 = 2^305*3^192-1
//
static const uint64_t p610[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
static const uint64_t p610p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
static const uint64_t p610x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1
};
const uint64_t p610[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
const uint64_t p610p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
const uint64_t p610x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0002000000000000};
// Order of Bob's subgroup
@ -127,9 +127,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv610_mont
#define fp2inv_mont_bingcd fp2inv610_mont_bingcd
#define fpequal_non_constant_time fpequal610_non_constant_time
#define mp_add_asm mp_add610_asm
#define mp_subaddx2_asm mp_subadd610x2_asm
#define mp_dblsubx2_asm mp_dblsub610x2_asm
#define mp_add_asm oqs_kem_sike_mp_add610_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd610x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub610x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p610_keypair
#define crypto_kem_enc OQS_KEM_sike_p610_encaps
#define crypto_kem_dec OQS_KEM_sike_p610_decaps
@ -139,10 +139,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p610_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p610_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p610_EphemeralSecretAgreement_B
#ifdef USE_SIKEP610_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -387,9 +387,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv610_mont
#define fp2inv_mont_bingcd fp2inv610_mont_bingcd
#define fpequal_non_constant_time fpequal610_non_constant_time
#define mp_add_asm mp_add610_asm
#define mp_subaddx2_asm mp_subadd610x2_asm
#define mp_dblsubx2_asm mp_dblsub610x2_asm
#define mp_add_asm oqs_kem_sike_mp_add610_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd610x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub610x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p610_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p610_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p610_compressed_decaps
@ -399,10 +399,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p610_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p610_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p610_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP610_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 610-bit multiprecision addition, c = a+b
static void mp_add610(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add610_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x610-bit multiprecision subtraction followed by addition with p610*2^640, c = a-b+(p610*2^640) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x610-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,11 +161,11 @@ static bool fpequal610_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p610
extern void fpadd610(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd610_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpadd610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular subtraction, c = a-b mod p610
extern void fpsub610(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub610_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpsub610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular negation, a = -a mod p610
extern void fpneg610(digit_t *a);
@ -177,12 +177,12 @@ static void fpdiv2_610(const digit_t *a, digit_t *c);
static void fpcorrection610(digit_t *a);
// 610-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc610_asm(digit_t *a, digit_t *c);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
static void fpmul610_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul610_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void rdc610_asm(const digit_t *ma, digit_t *mc);
void oqs_kem_sike_mul610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
static void fpsqr610_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
// mc = ma*R^-1 mod p610x2, where R = 2^768.
// If ma < 2^768*p610, the output mc is in the range [0, 2*p610-1].
// ma is assumed to be in Montgomery representation.

View File

@ -17,7 +17,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd751_asm(a, b, c);
oqs_kem_sike_fpadd751_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub751_asm(a, b, c);
oqs_kem_sike_fpsub751_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -516,17 +516,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul751_asm(a, b, c);
oqs_kem_sike_mul751_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p751x2, where R = 2^768.
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -842,7 +842,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc751_asm(ma, mc);
oqs_kem_sike_rdc751_asm(ma, mc);
#endif
}

View File

@ -6,6 +6,7 @@
.intel_syntax noprefix
// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
@ -17,8 +18,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd751_asm
fpadd751_asm:
.global oqs_kem_sike_fpadd751_asm
oqs_kem_sike_fpadd751_asm:
push r12
push r13
push r14
@ -150,8 +151,8 @@ fpadd751_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub751_asm
fpsub751_asm:
.global oqs_kem_sike_fpsub751_asm
oqs_kem_sike_fpsub751_asm:
push r12
push r13
push r14
@ -567,8 +568,8 @@ fpsub751_asm:
//*****************************************************************************
// 751-bit multiplication using Karatsuba (one level), schoolbook (two levels)
//*****************************************************************************
.global mul751_asm
mul751_asm:
.global oqs_kem_sike_mul751_asm
oqs_kem_sike_mul751_asm:
push r12
push r13
push r14
@ -791,8 +792,8 @@ mul751_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul751_asm
mul751_asm:
.global oqs_kem_sike_mul751_asm
oqs_kem_sike_mul751_asm:
push r12
push r13
push r14
@ -1957,8 +1958,8 @@ mul751_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc751_asm
rdc751_asm:
.global oqs_kem_sike_rdc751_asm
oqs_kem_sike_rdc751_asm:
push rbx
push rbp
push r12
@ -2106,8 +2107,8 @@ rdc751_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc751_asm
rdc751_asm:
.global oqs_kem_sike_rdc751_asm
oqs_kem_sike_rdc751_asm:
push r12
push r13
push r14
@ -2727,8 +2728,8 @@ rdc751_asm:
// 751-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add751_asm
mp_add751_asm:
.global oqs_kem_sike_mp_add751_asm
oqs_kem_sike_mp_add751_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -2773,8 +2774,8 @@ mp_add751_asm:
// 2x751-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768
//***********************************************************************
.global mp_subadd751x2_asm
mp_subadd751x2_asm:
.global oqs_kem_sike_mp_subadd751x2_asm
oqs_kem_sike_mp_subadd751x2_asm:
push r12
push r13
push r14
@ -2916,8 +2917,8 @@ mp_subadd751x2_asm:
// Double 2x751-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub751x2_asm
mp_dblsub751x2_asm:
.global oqs_kem_sike_mp_dblsub751x2_asm
oqs_kem_sike_mp_dblsub751x2_asm:
push r12
push r13
push r14
@ -3034,4 +3035,4 @@ mp_dblsub751x2_asm:
pop r14
pop r13
pop r12
ret
ret

View File

@ -16,14 +16,14 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
fpadd751_asm(a, b, c);
oqs_kem_sike_fpadd751_asm(a, b, c);
}
__inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751.
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
fpsub751_asm(a, b, c);
oqs_kem_sike_fpsub751_asm(a, b, c);
}
__inline void fpneg751(digit_t *a) { // Modular negation, a = -a mod p751.
@ -68,13 +68,13 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
mul751_asm(a, b, c);
oqs_kem_sike_mul751_asm(a, b, c);
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p751x2, where R = 2^768.
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.
rdc751_asm(ma, mc);
oqs_kem_sike_rdc751_asm(ma, mc);
}

File diff suppressed because it is too large Load Diff

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=6, B=1, C=1 and p751 = 2^372*3^239-1
//
static const uint64_t p751[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
static const uint64_t p751p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
static const uint64_t p751x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38
};
const uint64_t p751[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
const uint64_t p751p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
const uint64_t p751x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0010000000000000};
// Order of Bob's subgroup
@ -129,9 +129,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv751_mont
#define fp2inv_mont_bingcd fp2inv751_mont_bingcd
#define fpequal_non_constant_time fpequal751_non_constant_time
#define mp_add_asm mp_add751_asm
#define mp_subaddx2_asm mp_subadd751x2_asm
#define mp_dblsubx2_asm mp_dblsub751x2_asm
#define mp_add_asm oqs_kem_sike_mp_add751_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd751x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub751x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p751_keypair
#define crypto_kem_enc OQS_KEM_sike_p751_encaps
#define crypto_kem_dec OQS_KEM_sike_p751_decaps
@ -141,10 +141,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p751_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p751_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p751_EphemeralSecretAgreement_B
#ifdef USE_SIKEP751_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
#else

View File

@ -396,9 +396,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv751_mont
#define fp2inv_mont_bingcd fp2inv751_mont_bingcd
#define fpequal_non_constant_time fpequal751_non_constant_time
#define mp_add_asm mp_add751_asm
#define mp_subaddx2_asm mp_subadd751x2_asm
#define mp_dblsubx2_asm mp_dblsub751x2_asm
#define mp_add_asm oqs_kem_sike_mp_add751_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd751x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub751x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p751_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p751_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p751_compressed_decaps
@ -408,10 +408,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p751_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p751_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p751_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP751_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
#else

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 751-bit multiprecision addition, c = a+b
static void mp_add751(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x751-bit multiprecision subtraction followed by addition with p751*2^768, c = a-b+(p751*2^768) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x751-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,11 +161,11 @@ static bool fpequal751_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p751
extern void fpadd751(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular subtraction, c = a-b mod p751
extern void fpsub751(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular negation, a = -a mod p751
extern void fpneg751(digit_t *a);
@ -177,12 +177,13 @@ static void fpdiv2_751(const digit_t *a, digit_t *c);
static void fpcorrection751(digit_t *a);
// 751-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc751_asm(digit_t *ma, digit_t *mc);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
static void fpmul751_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul751_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void rdc751_asm(const digit_t *ma, digit_t *mc);
void oqs_kem_sike_mul751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
static void fpsqr751_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
// mc = ma*R^-1 mod p751x2, where R = 2^768.
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.

View File

@ -15,9 +15,15 @@
#define OS_WIN 1
#define OS_LINUX 2
#define OS_DARWIN 3
#if defined(_WIN32) // Microsoft Windows OS
#define OS_TARGET OS_WIN
#elif defined(__APPLE__) // darwin
#define OS_TARGET OS_DARWIN
#ifndef _GENERIC_ // default to generic implementation on darwin for now (FIXMEOQS: still needed?)
#define _GENERIC_
#endif
#else
#define OS_TARGET OS_LINUX // default to Linux
#endif

View File

@ -138,14 +138,10 @@ static void fp2correction(f2elm_t a) { // Modular correction, a = a in GF(p^2).
}
__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision addition, c = a+b.
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
mp_add(a, b, c, NWORDS_FIELD);
#elif (OS_TARGET == OS_LINUX)
#if USE_SIKE_ASM
mp_add_asm(a, b, c);
#else
mp_add(a, b, c, NWORDS_FIELD);
#endif
}
@ -172,7 +168,9 @@ __inline unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, con
}
__inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b.
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
#if USE_SIKE_ASM
mp_subaddx2_asm(a, b, c);
#else
felm_t t1;
digit_t mask = 0 - (digit_t) mp_sub(a, b, c, 2 * NWORDS_FIELD);
@ -180,24 +178,15 @@ __inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *
t1[i] = ((digit_t *) PRIME)[i] & mask;
}
mp_addfast((digit_t *) &c[NWORDS_FIELD], t1, (digit_t *) &c[NWORDS_FIELD]);
#elif (OS_TARGET == OS_LINUX)
mp_subaddx2_asm(a, b, c);
#endif
}
__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
#if USE_SIKE_ASM
mp_dblsubx2_asm(a, b, c);
#else
mp_sub(c, a, c, 2 * NWORDS_FIELD);
mp_sub(c, b, c, 2 * NWORDS_FIELD);
#elif (OS_TARGET == OS_LINUX)
mp_dblsubx2_asm(a, b, c);
#endif
}

View File

@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_sike_p434_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p434;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = true;
@ -43,7 +43,7 @@ OQS_KEM *OQS_KEM_sike_p434_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p434_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = true;
@ -73,7 +73,7 @@ OQS_KEM *OQS_KEM_sike_p503_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p503;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = true;
@ -103,7 +103,7 @@ OQS_KEM *OQS_KEM_sike_p503_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p503_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = true;
@ -133,7 +133,7 @@ OQS_KEM *OQS_KEM_sike_p610_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p610;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = true;
@ -163,7 +163,7 @@ OQS_KEM *OQS_KEM_sike_p610_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p610_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = true;
@ -193,7 +193,7 @@ OQS_KEM *OQS_KEM_sike_p751_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p751;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = true;
@ -223,7 +223,7 @@ OQS_KEM *OQS_KEM_sike_p751_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p751_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = true;
@ -253,7 +253,7 @@ OQS_KEM *OQS_KEM_sidh_p434_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p434;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = false;
@ -323,7 +323,7 @@ OQS_KEM *OQS_KEM_sidh_p434_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p434_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = false;
@ -393,7 +393,7 @@ OQS_KEM *OQS_KEM_sidh_p503_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p503;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = false;
@ -463,7 +463,7 @@ OQS_KEM *OQS_KEM_sidh_p503_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p503_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = false;
@ -533,7 +533,7 @@ OQS_KEM *OQS_KEM_sidh_p610_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p610;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = false;
@ -603,7 +603,7 @@ OQS_KEM *OQS_KEM_sidh_p610_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p610_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = false;
@ -673,7 +673,7 @@ OQS_KEM *OQS_KEM_sidh_p751_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p751;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = false;
@ -743,7 +743,7 @@ OQS_KEM *OQS_KEM_sidh_p751_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p751_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = false;

View File

@ -1,15 +0,0 @@
/*
* undefines SIDH symbols included in both P503 and P751
* to avoid Visual Studio errors
*/
#if defined(_WIN32)
#undef OQS_SIDH_MSR_CRYPTO_SECRETKEYBYTES
#undef OQS_SIDH_MSR_CRYPTO_PUBLICKEYBYTES
#undef OQS_SIDH_MSR_CRYPTO_BYTES
#undef OQS_SIDH_MSR_CRYPTO_CIPHERTEXTBYTES
#undef OQS_SIDH_MSR_CRYPTO_ALGNAME
#undef SIDH_SECRETKEYBYTES
#undef SIDH_PUBLICKEYBYTES
#undef SIDH_BYTES
#endif