Merge pull request #632 from christianpaquin/cp-enable-sike-fast-mode-with-cmake

Updated SIKE implementation with latest changes.
This commit is contained in:
Christian Paquin 2020-03-04 16:42:39 -05:00 committed by GitHub
commit 82e1828f4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 3049 additions and 3044 deletions

View File

@ -29,7 +29,7 @@ Implementation
--------------
- **Source of implementation:** https://github.com/Microsoft/PQCrypto-SIDH
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/tree/v3.2)
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311)
- **License:** MIT License
- **Language:** C
- **Constant-time:** Yes

View File

@ -30,13 +30,8 @@ Implementation
--------------
- **Source of implementation:** https://github.com/Microsoft/PQCrypto-SIDH
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa)
- **Implementation version:** v3.2 (https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311)
- **License:** MIT License
- **Language:** C
- **Constant-time:** Yes
- **Architectures supported in liboqs master branch**: x86, x64
Additional comments
-------------------
The original Sike implementation includes optimizations that are not currently being built in liboqs. See src/kem/sike/upstream/README for details.

View File

@ -1,22 +1,33 @@
set(SRCS kem_sike.c
P434/P434.c
P434/P434_compressed.c
P503/P503.c P503/P503_compressed.c
P610/P610.c
P610/P610_compressed.c
P751/P751.c
P751/P751_compressed.c)
P434/P434.c
P434/P434_compressed.c
P503/P503.c P503/P503_compressed.c
P610/P610.c
P610/P610_compressed.c
P751/P751.c
P751/P751_compressed.c)
add_library(sike OBJECT ${SRCS})
# FIXMEOQS: enable FAST mode, assembly
target_compile_definitions(sike PRIVATE _GENERIC_)
if(ARCH STREQUAL "x86")
target_compile_definitions(sike PRIVATE _X86_)
elseif(ARCH STREQUAL "x86_64")
target_compile_definitions(sike PRIVATE _AMD64_)
elseif(ARCH STREQUAL "arm")
target_compile_definitions(sike PRIVATE _ARM_)
elseif(ARCH STREQUAL "arm64")
target_compile_definitions(sike PRIVATE _ARM64_)
if(${ARCH} STREQUAL "x86")
target_compile_definitions(sike PRIVATE _GENERIC_ _X86_)
elseif(${ARCH} STREQUAL "x86_64")
target_compile_definitions(sike PRIVATE _AMD64_)
if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Linux")
set(SRCS ${SRCS} P503/AMD64/fp_x64_asm.S
P751/AMD64/fp_x64_asm.S)
if(USE_BMI2_INSTRUCTIONS)
target_compile_definitions(sike PRIVATE _MULX_ _ADX_)
set(SRCS ${SRCS} P434/AMD64/fp_x64_asm.S
P610/AMD64/fp_x64_asm.S)
endif()
endif()
elseif(${ARCH} STREQUAL "arm")
target_compile_definitions(sike PRIVATE _GENERIC_ _ARM_)
elseif(${ARCH} STREQUAL "arm64")
target_compile_definitions(sike PRIVATE _ARM64_)
if(${CMAKE_HOST_SYSTEM_NAME} STREQUAL "Linux")
set(SRCS ${SRCS} P503/AMD64/fp_x64_asm.S
P751/AMD64/fp_x64_asm.S)
endif()
endif()

View File

@ -17,7 +17,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd434_asm(a, b, c);
oqs_kem_sike_fpadd434_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub434_asm(a, b, c);
oqs_kem_sike_fpsub434_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -276,17 +276,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul434_asm(a, b, c);
oqs_kem_sike_mul434_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -423,7 +423,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc434_asm(ma, mc);
oqs_kem_sike_rdc434_asm(ma, mc);
#endif
}

View File

@ -36,8 +36,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd434_asm
fpadd434_asm:
.global oqs_kem_sike_fpadd434_asm
oqs_kem_sike_fpadd434_asm:
push r12
push r13
push r14
@ -111,8 +111,8 @@ fpadd434_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub434_asm
fpsub434_asm:
.global oqs_kem_sike_fpsub434_asm
oqs_kem_sike_fpsub434_asm:
push r12
push r13
push r14
@ -410,8 +410,8 @@ fpsub434_asm:
//*****************************************************************************
// 434-bit multiplication using Karatsuba (one level), schoolbook (one level)
//*****************************************************************************
.global mul434_asm
mul434_asm:
.global oqs_kem_sike_mul434_asm
oqs_kem_sike_mul434_asm:
push r12
push r13
push r14
@ -562,8 +562,8 @@ mul434_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul434_asm
mul434_asm:
.global oqs_kem_sike_mul434_asm
oqs_kem_sike_mul434_asm:
ret
@ -660,8 +660,8 @@ mul434_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc434_asm
rdc434_asm:
.global oqs_kem_sike_rdc434_asm
oqs_kem_sike_rdc434_asm:
push r12
push r13
@ -767,8 +767,8 @@ rdc434_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc434_asm
rdc434_asm:
.global oqs_kem_sike_rdc434_asm
oqs_kem_sike_rdc434_asm:
ret
@ -781,8 +781,8 @@ rdc434_asm:
// 434-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add434_asm
mp_add434_asm:
.global oqs_kem_sike_mp_add434_asm
oqs_kem_sike_mp_add434_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -812,8 +812,8 @@ mp_add434_asm:
// 2x434-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448
//***********************************************************************
.global mp_subadd434x2_asm
mp_subadd434x2_asm:
.global oqs_kem_sike_mp_subadd434x2_asm
oqs_kem_sike_mp_subadd434x2_asm:
push r12
push r13
push r14
@ -897,8 +897,8 @@ mp_subadd434x2_asm:
// Double 2x434-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub434x2_asm
mp_dblsub434x2_asm:
.global oqs_kem_sike_mp_dblsub434x2_asm
oqs_kem_sike_mp_dblsub434x2_asm:
push r12
push r13

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1
//
static const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
static const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
static const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
};
const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000,
0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344
};
const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF,
0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000};
// Order of Bob's subgroup
@ -119,9 +119,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv434_mont
#define fp2inv_mont_bingcd fp2inv434_mont_bingcd
#define fpequal_non_constant_time fpequal434_non_constant_time
#define mp_add_asm mp_add434_asm
#define mp_subaddx2_asm mp_subadd434x2_asm
#define mp_dblsubx2_asm mp_dblsub434x2_asm
#define mp_add_asm oqs_kem_sike_mp_add434_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd434x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub434x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p434_keypair
#define crypto_kem_enc OQS_KEM_sike_p434_encaps
#define crypto_kem_dec OQS_KEM_sike_p434_decaps
@ -131,10 +131,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_EphemeralSecretAgreement_B
#ifdef USE_SIKEP434_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -11,6 +11,9 @@
// OQS note: size #defines moved to P434.c to avoid redefinitions across parameters
// Algorithm name
#define CRYPTO_ALGNAME "SIKEp434"
// SIKE's key generation
// It produces a private key sk and computes the public key pk.
// Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = 374 bytes)

View File

@ -381,9 +381,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv434_mont
#define fp2inv_mont_bingcd fp2inv434_mont_bingcd
#define fpequal_non_constant_time fpequal434_non_constant_time
#define mp_add_asm mp_add434_asm
#define mp_subaddx2_asm mp_subadd434x2_asm
#define mp_dblsubx2_asm mp_dblsub434x2_asm
#define mp_add_asm oqs_kem_sike_mp_add434_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd434x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub434x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p434_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p434_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p434_compressed_decaps
@ -393,10 +393,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP434_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 434-bit multiprecision addition, c = a+b
static void mp_add434(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,11 +161,11 @@ static bool fpequal434_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p434
extern void fpadd434(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular subtraction, c = a-b mod p434
extern void fpsub434(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular negation, a = -a mod p434
extern void fpneg434(digit_t *a);
@ -177,12 +177,12 @@ static void fpdiv2_434(const digit_t *a, digit_t *c);
static void fpcorrection434(digit_t *a);
// 434-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc434_asm(digit_t *ma, digit_t *mc);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
static void fpmul434_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul434_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void rdc434_asm(const digit_t *ma, digit_t *mc);
void oqs_kem_sike_mul434_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768
static void fpsqr434_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p434.
// mc = ma*R^-1 mod p434x2, where R = 2^448.
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.

View File

@ -17,7 +17,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd503_asm(a, b, c);
oqs_kem_sike_fpadd503_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub503_asm(a, b, c);
oqs_kem_sike_fpsub503_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -316,17 +316,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul503_asm(a, b, c);
oqs_kem_sike_mul503_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -504,7 +504,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc503_asm(ma, mc);
oqs_kem_sike_rdc503_asm(ma, mc);
#endif
}

View File

@ -36,8 +36,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd503_asm
fpadd503_asm:
.global oqs_kem_sike_fpadd503_asm
oqs_kem_sike_fpadd503_asm:
push r12
push r13
push r14
@ -125,8 +125,8 @@ fpadd503_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub503_asm
fpsub503_asm:
.global oqs_kem_sike_fpsub503_asm
oqs_kem_sike_fpsub503_asm:
push r12
push r13
push r14
@ -350,8 +350,8 @@ fpsub503_asm:
//*****************************************************************************
// 503-bit multiplication using Karatsuba (one level), schoolbook (one level)
//*****************************************************************************
.global mul503_asm
mul503_asm:
.global oqs_kem_sike_mul503_asm
oqs_kem_sike_mul503_asm:
push r12
push r13
push r14
@ -512,8 +512,8 @@ mul503_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul503_asm
mul503_asm:
.global oqs_kem_sike_mul503_asm
oqs_kem_sike_mul503_asm:
push r12
push r13
push r14
@ -1029,8 +1029,8 @@ mul503_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc503_asm
rdc503_asm:
.global oqs_kem_sike_rdc503_asm
oqs_kem_sike_rdc503_asm:
push rbx
push r12
push r13
@ -1161,8 +1161,8 @@ rdc503_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc503_asm
rdc503_asm:
.global oqs_kem_sike_rdc503_asm
oqs_kem_sike_rdc503_asm:
push r12
push r13
push r14
@ -1484,8 +1484,8 @@ rdc503_asm:
// 503-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add503_asm
mp_add503_asm:
.global oqs_kem_sike_mp_add503_asm
oqs_kem_sike_mp_add503_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -1518,8 +1518,8 @@ mp_add503_asm:
// 2x503-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
//***********************************************************************
.global mp_subadd503x2_asm
mp_subadd503x2_asm:
.global oqs_kem_sike_mp_subadd503x2_asm
oqs_kem_sike_mp_subadd503x2_asm:
push r12
push r13
push r14
@ -1619,8 +1619,8 @@ mp_subadd503x2_asm:
// Double 2x503-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub503x2_asm
mp_dblsub503x2_asm:
.global oqs_kem_sike_mp_dblsub503x2_asm
oqs_kem_sike_mp_dblsub503x2_asm:
push r12
push r13
push r14

View File

@ -16,14 +16,14 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
fpadd503_asm(a, b, c);
oqs_kem_sike_fpadd503_asm(a, b, c);
}
__inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p503.
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
fpsub503_asm(a, b, c);
oqs_kem_sike_fpsub503_asm(a, b, c);
}
__inline void fpneg503(digit_t *a) { // Modular negation, a = -a mod p503.
@ -68,13 +68,13 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
mul503_asm(a, b, c);
oqs_kem_sike_mul503_asm(a, b, c);
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.
rdc503_asm(ma, mc);
oqs_kem_sike_rdc503_asm(ma, mc);
}

View File

@ -44,8 +44,8 @@ p503p1_nz_s8:
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd503_asm
fpadd503_asm:
.global oqs_kem_sike_fpadd503_asm
oqs_kem_sike_fpadd503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
@ -112,8 +112,8 @@ fpadd503_asm:
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub503_asm
fpsub503_asm:
.global oqs_kem_sike_fpsub503_asm
oqs_kem_sike_fpsub503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
@ -267,8 +267,8 @@ fpsub503_asm:
// 512-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mul503_asm
mul503_asm:
.global oqs_kem_sike_mul503_asm
oqs_kem_sike_mul503_asm:
sub sp, sp, #96
stp x19, x20, [sp,#0]
stp x21, x22, [sp,#16]
@ -454,8 +454,8 @@ mul503_asm:
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc503_asm
rdc503_asm:
.global oqs_kem_sike_rdc503_asm
oqs_kem_sike_rdc503_asm:
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp, #16]
@ -622,8 +622,8 @@ rdc503_asm:
// 503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503_asm
mp_add503_asm:
.global oqs_kem_sike_mp_add503_asm
oqs_kem_sike_mp_add503_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
@ -653,8 +653,8 @@ mp_add503_asm:
// 2x503-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add503x2_asm
mp_add503x2_asm:
.global oqs_kem_sike_mp_add503x2_asm
oqs_kem_sike_mp_add503x2_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x11, x12, [x1,#0]
@ -705,8 +705,8 @@ mp_add503x2_asm:
// 2x503-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p503*2^512
//***********************************************************************
.global mp_subadd503x2_asm
mp_subadd503x2_asm:
.global oqs_kem_sike_mp_subadd503x2_asm
oqs_kem_sike_mp_subadd503x2_asm:
ldp x3, x4, [x0,#0]
ldp x11, x12, [x1,#0]
ldp x5, x6, [x0,#16]
@ -778,8 +778,8 @@ mp_subadd503x2_asm:
// Double 2x503-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsub503x2_asm
mp_dblsub503x2_asm:
.global oqs_kem_sike_mp_dblsub503x2_asm
oqs_kem_sike_mp_dblsub503x2_asm:
sub sp, sp, #32
stp x27, x28, [sp, #0]
stp x29, x30, [sp, #16]

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=6, B=1, C=1 and p503 = 2^250*3^159-1
//
static const uint64_t p503[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
static const uint64_t p503p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
static const uint64_t p503x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
};
const uint64_t p503[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
const uint64_t p503p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000,
0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E
};
const uint64_t p503x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF,
0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000};
// Order of Bob's subgroup
@ -125,9 +125,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv503_mont
#define fp2inv_mont_bingcd fp2inv503_mont_bingcd
#define fpequal_non_constant_time fpequal503_non_constant_time
#define mp_add_asm mp_add503_asm
#define mp_subaddx2_asm mp_subadd503x2_asm
#define mp_dblsubx2_asm mp_dblsub503x2_asm
#define mp_add_asm oqs_kem_sike_mp_add503_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd503x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub503x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p503_keypair
#define crypto_kem_enc OQS_KEM_sike_p503_encaps
#define crypto_kem_dec OQS_KEM_sike_p503_decaps
@ -137,13 +137,14 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p503_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p503_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p503_EphemeralSecretAgreement_B
#ifdef USE_SIKEP503_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
// #include "ARM64/fp_arm64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -384,9 +384,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv503_mont
#define fp2inv_mont_bingcd fp2inv503_mont_bingcd
#define fpequal_non_constant_time fpequal503_non_constant_time
#define mp_add_asm mp_add503_asm
#define mp_subaddx2_asm mp_subadd503x2_asm
#define mp_dblsubx2_asm mp_dblsub503x2_asm
#define mp_add_asm oqs_kem_sike_mp_add503_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd503x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub503x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p503_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p503_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p503_compressed_decaps
@ -396,13 +396,14 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p503_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p503_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p503_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP503_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
// #include "ARM64/fp_arm64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 503-bit multiprecision addition, c = a+b
static void mp_add503(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add503_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x503-bit multiprecision subtraction followed by addition with p503*2^512, c = a-b+(p503*2^512) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x503-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub503x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,13 +161,13 @@ static bool fpequal503_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p503
extern void fpadd503(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd503_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void fpadd503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
extern void oqs_kem_sike_fpadd503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_fpadd503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
// Modular subtraction, c = a-b mod p503
extern void fpsub503(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub503_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void fpsub503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
extern void oqs_kem_sike_fpsub503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_fpsub503_inline_asm(const digit_t *a, const digit_t *b, const digit_t *p, digit_t *c);
// Modular negation, a = -a mod p503
extern void fpneg503(digit_t *a);
@ -179,14 +179,13 @@ static void fpdiv2_503(const digit_t *a, digit_t *c);
static void fpcorrection503(digit_t *a);
// 503-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc503_asm(const digit_t *ma, digit_t *mc);
static void rdc503_inline_asm(const felm_t ma, felm_t mb, felm_t mc);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc503_asm(digit_t *ma, digit_t *mc);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
static void fpmul503_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul503_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mul503_inline_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mul503_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mul503_inline_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768
static void fpsqr503_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503.
// mc = ma*R^-1 mod p503x2, where R = 2^512.
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.

View File

@ -17,7 +17,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p610-1]
// Output: c in [0, 2*p610-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd610_asm(a, b, c);
oqs_kem_sike_fpadd610_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p610-1]
// Output: c in [0, 2*p610-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub610_asm(a, b, c);
oqs_kem_sike_fpsub610_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -408,17 +408,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul610_asm(a, b, c);
oqs_kem_sike_mul610_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p610x2, where R = 2^640.
// If ma < 2^640*p610, the output mc is in the range [0, 2*p610-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -661,7 +661,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc610_asm(ma, mc);
oqs_kem_sike_rdc610_asm(ma, mc);
#endif
}

View File

@ -6,6 +6,7 @@
.intel_syntax noprefix
// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
@ -17,8 +18,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd610_asm
fpadd610_asm:
.global oqs_kem_sike_fpadd610_asm
oqs_kem_sike_fpadd610_asm:
push r12
push r13
push r14
@ -123,8 +124,8 @@ fpadd610_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub610_asm
fpsub610_asm:
.global oqs_kem_sike_fpsub610_asm
oqs_kem_sike_fpsub610_asm:
push r12
push r13
push r14
@ -426,8 +427,8 @@ fpsub610_asm:
//*****************************************************************************
// 610-bit multiplication using Karatsuba (one level), schoolbook (two levels)
//*****************************************************************************
.global mul610_asm
mul610_asm:
.global oqs_kem_sike_mul610_asm
oqs_kem_sike_mul610_asm:
push r12
push r13
push r14
@ -621,8 +622,8 @@ mul610_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul610_asm
mul610_asm:
.global oqs_kem_sike_mul610_asm
oqs_kem_sike_mul610_asm:
ret
@ -757,8 +758,8 @@ mul610_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc610_asm
rdc610_asm:
.global oqs_kem_sike_rdc610_asm
oqs_kem_sike_rdc610_asm:
push r12
push r13
push r14
@ -939,8 +940,8 @@ rdc610_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc610_asm
rdc610_asm:
.global oqs_kem_sike_rdc610_asm
oqs_kem_sike_rdc610_asm:
ret
@ -953,8 +954,8 @@ rdc610_asm:
// 610-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add610_asm
mp_add610_asm:
.global oqs_kem_sike_mp_add610_asm
oqs_kem_sike_mp_add610_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -993,8 +994,8 @@ mp_add610_asm:
// 2x610-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p610*2^640
//***********************************************************************
.global mp_subadd610x2_asm
mp_subadd610x2_asm:
.global oqs_kem_sike_mp_subadd610x2_asm
oqs_kem_sike_mp_subadd610x2_asm:
push r12
push r13
push r14
@ -1112,8 +1113,8 @@ mp_subadd610x2_asm:
// Double 2x610-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub610x2_asm
mp_dblsub610x2_asm:
.global oqs_kem_sike_mp_dblsub610x2_asm
oqs_kem_sike_mp_dblsub610x2_asm:
push r12
push r13
push r14

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp610". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p610^2), where A=6, B=1, C=1 and p610 = 2^305*3^192-1
//
static const uint64_t p610[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
static const uint64_t p610p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
static const uint64_t p610x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1
};
const uint64_t p610[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x6E01FFFFFFFFFFFF,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
const uint64_t p610p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x6E02000000000000,
0xB1784DE8AA5AB02E, 0x9AE7BF45048FF9AB, 0xB255B2FA10C4252A, 0x819010C251E7D88C, 0x000000027BF6A768
};
const uint64_t p610x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDC03FFFFFFFFFFFF,
0x62F09BD154B5605C, 0x35CF7E8A091FF357, 0x64AB65F421884A55, 0x03202184A3CFB119, 0x00000004F7ED4ED1
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0002000000000000};
// Order of Bob's subgroup
@ -127,9 +127,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv610_mont
#define fp2inv_mont_bingcd fp2inv610_mont_bingcd
#define fpequal_non_constant_time fpequal610_non_constant_time
#define mp_add_asm mp_add610_asm
#define mp_subaddx2_asm mp_subadd610x2_asm
#define mp_dblsubx2_asm mp_dblsub610x2_asm
#define mp_add_asm oqs_kem_sike_mp_add610_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd610x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub610x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p610_keypair
#define crypto_kem_enc OQS_KEM_sike_p610_encaps
#define crypto_kem_dec OQS_KEM_sike_p610_decaps
@ -139,10 +139,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p610_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p610_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p610_EphemeralSecretAgreement_B
#ifdef USE_SIKEP610_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -387,9 +387,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv610_mont
#define fp2inv_mont_bingcd fp2inv610_mont_bingcd
#define fpequal_non_constant_time fpequal610_non_constant_time
#define mp_add_asm mp_add610_asm
#define mp_subaddx2_asm mp_subadd610x2_asm
#define mp_dblsubx2_asm mp_dblsub610x2_asm
#define mp_add_asm oqs_kem_sike_mp_add610_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd610x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub610x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p610_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p610_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p610_compressed_decaps
@ -399,10 +399,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p610_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p610_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p610_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP610_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#else
#include "generic/fp_generic.c"
#endif

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 610-bit multiprecision addition, c = a+b
static void mp_add610(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add610_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x610-bit multiprecision subtraction followed by addition with p610*2^640, c = a-b+(p610*2^640) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x610-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub610x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,11 +161,11 @@ static bool fpequal610_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p610
extern void fpadd610(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd610_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpadd610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular subtraction, c = a-b mod p610
extern void fpsub610(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub610_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpsub610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular negation, a = -a mod p610
extern void fpneg610(digit_t *a);
@ -177,12 +177,12 @@ static void fpdiv2_610(const digit_t *a, digit_t *c);
static void fpcorrection610(digit_t *a);
// 610-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc610_asm(digit_t *a, digit_t *c);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
static void fpmul610_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul610_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void rdc610_asm(const digit_t *ma, digit_t *mc);
void oqs_kem_sike_mul610_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p610, where R=2^640
static void fpsqr610_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p610.
// mc = ma*R^-1 mod p610x2, where R = 2^768.
// If ma < 2^768*p610, the output mc is in the range [0, 2*p610-1].
// ma is assumed to be in Montgomery representation.

View File

@ -17,7 +17,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, carry = 0;
digit_t mask;
@ -38,7 +38,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpadd751_asm(a, b, c);
oqs_kem_sike_fpadd751_asm(a, b, c);
#endif
}
@ -47,7 +47,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -63,7 +63,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
#elif (OS_TARGET == OS_LINUX)
fpsub751_asm(a, b, c);
oqs_kem_sike_fpsub751_asm(a, b, c);
#endif
}
@ -110,7 +110,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -516,17 +516,17 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
#elif (OS_TARGET == OS_LINUX)
mul751_asm(a, b, c);
oqs_kem_sike_mul751_asm(a, b, c);
#endif
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p751x2, where R = 2^768.
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN)
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -842,7 +842,7 @@ void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiti
#elif (OS_TARGET == OS_LINUX)
rdc751_asm(ma, mc);
oqs_kem_sike_rdc751_asm(ma, mc);
#endif
}

View File

@ -6,6 +6,7 @@
.intel_syntax noprefix
// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
@ -17,8 +18,8 @@
// Field addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global fpadd751_asm
fpadd751_asm:
.global oqs_kem_sike_fpadd751_asm
oqs_kem_sike_fpadd751_asm:
push r12
push r13
push r14
@ -150,8 +151,8 @@ fpadd751_asm:
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
//***********************************************************************
.global fpsub751_asm
fpsub751_asm:
.global oqs_kem_sike_fpsub751_asm
oqs_kem_sike_fpsub751_asm:
push r12
push r13
push r14
@ -567,8 +568,8 @@ fpsub751_asm:
//*****************************************************************************
// 751-bit multiplication using Karatsuba (one level), schoolbook (two levels)
//*****************************************************************************
.global mul751_asm
mul751_asm:
.global oqs_kem_sike_mul751_asm
oqs_kem_sike_mul751_asm:
push r12
push r13
push r14
@ -791,8 +792,8 @@ mul751_asm:
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul751_asm
mul751_asm:
.global oqs_kem_sike_mul751_asm
oqs_kem_sike_mul751_asm:
push r12
push r13
push r14
@ -1957,8 +1958,8 @@ mul751_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//**************************************************************************************
.global rdc751_asm
rdc751_asm:
.global oqs_kem_sike_rdc751_asm
oqs_kem_sike_rdc751_asm:
push rbx
push rbp
push r12
@ -2106,8 +2107,8 @@ rdc751_asm:
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc751_asm
rdc751_asm:
.global oqs_kem_sike_rdc751_asm
oqs_kem_sike_rdc751_asm:
push r12
push r13
push r14
@ -2727,8 +2728,8 @@ rdc751_asm:
// 751-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
//***********************************************************************
.global mp_add751_asm
mp_add751_asm:
.global oqs_kem_sike_mp_add751_asm
oqs_kem_sike_mp_add751_asm:
mov r8, [reg_p1]
mov r9, [reg_p1+8]
mov r10, [reg_p1+16]
@ -2773,8 +2774,8 @@ mp_add751_asm:
// 2x751-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768
//***********************************************************************
.global mp_subadd751x2_asm
mp_subadd751x2_asm:
.global oqs_kem_sike_mp_subadd751x2_asm
oqs_kem_sike_mp_subadd751x2_asm:
push r12
push r13
push r14
@ -2916,8 +2917,8 @@ mp_subadd751x2_asm:
// Double 2x751-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
//***********************************************************************
.global mp_dblsub751x2_asm
mp_dblsub751x2_asm:
.global oqs_kem_sike_mp_dblsub751x2_asm
oqs_kem_sike_mp_dblsub751x2_asm:
push r12
push r13
push r14

View File

@ -16,14 +16,14 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
fpadd751_asm(a, b, c);
oqs_kem_sike_fpadd751_asm(a, b, c);
}
__inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modular subtraction, c = a-b mod p751.
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
fpsub751_asm(a, b, c);
oqs_kem_sike_fpsub751_asm(a, b, c);
}
__inline void fpneg751(digit_t *a) { // Modular negation, a = -a mod p751.
@ -68,13 +68,13 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
mul751_asm(a, b, c);
oqs_kem_sike_mul751_asm(a, b, c);
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting special form of the prime.
// mc = ma*R^-1 mod p751x2, where R = 2^768.
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.
rdc751_asm(ma, mc);
oqs_kem_sike_rdc751_asm(ma, mc);
}

View File

@ -43,8 +43,8 @@ p751p1_nz_s32:
// Field addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global fpadd751_asm
fpadd751_asm:
.global oqs_kem_sike_fpadd751_asm
oqs_kem_sike_fpadd751_asm:
sub sp, sp, #16
stp x19, x20, [sp]
@ -141,8 +141,8 @@ fpadd751_asm:
// Field subtraction
// Operation: c [x2] = a [x0] - b [x1]
//***********************************************************************
.global fpsub751_asm
fpsub751_asm:
.global oqs_kem_sike_fpsub751_asm
oqs_kem_sike_fpsub751_asm:
sub sp, sp, #16
str x19, [sp]
@ -472,8 +472,8 @@ fpsub751_asm:
// 768-bit integer multiplication using Karatsuba (two levels), Comba (lower level)
// Operation: c [x2] = a [x0] * b [x1]
//***********************************************************************************
.global mul751_asm
mul751_asm:
.global oqs_kem_sike_mul751_asm
oqs_kem_sike_mul751_asm:
sub sp, sp, #96
stp x19, x20, [sp,#0]
stp x21, x22, [sp,#16]
@ -827,8 +827,8 @@ mul751_asm:
// Operation: mc [x1] = ma [x0]
// NOTE: ma=mc is not allowed
//**************************************************************************************
.global rdc751_asm
rdc751_asm:
.global oqs_kem_sike_rdc751_asm
oqs_kem_sike_rdc751_asm:
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp,#16]
@ -1019,8 +1019,8 @@ rdc751_asm:
// 751-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add751_asm
mp_add751_asm:
.global oqs_kem_sike_mp_add751_asm
oqs_kem_sike_mp_add751_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
@ -1060,8 +1060,8 @@ mp_add751_asm:
// 2x751-bit multiprecision addition
// Operation: c [x2] = a [x0] + b [x1]
//***********************************************************************
.global mp_add751x2_asm
mp_add751x2_asm:
.global oqs_kem_sike_mp_add751x2_asm
oqs_kem_sike_mp_add751x2_asm:
ldp x3, x4, [x0,#0]
ldp x5, x6, [x0,#16]
ldp x7, x8, [x0,#32]
@ -1134,8 +1134,8 @@ mp_add751x2_asm:
// 2x751-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p751*2^768
//***********************************************************************
.global mp_subadd751x2_asm
mp_subadd751x2_asm:
.global oqs_kem_sike_mp_subadd751x2_asm
oqs_kem_sike_mp_subadd751x2_asm:
sub sp, sp, #16
stp x19, x20, [sp]
@ -1242,8 +1242,8 @@ mp_subadd751x2_asm:
// Double 2x751-bit multiprecision subtraction
// Operation: c [x2] = c [x2] - a [x0] - b [x1]
//***********************************************************************
.global mp_dblsub751x2_asm
mp_dblsub751x2_asm:
.global oqs_kem_sike_mp_dblsub751x2_asm
oqs_kem_sike_mp_dblsub751x2_asm:
sub sp, sp, #96
stp x19, x20, [sp]
stp x21, x22, [sp, #16]

View File

@ -31,15 +31,15 @@
// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=6, B=1, C=1 and p751 = 2^372*3^239-1
//
static const uint64_t p751[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
static const uint64_t p751p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
static const uint64_t p751x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38
};
const uint64_t p751[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
const uint64_t p751p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000,
0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C
};
const uint64_t p751x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF,
0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38
};
// Order of Alice's subgroup
static const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0010000000000000};
// Order of Bob's subgroup
@ -129,9 +129,9 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define fp2inv_mont fp2inv751_mont
#define fp2inv_mont_bingcd fp2inv751_mont_bingcd
#define fpequal_non_constant_time fpequal751_non_constant_time
#define mp_add_asm mp_add751_asm
#define mp_subaddx2_asm mp_subadd751x2_asm
#define mp_dblsubx2_asm mp_dblsub751x2_asm
#define mp_add_asm oqs_kem_sike_mp_add751_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd751x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub751x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p751_keypair
#define crypto_kem_enc OQS_KEM_sike_p751_encaps
#define crypto_kem_dec OQS_KEM_sike_p751_decaps
@ -141,10 +141,12 @@ static const unsigned int strat_Bob[MAX_Bob - 1] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p751_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p751_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p751_EphemeralSecretAgreement_B
#ifdef USE_SIKEP751_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
#else

View File

@ -396,9 +396,9 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define fp2inv_mont fp2inv751_mont
#define fp2inv_mont_bingcd fp2inv751_mont_bingcd
#define fpequal_non_constant_time fpequal751_non_constant_time
#define mp_add_asm mp_add751_asm
#define mp_subaddx2_asm mp_subadd751x2_asm
#define mp_dblsubx2_asm mp_dblsub751x2_asm
#define mp_add_asm oqs_kem_sike_mp_add751_asm
#define mp_subaddx2_asm oqs_kem_sike_mp_subadd751x2_asm
#define mp_dblsubx2_asm oqs_kem_sike_mp_dblsub751x2_asm
#define crypto_kem_keypair OQS_KEM_sike_p751_compressed_keypair
#define crypto_kem_enc OQS_KEM_sike_p751_compressed_encaps
#define crypto_kem_dec OQS_KEM_sike_p751_compressed_decaps
@ -408,10 +408,12 @@ static const uint64_t v_3_torsion[20][2 * NWORDS64_FIELD] = {
#define EphemeralKeyGeneration_B oqs_kem_sidh_p751_compressed_EphemeralKeyGeneration_B
#define EphemeralSecretAgreement_A oqs_kem_sidh_p751_compressed_EphemeralSecretAgreement_A
#define EphemeralSecretAgreement_B oqs_kem_sidh_p751_compressed_EphemeralSecretAgreement_B
#ifdef USE_SIKEP751_ASM
#define USE_SIKE_ASM
#endif
#if defined(X86_64)
#include "AMD64/fp_x64.c"
// #include "AMD64/fp_x64_asm.S" FIXMEOQS
#elif defined(ARM64)
#include "ARM64/fp_arm64.c"
#else

View File

@ -121,17 +121,17 @@ static unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const
// 751-bit multiprecision addition, c = a+b
static void mp_add751(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_add751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit
static unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords);
// 2x751-bit multiprecision subtraction followed by addition with p751*2^768, c = a-b+(p751*2^768) if a-b < 0, otherwise c=a-b
static void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void mp_subadd751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_subadd751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Double 2x751-bit multiprecision subtraction, c = c-a-b, where c > a and c > b
static void mp_dblsub751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
void oqs_kem_sike_mp_dblsub751x2_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Multiprecision left shift
static void mp_shiftleft(digit_t *x, unsigned int shift, const unsigned int nwords);
@ -161,11 +161,11 @@ static bool fpequal751_non_constant_time(const digit_t *a, const digit_t *b);
// Modular addition, c = a+b mod p751
extern void fpadd751(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpadd751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular subtraction, c = a-b mod p751
extern void fpsub751(const digit_t *a, const digit_t *b, digit_t *c);
extern void fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
extern void oqs_kem_sike_fpsub751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Modular negation, a = -a mod p751
extern void fpneg751(digit_t *a);
@ -177,12 +177,13 @@ static void fpdiv2_751(const digit_t *a, digit_t *c);
static void fpcorrection751(digit_t *a);
// 751-bit Montgomery reduction, c = a mod p
static void rdc_mont(const digit_t *a, digit_t *c);
static void rdc_mont(digit_t *a, digit_t *c);
void oqs_kem_sike_rdc751_asm(digit_t *ma, digit_t *mc);
// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
static void fpmul751_mont(const digit_t *a, const digit_t *b, digit_t *c);
static void mul751_asm(const digit_t *a, const digit_t *b, digit_t *c);
static void rdc751_asm(const digit_t *ma, digit_t *mc);
void oqs_kem_sike_mul751_asm(const digit_t *a, const digit_t *b, digit_t *c);
// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768
static void fpsqr751_mont(const digit_t *ma, digit_t *mc);

View File

@ -155,7 +155,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[2 * nwords - 1] = v;
}
void rdc_mont(const digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
void rdc_mont(digit_t *ma, digit_t *mc) { // Efficient Montgomery reduction using comba and exploiting the special form of the prime p751.
// mc = ma*R^-1 mod p751x2, where R = 2^768.
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.

View File

@ -15,9 +15,15 @@
#define OS_WIN 1
#define OS_LINUX 2
#define OS_DARWIN 3
#if defined(_WIN32) // Microsoft Windows OS
#define OS_TARGET OS_WIN
#elif defined(__APPLE__) // darwin
#define OS_TARGET OS_DARWIN
#ifndef _GENERIC_ // default to generic implementation on darwin for now (FIXMEOQS: still needed?)
#define _GENERIC_
#endif
#else
#define OS_TARGET OS_LINUX // default to Linux
#endif

View File

@ -138,14 +138,10 @@ static void fp2correction(f2elm_t a) { // Modular correction, a = a in GF(p^2).
}
__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision addition, c = a+b.
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
mp_add(a, b, c, NWORDS_FIELD);
#elif (OS_TARGET == OS_LINUX)
#if USE_SIKE_ASM
mp_add_asm(a, b, c);
#else
mp_add(a, b, c, NWORDS_FIELD);
#endif
}
@ -172,7 +168,9 @@ __inline unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, con
}
__inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b.
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
#if USE_SIKE_ASM
mp_subaddx2_asm(a, b, c);
#else
felm_t t1;
digit_t mask = 0 - (digit_t) mp_sub(a, b, c, 2 * NWORDS_FIELD);
@ -180,24 +178,15 @@ __inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *
t1[i] = ((digit_t *) PRIME)[i] & mask;
}
mp_addfast((digit_t *) &c[NWORDS_FIELD], t1, (digit_t *) &c[NWORDS_FIELD]);
#elif (OS_TARGET == OS_LINUX)
mp_subaddx2_asm(a, b, c);
#endif
}
__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c) { // Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD.
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && (NBITS_FIELD == 434 || NBITS_FIELD == 610))
#if USE_SIKE_ASM
mp_dblsubx2_asm(a, b, c);
#else
mp_sub(c, a, c, 2 * NWORDS_FIELD);
mp_sub(c, b, c, 2 * NWORDS_FIELD);
#elif (OS_TARGET == OS_LINUX)
mp_dblsubx2_asm(a, b, c);
#endif
}

View File

@ -13,7 +13,7 @@ OQS_KEM *OQS_KEM_sike_p434_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p434;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = true;
@ -43,7 +43,7 @@ OQS_KEM *OQS_KEM_sike_p434_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p434_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = true;
@ -73,7 +73,7 @@ OQS_KEM *OQS_KEM_sike_p503_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p503;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = true;
@ -103,7 +103,7 @@ OQS_KEM *OQS_KEM_sike_p503_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p503_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = true;
@ -133,7 +133,7 @@ OQS_KEM *OQS_KEM_sike_p610_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p610;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = true;
@ -163,7 +163,7 @@ OQS_KEM *OQS_KEM_sike_p610_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p610_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = true;
@ -193,7 +193,7 @@ OQS_KEM *OQS_KEM_sike_p751_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p751;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = true;
@ -223,7 +223,7 @@ OQS_KEM *OQS_KEM_sike_p751_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sike_p751_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = true;
@ -253,7 +253,7 @@ OQS_KEM *OQS_KEM_sidh_p434_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p434;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = false;
@ -323,7 +323,7 @@ OQS_KEM *OQS_KEM_sidh_p434_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p434_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 1;
kem->ind_cca = false;
@ -393,7 +393,7 @@ OQS_KEM *OQS_KEM_sidh_p503_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p503;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = false;
@ -463,7 +463,7 @@ OQS_KEM *OQS_KEM_sidh_p503_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p503_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 2;
kem->ind_cca = false;
@ -533,7 +533,7 @@ OQS_KEM *OQS_KEM_sidh_p610_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p610;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = false;
@ -603,7 +603,7 @@ OQS_KEM *OQS_KEM_sidh_p610_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p610_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 3;
kem->ind_cca = false;
@ -673,7 +673,7 @@ OQS_KEM *OQS_KEM_sidh_p751_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p751;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = false;
@ -743,7 +743,7 @@ OQS_KEM *OQS_KEM_sidh_p751_compressed_new() {
return NULL;
}
kem->method_name = OQS_KEM_alg_sidh_p751_compressed;
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/ebd1c80a8ac35e9ca2ef9680291a8a43b95a3bfa";
kem->alg_version = "https://github.com/microsoft/PQCrypto-SIDH/commit/fde210a7cf03e835cceb1d5ff34ccf58625d0311";
kem->claimed_nist_level = 5;
kem->ind_cca = false;

View File

@ -1,15 +0,0 @@
/*
* undefines SIDH symbols included in both P503 and P751
* to avoid Visual Studio errors
*/
#if defined(_WIN32)
#undef OQS_SIDH_MSR_CRYPTO_SECRETKEYBYTES
#undef OQS_SIDH_MSR_CRYPTO_PUBLICKEYBYTES
#undef OQS_SIDH_MSR_CRYPTO_BYTES
#undef OQS_SIDH_MSR_CRYPTO_CIPHERTEXTBYTES
#undef OQS_SIDH_MSR_CRYPTO_ALGNAME
#undef SIDH_SECRETKEYBYTES
#undef SIDH_PUBLICKEYBYTES
#undef SIDH_BYTES
#endif