Enabled optimizations on macOS

This commit is contained in:
Christian Paquin 2020-06-19 14:34:23 -04:00
parent 5a3b238139
commit 8288420afb
5 changed files with 48 additions and 51 deletions

View File

@ -16,7 +16,7 @@ extern const uint64_t p434x4[NWORDS_FIELD];
__inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
// FIXMEOQS: probably need to check that condition
unsigned int i, borrow = 0;
@ -29,7 +29,7 @@ __inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p434x2)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub434_p2_asm(a, b, c);
@ -39,7 +39,7 @@ __inline void mp_sub434_p2(const digit_t* a, const digit_t* b, digit_t* c)
__inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
// FIXMEOQS: probably need to check that condition
unsigned int i, borrow = 0;
@ -52,7 +52,7 @@ __inline void mp_sub434_p4(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p434x4)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub434_p4_asm(a, b, c);
@ -63,7 +63,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, carry = 0;
digit_t mask;
@ -82,7 +82,7 @@ __inline void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(carry, c[i], ((digit_t *) p434x2)[i] & mask, carry, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpadd434_asm(a, b, c);
@ -93,7 +93,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p434-1]
// Output: c in [0, 2*p434-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -107,7 +107,7 @@ __inline void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(borrow, c[i], ((digit_t *) p434x2)[i] & mask, borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpsub434_asm(a, b, c);
@ -156,7 +156,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -320,7 +320,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[12] = uv[0];
c[13] = uv[1];
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_mul434_asm(a, b, c);
@ -332,7 +332,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -467,7 +467,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
ADDC(0, uv[0], ma[12], carry, mc[5]);
ADDC(carry, uv[1], ma[13], carry, mc[6]);
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_rdc434_asm(ma, mc);

View File

@ -16,7 +16,7 @@ extern const uint64_t p503x4[NWORDS_FIELD];
__inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
@ -28,7 +28,7 @@ __inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p503x2)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub503_p2_asm(a, b, c);
@ -38,7 +38,7 @@ __inline void mp_sub503_p2(const digit_t* a, const digit_t* b, digit_t* c)
__inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
@ -50,7 +50,7 @@ __inline void mp_sub503_p4(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p503x4)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub503_p4_asm(a, b, c);
@ -61,7 +61,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, carry = 0;
digit_t mask;
@ -80,7 +80,7 @@ __inline void fpadd503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(carry, c[i], ((digit_t *) p503x2)[i] & mask, carry, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpadd503_asm(a, b, c);
@ -91,7 +91,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p503-1]
// Output: c in [0, 2*p503-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -105,7 +105,7 @@ __inline void fpsub503(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(borrow, c[i], ((digit_t *) p503x2)[i] & mask, borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpsub503_asm(a, b, c);
@ -154,7 +154,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -358,7 +358,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[14] = uv[0];
c[15] = uv[1];
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_mul503_asm(a, b, c);
@ -370,7 +370,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
// If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -546,7 +546,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
ADDC(carry, uv[1], 0, carry, uv[1]);
ADDC(0, uv[1], ma[15], carry, mc[7]);
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_rdc503_asm(ma, mc);

View File

@ -16,7 +16,7 @@ extern const uint64_t p610x4[NWORDS_FIELD];
__inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
@ -28,7 +28,7 @@ __inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p610x2)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub610_p2_asm(a, b, c);
@ -38,7 +38,7 @@ __inline void mp_sub610_p2(const digit_t* a, const digit_t* b, digit_t* c)
__inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 610)
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
@ -50,7 +50,7 @@ __inline void mp_sub610_p4(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p610x4)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub610_p4_asm(a, b, c);
@ -61,7 +61,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p610-1]
// Output: c in [0, 2*p610-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, carry = 0;
digit_t mask;
@ -80,7 +80,7 @@ __inline void fpadd610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(carry, c[i], ((digit_t *) p610x2)[i] & mask, carry, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpadd610_asm(a, b, c);
@ -91,7 +91,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p610-1]
// Output: c in [0, 2*p610-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -105,7 +105,7 @@ __inline void fpsub610(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(borrow, c[i], ((digit_t *) p610x2)[i] & mask, borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpsub610_asm(a, b, c);
@ -154,7 +154,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -450,7 +450,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[18] = uv[0];
c[19] = uv[1];
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_mul610_asm(a, b, c);
@ -462,7 +462,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
// If ma < 2^640*p610, the output mc is in the range [0, 2*p610-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -703,7 +703,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
ADDC(carry, uv[1], 0, carry, uv[1]);
ADDC(0, uv[1], ma[19], carry, mc[9]);
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_rdc610_asm(ma, mc);

View File

@ -16,7 +16,7 @@ extern const uint64_t p751x4[NWORDS_FIELD];
__inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 2*p, c = a-b+2p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
@ -28,7 +28,7 @@ __inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p751x2)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub751_p2_asm(a, b, c);
@ -38,7 +38,7 @@ __inline void mp_sub751_p2(const digit_t* a, const digit_t* b, digit_t* c)
__inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
{ // Multiprecision subtraction with correction with 4*p, c = a-b+4p.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
#if (OS_TARGET == OS_WIN) || defined(GENERIC_IMPLEMENTATION) || (TARGET == TARGET_ARM) || (TARGET == TARGET_ARM64 && NBITS_FIELD == 751)
unsigned int i, borrow = 0;
for (i = 0; i < NWORDS_FIELD; i++) {
@ -50,7 +50,7 @@ __inline void mp_sub751_p4(const digit_t* a, const digit_t* b, digit_t* c)
ADDC(borrow, c[i], ((digit_t*)p751x4)[i], borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
mp_sub751_p4_asm(a, b, c);
@ -61,7 +61,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, carry = 0;
digit_t mask;
@ -80,7 +80,7 @@ __inline void fpadd751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(carry, c[i], ((digit_t *) p751x2)[i] & mask, carry, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpadd751_asm(a, b, c);
@ -91,7 +91,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
// Inputs: a, b in [0, 2*p751-1]
// Output: c in [0, 2*p751-1]
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int i, borrow = 0;
digit_t mask;
@ -105,7 +105,7 @@ __inline void fpsub751(const digit_t *a, const digit_t *b, digit_t *c) { // Modu
ADDC(borrow, c[i], ((digit_t *) p751x2)[i] & mask, borrow, c[i]);
}
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_fpsub751_asm(a, b, c);
@ -154,7 +154,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
UNREFERENCED_PARAMETER(nwords);
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
digit_t t = 0;
uint128_t uv = {0};
unsigned int carry = 0;
@ -558,7 +558,7 @@ void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int n
c[22] = uv[0];
c[23] = uv[1];
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_mul751_asm(a, b, c);
@ -570,7 +570,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
// If ma < 2^768*p751, the output mc is in the range [0, 2*p751-1].
// ma is assumed to be in Montgomery representation.
#if (OS_TARGET == OS_WIN || OS_TARGET == OS_DARWIN)
#if (OS_TARGET == OS_WIN)
unsigned int carry;
digit_t t = 0;
uint128_t uv = {0};
@ -884,7 +884,7 @@ void rdc_mont(digit_t *ma, digit_t *mc) { // Montgomery reduction exploiting spe
ADDC(carry, uv[1], 0, carry, uv[1]);
ADDC(0, uv[1], ma[23], carry, mc[11]);
#elif (OS_TARGET == OS_NIX)
#elif (OS_TARGET == OS_NIX || OS_TARGET == OS_DARWIN)
oqs_kem_sike_rdc751_asm(ma, mc);

View File

@ -21,9 +21,6 @@
#define OS_TARGET OS_WIN
#elif defined(__APPLE__) // darwin
#define OS_TARGET OS_DARWIN
#ifndef _GENERIC_ // default to generic implementation on darwin for now
#define _GENERIC_
#endif
#else
#define OS_TARGET OS_NIX // default to Linux
#endif