From 76a3480df5d4bf68f6daca0d68f5b48d00ba75ba Mon Sep 17 00:00:00 2001 From: Douglas Stebila Date: Wed, 23 Nov 2016 12:35:55 -0500 Subject: [PATCH 1/5] Update README.md. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1f1f41eaf..d5dac66c4 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,7 @@ License liboqs is licensed under the MIT License; see [LICENSE.txt](https://github.com/open-quantum-safe/liboqs/blob/master/LICENSE.txt) for details. liboqs includes some third party libraries or modules that are licensed differently; the corresponding subfolder contains the license that applies in that case. In particular: +- `src/aes/aes.c`: public domain - `src/kex_rlwe_bcns15`: public domain ([Unlicense](http://unlicense.org)) - `src/kex_rlwe_msrln16`: MIT License - `src/kex_rlwe_msrln16/external`: public domain ([CC0](http://creativecommons.org/publicdomain/zero/1.0/)) From 7babc31f8b5ec9dd399bd9afa5de826f5fafa4fe Mon Sep 17 00:00:00 2001 From: Christian Paquin Date: Thu, 24 Nov 2016 16:13:50 -0500 Subject: [PATCH 2/5] Integrates MSR's SIDH library into OQS (#59) --- Makefile | 8 +- README.md | 6 +- VisualStudio/oqs/oqs.vcxproj | 22 +- VisualStudio/oqs/oqs.vcxproj.filters | 33 + src/kex/kex.c | 3 + src/kex/kex.h | 1 + src/kex/test_kex.c | 8 +- src/kex_sidh_cln16/AMD64/fp_x64.c | 865 +++++++++++ src/kex_sidh_cln16/AMD64/fp_x64_asm.S | 1864 +++++++++++++++++++++++ src/kex_sidh_cln16/License.txt | 21 + src/kex_sidh_cln16/README.txt | 46 + src/kex_sidh_cln16/SIDH.c | 117 ++ src/kex_sidh_cln16/SIDH.h | 245 +++ src/kex_sidh_cln16/SIDH_internal.h | 444 ++++++ src/kex_sidh_cln16/SIDH_setup.c | 245 +++ src/kex_sidh_cln16/ec_isogeny.c | 586 +++++++ src/kex_sidh_cln16/fpx.c | 611 ++++++++ src/kex_sidh_cln16/generic/fp_generic.c | 251 +++ src/kex_sidh_cln16/kex_sidh_cln16.c | 183 +++ src/kex_sidh_cln16/kex_sidh_cln16.h | 24 + src/kex_sidh_cln16/sidh_kex.c | 392 +++++ src/kex_sidh_cln16/validate.c | 220 +++ 22 files changed, 6187 insertions(+), 8 deletions(-) create mode 100644 src/kex_sidh_cln16/AMD64/fp_x64.c create mode 100644 src/kex_sidh_cln16/AMD64/fp_x64_asm.S create mode 100644 src/kex_sidh_cln16/License.txt create mode 100644 src/kex_sidh_cln16/README.txt create mode 100644 src/kex_sidh_cln16/SIDH.c create mode 100644 src/kex_sidh_cln16/SIDH.h create mode 100644 src/kex_sidh_cln16/SIDH_internal.h create mode 100644 src/kex_sidh_cln16/SIDH_setup.c create mode 100644 src/kex_sidh_cln16/ec_isogeny.c create mode 100644 src/kex_sidh_cln16/fpx.c create mode 100644 src/kex_sidh_cln16/generic/fp_generic.c create mode 100644 src/kex_sidh_cln16/kex_sidh_cln16.c create mode 100644 src/kex_sidh_cln16/kex_sidh_cln16.h create mode 100644 src/kex_sidh_cln16/sidh_kex.c create mode 100644 src/kex_sidh_cln16/validate.c diff --git a/Makefile b/Makefile index 950bf8683..4b5d5823a 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,7 @@ links: $(LN) ../../src/kex_rlwe_newhope/kex_rlwe_newhope.h include/oqs $(LN) ../../src/kex_rlwe_msrln16/kex_rlwe_msrln16.h include/oqs $(LN) ../../src/kex_lwe_frodo/kex_lwe_frodo.h include/oqs + $(LN) ../../src/kex_sidh_cln16/kex_sidh_cln16.h include/oqs $(LN) ../../src/rand/rand.h include/oqs $(LN) ../../src/rand_urandom_chacha20/rand_urandom_chacha20.h include/oqs $(LN) ../../src/rand_urandom_aesctr/rand_urandom_aesctr.h include/oqs @@ -96,6 +97,11 @@ KEX_LWE_FRODO_OBJS := $(addprefix objs/kex_lwe_frodo/, lwe.o kex_lwe_frodo.o lwe KEX_LWE_FRODO_HEADERS := $(addprefix src/kex_lwe_frodo/, kex_lwe_frodo.h local.h) $(KEX_LWE_FRODO_OBJS): $(KEX_LWE_FRODO_HEADERS) +# KEX_SIDH_CLN16 +KEX_SIDH_CLN16_OBJS := $(addprefix objs/kex_sidh_cln16/, ec_isogeny.o fpx.o kex_sidh_cln16.o SIDH.o sidh_kex.o SIDH_setup.o validate.o) +KEX_SIDH_CLN16_HEADERS := $(addprefix src/kex_sidh_cln16/, kex_sidh_cln16.h SIDH.h) +$(KEX_SIDH_CLN16_OBJS): $(KEX_SIDH_CLN16_HEADERS) + # AES AES_OBJS := $(addprefix objs/aes/, aes.o aes_c.o aes_ni.o) AES_HEADERS := $(addprefix src/aes/, aes.h) @@ -115,7 +121,7 @@ objs/kex/kex.o: src/kex/kex.h RAND_OBJS := $(RAND_URANDOM_AESCTR_OBJS) $(RAND_URANDOM_CHACHA_OBJS) -lib: $(RAND_OBJS) $(KEX_RLWE_BCNS15_OBJS) $(KEX_RLWE_NEWHOPE_OBJS) $(KEX_LWE_FRODO_OBJS) $(KEX_RLWE_MSRLN16_OBJS) objs/rand/rand.o objs/kex/kex.o $(AES_OBJS) $(COMMON_OBJS) +lib: $(RAND_OBJS) $(KEX_RLWE_BCNS15_OBJS) $(KEX_RLWE_NEWHOPE_OBJS) $(KEX_RLWE_MSRLN16_OBJS) $(KEX_LWE_FRODO_OBJS) $(KEX_SIDH_CLN16_OBJS) objs/rand/rand.o objs/kex/kex.o $(AES_OBJS) $(COMMON_OBJS) rm -f liboqs.a $(AR) liboqs.a $^ $(RANLIB) liboqs.a diff --git a/README.md b/README.md index d5dac66c4..520e295c4 100644 --- a/README.md +++ b/README.md @@ -23,11 +23,11 @@ Contents liboqs currently contains: -- `rand_urandom_chacha20`: pseudorandom number generator seeded from /dev/urandom and expanded using the ChaCha20 stream cipher - `kex_rlwe_bcns15`: key exchange from the ring learning with errors problem (Bos, Costello, Naehrig, Stebila, *IEEE Symposium on Security & Privacy 2015*, [https://eprint.iacr.org/2014/599](https://eprint.iacr.org/2014/599)) - `kex_rlwe_newhope`: "NewHope": key exchange from the ring learning with errors problem (Alkim, Ducas, Pöppelmann, Schwabe, *USENIX Security 2016*, [https://eprint.iacr.org/2015/1092](https://eprint.iacr.org/2015/1092)) (using the reference C implementation of NewHope from [https://github.com/tpoeppelmann/newhope](https://github.com/tpoeppelmann/newhope)) - `kex_rlwe_msrln16`: Microsoft Research implementation of Peikert's ring-LWE key exchange (Longa, Naehrig, *CANS 2016*, [https://eprint.iacr.org/2016/504](https://eprint.iacr.org/2016/504)) (based on the implementation of Alkim, Ducas, Pöppelmann, and Schwabe, with improvements from Longa and Naehrig, see [https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/](https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/)) -- `kex_lwe_frodo`: key exchange from the learning with errors problem (Bos, Costello, Ducas, Mironov, Naehrig, Nikolaenko, Raghunathan, Stebila, *ACM Conference on Computer and Communications Security 2016*, [http://eprint.iacr.org/2016/659](http://eprint.iacr.org/2016/659)) +- `kex_lwe_frodo`: "Frodo": key exchange from the learning with errors problem (Bos, Costello, Ducas, Mironov, Naehrig, Nikolaenko, Raghunathan, Stebila, *ACM Conference on Computer and Communications Security 2016*, [http://eprint.iacr.org/2016/659](http://eprint.iacr.org/2016/659)) +- `kex_sidh_cln16`: key exchange from the supersingular isogeny Diffie-Hellman problem (Costello, Naehrig, Longa, *CRYPTO 2016*, [https://eprint.iacr.org/2016/413](https://eprint.iacr.org/2016/413)), using the implementation of Microsoft Research [https://www.microsoft.com/en-us/research/project/sidh-library/](https://www.microsoft.com/en-us/research/project/sidh-library/) Building and Running -------------------- @@ -93,6 +93,7 @@ Since our initial launch, we have made the following updates: - Use of travis continuous integration system for testing - `kex_rlwe_newhope` wrapper - `kex_rlwe_msrln16` implementation contributed by Christian Paquin (Microsoft Research) +- `kex_sidh_cln16` implementation contributed by Christian Paquin (Microsoft Research) Our plans for the next few months can be found in [Milestone 1 - Key exchange](https://github.com/open-quantum-safe/liboqs/projects/2). @@ -108,6 +109,7 @@ liboqs is licensed under the MIT License; see [LICENSE.txt](https://github.com/o - `src/kex_rlwe_msrln16`: MIT License - `src/kex_rlwe_msrln16/external`: public domain ([CC0](http://creativecommons.org/publicdomain/zero/1.0/)) - `src/kex_rlwe_newhope`: public domain +- `src/kex_sidh_cln16`: MIT License - `src/rand_urandom_chacha20/external`: public domain Team diff --git a/VisualStudio/oqs/oqs.vcxproj b/VisualStudio/oqs/oqs.vcxproj index c881e5206..ce6f20509 100644 --- a/VisualStudio/oqs/oqs.vcxproj +++ b/VisualStudio/oqs/oqs.vcxproj @@ -32,6 +32,9 @@ + + + @@ -53,6 +56,13 @@ + + + + + + + @@ -141,7 +151,8 @@ copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\" -copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_sidh_cln16\kex_sidh_cln16.h" "$(SolutionDir)include\oqs\" @@ -169,7 +180,8 @@ copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\" -copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_sidh_cln16\kex_sidh_cln16.h" "$(SolutionDir)include\oqs\" @@ -201,7 +213,8 @@ copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\" -copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_sidh_cln16\kex_sidh_cln16.h" "$(SolutionDir)include\oqs\" @@ -233,7 +246,8 @@ copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\" copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\" -copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\" +copy "$(SolutionDir)..\src\kex_sidh_cln16\kex_sidh_cln16.h" "$(SolutionDir)include\oqs\" diff --git a/VisualStudio/oqs/oqs.vcxproj.filters b/VisualStudio/oqs/oqs.vcxproj.filters index d8bdc128b..a6ca7ce0d 100644 --- a/VisualStudio/oqs/oqs.vcxproj.filters +++ b/VisualStudio/oqs/oqs.vcxproj.filters @@ -56,6 +56,27 @@ BCNS15 + + SIDH CLN16 + + + SIDH CLN16 + + + SIDH CLN16 + + + SIDH CLN16 + + + SIDH CLN16 + + + SIDH CLN16 + + + SIDH CLN16 + @@ -104,6 +125,15 @@ BCNS15 + + SIDH CLN16 + + + SIDH CLN16 + + + SIDH CLN16 + @@ -124,5 +154,8 @@ {9f5ed87f-ed1e-47b4-b7e7-1d6648cb88fd} + + {3e550d03-8fd0-4307-ad38-832effa40102} + \ No newline at end of file diff --git a/src/kex/kex.c b/src/kex/kex.c index aa3a717c9..658422129 100644 --- a/src/kex/kex.c +++ b/src/kex/kex.c @@ -5,6 +5,7 @@ #include #include #include +#include OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters) { switch (alg_name) { @@ -18,6 +19,8 @@ OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8 return OQS_KEX_rlwe_newhope_new(rand); case OQS_KEX_alg_lwe_frodo: return OQS_KEX_lwe_frodo_new(rand, seed, seed_len, named_parameters); + case OQS_KEX_alg_sidh_cln16: + return OQS_KEX_sidh_cln16_new(rand); default: assert(0); return NULL; diff --git a/src/kex/kex.h b/src/kex/kex.h index 9dcaf0291..70d0b8a02 100644 --- a/src/kex/kex.h +++ b/src/kex/kex.h @@ -17,6 +17,7 @@ enum OQS_KEX_alg_name { OQS_KEX_alg_rlwe_newhope, OQS_KEX_alg_rlwe_msrln16, OQS_KEX_alg_lwe_frodo, + OQS_KEX_alg_sidh_cln16, }; typedef struct OQS_KEX OQS_KEX; diff --git a/src/kex/test_kex.c b/src/kex/test_kex.c index 136dfa783..50a523cbf 100644 --- a/src/kex/test_kex.c +++ b/src/kex/test_kex.c @@ -23,6 +23,7 @@ struct kex_testcase kex_testcases[] = { { OQS_KEX_alg_rlwe_newhope, NULL, 0, NULL, "rlwe_newhope", 0 }, { OQS_KEX_alg_rlwe_msrln16, NULL, 0, NULL, "rlwe_msrln16", 0 }, { OQS_KEX_alg_lwe_frodo, (unsigned char *) "01234567890123456", 16, "recommended", "lwe_frodo_recommended", 0 }, + { OQS_KEX_alg_sidh_cln16, NULL, 0, NULL, "sidh_cln16", 0 }, }; #define KEX_TEST_ITERATIONS 100 @@ -280,7 +281,12 @@ int main(int argc, char **argv) { for (size_t i = 0; i < kex_testcases_len; i++) { if (run_all || kex_testcases[i].run == 1) { - success = kex_test_correctness_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters, KEX_TEST_ITERATIONS); + int num_iter = KEX_TEST_ITERATIONS; + if (kex_testcases[i].alg_name == OQS_KEX_alg_sidh_cln16) { + // SIDH is slower than the other schemes, so we reduce the number of runs + num_iter = KEX_TEST_ITERATIONS / 10; + } + success = kex_test_correctness_wrapper(rand, kex_testcases[i].alg_name, kex_testcases[i].seed, kex_testcases[i].seed_len, kex_testcases[i].named_parameters, num_iter); } if (success != 1) { goto err; diff --git a/src/kex_sidh_cln16/AMD64/fp_x64.c b/src/kex_sidh_cln16/AMD64/fp_x64.c new file mode 100644 index 000000000..8b841c175 --- /dev/null +++ b/src/kex_sidh_cln16/AMD64/fp_x64.c @@ -0,0 +1,865 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: modular arithmetic optimized for x64 platforms +* +*********************************************************************************************/ + +#include "../SIDH_internal.h" + + +// Global constants +extern const uint64_t p751[NWORDS_FIELD]; +extern const uint64_t p751p1[NWORDS_FIELD]; +extern const uint64_t p751x2[NWORDS_FIELD]; + + +__inline void oqs_sidh_cln16_fpadd751(digit_t* a, digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p751x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p751x2)[i] & mask, carry, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_fpadd751_asm(a, b, c); + +#endif +} + + +__inline void oqs_sidh_cln16_fpsub751(digit_t* a, digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + +#if (OS_TARGET == OS_WIN) + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x2)[i] & mask, borrow, c[i]); + } + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_fpsub751_asm(a, b, c); + +#endif +} + + +__inline void oqs_sidh_cln16_fpneg751(digit_t* a) +{ // Modular negation, a = -a mod p751. + // Input/output: a in [0, 2*p751-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p751x2)[i], a[i], borrow, a[i]); + } +} + + +void oqs_sidh_cln16_fpdiv2_751(digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p751. + // Input : a in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p521 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p751)[i] & mask, carry, c[i]); + } + + oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD); +} + + +void oqs_sidh_cln16_fpcorrection751(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p751)[i] & mask, borrow, a[i]); + } +} + + +void oqs_sidh_cln16_mp_mul(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords) +{ // Multiprecision multiply, c = a*b, where lng(a) = lng(b) = nwords. + + UNREFERENCED_PARAMETER(nwords); + +#if (OS_TARGET == OS_WIN) + digit_t t = 0; + uint128_t uv = {0}; + unsigned int carry = 0; + + MULADD128(a[0], b[0], uv, carry, uv); + t += carry; + c[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[0], uv, carry, uv); + t += carry; + c[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[0], uv, carry, uv); + t += carry; + c[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[1], uv, carry, uv); + t += carry; + MULADD128(a[1], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[0], uv, carry, uv); + t += carry; + c[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[1], uv, carry, uv); + t += carry; + MULADD128(a[2], b[2], uv, carry, uv); + t += carry; + MULADD128(a[1], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[0], uv, carry, uv); + t += carry; + c[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[1], uv, carry, uv); + t += carry; + MULADD128(a[3], b[2], uv, carry, uv); + t += carry; + MULADD128(a[2], b[3], uv, carry, uv); + t += carry; + MULADD128(a[1], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[0], uv, carry, uv); + t += carry; + c[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[1], uv, carry, uv); + t += carry; + MULADD128(a[4], b[2], uv, carry, uv); + t += carry; + MULADD128(a[3], b[3], uv, carry, uv); + t += carry; + MULADD128(a[2], b[4], uv, carry, uv); + t += carry; + MULADD128(a[1], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[0], uv, carry, uv); + t += carry; + c[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[1], uv, carry, uv); + t += carry; + MULADD128(a[5], b[2], uv, carry, uv); + t += carry; + MULADD128(a[4], b[3], uv, carry, uv); + t += carry; + MULADD128(a[3], b[4], uv, carry, uv); + t += carry; + MULADD128(a[2], b[5], uv, carry, uv); + t += carry; + MULADD128(a[1], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[0], uv, carry, uv); + t += carry; + c[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[1], uv, carry, uv); + t += carry; + MULADD128(a[6], b[2], uv, carry, uv); + t += carry; + MULADD128(a[5], b[3], uv, carry, uv); + t += carry; + MULADD128(a[4], b[4], uv, carry, uv); + t += carry; + MULADD128(a[3], b[5], uv, carry, uv); + t += carry; + MULADD128(a[2], b[6], uv, carry, uv); + t += carry; + MULADD128(a[1], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[0], uv, carry, uv); + t += carry; + c[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[1], uv, carry, uv); + t += carry; + MULADD128(a[7], b[2], uv, carry, uv); + t += carry; + MULADD128(a[6], b[3], uv, carry, uv); + t += carry; + MULADD128(a[5], b[4], uv, carry, uv); + t += carry; + MULADD128(a[4], b[5], uv, carry, uv); + t += carry; + MULADD128(a[3], b[6], uv, carry, uv); + t += carry; + MULADD128(a[2], b[7], uv, carry, uv); + t += carry; + MULADD128(a[1], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[0], uv, carry, uv); + t += carry; + c[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[1], uv, carry, uv); + t += carry; + MULADD128(a[8], b[2], uv, carry, uv); + t += carry; + MULADD128(a[7], b[3], uv, carry, uv); + t += carry; + MULADD128(a[6], b[4], uv, carry, uv); + t += carry; + MULADD128(a[5], b[5], uv, carry, uv); + t += carry; + MULADD128(a[4], b[6], uv, carry, uv); + t += carry; + MULADD128(a[3], b[7], uv, carry, uv); + t += carry; + MULADD128(a[2], b[8], uv, carry, uv); + t += carry; + MULADD128(a[1], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[0], uv, carry, uv); + t += carry; + c[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[0], b[11], uv, carry, uv); + t += carry; + MULADD128(a[10], b[1], uv, carry, uv); + t += carry; + MULADD128(a[9], b[2], uv, carry, uv); + t += carry; + MULADD128(a[8], b[3], uv, carry, uv); + t += carry; + MULADD128(a[7], b[4], uv, carry, uv); + t += carry; + MULADD128(a[6], b[5], uv, carry, uv); + t += carry; + MULADD128(a[5], b[6], uv, carry, uv); + t += carry; + MULADD128(a[4], b[7], uv, carry, uv); + t += carry; + MULADD128(a[3], b[8], uv, carry, uv); + t += carry; + MULADD128(a[2], b[9], uv, carry, uv); + t += carry; + MULADD128(a[1], b[10], uv, carry, uv); + t += carry; + MULADD128(a[11], b[0], uv, carry, uv); + t += carry; + c[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[1], b[11], uv, carry, uv); + t += carry; + MULADD128(a[10], b[2], uv, carry, uv); + t += carry; + MULADD128(a[9], b[3], uv, carry, uv); + t += carry; + MULADD128(a[8], b[4], uv, carry, uv); + t += carry; + MULADD128(a[7], b[5], uv, carry, uv); + t += carry; + MULADD128(a[6], b[6], uv, carry, uv); + t += carry; + MULADD128(a[5], b[7], uv, carry, uv); + t += carry; + MULADD128(a[4], b[8], uv, carry, uv); + t += carry; + MULADD128(a[3], b[9], uv, carry, uv); + t += carry; + MULADD128(a[2], b[10], uv, carry, uv); + t += carry; + MULADD128(a[11], b[1], uv, carry, uv); + t += carry; + c[12] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[2], uv, carry, uv); + t += carry; + MULADD128(a[10], b[3], uv, carry, uv); + t += carry; + MULADD128(a[9], b[4], uv, carry, uv); + t += carry; + MULADD128(a[8], b[5], uv, carry, uv); + t += carry; + MULADD128(a[7], b[6], uv, carry, uv); + t += carry; + MULADD128(a[6], b[7], uv, carry, uv); + t += carry; + MULADD128(a[5], b[8], uv, carry, uv); + t += carry; + MULADD128(a[4], b[9], uv, carry, uv); + t += carry; + MULADD128(a[3], b[10], uv, carry, uv); + t += carry; + MULADD128(a[2], b[11], uv, carry, uv); + t += carry; + c[13] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[3], uv, carry, uv); + t += carry; + MULADD128(a[10], b[4], uv, carry, uv); + t += carry; + MULADD128(a[9], b[5], uv, carry, uv); + t += carry; + MULADD128(a[8], b[6], uv, carry, uv); + t += carry; + MULADD128(a[7], b[7], uv, carry, uv); + t += carry; + MULADD128(a[6], b[8], uv, carry, uv); + t += carry; + MULADD128(a[5], b[9], uv, carry, uv); + t += carry; + MULADD128(a[4], b[10], uv, carry, uv); + t += carry; + MULADD128(a[3], b[11], uv, carry, uv); + t += carry; + c[14] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[4], uv, carry, uv); + t += carry; + MULADD128(a[10], b[5], uv, carry, uv); + t += carry; + MULADD128(a[9], b[6], uv, carry, uv); + t += carry; + MULADD128(a[8], b[7], uv, carry, uv); + t += carry; + MULADD128(a[7], b[8], uv, carry, uv); + t += carry; + MULADD128(a[6], b[9], uv, carry, uv); + t += carry; + MULADD128(a[5], b[10], uv, carry, uv); + t += carry; + MULADD128(a[4], b[11], uv, carry, uv); + t += carry; + c[15] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[5], uv, carry, uv); + t += carry; + MULADD128(a[10], b[6], uv, carry, uv); + t += carry; + MULADD128(a[9], b[7], uv, carry, uv); + t += carry; + MULADD128(a[8], b[8], uv, carry, uv); + t += carry; + MULADD128(a[7], b[9], uv, carry, uv); + t += carry; + MULADD128(a[6], b[10], uv, carry, uv); + t += carry; + MULADD128(a[5], b[11], uv, carry, uv); + t += carry; + c[16] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[6], uv, carry, uv); + t += carry; + MULADD128(a[10], b[7], uv, carry, uv); + t += carry; + MULADD128(a[9], b[8], uv, carry, uv); + t += carry; + MULADD128(a[8], b[9], uv, carry, uv); + t += carry; + MULADD128(a[7], b[10], uv, carry, uv); + t += carry; + MULADD128(a[6], b[11], uv, carry, uv); + t += carry; + c[17] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[7], uv, carry, uv); + t += carry; + MULADD128(a[10], b[8], uv, carry, uv); + t += carry; + MULADD128(a[9], b[9], uv, carry, uv); + t += carry; + MULADD128(a[8], b[10], uv, carry, uv); + t += carry; + MULADD128(a[7], b[11], uv, carry, uv); + t += carry; + c[18] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[8], uv, carry, uv); + t += carry; + MULADD128(a[10], b[9], uv, carry, uv); + t += carry; + MULADD128(a[9], b[10], uv, carry, uv); + t += carry; + MULADD128(a[8], b[11], uv, carry, uv); + t += carry; + c[19] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[9], uv, carry, uv); + t += carry; + MULADD128(a[10], b[10], uv, carry, uv); + t += carry; + MULADD128(a[9], b[11], uv, carry, uv); + t += carry; + c[20] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(a[11], b[10], uv, carry, uv); + t += carry; + MULADD128(a[10], b[11], uv, carry, uv); + t += carry; + c[21] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + + MULADD128(a[11], b[11], uv, carry, uv); + c[22] = uv[0]; + c[23] = uv[1]; + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_mul751_asm(a, b, c); + +#endif +} + + +void oqs_sidh_cln16_rdc_mont(oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_felm_t mc) +{ // Optimized Montgomery reduction using comba and exploiting the special form of the prime p751. + // mc = ma*mb*R^-1 mod p751, where ma,mb,mc in [0, 2*p751-1] and R = 2^768. + // ma and mb are assumed to be in Montgomery representation. + +#if (OS_TARGET == OS_WIN) + unsigned int carry; + digit_t t = 0; + uint128_t uv = {0}; + + mc[0] = ma[0]; + mc[1] = ma[1]; + mc[2] = ma[2]; + mc[3] = ma[3]; + mc[4] = ma[4]; + MUL128(mc[0], ((digit_t*)p751p1)[5], uv); + ADDC(0, uv[0], ma[5], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[6], uv, carry, uv); + MULADD128(mc[1], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[6], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[7], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[8], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[9], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[10], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[10] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[0], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[1], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[11], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[11] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[1], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[2], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[12], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[0] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[2], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[3], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[13], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[1] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[3], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[4], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[14], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[2] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[4], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[5], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[15], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[3] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[5], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[6], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[5], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[16], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[4] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[6], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[7], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[6], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[17], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[5] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[7], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[8], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[7], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[18], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[6] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[8], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[9], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[8], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[19], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[7] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[9], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[10], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[9], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[20], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[8] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[10], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + MULADD128(mc[11], ((digit_t*)p751p1)[10], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[21], carry, uv[0]); + ADDC(carry, uv[1], 0, carry, uv[1]); + t += carry; + mc[9] = uv[0]; + uv[0] = uv[1]; + uv[1] = t; + t = 0; + + MULADD128(mc[11], ((digit_t*)p751p1)[11], uv, carry, uv); + t += carry; + ADDC(0, uv[0], ma[22], carry, mc[10]); + ADDC(carry, uv[1], 0, carry, uv[1]); + ADDC(0, uv[1], ma[23], carry, mc[11]); + +#elif (OS_TARGET == OS_LINUX) + + oqs_sidh_cln16_rdc751_asm(ma, mc); + +#endif +} \ No newline at end of file diff --git a/src/kex_sidh_cln16/AMD64/fp_x64_asm.S b/src/kex_sidh_cln16/AMD64/fp_x64_asm.S new file mode 100644 index 000000000..056fada6b --- /dev/null +++ b/src/kex_sidh_cln16/AMD64/fp_x64_asm.S @@ -0,0 +1,1864 @@ +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +// exchange providing 128 bits of quantum security and 192 bits of classical security. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// +// Abstract: field arithmetic in x64 assembly for Linux +// +//******************************************************************************************* + +.intel_syntax noprefix + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + +// p751 + 1 +#define p751p1_5 0xEEB0000000000000 +#define p751p1_6 0xE3EC968549F878A8 +#define p751p1_7 0xDA959B1A13F7CC76 +#define p751p1_8 0x084E9867D6EBE876 +#define p751p1_9 0x8562B5045CB25748 +#define p751p1_10 0x0E12909F97BADC66 +#define p751p1_11 0x00006FE5D541F71C + +#define p751_0 0xFFFFFFFFFFFFFFFF +#define p751_5 0xEEAFFFFFFFFFFFFF +#define p751_6 0xE3EC968549F878A8 +#define p751_7 0xDA959B1A13F7CC76 +#define p751_8 0x084E9867D6EBE876 +#define p751_9 0x8562B5045CB25748 +#define p751_10 0x0E12909F97BADC66 +#define p751_11 0x00006FE5D541F71C + +#define p751x2_0 0xFFFFFFFFFFFFFFFE +#define p751x2_1 0xFFFFFFFFFFFFFFFF +#define p751x2_5 0xDD5FFFFFFFFFFFFF +#define p751x2_6 0xC7D92D0A93F0F151 +#define p751x2_7 0xB52B363427EF98ED +#define p751x2_8 0x109D30CFADD7D0ED +#define p751x2_9 0x0AC56A08B964AE90 +#define p751x2_10 0x1C25213F2F75B8CD +#define p751x2_11 0x0000DFCBAA83EE38 + + +.text +//*********************************************************************** +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fpadd751_asm +fpadd751_asm: + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, [reg_p2+56] + adc rcx, [reg_p2+64] + mov rax, [reg_p1+72] + adc rax, [reg_p2+72] + mov [reg_p3+72], rax + mov rax, [reg_p1+80] + adc rax, [reg_p2+80] + mov [reg_p3+80], rax + mov rax, [reg_p1+88] + adc rax, [reg_p2+88] + mov [reg_p3+88], rax + + movq rax, p751x2_0 + sub r8, rax + movq rax, p751x2_1 + sbb r9, rax + sbb r10, rax + sbb r11, rax + sbb r12, rax + movq rax, p751x2_5 + sbb r13, rax + movq rax, p751x2_6 + sbb r14, rax + movq rax, p751x2_7 + sbb r15, rax + movq rax, p751x2_8 + sbb rcx, rax + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rcx + mov r8, [reg_p3+72] + mov r9, [reg_p3+80] + mov r10, [reg_p3+88] + movq rax, p751x2_9 + sbb r8, rax + movq rax, p751x2_10 + sbb r9, rax + movq rax, p751x2_11 + sbb r10, rax + mov [reg_p3+72], r8 + mov [reg_p3+80], r9 + mov [reg_p3+88], r10 + movq rax, 0 + sbb rax, 0 + + mov rsi, p751x2_0 + and rsi, rax + mov r8, p751x2_1 + and r8, rax + movq r9, p751x2_5 + and r9, rax + movq r10, p751x2_6 + and r10, rax + movq r11, p751x2_7 + and r11, rax + movq r12, p751x2_8 + and r12, rax + movq r13, p751x2_9 + and r13, rax + movq r14, p751x2_10 + and r14, rax + movq r15, p751x2_11 + and r15, rax + + mov rax, [reg_p3] + add rax, rsi + mov [reg_p3], rax + mov rax, [reg_p3+8] + adc rax, r8 + mov [reg_p3+8], rax + mov rax, [reg_p3+16] + adc rax, r8 + mov [reg_p3+16], rax + mov rax, [reg_p3+24] + adc rax, r8 + mov [reg_p3+24], rax + mov rax, [reg_p3+32] + adc rax, r8 + mov [reg_p3+32], rax + mov rax, [reg_p3+40] + adc rax, r9 + mov [reg_p3+40], rax + mov rax, [reg_p3+48] + adc rax, r10 + mov [reg_p3+48], rax + mov rax, [reg_p3+56] + adc rax, r11 + mov [reg_p3+56], rax + mov rax, [reg_p3+64] + adc rax, r12 + mov [reg_p3+64], rax + mov rax, [reg_p3+72] + adc rax, r13 + mov [reg_p3+72], rax + mov rax, [reg_p3+80] + adc rax, r14 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r15 + mov [reg_p3+88], rax + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fpsub751_asm +fpsub751_asm: + push r12 + push r13 + push r14 + push r15 + + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + mov r15, [reg_p1+56] + mov rcx, [reg_p1+64] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb r15, [reg_p2+56] + sbb rcx, [reg_p2+64] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + mov [reg_p3+56], r15 + mov [reg_p3+64], rcx + mov rax, [reg_p1+72] + sbb rax, [reg_p2+72] + mov [reg_p3+72], rax + mov rax, [reg_p1+80] + sbb rax, [reg_p2+80] + mov [reg_p3+80], rax + mov rax, [reg_p1+88] + sbb rax, [reg_p2+88] + mov [reg_p3+88], rax + movq rax, 0 + sbb rax, 0 + + mov rsi, p751x2_0 + and rsi, rax + mov r8, p751x2_1 + and r8, rax + movq r9, p751x2_5 + and r9, rax + movq r10, p751x2_6 + and r10, rax + movq r11, p751x2_7 + and r11, rax + movq r12, p751x2_8 + and r12, rax + movq r13, p751x2_9 + and r13, rax + movq r14, p751x2_10 + and r14, rax + movq r15, p751x2_11 + and r15, rax + + mov rax, [reg_p3] + add rax, rsi + mov [reg_p3], rax + mov rax, [reg_p3+8] + adc rax, r8 + mov [reg_p3+8], rax + mov rax, [reg_p3+16] + adc rax, r8 + mov [reg_p3+16], rax + mov rax, [reg_p3+24] + adc rax, r8 + mov [reg_p3+24], rax + mov rax, [reg_p3+32] + adc rax, r8 + mov [reg_p3+32], rax + mov rax, [reg_p3+40] + adc rax, r9 + mov [reg_p3+40], rax + mov rax, [reg_p3+48] + adc rax, r10 + mov [reg_p3+48], rax + mov rax, [reg_p3+56] + adc rax, r11 + mov [reg_p3+56], rax + mov rax, [reg_p3+64] + adc rax, r12 + mov [reg_p3+64], rax + mov rax, [reg_p3+72] + adc rax, r13 + mov [reg_p3+72], rax + mov rax, [reg_p3+80] + adc rax, r14 + mov [reg_p3+80], rax + mov rax, [reg_p3+88] + adc rax, r15 + mov [reg_p3+88], rax + + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Integer multiplication +// Based on Karatsuba method +// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] +// NOTE: a=c or b=c are not allowed +//*********************************************************************** +.global mul751_asm +mul751_asm: + push r12 + push r13 + push r14 + mov rcx, reg_p3 + + // rcx[0-5] <- AH+AL + xor rax, rax + mov r8, [reg_p1+48] + mov r9, [reg_p1+56] + mov r10, [reg_p1+64] + mov r11, [reg_p1+72] + mov r12, [reg_p1+80] + mov r13, [reg_p1+88] + add r8, [reg_p1] + adc r9, [reg_p1+8] + adc r10, [reg_p1+16] + adc r11, [reg_p1+24] + adc r12, [reg_p1+32] + adc r13, [reg_p1+40] + push r15 + mov [rcx], r8 + mov [rcx+8], r9 + mov [rcx+16], r10 + mov [rcx+24], r11 + mov [rcx+32], r12 + mov [rcx+40], r13 + sbb rax, 0 + sub rsp, 96 // Allocating space in stack + + // rcx[6-11] <- BH+BL + xor rdx, rdx + mov r8, [reg_p2+48] + mov r9, [reg_p2+56] + mov r10, [reg_p2+64] + mov r11, [reg_p2+72] + mov r12, [reg_p2+80] + mov r13, [reg_p2+88] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + mov [rcx+48], r8 + mov [rcx+56], r9 + mov [rcx+64], r10 + mov [rcx+72], r11 + mov [rcx+80], r12 + mov [rcx+88], r13 + sbb rdx, 0 + mov [rsp+80], rax + mov [rsp+88], rdx + + // (rsp[0-8],r10,r8,r9) <- (AH+AL)*(BH+BL) + mov r11, [rcx] + mov rax, r8 + mul r11 + mov [rsp], rax // c0 + mov r14, rdx + + xor r15, r15 + mov rax, r9 + mul r11 + xor r9, r9 + add r14, rax + adc r9, rdx + + mov r12, [rcx+8] + mov rax, r8 + mul r12 + add r14, rax + mov [rsp+8], r14 // c1 + adc r9, rdx + adc r15, 0 + + xor r8, r8 + mov rax, r10 + mul r11 + add r9, rax + mov r13, [rcx+48] + adc r15, rdx + adc r8, 0 + + mov rax, [rcx+16] + mul r13 + add r9, rax + adc r15, rdx + mov rax, [rcx+56] + adc r8, 0 + + mul r12 + add r9, rax + mov [rsp+16], r9 // c2 + adc r15, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+72] + mul r11 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+24] + mul r13 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov rax, r10 + mul r12 + add r15, rax + adc r8, rdx + adc r9, 0 + + mov r14, [rcx+16] + mov rax, [rcx+56] + mul r14 + add r15, rax + mov [rsp+24], r15 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+80] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+64] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [rcx+48] + mov rax, [rcx+32] + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+72] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [rcx+24] + mov rax, [rcx+56] + mul r13 + add r8, rax + mov [rsp+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx+88] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+72] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+40] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+80] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r15, [rcx+32] + mov rax, [rcx+56] + mul r15 + add r9, rax + mov [rsp+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+64] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+88] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+80] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r11, [rcx+40] + mov rax, [rcx+56] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+72] + mul r13 + add r10, rax + mov [rsp+48], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [rcx+88] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+64] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [rcx+72] + mul r15 + add r8, rax + mov [rsp+56], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [rcx+72] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+80] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [rcx+88] + mul r13 + add r9, rax + mov [rsp+64], r9 // c8 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [rcx+88] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+80] + mul r11 + add r10, rax // c9 + adc r8, rdx + adc r9, 0 + + mov rax, [rcx+88] + mul r11 + add r8, rax // c10 + adc r9, rdx // c11 + + mov rax, [rsp+88] + mov rdx, [rcx] + and r12, rax + and r14, rax + and rdx, rax + and r13, rax + and r15, rax + and r11, rax + mov rax, [rsp+48] + add rdx, rax + mov rax, [rsp+56] + adc r12, rax + mov rax, [rsp+64] + adc r14, rax + adc r13, r10 + adc r15, r8 + adc r11, r9 + mov rax, [rsp+80] + mov [rsp+48], rdx + mov [rsp+56], r12 + mov [rsp+64], r14 + mov [rsp+72], r13 + mov [rsp+80], r15 + mov [rsp+88], r11 + + mov r8, [rcx+48] + mov r9, [rcx+56] + mov r10, [rcx+64] + mov r11, [rcx+72] + mov r12, [rcx+80] + mov r13, [rcx+88] + and r8, rax + and r9, rax + and r10, rax + and r11, rax + and r12, rax + and r13, rax + mov rax, [rsp+48] + add r8, rax + mov rax, [rsp+56] + adc r9, rax + mov rax, [rsp+64] + adc r10, rax + mov rax, [rsp+72] + adc r11, rax + mov rax, [rsp+80] + adc r12, rax + mov rax, [rsp+88] + adc r13, rax + mov [rsp+48], r8 + mov [rsp+56], r9 + mov [rsp+72], r11 + + // rcx[0-11] <- AL*BL + mov r11, [reg_p1] + mov rax, [reg_p2] + mul r11 + xor r9, r9 + mov [rcx], rax // c0 + mov [rsp+64], r10 + mov r8, rdx + + mov rax, [reg_p2+8] + mul r11 + xor r10, r10 + add r8, rax + mov [rsp+80], r12 + adc r9, rdx + + mov r12, [reg_p1+8] + mov rax, [reg_p2] + mul r12 + add r8, rax + mov [rcx+8], r8 // c1 + adc r9, rdx + mov [rsp+88], r13 + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+16] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2] + mov rax, [reg_p1+16] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+8] + mul r12 + add r9, rax + mov [rcx+16], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+24] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p1+24] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+16] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+16] + mov rax, [reg_p2+8] + mul r14 + add r10, rax + mov [rcx+24], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+32] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p1+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+24] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [reg_p1+24] + mov rax, [reg_p2+8] + mul r13 + add r8, rax + mov [rcx+32], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+40] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+16] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+24] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r11, [reg_p1+40] + mov rax, [reg_p2] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+32] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r15, [reg_p1+32] + mov rax, [reg_p2+8] + mul r15 + add r9, rax + mov [rcx+40], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+16] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+32] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+8] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+24] + mul r13 + add r10, rax + mov [rcx+48], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+40] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+16] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+32] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+24] + mul r15 + add r8, rax + mov [rcx+56], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+24] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+32] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+40] + mul r13 + add r9, rax + mov [rcx+64], r9 // c8 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+40] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+32] + mul r11 + add r10, rax + mov [rcx+72], r10 // c9 + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+40] + mul r11 + add r8, rax + mov [rcx+80], r8 // c10 + adc r9, rdx + mov [rcx+88], r9 // c11 + + // rcx[12-23] <- AH*BH + mov r11, [reg_p1+48] + mov rax, [reg_p2+48] + mul r11 + xor r9, r9 + mov [rcx+96], rax // c0 + mov r8, rdx + + mov rax, [reg_p2+56] + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+56] + mov rax, [reg_p2+48] + mul r12 + add r8, rax + mov [rcx+104], r8 // c1 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+64] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+48] + mov rax, [reg_p1+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r12 + add r9, rax + mov [rcx+112], r9 // c2 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+72] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p1+72] + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+64] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+64] + mov rax, [reg_p2+56] + mul r14 + add r10, rax + mov [rcx+120], r10 // c3 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+80] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+64] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p1+80] + mov rax, r13 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+72] + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r13, [reg_p1+72] + mov rax, [reg_p2+56] + mul r13 + add r8, rax + mov [rcx+128], r8 // c4 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+88] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+64] + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+72] + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r11, [reg_p1+88] + mov rax, [reg_p2+48] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+80] + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+56] + mul r15 + add r9, rax + mov [rcx+136], r9 // c5 + adc r10, rdx + adc r8, 0 + + xor r9, r9 + mov rax, [reg_p2+64] + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+88] + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+80] + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+56] + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov rax, [reg_p2+72] + mul r13 + add r10, rax + mov [rcx+144], r10 // c6 + adc r8, rdx + adc r9, 0 + + xor r10, r10 + mov rax, [reg_p2+88] + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+64] + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+80] + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov rax, [reg_p2+72] + mul r15 + add r8, rax + mov [rcx+152], r8 // c7 + adc r9, rdx + adc r10, 0 + + xor r8, r8 + mov rax, [reg_p2+72] + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+80] + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+88] + mul r13 + add r9, rax + mov [rcx+160], r9 // c8 + adc r10, rdx + adc r8, 0 + + mov rax, [reg_p2+88] + mul r15 + add r10, rax + adc r8, rdx + + mov rax, [reg_p2+80] + mul r11 + add r10, rax + mov [rcx+168], r10 // c9 + adc r8, rdx + + mov rax, [reg_p2+88] + mul r11 + add r8, rax + mov [rcx+176], r8 // c10 + adc rdx, 0 + mov [rcx+184], rdx // c11 + + // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL + mov r8, [rsp] + sub r8, [rcx] + mov r9, [rsp+8] + sbb r9, [rcx+8] + mov r10, [rsp+16] + sbb r10, [rcx+16] + mov r11, [rsp+24] + sbb r11, [rcx+24] + mov r12, [rsp+32] + sbb r12, [rcx+32] + mov r13, [rsp+40] + sbb r13, [rcx+40] + mov r14, [rsp+48] + sbb r14, [rcx+48] + mov r15, [rsp+56] + sbb r15, [rcx+56] + mov rax, [rsp+64] + sbb rax, [rcx+64] + mov rdx, [rsp+72] + sbb rdx, [rcx+72] + mov rdi, [rsp+80] + sbb rdi, [rcx+80] + mov rsi, [rsp+88] + sbb rsi, [rcx+88] + mov [rsp], rsi + + // [r8-r15,rax,rdx,rdi,[rsp]] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH + mov rsi, [rcx+96] + sub r8, rsi + mov rsi, [rcx+104] + sbb r9, rsi + mov rsi, [rcx+112] + sbb r10, rsi + mov rsi, [rcx+120] + sbb r11, rsi + mov rsi, [rcx+128] + sbb r12, rsi + mov rsi, [rcx+136] + sbb r13, rsi + mov rsi, [rcx+144] + sbb r14, rsi + mov rsi, [rcx+152] + sbb r15, rsi + mov rsi, [rcx+160] + sbb rax, rsi + mov rsi, [rcx+168] + sbb rdx, rsi + mov rsi, [rcx+176] + sbb rdi, rsi + mov rsi, [rsp] + sbb rsi, [rcx+184] + + // Final result + add r8, [rcx+48] + mov [rcx+48], r8 + adc r9, [rcx+56] + mov [rcx+56], r9 + adc r10, [rcx+64] + mov [rcx+64], r10 + adc r11, [rcx+72] + mov [rcx+72], r11 + adc r12, [rcx+80] + mov [rcx+80], r12 + adc r13, [rcx+88] + mov [rcx+88], r13 + adc r14, [rcx+96] + mov [rcx+96], r14 + adc r15, [rcx+104] + mov [rcx+104], r15 + adc rax, [rcx+112] + mov [rcx+112], rax + adc rdx, [rcx+120] + mov [rcx+120], rdx + adc rdi, [rcx+128] + mov [rcx+128], rdi + adc rsi, [rcx+136] + mov [rcx+136], rsi + mov rax, [rcx+144] + adc rax, 0 + mov [rcx+144], rax + mov rax, [rcx+152] + adc rax, 0 + mov [rcx+152], rax + mov rax, [rcx+160] + adc rax, 0 + mov [rcx+160], rax + mov rax, [rcx+168] + adc rax, 0 + mov [rcx+168], rax + mov rax, [rcx+176] + adc rax, 0 + mov [rcx+176], rax + mov rax, [rcx+184] + adc rax, 0 + mov [rcx+184], rax + + add rsp, 96 // Restoring space in stack + pop r15 + pop r14 + pop r13 + pop r12 + ret + + +//*********************************************************************** +// Montgomery reduction +// Based on comba method +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//*********************************************************************** +.global rdc751_asm +rdc751_asm: + push r12 + push r13 + push r14 + push r15 + + mov r11, [reg_p1] + movq rax, p751p1_5 + mul r11 + xor r8, r8 + add rax, [reg_p1+40] + mov [reg_p2+40], rax // z5 + adc r8, rdx + + xor r9, r9 + movq rax, p751p1_6 + mul r11 + xor r10, r10 + add r8, rax + adc r9, rdx + + mov r12, [reg_p1+8] + movq rax, p751p1_5 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+48] + mov [reg_p2+48], r8 // z6 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_7 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p1+16] + movq rax, p751p1_5 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+56] + mov [reg_p2+56], r9 // z7 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_8 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p1+24] + movq rax, p751p1_5 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+64] + mov [reg_p2+64], r10 // z8 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_9 + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_6 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p1+32] + movq rax, p751p1_5 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+72] + mov [reg_p2+72], r8 // z9 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_10 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_7 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+40] + movq rax, p751p1_5 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+80] + mov [reg_p2+80], r9 // z10 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_8 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r11, [reg_p2+48] + movq rax, p751p1_5 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+88] + mov [reg_p2+88], r10 // z11 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_9 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_6 + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r12, [reg_p2+56] + movq rax, p751p1_5 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+96] + mov [reg_p2], r8 // z0 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_11 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_10 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_7 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov r13, [reg_p2+64] + movq rax, p751p1_5 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+104] + mov [reg_p2+8], r9 // z1 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_8 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + mov r14, [reg_p2+72] + movq rax, p751p1_5 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+112] + mov [reg_p2+16], r10 // z2 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_9 + mul r11 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_6 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + mov r15, [reg_p2+80] + movq rax, p751p1_5 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+120] + mov [reg_p2+24], r8 // z3 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_11 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_10 + mul r11 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r12 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_7 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_6 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + mov rcx, [reg_p2+88] + movq rax, p751p1_5 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+128] + mov [reg_p2+32], r9 // z4 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r11 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r12 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul r13 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_8 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_7 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_6 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+136] + mov [reg_p2+40], r10 // z5 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r12 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul r13 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_9 + mul r14 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_8 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_7 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+144] + mov [reg_p2+48], r8 // z6 + adc r9, 0 + adc r10, 0 + + xor r8, r8 + movq rax, p751p1_11 + mul r13 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_10 + mul r14 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_9 + mul r15 + add r9, rax + adc r10, rdx + adc r8, 0 + + movq rax, p751p1_8 + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0 + add r9, [reg_p1+152] + mov [reg_p2+56], r9 // z7 + adc r10, 0 + adc r8, 0 + + xor r9, r9 + movq rax, p751p1_11 + mul r14 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_10 + mul r15 + add r10, rax + adc r8, rdx + adc r9, 0 + + movq rax, p751p1_9 + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0 + add r10, [reg_p1+160] + mov [reg_p2+64], r10 // z8 + adc r8, 0 + adc r9, 0 + + xor r10, r10 + movq rax, p751p1_11 + mul r15 + add r8, rax + adc r9, rdx + adc r10, 0 + + movq rax, p751p1_10 + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0 + add r8, [reg_p1+168] // z9 + mov [reg_p2+72], r8 // z9 + adc r9, 0 + adc r10, 0 + + movq rax, p751p1_11 + mul rcx + add r9, rax + adc r10, rdx + add r9, [reg_p1+176] // z10 + mov [reg_p2+80], r9 // z10 + adc r10, 0 + add r10, [reg_p1+184] // z11 + mov [reg_p2+88], r10 // z11 + + pop r15 + pop r14 + pop r13 + pop r12 + ret diff --git a/src/kex_sidh_cln16/License.txt b/src/kex_sidh_cln16/License.txt new file mode 100644 index 000000000..3b4ffccb0 --- /dev/null +++ b/src/kex_sidh_cln16/License.txt @@ -0,0 +1,21 @@ +SIDH Library + +Copyright (c) Microsoft Corporation +All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the ""Software""), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/kex_sidh_cln16/README.txt b/src/kex_sidh_cln16/README.txt new file mode 100644 index 000000000..27470c68d --- /dev/null +++ b/src/kex_sidh_cln16/README.txt @@ -0,0 +1,46 @@ + SIDH v1.1 (C Edition) + ======================= + +The SIDH v1.1 library (C Edition) is a supersingular isogeny-based cryptography library that implements a +new suite of algorithms for a post-quantum resistant Diffie-Hellman key exchange scheme [2]. This scheme +provides approximately 128 bits of quantum security and 192 bits of classical security. + +The library was developed by Microsoft Research for experimentation purposes. + +*** THE ORIGINAL README HAS BEEN TRIMMED LEAVING ONLY THE INFO RELEVANT FOR THE OQS INTEGRATION *** + +1. CONTENTS: + -------- + +/ - Library C and header files +AMD64/ - Optimized implementation of the field arithmetic for x64 platforms +generic/ - Implementation of the field arithmetic in portable C +README.txt - This readme file + + +2. MAIN FEATURES: + ------------- + +- Support key exchange providing 128 bits of quantum security and 192 bits of classical security. +- Support a peace-of-mind hybrid key exchange mode that adds a classical elliptic curve Diffie-Hellman + key exchange on a high-security Montgomery curve providing 384 bits of classical ECDH security. +- Protected against timing and cache-timing attacks through regular, constant-time implementation of + all operations on secret key material. +- Support for public key validation in static key exchange when private keys are used more than once. +- Basic implementation of the underlying arithmetic functions using portable C to enable support on + a wide range of platforms including x64, x86 and ARM. +- Optimized implementation of the underlying arithmetic functions for x64 platforms with optional, + high-performance x64 assembly for Linux. + + +REFERENCES: +---------- + +[1] Craig Costello, Patrick Longa, and Michael Naehrig. + Efficient algorithms for supersingular isogeny Diffie-Hellman. + Advances in Cryptology - CRYPTO 2016 (to appear), 2016. + Extended version available at: http://eprint.iacr.org/2016/413. + +[2] David Jao and Luca DeFeo. + Towards quantum-resistant cryptosystems from supersingular elliptic curve isogenies. + PQCrypto 2011, LNCS 7071, pp. 19-34, 2011. \ No newline at end of file diff --git a/src/kex_sidh_cln16/SIDH.c b/src/kex_sidh_cln16/SIDH.c new file mode 100644 index 000000000..81950bd63 --- /dev/null +++ b/src/kex_sidh_cln16/SIDH.c @@ -0,0 +1,117 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: supersingular elliptic curve isogeny parameters +* +*********************************************************************************************/ + +#include "SIDH_internal.h" + + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located +// at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {b, a}, with b +// in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. + +// +// Curve isogeny system "SIDHp751". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p751^2), where A=0, B=1 and C=1 +// + +CurveIsogenyStaticData CurveIsogeny_SIDHp751 = { + "SIDHp751", 768, 384, // Curve isogeny system ID, smallest multiple of 32 larger than the prime bitlength and smallest multiple of 32 larger than the order bitlength + 751, // Bitlength of the prime + // Prime p751 = 2^372*3^239-1 + { + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C + }, + // Base curve parameter "A" + { 0 }, + // Base curve parameter "C" + { 1 }, + // Order bitlength for Alice + 372, + // Order of Alice's subgroup + { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0010000000000000 }, + // Order bitlength for Bob + 379, + // Power of Bob's subgroup order + 239, + // Order of Bob's subgroup + { 0xC968549F878A8EEB, 0x59B1A13F7CC76E3E, 0xE9867D6EBE876DA9, 0x2B5045CB25748084, 0x2909F97BADC66856, 0x06FE5D541F71C0E1 }, + // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p751) + { + 0x4B0346F5CCE233E9, 0x632646086CE3ACD5, 0x5661D14AB7347693, 0xA58A20449AF1F133, 0xB9AC2F40C56D6FA4, 0x8E561E008FA0E3F3, + 0x6CAE096D5DB822C9, 0x83FDB7A4AD3E83E8, 0xB1317AD904386217, 0x3FA23F89F6BE06D2, 0x429C8D36FF46BCC9, 0x00003E82027A38E9, + 0x12E0D620BFB341D5, 0x0F8EEA7370893430, 0x5A99EBEC3B5B8B00, 0x236C7FAC9E69F7FD, 0x0F147EF3BD0CFEC5, 0x8ED5950D80325A8D, + 0x1E911F50BF3F721A, 0x163A7421DFA8378D, 0xC331B043DA010E6A, 0x5E15915A755883B7, 0xB6236F5F598D56EB, 0x00003BBF8DCD4E7E + }, + // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p751) + { + 0x76ED2325DCC93103, 0xD9E1DF566C1D26D3, 0x76AECB94B919AEED, 0xD3785AAAA4D646C5, 0xCB610E30288A7770, 0x9BD3778659023B9E, + 0xD5E69CF26DF23742, 0xA3AD8E17B9F9238C, 0xE145FE2D525160E0, 0xF8D5BCE859ED725D, 0x960A01AB8FF409A2, 0x00002F1D80EF06EF, + 0x91479226A0687894, 0xBBC6BAF5F6BA40BB, 0x15B529122CFE3CA6, 0x7D12754F00E898A3, 0x76EBA0C8419745E9, 0x0A94F06CDFB3EADE, + 0x399A6EDB2EEB2F9B, 0xE302C5129C049EEB, 0xC35892123951D4B6, 0x15445287ED1CC55D, 0x1ACAF351F09AB55A, 0x00000127A46D082A + }, + // BigMont's curve parameter A24 = (A+2)/4 + 156113, + // BigMont's order, where BigMont is defined by y^2=x^3+A*x^2+x + { + 0xA59B73D250E58055, 0xCB063593D0BE10E1, 0xF6515CCB5D076CBB, 0x66880747EDDF5E20, 0xBA515248A6BFD4AB, 0x3B8EF00DDDDC789D, + 0xB8FB25A1527E1E2A, 0xB6A566C684FDF31D, 0x0213A619F5BAFA1D, 0xA158AD41172C95D2, 0x0384A427E5EEB719, 0x00001BF975507DC7 + }, + // Montgomery constant Montgomery_R2 = (2^768)^2 mod p751 + { + 0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751 , 0x1F735F1F1EE7FC81, + 0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35 + }, + // Montgomery constant -p751^-1 mod 2^768 + { + 0x0000000000000001, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x258C28E5D541F71C + }, + // Value one in Montgomery representation + { + 0x00000000000249ad, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8310000000000000, + 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x00002d5b24bce5e2 + } +}; + + +// Fixed parameters for isogeny tree computation + +const unsigned int splits_Alice[SIDH_MAX_Alice] = { + 0, 1, 1, 2, 2, 2, 3, 4, 4, 4, 4, 5, 5, 6, 7, 8, 8, 9, 9, 9, 9, 9, 9, 9, 12, + 11, 12, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 17, 17, 18, 18, 17, 21, 17, + 18, 21, 20, 21, 21, 21, 21, 21, 22, 25, 25, 25, 26, 27, 28, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 35, 36, 36, 33, 36, 35, 36, 36, 35, + 36, 36, 37, 38, 38, 39, 40, 41, 42, 38, 39, 40, 41, 42, 40, 46, 42, 43, 46, + 46, 46, 46, 48, 48, 48, 48, 49, 49, 48, 53, 54, 51, 52, 53, 54, 55, 56, 57, + 58, 59, 59, 60, 62, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, + 65, 66, 67, 65, 66, 67, 66, 69, 70, 66, 67, 66, 69, 70, 69, 70, 70, 71, 72, + 71, 72, 72, 74, 74, 75, 72, 72, 74, 74, 75, 72, 72, 74, 75, 75, 72, 72, 74, + 75, 75, 77, 77, 79, 80, 80, 82 +}; + +const unsigned int splits_Bob[SIDH_MAX_Bob] = { + 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7, 8, 8, 8, 8, 9, 9, 9, 9, 9, + 10, 12, 12, 12, 12, 12, 12, 13, 14, 14, 15, 16, 16, 16, 16, 16, 17, 16, 16, + 17, 19, 19, 20, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 24, 24, 25, 27, + 27, 28, 28, 29, 28, 29, 28, 28, 28, 30, 28, 28, 28, 29, 30, 33, 33, 33, 33, + 34, 35, 37, 37, 37, 37, 38, 38, 37, 38, 38, 38, 38, 38, 39, 43, 38, 38, 38, + 38, 43, 40, 41, 42, 43, 48, 45, 46, 47, 47, 48, 49, 49, 49, 50, 51, 50, 49, + 49, 49, 49, 51, 49, 53, 50, 51, 50, 51, 51, 51, 52, 55, 55, 55, 56, 56, 56, + 56, 56, 58, 58, 61, 61, 61, 63, 63, 63, 64, 65, 65, 65, 65, 66, 66, 65, 65, + 66, 66, 66, 66, 66, 66, 66, 71, 66, 73, 66, 66, 71, 66, 73, 66, 66, 71, 66, + 73, 68, 68, 71, 71, 73, 73, 73, 75, 75, 78, 78, 78, 80, 80, 80, 81, 81, 82, + 83, 84, 85, 86, 86, 86, 86, 86, 87, 86, 88, 86, 86, 86, 86, 88, 86, 88, 86, + 86, 86, 88, 88, 86, 86, 86, 93, 90, 90, 92, 92, 92, 93, 93, 93, 93, 93, 97, + 97, 97, 97, 97, 97 +}; diff --git a/src/kex_sidh_cln16/SIDH.h b/src/kex_sidh_cln16/SIDH.h new file mode 100644 index 000000000..21c634c70 --- /dev/null +++ b/src/kex_sidh_cln16/SIDH.h @@ -0,0 +1,245 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: main header file +* +*********************************************************************************************/ + +#ifndef __SIDH_H__ +#define __SIDH_H__ + + +// For C++ +#ifdef __cplusplus +extern "C" { +#endif + + +#include +#include +#include +#include + +// Definition of operating system + +#if (defined(__x86_64__) || defined(__x86_64) || defined(__arch64__) || defined(_M_AMD64) || defined(_M_X64) || defined(_WIN64) || !defined(__LP64__)) +#define TARGET TARGET_AMD64 +#define RADIX 64 +typedef uint64_t digit_t; // Unsigned 64-bit digit +typedef int64_t sdigit_t; // Signed 64-bit digit +#define NWORDS_FIELD 12 // Number of words of a 751-bit field element +#define p751_ZERO_WORDS 5 // Number of "0" digits in the least significant part of p751 - 1 +#else +#define TARGET TARGET_x86 +#define TARGET TARGET_ARM +#define RADIX 32 +typedef uint32_t digit_t; // Unsigned 32-bit digit +typedef int32_t sdigit_t; // Signed 32-bit digit +#define NWORDS_FIELD 24 +#define p751_ZERO_WORDS 11 +#endif + +// Extended datatype support +#if defined(SIDH_ASM) +#if (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) +#define SCALAR_INTRIN_SUPPORT +typedef uint64_t uint128_t[2]; +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) +#define UINT128_SUPPORT +typedef unsigned uint128_t __attribute__((mode(TI))); +#endif +#else /* generic implementation */ +typedef uint64_t uint128_t[2]; +#endif + +// Basic constants + +#define SIDH_NBITS_FIELD 751 +#define SIDH_MAXBITS_FIELD 768 +#define SIDH_MAXWORDS_FIELD ((SIDH_MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define SIDH_NWORDS64_FIELD ((SIDH_NBITS_FIELD+63)/64) // Number of 64-bit words of a 751-bit field element +#define SIDH_NBITS_ORDER 384 +#define SIDH_NWORDS_ORDER ((SIDH_NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define SIDH_MAXBITS_ORDER SIDH_NBITS_ORDER +#define SIDH_MAXWORDS_ORDER ((SIDH_MAXBITS_ORDER+RADIX-1)/RADIX) // Max. number of words to represent elements in [1, oA-1] or [1, oB]. + +// Basic constants for elliptic curve BigMont + +#define BIGMONT_NBITS_ORDER 749 +#define BIGMONT_MAXBITS_ORDER 768 +#define BIGMONT_NWORDS_ORDER ((BIGMONT_NBITS_ORDER+RADIX-1)/RADIX) // Number of words of BigMont's subgroup order. +#define BIGMONT_MAXWORDS_ORDER ((BIGMONT_MAXBITS_ORDER+RADIX-1)/RADIX) // Max. number of words to represent elements in [1, BigMont_order]. + +// Size of SIDH secret key = (CurveIsogeny_SIDHp751.owordbits + 7)/8 +// Number of bytes in an element in [1, order] +#define SIDH_SECRETKEY_LEN 48 +// Number of bytes in a field element +// PBYTES_SIDHp751 ((CurveIsogeny_SIDHp751.pwordbits + 7)/8) +// Size of SIDH public key = 3*2*PBYTES_SIDHp751 +#define SIDH_PUBKEY_LEN 576 +// Size of SIDH shared key = 2*PBYTES_SIDHp751 +#define SIDH_SHAREDKEY_LEN 192 + +// Definitions of the error-handling type and error codes + +typedef enum { + SIDH_CRYPTO_SUCCESS, // 0x00 + SIDH_CRYPTO_ERROR, // 0x01 + SIDH_CRYPTO_ERROR_INVALID_PARAMETER, // 0x02 + SIDH_CRYPTO_ERROR_PUBLIC_KEY_VALIDATION, // 0x03 + SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS, // 0x04 + SIDH_CRYPTO_ERROR_END_OF_LIST +} SIDH_CRYPTO_STATUS; + +// Definition of type for curve isogeny system identifiers. Currently valid value is "SIDHp751" (see SIDH.h) +typedef char CurveIsogeny_ID[10]; + + +// Supersingular elliptic curve isogeny structures: + +// This data struct contains the static curve isogeny data +typedef struct { + CurveIsogeny_ID CurveIsogeny; // Curve isogeny system identifier, base curve defined over GF(p^2) + unsigned int pwordbits; // Smallest multiple of 32 larger than the prime bitlength + unsigned int owordbits; // Smallest multiple of 32 larger than the order bitlength + unsigned int pbits; // Bitlength of the prime p + uint64_t prime[SIDH_MAXWORDS_FIELD]; // Prime p + uint64_t A[SIDH_MAXWORDS_FIELD]; // Base curve parameter "A" + uint64_t C[SIDH_MAXWORDS_FIELD]; // Base curve parameter "C" + unsigned int oAbits; // Order bitlength for Alice + uint64_t Aorder[SIDH_MAXWORDS_ORDER]; // Order of Alice's (sub)group + unsigned int oBbits; // Order bitlength for Bob + unsigned int eB; // Power of Bob's subgroup order (i.e., oB = 3^eB) + uint64_t Border[SIDH_MAXWORDS_ORDER]; // Order of Bob's (sub)group + uint64_t PA[2 * SIDH_MAXWORDS_FIELD]; // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p) + uint64_t PB[2 * SIDH_MAXWORDS_FIELD]; // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p) + unsigned int BigMont_A24; // BigMont's curve parameter A24 = (A+2)/4 + uint64_t BigMont_order[BIGMONT_MAXWORDS_ORDER]; // BigMont's subgroup order + uint64_t Montgomery_R2[SIDH_MAXWORDS_FIELD]; // Montgomery constant (2^W)^2 mod p, using a suitable value W + uint64_t Montgomery_pp[SIDH_MAXWORDS_FIELD]; // Montgomery constant -p^-1 mod 2^W, using a suitable value W + uint64_t Montgomery_one[SIDH_MAXWORDS_FIELD]; // Value one in Montgomery representation +} CurveIsogenyStaticData, *PCurveIsogenyStaticData; + + +// This data struct is initialized with the targeted curve isogeny system during setup +typedef struct { + CurveIsogeny_ID CurveIsogeny; // Curve isogeny system identifier, base curve defined over GF(p^2) + unsigned int pwordbits; // Closest multiple of 32 to prime bitlength + unsigned int owordbits; // Closest multiple of 32 to order bitlength + unsigned int pbits; // Bitlength of the prime p + digit_t *prime; // Prime p + digit_t *A; // Base curve parameter "A" + digit_t *C; // Base curve parameter "C" + unsigned int oAbits; // Order bitlength for Alice + digit_t *Aorder; // Order of Alice's (sub)group + unsigned int oBbits; // Order bitlength for Bob + unsigned int eB; // Power of Bob's subgroup order (i.e., oB = 3^eB) + digit_t *Border; // Order of Bob's (sub)group + digit_t *PA; // Alice's generator PA = (XPA,YPA), where XPA and YPA are defined over GF(p) + digit_t *PB; // Bob's generator PB = (XPB,YPB), where XPB and YPB are defined over GF(p) + unsigned int BigMont_A24; // BigMont's curve parameter A24 = (A+2)/4 + digit_t *BigMont_order; // BigMont's subgroup order + digit_t *Montgomery_R2; // Montgomery constant (2^W)^2 mod p, using a suitable value W + digit_t *Montgomery_pp; // Montgomery constant -p^-1 mod 2^W, using a suitable value W + digit_t *Montgomery_one; // Value one in Montgomery representation +} CurveIsogenyStruct, *PCurveIsogenyStruct; + + +// Supported curve isogeny systems: + +// "SIDHp751", base curve: supersingular elliptic curve E: y^2 = x^3 + x +extern CurveIsogenyStaticData CurveIsogeny_SIDHp751; + + +/******************** Function prototypes ***********************/ +/*************** Setup/initialization functions *****************/ + +// Dynamic allocation of memory for curve isogeny structure. +// Returns NULL on error. +PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveData); + +// Initialize curve isogeny structure pCurveIsogeny with static data extracted from pCurveIsogenyData. +// This needs to be called after allocating memory for "pCurveIsogeny" using oqs_sidh_cln16_curve_allocate(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_curve_initialize(PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand, PCurveIsogenyStaticData pCurveIsogenyData); + +// Free memory for curve isogeny structure +void oqs_sidh_cln16_curve_free(PCurveIsogenyStruct pCurveIsogeny); + +// Output random values in the range [1, order-1] in little endian format that can be used as private keys. +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_mod_order(digit_t *random_digits, unsigned int AliceOrBob, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand); + +// Output random values in the range [1, BigMont_order-1] in little endian format that can be used as private keys +// to compute scalar multiplications using the elliptic curve BigMont. +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_BigMont_mod_order(digit_t *random_digits, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand); + +// Clear "nwords" digits from memory +void oqs_sidh_cln16_clear_words(void *mem, digit_t nwords); + +/*********************** Key exchange API ***********************/ + +// Alice's key-pair generation +// It produces a private key pPrivateKeyA and computes the public key pPublicKeyA. +// The private key is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). +// The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_KeyGeneration_A(unsigned char *pPrivateKeyA, unsigned char *pPublicKeyA, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +// Bob's key-pair generation +// It produces a private key pPrivateKeyB and computes the public key pPublicKeyB. +// The private key is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total). +// The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_KeyGeneration_B(unsigned char *pPrivateKeyB, unsigned char *pPublicKeyB, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +// Alice's shared secret generation +// It produces a shared secret key pSharedSecretA using her secret key pPrivateKeyA and Bob's public key pPublicKeyB +// Inputs: Alice's pPrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). +// Bob's pPublicKeyB consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// "validate" flag that indicates if Alice must validate Bob's public key. +// Output: a shared secret pSharedSecretA that consists of one element in GF(p751^2), i.e., 1502 bits in total. +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_SecretAgreement_A(unsigned char *pPrivateKeyA, unsigned char *pPublicKeyB, unsigned char *pSharedSecretA, bool validate, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +// Bob's shared secret generation +// It produces a shared secret key pSharedSecretB using his secret key pPrivateKeyB and Alice's public key pPublicKeyA +// Inputs: Bob's pPrivateKeyB is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total). +// Alice's pPublicKeyA consists of 3 elements in GF(p751^2), i.e., 564 bytes. +// "validate" flag that indicates if Bob must validate Alice's public key. +// Output: a shared secret pSharedSecretB that consists of one element in GF(p751^2), i.e., 1502 bits in total. +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_SecretAgreement_B(unsigned char *pPrivateKeyB, unsigned char *pPublicKeyA, unsigned char *pSharedSecretB, bool validate, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +/*********************** Scalar multiplication API using BigMont ***********************/ + +// BigMont's scalar multiplication using the Montgomery ladder +// Inputs: x, the affine x-coordinate of a point P on BigMont: y^2=x^3+A*x^2+x, +// scalar m. +// Output: xout, the affine x-coordinate of m*(x:1) +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_BigMont_ladder(unsigned char *x, digit_t *m, unsigned char *xout, PCurveIsogenyStruct CurveIsogeny); + + +// Encoding of keys for isogeny system "SIDHp751" (wire format): +// ------------------------------------------------------------ +// Elements over GF(p751) are encoded in 96 octets in little endian format (i.e., the least significant octet located at the leftmost position). +// Elements (a+b*i) over GF(p751^2), where a and b are defined over GF(p751), are encoded as {b, a}, with b in the least significant position. +// Elements over Z_oA and Z_oB are encoded in 48 octets in little endian format. +// +// Private keys pPrivateKeyA and pPrivateKeyB are defined in Z_oA and Z_oB (resp.) and can have values in the range [2, 2^372-2] and [1, 3^239-1], resp. +// In the key exchange API, they are encoded in 48 octets in little endian format. +// Public keys pPublicKeyA and pPublicKeyB consist of four elements in GF(p751^2). In the key exchange API, they are encoded in 768 octets in little +// endian format. +// Shared keys pSharedSecretA and pSharedSecretB consist of one element in GF(p751^2). In the key exchange API, they are encoded in 192 octets in little +// endian format. + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/kex_sidh_cln16/SIDH_internal.h b/src/kex_sidh_cln16/SIDH_internal.h new file mode 100644 index 000000000..471e8e60f --- /dev/null +++ b/src/kex_sidh_cln16/SIDH_internal.h @@ -0,0 +1,444 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: internal header file +* +*********************************************************************************************/ + +#ifndef __SIDH_INTERNAL_H__ +#define __SIDH_INTERNAL_H__ + + +// For C++ +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__ ((unused)) +#endif + +#include "SIDH.h" + + +// Basic constants + +#define SIDH_ALICE 0 +#define SIDH_BOB 1 +#define SIDH_MAX_INT_POINTS_ALICE 8 +// Fixed parameters for isogeny tree computation +#define SIDH_MAX_INT_POINTS_BOB 10 +#define SIDH_MAX_Alice 185 +#define SIDH_MAX_Bob 239 + + +// SIDH's basic element definitions and point representations + +typedef digit_t oqs_sidh_cln16_felm_t[NWORDS_FIELD]; // Datatype for representing 751-bit field elements (768-bit max.) +typedef digit_t oqs_sidh_cln16_dfelm_t[2 * NWORDS_FIELD]; // Datatype for representing double-precision 2x751-bit field elements (2x768-bit max.) +typedef oqs_sidh_cln16_felm_t oqs_sidh_cln16_f2elm_t[2]; // Datatype for representing quadratic extension field elements GF(p751^2) +typedef oqs_sidh_cln16_f2elm_t oqs_sidh_cln16_publickey_t[3]; // Datatype for representing public keys equivalent to three GF(p751^2) elements + +typedef struct { + oqs_sidh_cln16_f2elm_t x; + oqs_sidh_cln16_f2elm_t y; +} oqs_sidh_cln16_point_affine; // Point representation in affine coordinates on Montgomery curve. +typedef oqs_sidh_cln16_point_affine oqs_sidh_cln16_point_t[1]; + +typedef struct { + oqs_sidh_cln16_f2elm_t X; + oqs_sidh_cln16_f2elm_t Z; +} oqs_sidh_cln16_point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef oqs_sidh_cln16_point_proj oqs_sidh_cln16_point_proj_t[1]; +#define oqs_sidh_cln16_point_proj_t_EMPTY { { { {0}, {0} }, { {0}, {0} } } } + +typedef struct { + oqs_sidh_cln16_felm_t x; + oqs_sidh_cln16_felm_t y; +} oqs_sidh_cln16_point_basefield_affine; // Point representation in affine coordinates on Montgomery curve over the base field. +typedef oqs_sidh_cln16_point_basefield_affine oqs_sidh_cln16_point_basefield_t[1]; + +typedef struct { + oqs_sidh_cln16_felm_t X; + oqs_sidh_cln16_felm_t Z; +} oqs_sidh_cln16_point_basefield_proj; // Point representation in projective XZ Montgomery coordinates over the base field. +typedef oqs_sidh_cln16_point_basefield_proj oqs_sidh_cln16_point_basefield_proj_t[1]; + + +// Macro definitions + +#define NBITS_TO_NBYTES(nbits) (((nbits)+7)/8) // Conversion macro from number of bits to number of bytes +#define NBITS_TO_NWORDS(nbits) (((nbits)+(sizeof(digit_t)*8)-1)/(sizeof(digit_t)*8)) // Conversion macro from number of bits to number of computer words +#define NBYTES_TO_NWORDS(nbytes) (((nbytes)+sizeof(digit_t)-1)/sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words + +// Macro to avoid compiler warnings when detecting unreferenced parameters +#define UNREFERENCED_PARAMETER(PAR) (PAR) + + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + +// Is x != 0? +#define is_digit_nonzero_ct(x) \ + ((unsigned int)(((x) | (0 - (x))) >> (RADIX - 1))) + +// Is x = 0? +#define is_digit_zero_ct(x) \ + ((unsigned int)(1 ^ is_digit_nonzero_ct((x)))) + +// Is x < y? +#define is_digit_lessthan_ct(x, y) \ + ((unsigned int)(((x) ^ (((x) ^ (y)) | (((x) - (y)) ^ (y)))) >> (RADIX - 1))) + + +/********************** Macros for platform-dependent operations **********************/ + +#if !defined(SIDH_ASM) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + oqs_sidh_cln16_digit_x_digit((multiplier), (multiplicand), &(lo)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Shift left with flexible datatype +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + oqs_sidh_cln16_mp_mul((digit_t*)&(multiplier), (digit_t*)&(multiplicand), (digit_t*)&(product), NWORDS_FIELD/2); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + oqs_sidh_cln16_mp_add((digit_t*)(addend1), (digit_t*)(addend2), (digit_t*)(addition), NWORDS_FIELD); + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = oqs_sidh_cln16_mp_add((digit_t*)(addend1), (digit_t*)(addend2), (digit_t*)(addition), NWORDS_FIELD); + +#elif (TARGET == TARGET_AMD64 && defined(WINDOWS)) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + (lo) = _umul128((multiplier), (multiplicand), (hi)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + (carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut)); + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + (borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut)); + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftright128((lowIn), (highIn), (shift)); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftleft128((lowIn), (highIn), (shift)); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + (product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + { unsigned char carry = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + _addcarry_u64(carry, (addend1)[1], (addend2)[1], &(addition)[1]); } + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + (carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]); + +// 128-bit subtraction, subtrahend < 2^127 +#define SUB128(minuend, subtrahend, difference) \ + { unsigned char borrow = _subborrow_u64(0, (minuend)[0], (subtrahend)[0], &(difference)[0]); \ + _subborrow_u64(borrow, (minuend)[1], (subtrahend)[1], &(difference)[1]); } + +// 128-bit right shift, max. shift value is 64 +#define SHIFTR128(Input, shift, shiftOut) \ + (shiftOut)[0] = __shiftright128((Input)[0], (Input)[1], (shift)); \ + (shiftOut)[1] = (Input)[1] >> (shift); + +// 128-bit left shift, max. shift value is 64 +#define SHIFTL128(Input, shift, shiftOut) \ + (shiftOut)[1] = __shiftleft128((Input)[0], (Input)[1], (shift)); \ + (shiftOut)[0] = (Input)[0] << (shift); + +#define MULADD128(multiplier, multiplicand, addend, carry, result); \ + { uint128_t product; \ + MUL128(multiplier, multiplicand, product); \ + ADC128(addend, product, carry, result); } + +#elif (TARGET == TARGET_AMD64) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + { uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \ + *(hi) = (digit_t)(tempReg >> RADIX); \ + (lo) = (digit_t)tempReg; } + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \ + (carryOut) = (digit_t)(tempReg >> RADIX); \ + (sumOut) = (digit_t)tempReg; } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \ + (borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t)*8 - 1)); \ + (differenceOut) = (digit_t)tempReg; } + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif + + +// Multiprecision multiplication selection +#if !defined(SIDH_ASM) && (TARGET == TARGET_AMD64) +#define oqs_sidh_cln16_mp_mul_comba oqs_sidh_cln16_mp_mul +#else +#define oqs_sidh_cln16_mp_mul_schoolbook oqs_sidh_cln16_mp_mul +#endif + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// Copy wordsize digits, c = a, where lng(a) = nwords +void oqs_sidh_cln16_copy_words(digit_t *a, digit_t *c, unsigned int nwords); + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit +extern unsigned int oqs_sidh_cln16_mp_add(digit_t *a, digit_t *b, digit_t *c, unsigned int nwords); + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit +extern unsigned int oqs_sidh_cln16_mp_sub(digit_t *a, digit_t *b, digit_t *c, unsigned int nwords); + +// Multiprecision right shift by one +void oqs_sidh_cln16_mp_shiftr1(digit_t *x, unsigned int nwords); + +// Multiprecision left right shift by one +void oqs_sidh_cln16_mp_shiftl1(digit_t *x, unsigned int nwords); + +// Digit multiplication, digit * digit -> 2-digit result +void oqs_sidh_cln16_digit_x_digit(digit_t a, digit_t b, digit_t *c); + +// Multiprecision schoolbook multiply, c = a*b, where lng(a) = lng(b) = nwords. +void oqs_sidh_cln16_mp_mul_schoolbook(digit_t *a, digit_t *b, digit_t *c, unsigned int nwords); + +// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. +void oqs_sidh_cln16_mp_mul_comba(digit_t *a, digit_t *b, digit_t *c, unsigned int nwords); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void oqs_sidh_cln16_fpcopy751(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t c); + +// Zeroing a field element, a = 0 +void oqs_sidh_cln16_fpzero751(oqs_sidh_cln16_felm_t a); + +// Modular addition, c = a+b mod p751 +extern void oqs_sidh_cln16_fpadd751(digit_t *a, digit_t *b, digit_t *c); +extern void oqs_sidh_cln16_fpadd751_asm(digit_t *a, digit_t *b, digit_t *c); + +// Modular subtraction, c = a-b mod p751 +extern void oqs_sidh_cln16_fpsub751(digit_t *a, digit_t *b, digit_t *c); +extern void oqs_sidh_cln16_fpsub751_asm(digit_t *a, digit_t *b, digit_t *c); + +// Modular negation, a = -a mod p751 +extern void oqs_sidh_cln16_fpneg751(digit_t *a); + +// Modular division by two, c = a/2 mod p751. +void oqs_sidh_cln16_fpdiv2_751(digit_t *a, digit_t *c); + +// Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. +void oqs_sidh_cln16_fpcorrection751(digit_t *a); + +// 751-bit Montgomery reduction, c = a mod p +void oqs_sidh_cln16_rdc_mont(digit_t *a, digit_t *c); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768 +void oqs_sidh_cln16_fpmul751_mont(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t b, oqs_sidh_cln16_felm_t c); +void oqs_sidh_cln16_mul751_asm(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t b, oqs_sidh_cln16_dfelm_t c); +void oqs_sidh_cln16_rdc751_asm(oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_dfelm_t mc); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p751, where R=2^768 +void oqs_sidh_cln16_fpsqr751_mont(oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mc); + +// Conversion to Montgomery representation +void oqs_sidh_cln16_to_mont(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t mc); + +// Conversion from Montgomery representation to standard representation +void oqs_sidh_cln16_from_mont(oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t c); + +// Field inversion, a = a^-1 in GF(p751) +void oqs_sidh_cln16_fpinv751_mont(oqs_sidh_cln16_felm_t a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p751^2) element, c = a +void oqs_sidh_cln16_fp2copy751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c); + +// Zeroing a GF(p751^2) element, a = 0 +void oqs_sidh_cln16_fp2zero751(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) negation, a = -a in GF(p751^2) +void oqs_sidh_cln16_fp2neg751(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) addition, c = a+b in GF(p751^2) +extern void oqs_sidh_cln16_fp2add751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) subtraction, c = a-b in GF(p751^2) +extern void oqs_sidh_cln16_fp2sub751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) division by two, c = a/2 in GF(p751^2) +void oqs_sidh_cln16_fp2div2_751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c); + +// Modular correction, a = a in GF(p751^2) +void oqs_sidh_cln16_fp2correction751(oqs_sidh_cln16_f2elm_t a); + +// GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2) +void oqs_sidh_cln16_fp2sqr751_mont(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2) +void oqs_sidh_cln16_fp2mul751_mont(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c); + +// Conversion of a GF(p751^2) element to Montgomery representation +void oqs_sidh_cln16_to_fp2mont(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t mc); + +// Conversion of a GF(p751^2) element from Montgomery representation to standard representation +void oqs_sidh_cln16_from_fp2mont(oqs_sidh_cln16_f2elm_t ma, oqs_sidh_cln16_f2elm_t c); + +// GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void oqs_sidh_cln16_fp2inv751_mont(oqs_sidh_cln16_f2elm_t a); + +// Select either x or y depending on value of option +void oqs_sidh_cln16_select_f2elm(oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_f2elm_t y, oqs_sidh_cln16_f2elm_t z, digit_t option); + +/************ Elliptic curve and isogeny functions *************/ + +// Check if curve isogeny structure is NULL +bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny); + +// Swap points over the base field +void oqs_sidh_cln16_swap_points_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, digit_t option); + +// Swap points +void oqs_sidh_cln16_swap_points(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, digit_t option); + +// Computes the j-invariant of a Montgomery curve with projective constant. +void oqs_sidh_cln16_j_inv(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t jinv); + +// Simultaneous doubling and differential addition. +void oqs_sidh_cln16_xDBLADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t xPQ, oqs_sidh_cln16_f2elm_t A24); + +// Doubling of a Montgomery point in projective coordinates (X:Z). +void oqs_sidh_cln16_xDBL(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_f2elm_t C24); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. +void oqs_sidh_cln16_xDBLe(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, int e); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings and collects a few intermediate multiples. +void oqs_sidh_cln16_xDBLe_collect(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, unsigned int left_bound, const unsigned int right_bound, const unsigned int *col, oqs_sidh_cln16_point_proj_t *pts, unsigned int *pts_index, unsigned int *npts); + +// Differential addition. +void oqs_sidh_cln16_xADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t xPQ); + +// Doubling of a Montgomery point in projective coordinates (X:Z) over the base field. +void oqs_sidh_cln16_xDBL_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q); + +// Simultaneous doubling and differential addition over the base field. +void oqs_sidh_cln16_xDBLADD_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, oqs_sidh_cln16_felm_t xPQ, oqs_sidh_cln16_felm_t A24); + +// The Montgomery ladder +void oqs_sidh_cln16_ladder(oqs_sidh_cln16_felm_t x, digit_t *m, oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, oqs_sidh_cln16_felm_t A24, unsigned int order_bits, unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny); + +// Computes key generation entirely in the base field +SIDH_CRYPTO_STATUS oqs_sidh_cln16_secret_pt(oqs_sidh_cln16_point_basefield_t P, digit_t *m, unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t R, PCurveIsogenyStruct CurveIsogeny); + +// Computes P+[m]Q via x-only arithmetic. +SIDH_CRYPTO_STATUS oqs_sidh_cln16_ladder_3_pt(oqs_sidh_cln16_f2elm_t xP, oqs_sidh_cln16_f2elm_t xQ, oqs_sidh_cln16_f2elm_t xPQ, digit_t *m, unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t W, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); + +// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. +void oqs_sidh_cln16_get_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t *coeff); + +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny +void oqs_sidh_cln16_eval_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t *coeff); + +// Computes first 4-isogeny computed by Alice. +void oqs_sidh_cln16_first_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t Aout, oqs_sidh_cln16_f2elm_t Cout, PCurveIsogenyStruct CurveIsogeny); + +// Tripling of a Montgomery point in projective coordinates (X:Z). +void oqs_sidh_cln16_xTPL(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_f2elm_t C24); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. +void oqs_sidh_cln16_xTPLe(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, int e); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings and collects a few intermediate multiples. +void oqs_sidh_cln16_xTPLe_collect(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, unsigned int left_bound, const unsigned int right_bound, const unsigned int *col, oqs_sidh_cln16_point_proj_t *pts, unsigned int *pts_index, unsigned int *npts); + +// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. +void oqs_sidh_cln16_get_3_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C); + +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P = (X:Z). +void oqs_sidh_cln16_eval_3_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q); + +// 3-way simultaneous inversion +void oqs_sidh_cln16_inv_3_way(oqs_sidh_cln16_f2elm_t z1, oqs_sidh_cln16_f2elm_t z2, oqs_sidh_cln16_f2elm_t z3); + +// Computing the point D = (x(Q-P),z(Q-P)) +void oqs_sidh_cln16_distort_and_diff(oqs_sidh_cln16_felm_t xP, oqs_sidh_cln16_point_proj_t d, PCurveIsogenyStruct CurveIsogeny); + +// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void oqs_sidh_cln16_get_A(oqs_sidh_cln16_f2elm_t xP, oqs_sidh_cln16_f2elm_t xQ, oqs_sidh_cln16_f2elm_t xR, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny); + +/************ Public key validation functions *************/ + +// Validation of Alice's public key (ran by Bob) +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_Validate_PKA(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_publickey_t PKA, bool *valid, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + +// Validation of Bob's public key (ran by Alice) +// CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). +SIDH_CRYPTO_STATUS oqs_sidh_cln16_Validate_PKB(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_publickey_t PKB, bool *valid, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/kex_sidh_cln16/SIDH_setup.c b/src/kex_sidh_cln16/SIDH_setup.c new file mode 100644 index 000000000..621cfa2ef --- /dev/null +++ b/src/kex_sidh_cln16/SIDH_setup.c @@ -0,0 +1,245 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: functions for initialization and getting randomness +* +*********************************************************************************************/ + +#include +#include +#include "SIDH_internal.h" + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_curve_initialize(PCurveIsogenyStruct pCurveIsogeny, UNUSED OQS_RAND *rand, PCurveIsogenyStaticData pCurveIsogenyData) { + // Initialize curve isogeny structure pCurveIsogeny with static data extracted from pCurveIsogenyData. + // This needs to be called after allocating memory for "pCurveIsogeny" using oqs_sidh_cln16_curve_allocate(). + unsigned int i, pwords, owords; + + if (oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + for (i = 0; i < 8; i++) { // Copy 8-character identifier + pCurveIsogeny->CurveIsogeny[i] = pCurveIsogenyData->CurveIsogeny[i]; + } + pCurveIsogeny->pwordbits = pCurveIsogenyData->pwordbits; + pCurveIsogeny->owordbits = pCurveIsogenyData->owordbits; + pCurveIsogeny->pbits = pCurveIsogenyData->pbits; + pCurveIsogeny->oAbits = pCurveIsogenyData->oAbits; + pCurveIsogeny->oBbits = pCurveIsogenyData->oBbits; + pCurveIsogeny->eB = pCurveIsogenyData->eB; + pCurveIsogeny->BigMont_A24 = pCurveIsogenyData->BigMont_A24; + + pwords = (pCurveIsogeny->pwordbits + RADIX - 1) / RADIX; + owords = (pCurveIsogeny->owordbits + RADIX - 1) / RADIX; + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->prime, pCurveIsogeny->prime, pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->A, pCurveIsogeny->A, pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->C, pCurveIsogeny->C, pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->Aorder, pCurveIsogeny->Aorder, owords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->Border, pCurveIsogeny->Border, owords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->PA, pCurveIsogeny->PA, 2 * pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->PB, pCurveIsogeny->PB, 2 * pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->BigMont_order, pCurveIsogeny->BigMont_order, pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->Montgomery_R2, pCurveIsogeny->Montgomery_R2, pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->Montgomery_pp, pCurveIsogeny->Montgomery_pp, pwords); + oqs_sidh_cln16_copy_words((digit_t *)pCurveIsogenyData->Montgomery_one, pCurveIsogeny->Montgomery_one, pwords); + + return SIDH_CRYPTO_SUCCESS; +} + + +PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveData) { + // Dynamic allocation of memory for curve isogeny structure. + // Returns NULL on error. + digit_t pbytes = (CurveData->pwordbits + 7) / 8; + digit_t obytes = (CurveData->owordbits + 7) / 8; + PCurveIsogenyStruct pCurveIsogeny = (PCurveIsogenyStruct)calloc(1, sizeof(CurveIsogenyStruct)); + if (!pCurveIsogeny) { + return NULL; + } + pCurveIsogeny->prime = (digit_t *)calloc(1, pbytes); + pCurveIsogeny->A = (digit_t *)calloc(1, pbytes); + pCurveIsogeny->C = (digit_t *)calloc(1, pbytes); + pCurveIsogeny->Aorder = (digit_t *)calloc(1, obytes); + pCurveIsogeny->Border = (digit_t *)calloc(1, obytes); + pCurveIsogeny->PA = (digit_t *)calloc(1, 2 * pbytes); + pCurveIsogeny->PB = (digit_t *)calloc(1, 2 * pbytes); + pCurveIsogeny->BigMont_order = (digit_t *)calloc(1, pbytes); + pCurveIsogeny->Montgomery_R2 = (digit_t *)calloc(1, pbytes); + pCurveIsogeny->Montgomery_pp = (digit_t *)calloc(1, pbytes); + pCurveIsogeny->Montgomery_one = (digit_t *)calloc(1, pbytes); + if (oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) { + return NULL; + } + return pCurveIsogeny; +} + + +void oqs_sidh_cln16_curve_free(PCurveIsogenyStruct pCurveIsogeny) { + // Free memory for curve isogeny structure + + if (pCurveIsogeny != NULL) { + if (pCurveIsogeny->prime != NULL) { + free(pCurveIsogeny->prime); + } + if (pCurveIsogeny->A != NULL) { + free(pCurveIsogeny->A); + } + if (pCurveIsogeny->C != NULL) { + free(pCurveIsogeny->C); + } + if (pCurveIsogeny->Aorder != NULL) { + free(pCurveIsogeny->Aorder); + } + if (pCurveIsogeny->Border != NULL) { + free(pCurveIsogeny->Border); + } + if (pCurveIsogeny->PA != NULL) { + free(pCurveIsogeny->PA); + } + if (pCurveIsogeny->PB != NULL) { + free(pCurveIsogeny->PB); + } + if (pCurveIsogeny->BigMont_order != NULL) { + free(pCurveIsogeny->BigMont_order); + } + if (pCurveIsogeny->Montgomery_R2 != NULL) { + free(pCurveIsogeny->Montgomery_R2); + } + if (pCurveIsogeny->Montgomery_pp != NULL) { + free(pCurveIsogeny->Montgomery_pp); + } + if (pCurveIsogeny->Montgomery_one != NULL) { + free(pCurveIsogeny->Montgomery_one); + } + + free(pCurveIsogeny); + } +} + + +bool oqs_sidh_cln16_is_CurveIsogenyStruct_null(PCurveIsogenyStruct pCurveIsogeny) { + // Check if curve isogeny structure is NULL + + if (pCurveIsogeny == NULL || pCurveIsogeny->prime == NULL || pCurveIsogeny->A == NULL || pCurveIsogeny->C == NULL || pCurveIsogeny->Aorder == NULL || pCurveIsogeny->Border == NULL || + pCurveIsogeny->PA == NULL || pCurveIsogeny->PB == NULL || pCurveIsogeny->BigMont_order == NULL || pCurveIsogeny->Montgomery_R2 == NULL || pCurveIsogeny->Montgomery_pp == NULL || + pCurveIsogeny->Montgomery_one == NULL) { + return true; + } + return false; +} + +const uint64_t Border_div3[SIDH_NWORDS_ORDER] = { 0xEDCD718A828384F9, 0x733B35BFD4427A14, 0xF88229CF94D7CF38, 0x63C56C990C7C2AD6, 0xB858A87E8F4222C7, 0x254C9C6B525EAF5 }; + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_mod_order(digit_t *random_digits, unsigned int AliceOrBob, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { + // Output random values in the range [1, order-1] in little endian format that can be used as private keys. + // It makes requests of random values with length "oAbits" (when AliceOrBob = 0) or "oBbits" (when AliceOrBob = 1). + // The process repeats until random value is in [0, Aorder-2] ([0, Border-2], resp.). + // If successful, the output is given in "random_digits" in the range [1, Aorder-1] ([1, Border-1], resp.). + unsigned int ntry = 0, nbytes, nwords; + digit_t t1[SIDH_MAXWORDS_ORDER] = {0}, order2[SIDH_MAXWORDS_ORDER] = {0}; + unsigned char mask; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (random_digits == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny) || AliceOrBob > 1) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_clear_words((void *)random_digits, SIDH_MAXWORDS_ORDER); + t1[0] = 2; + if (AliceOrBob == SIDH_ALICE) { + nbytes = (pCurveIsogeny->oAbits + 7) / 8; // Number of random bytes to be requested + nwords = NBITS_TO_NWORDS(pCurveIsogeny->oAbits); + mask = 0x07; // Value for masking last random byte + oqs_sidh_cln16_copy_words(pCurveIsogeny->Aorder, order2, nwords); + oqs_sidh_cln16_mp_shiftr1(order2, nwords); // order/2 + oqs_sidh_cln16_mp_sub(order2, t1, order2, nwords); // order2 = order/2-2 + } else { + nbytes = (pCurveIsogeny->oBbits + 7) / 8; + nwords = NBITS_TO_NWORDS(pCurveIsogeny->oBbits); + mask = 0x03; // Value for masking last random byte + oqs_sidh_cln16_mp_sub((digit_t *)Border_div3, t1, order2, nwords); // order2 = order/3-2 + } + + do { + ntry++; + if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, order-2] + return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS; + } + + rand->rand_n(rand, (uint8_t *) random_digits, nbytes); + ((unsigned char *)random_digits)[nbytes - 1] &= mask; // Masking last byte + } while (oqs_sidh_cln16_mp_sub(order2, random_digits, t1, nwords) == 1); + + oqs_sidh_cln16_clear_words((void *)t1, SIDH_MAXWORDS_ORDER); + t1[0] = 1; + oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); + oqs_sidh_cln16_copy_words(random_digits, t1, nwords); + oqs_sidh_cln16_mp_shiftl1(random_digits, nwords); // Alice's output in the range [2, order-2] + if (AliceOrBob == SIDH_BOB) { + oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); // Bob's output in the range [3, order-3] + } + + return Status; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_BigMont_mod_order(digit_t *random_digits, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { + // Output random values in the range [1, BigMont_order-1] in little endian format that can be used as private keys to compute scalar multiplications + // using the elliptic curve BigMont. + // It makes requests of random values with length "BIGMONT_NBITS_ORDER". + // The process repeats until random value is in [0, BigMont_order-2] + // If successful, the output is given in "random_digits" in the range [1, BigMont_order-1]. + unsigned int ntry = 0, nbytes = (BIGMONT_NBITS_ORDER + 7) / 8, nwords = NBITS_TO_NWORDS(BIGMONT_NBITS_ORDER); + digit_t t1[BIGMONT_MAXWORDS_ORDER] = {0}, order2[BIGMONT_MAXWORDS_ORDER] = {0}; + unsigned char mask; + SIDH_CRYPTO_STATUS Status = SIDH_CRYPTO_SUCCESS; + + if (random_digits == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_clear_words((void *)random_digits, BIGMONT_MAXWORDS_ORDER); + t1[0] = 2; + mask = (unsigned char)(8 * nbytes - BIGMONT_NBITS_ORDER); + oqs_sidh_cln16_mp_sub(pCurveIsogeny->BigMont_order, t1, order2, nwords); // order2 = order-2 + mask = ((unsigned char) - 1 >> mask); // Value for masking last random byte + + do { + ntry++; + if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, order-2] + return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS; + } + rand->rand_n(rand, (uint8_t *)random_digits, nbytes); + ((unsigned char *)random_digits)[nbytes - 1] &= mask; // Masking last byte + } while (oqs_sidh_cln16_mp_sub(order2, random_digits, t1, nwords) == 1); + + oqs_sidh_cln16_clear_words((void *)t1, BIGMONT_MAXWORDS_ORDER); + t1[0] = 1; + oqs_sidh_cln16_mp_add(random_digits, t1, random_digits, nwords); // Output in the range [1, order-1] + + return Status; +} + + +void oqs_sidh_cln16_clear_words(void *mem, digit_t nwords) { + // Clear digits from memory. "nwords" indicates the number of digits to be zeroed. + // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. + unsigned int i; + volatile digit_t *v = mem; + + for (i = 0; i < nwords; i++) { + v[i] = 0; + } +} + + + + + + diff --git a/src/kex_sidh_cln16/ec_isogeny.c b/src/kex_sidh_cln16/ec_isogeny.c new file mode 100644 index 000000000..235663189 --- /dev/null +++ b/src/kex_sidh_cln16/ec_isogeny.c @@ -0,0 +1,586 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: elliptic curve and isogeny functions +* +*********************************************************************************************/ + +#include "SIDH_internal.h" + + +void oqs_sidh_cln16_j_inv(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t jinv) { + // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is j-invariant of Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2sqr751_mont(A, jinv); // jinv = A^2 + oqs_sidh_cln16_fp2sqr751_mont(C, t1); // t1 = C^2 + oqs_sidh_cln16_fp2add751(t1, t1, t0); // t0 = t1+t1 + oqs_sidh_cln16_fp2sub751(jinv, t0, t0); // t0 = jinv-t0 + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fp2sub751(t0, t1, jinv); // jinv = t0-t1 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = t1^2 + oqs_sidh_cln16_fp2mul751_mont(jinv, t1, jinv); // jinv = jinv*t1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2sqr751_mont(t0, t1); // t1 = t0^2 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t0); // t0 = t0*t1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2inv751_mont(jinv); // jinv = 1/jinv + oqs_sidh_cln16_fp2mul751_mont(jinv, t0, jinv); // jinv = t0*jinv +} + + +void oqs_sidh_cln16_xDBLADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t xPQ, oqs_sidh_cln16_f2elm_t A24) { + // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + oqs_sidh_cln16_f2elm_t t0, t1, t2; + + oqs_sidh_cln16_fp2add751(P->X, P->Z, t0); // t0 = XP+ZP + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t1); // t1 = XP-ZP + oqs_sidh_cln16_fp2sqr751_mont(t0, P->X); // XP = (XP+ZP)^2 + oqs_sidh_cln16_fp2sub751(Q->X, Q->Z, t2); // t2 = XQ-ZQ + oqs_sidh_cln16_fp2add751(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + oqs_sidh_cln16_fp2mul751_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + oqs_sidh_cln16_fp2sqr751_mont(t1, P->Z); // ZP = (XP-ZP)^2 + oqs_sidh_cln16_fp2mul751_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + oqs_sidh_cln16_fp2mul751_mont(t2, A24, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fp2sub751(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2add751(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + oqs_sidh_cln16_fp2add751(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2mul751_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fp2sqr751_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2sqr751_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + + +void oqs_sidh_cln16_xDBL(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_f2elm_t C24) { + // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constant A24/C24=(A/C+2)/4. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t0); // t0 = X1-Z1 + oqs_sidh_cln16_fp2add751(P->X, P->Z, t1); // t1 = X1+Z1 + oqs_sidh_cln16_fp2sqr751_mont(t0, t0); // t0 = (X1-Z1)^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (X1+Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(C24, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + oqs_sidh_cln16_fp2sub751(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(A24, t1, t0); // t0 = A24*[(X1+Z1)^2-(X1-Z1)^2] + oqs_sidh_cln16_fp2add751(Q->Z, t0, Q->Z); // Z2 = A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, t1, Q->Z); // Z2 = [A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + + +void oqs_sidh_cln16_xDBLe(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, int e) { + // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constant A/C. + // Output: projective Montgomery x-coordinates P <- (2^e)*P. + oqs_sidh_cln16_f2elm_t A24num, A24den; + int i; + + oqs_sidh_cln16_fp2add751(C, C, A24num); + oqs_sidh_cln16_fp2add751(A24num, A24num, A24den); + oqs_sidh_cln16_fp2add751(A24num, A, A24num); + oqs_sidh_cln16_copy_words((digit_t *)P, (digit_t *)Q, 2 * 2 * NWORDS_FIELD); + + for (i = 0; i < e; i++) { + oqs_sidh_cln16_xDBL(Q, Q, A24num, A24den); + } +} + + +void oqs_sidh_cln16_xADD(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t xPQ) { + // Differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, and affine difference xPQ=x(P-Q). + // Output: projective Montgomery point P <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2add751(P->X, P->Z, t0); // t0 = XP+ZP + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t1); // t1 = XP-ZP + oqs_sidh_cln16_fp2sub751(Q->X, Q->Z, P->X); // XP = XQ-ZQ + oqs_sidh_cln16_fp2add751(Q->X, Q->Z, P->Z); // ZP = XQ+ZQ + oqs_sidh_cln16_fp2mul751_mont(t0, P->X, t0); // t0 = (XP+ZP)*(XQ-ZQ) + oqs_sidh_cln16_fp2mul751_mont(t1, P->Z, t1); // t1 = (XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2sub751(t0, t1, P->Z); // ZP = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2add751(t0, t1, P->X); // XP = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fp2sqr751_mont(P->Z, P->Z); // ZP = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2sqr751_mont(P->X, P->X); // XP = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fp2mul751_mont(P->Z, xPQ, P->Z); // ZP = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + + +void oqs_sidh_cln16_xDBL_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q) { + // Doubling of a Montgomery point in projective coordinates (X:Z) over the base field. + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constant A24/C24=(A/C+2)/4. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + oqs_sidh_cln16_felm_t t0, t1; + + // NOTE: this function is fixed for A24=1, C24=2 + + oqs_sidh_cln16_fpsub751(P->X, P->Z, t0); // t0 = X1-Z1 + oqs_sidh_cln16_fpadd751(P->X, P->Z, t1); // t1 = X1+Z1 + oqs_sidh_cln16_fpsqr751_mont(t0, t0); // t0 = (X1-Z1)^2 + oqs_sidh_cln16_fpsqr751_mont(t1, t1); // t1 = (X1+Z1)^2 + oqs_sidh_cln16_fpadd751(t0, t0, Q->Z); // Z2 = C24*(X1-Z1)^2 + oqs_sidh_cln16_fpmul751_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + oqs_sidh_cln16_fpsub751(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + oqs_sidh_cln16_fpadd751(Q->Z, t1, Q->Z); // Z2 = A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + oqs_sidh_cln16_fpmul751_mont(Q->Z, t1, Q->Z); // Z2 = [A24*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + + +void oqs_sidh_cln16_xDBLADD_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, oqs_sidh_cln16_felm_t xPQ, oqs_sidh_cln16_felm_t A24) { + // Simultaneous doubling and differential addition over the base field. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + oqs_sidh_cln16_felm_t t0, t1, t2; + + // NOTE: this function is fixed for C24=2 + + oqs_sidh_cln16_fpadd751(P->X, P->Z, t0); // t0 = XP+ZP + oqs_sidh_cln16_fpsub751(P->X, P->Z, t1); // t1 = XP-ZP + oqs_sidh_cln16_fpsqr751_mont(t0, P->X); // XP = (XP+ZP)^2 + oqs_sidh_cln16_fpsub751(Q->X, Q->Z, t2); // t2 = XQ-ZQ + oqs_sidh_cln16_fpadd751(Q->X, Q->Z, Q->X); // XQ = XQ+ZQ + oqs_sidh_cln16_fpmul751_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + oqs_sidh_cln16_fpsqr751_mont(t1, P->Z); // ZP = (XP-ZP)^2 + oqs_sidh_cln16_fpmul751_mont(t1, Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fpsub751(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + + if (A24[0] == 1) { + oqs_sidh_cln16_fpadd751(P->Z, P->Z, P->Z); // ZP = C24*(XP-ZP)^2 + oqs_sidh_cln16_fpmul751_mont(P->X, P->Z, P->X); // XP = C24*(XP+ZP)^2*(XP-ZP)^2 + oqs_sidh_cln16_fpadd751(t2, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2 + } else { + oqs_sidh_cln16_fpmul751_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + oqs_sidh_cln16_fpmul751_mont(A24, t2, Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fpadd751(P->Z, Q->X, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2 + } + + oqs_sidh_cln16_fpsub751(t0, t1, Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fpadd751(t0, t1, Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + oqs_sidh_cln16_fpmul751_mont(P->Z, t2, P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+C24*(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + oqs_sidh_cln16_fpsqr751_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fpsqr751_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + oqs_sidh_cln16_fpmul751_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + + +void oqs_sidh_cln16_ladder(oqs_sidh_cln16_felm_t x, digit_t *m, oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, oqs_sidh_cln16_felm_t A24, unsigned int order_bits, unsigned int order_fullbits, PCurveIsogenyStruct CurveIsogeny) { + // The Montgomery ladder + // Inputs: the affine x-coordinate of a point P on E: B*y^2=x^3+A*x^2+x, + // scalar m + // curve constant A24 = (A+2)/4 + // order_bits = subgroup order bitlength + // order_fullbits = smallest multiple of 32 larger than the order bitlength + // Output: Q = m*(x:1) + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int bit = 0, owords = NBITS_TO_NWORDS(order_fullbits); + digit_t mask; + int i; + + // Initializing with the points (1:0) and (x:1) + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)P->X); + oqs_sidh_cln16_fpzero751(P->Z); + oqs_sidh_cln16_fpcopy751(x, Q->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)Q->Z); + + for (i = order_fullbits - order_bits; i > 0; i--) { + oqs_sidh_cln16_mp_shiftl1(m, owords); + } + + for (i = order_bits; i > 0; i--) { + bit = (unsigned int)(m[owords - 1] >> (RADIX - 1)); + oqs_sidh_cln16_mp_shiftl1(m, owords); + mask = 0 - (digit_t)bit; + + oqs_sidh_cln16_swap_points_basefield(P, Q, mask); + oqs_sidh_cln16_xDBLADD_basefield(P, Q, x, A24); // If bit=0 then P <- 2*P and Q <- P+Q, + oqs_sidh_cln16_swap_points_basefield(P, Q, mask); // else if bit=1 then Q <- 2*Q and P <- P+Q + } +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_BigMont_ladder(unsigned char *x, digit_t *m, unsigned char *xout, PCurveIsogenyStruct CurveIsogeny) { + // BigMont's scalar multiplication using the Montgomery ladder + // Inputs: x, the affine x-coordinate of a point P on BigMont: y^2=x^3+A*x^2+x, + // scalar m. + // Output: xout, the affine x-coordinate of m*(x:1) + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + oqs_sidh_cln16_point_basefield_proj_t P1, P2; + digit_t scalar[BIGMONT_NWORDS_ORDER]; + oqs_sidh_cln16_felm_t X, A24 = {0}; + + A24[0] = (digit_t)CurveIsogeny->BigMont_A24; + oqs_sidh_cln16_to_mont(A24, A24); // Conversion to Montgomery representation + oqs_sidh_cln16_to_mont((digit_t *)x, X); + + oqs_sidh_cln16_copy_words(m, scalar, BIGMONT_NWORDS_ORDER); + oqs_sidh_cln16_ladder(X, scalar, P1, P2, A24, BIGMONT_NBITS_ORDER, BIGMONT_MAXBITS_ORDER, CurveIsogeny); + + oqs_sidh_cln16_fpinv751_mont(P1->Z); + oqs_sidh_cln16_fpmul751_mont(P1->X, P1->Z, (digit_t *)xout); + oqs_sidh_cln16_from_mont((digit_t *)xout, (digit_t *)xout); // Conversion to standard representation + + return SIDH_CRYPTO_SUCCESS; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_secret_pt(oqs_sidh_cln16_point_basefield_t P, digit_t *m, unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t R, PCurveIsogenyStruct CurveIsogeny) { + // Computes key generation entirely in the base field by exploiting a 1-dimensional Montgomery ladder in the trace zero subgroup and + // recovering the y-coordinate for the addition. All operations in the base field GF(p). + // Input: The scalar m, point P = (x,y) on E in the base field subgroup and Q = (x1,y1*i) on E in the trace-zero subgroup. + // x,y,x1,y1 are all in the base field. + // Output: R = (RX0+RX1*i)/RZ0 (the x-coordinate of P+[m]Q). + unsigned int nbits; + oqs_sidh_cln16_point_basefield_t Q; + oqs_sidh_cln16_point_basefield_proj_t S, T; + digit_t *X0 = (digit_t *)S->X, *Z0 = (digit_t *)S->Z, *X1 = (digit_t *)T->X, *Z1 = (digit_t *)T->Z; + digit_t *x = (digit_t *)P->x, *y = (digit_t *)P->y, *x1 = (digit_t *)Q->x, *y1 = (digit_t *)Q->y; + digit_t scalar[SIDH_NWORDS_ORDER]; + oqs_sidh_cln16_felm_t t0, t1, t2, A24 = {0}; + digit_t *RX0 = (digit_t *)R->X[0], *RX1 = (digit_t *)R->X[1], *RZ0 = (digit_t *)R->Z[0], *RZ1 = (digit_t *)R->Z[1]; + + oqs_sidh_cln16_fpcopy751(P->x, Q->x); // Q = (-XP,YP) + oqs_sidh_cln16_fpcopy751(P->y, Q->y); + oqs_sidh_cln16_fpneg751(Q->x); + + if (AliceOrBob == SIDH_ALICE) { + nbits = CurveIsogeny->oAbits; + } else if (AliceOrBob == SIDH_BOB) { + nbits = CurveIsogeny->oBbits; + } else { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + // Setting curve constant to one (in standard representation), used in oqs_sidh_cln16_xDBLADD_basefield() in the ladder computation + A24[0] = 1; + oqs_sidh_cln16_copy_words(m, scalar, SIDH_NWORDS_ORDER); + oqs_sidh_cln16_ladder(Q->x, scalar, S, T, A24, nbits, CurveIsogeny->owordbits, CurveIsogeny); + + //RX0 := (2*y*y1*Z0^2*Z1 + Z1*(X0*x1+Z0)*(X0+x1*Z0) - X1*(X0-x1*Z0)^2)*(2*y*y1*Z0^2*Z1 - Z1*(X0*x1+Z0)*(X0+x1*Z0) + X1*(X0-x1*Z0)^2) - 4*y1^2*Z0*Z1^2*(X0+x*Z0)*(X0-x*Z0)^2; + //RX1 := 4*y*y1*Z0^2*Z1*(Z1*(X0*x1+Z0)*(X0+x1*Z0) - X1*(X0-x1*Z0)^2); + //RZ0 := 4*y1^2*Z0^2*Z1^2*(X0-x*Z0)^2; + + oqs_sidh_cln16_fpmul751_mont(x1, Z0, RX1); + oqs_sidh_cln16_fpmul751_mont(X0, x1, RX0); + oqs_sidh_cln16_fpsub751(X0, RX1, t0); + oqs_sidh_cln16_fpadd751(X0, RX1, RX1); + oqs_sidh_cln16_fpsqr751_mont(t0, t0); + oqs_sidh_cln16_fpadd751(RX0, Z0, RX0); + oqs_sidh_cln16_fpmul751_mont(t0, X1, t0); + oqs_sidh_cln16_fpmul751_mont(RX0, RX1, RX0); + oqs_sidh_cln16_fpmul751_mont(y1, Z1, t2); + oqs_sidh_cln16_fpmul751_mont(y, Z0, t1); + oqs_sidh_cln16_fpadd751(t2, t2, t2); + oqs_sidh_cln16_fpmul751_mont(t2, Z0, RX1); + oqs_sidh_cln16_fpmul751_mont(RX0, Z1, RX0); + oqs_sidh_cln16_fpsub751(RX0, t0, RX0); + oqs_sidh_cln16_fpmul751_mont(t1, RX1, t1); + oqs_sidh_cln16_fpsqr751_mont(RX1, t0); + oqs_sidh_cln16_fpmul751_mont(t2, RX1, t2); + oqs_sidh_cln16_fpmul751_mont(t1, RX0, RX1); + oqs_sidh_cln16_fpadd751(t1, RX0, RZ0); + oqs_sidh_cln16_fpadd751(RX1, RX1, RX1); + oqs_sidh_cln16_fpsub751(t1, RX0, t1); + oqs_sidh_cln16_fpmul751_mont(x, Z0, RX0); + oqs_sidh_cln16_fpmul751_mont(t1, RZ0, t1); + oqs_sidh_cln16_fpsub751(X0, RX0, RZ0); + oqs_sidh_cln16_fpadd751(X0, RX0, RX0); + oqs_sidh_cln16_fpsqr751_mont(RZ0, RZ0); + oqs_sidh_cln16_fpmul751_mont(t2, RX0, t2); + oqs_sidh_cln16_fpmul751_mont(t2, RZ0, t2); + oqs_sidh_cln16_fpmul751_mont(RZ0, t0, RZ0); + oqs_sidh_cln16_fpsub751(t1, t2, RX0); + oqs_sidh_cln16_fpzero751(RZ1); + + return SIDH_CRYPTO_SUCCESS; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_ladder_3_pt(oqs_sidh_cln16_f2elm_t xP, oqs_sidh_cln16_f2elm_t xQ, oqs_sidh_cln16_f2elm_t xPQ, digit_t *m, unsigned int AliceOrBob, oqs_sidh_cln16_point_proj_t W, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { + // Computes P+[m]Q via x-only arithmetic. Algorithm by De Feo, Jao and Plut. + // Input: three affine points xP,xQ,xPQ and Montgomery constant A. + // Output: projective Montgomery x-coordinates of x(P+[m]Q)=WX/WZ + oqs_sidh_cln16_point_proj_t U = oqs_sidh_cln16_point_proj_t_EMPTY, V = oqs_sidh_cln16_point_proj_t_EMPTY; + oqs_sidh_cln16_f2elm_t A24, A24num, constant1 = { {0} }, constant2; + oqs_sidh_cln16_felm_t temp_scalar; + unsigned int bit = 0, nbits, fullbits = CurveIsogeny->owordbits; + digit_t mask; + int i; + + if (AliceOrBob == SIDH_ALICE) { + nbits = CurveIsogeny->oAbits; + } else if (AliceOrBob == SIDH_BOB) { + nbits = CurveIsogeny->oBbits; + } else { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, constant1[0]); + oqs_sidh_cln16_fp2add751(constant1, constant1, constant1); // constant = 2 + oqs_sidh_cln16_fp2add751(A, constant1, A24num); + oqs_sidh_cln16_fp2div2_751(A24num, A24); + oqs_sidh_cln16_fp2div2_751(A24, A24); + + // Initializing with the points (1:0), (xQ:1) and (xP:1) + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)U->X); + oqs_sidh_cln16_fp2copy751(xQ, V->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)V->Z); + oqs_sidh_cln16_fp2copy751(xP, W->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)W->Z); + oqs_sidh_cln16_fpzero751(W->Z[1]); + oqs_sidh_cln16_fpcopy751(m, temp_scalar); + + for (i = fullbits - nbits; i > 0; i--) { + oqs_sidh_cln16_mp_shiftl1(temp_scalar, SIDH_NWORDS_ORDER); + } + + for (i = nbits; i > 0; i--) { + bit = (unsigned int)(temp_scalar[SIDH_NWORDS_ORDER - 1] >> (RADIX - 1)); + oqs_sidh_cln16_mp_shiftl1(temp_scalar, SIDH_NWORDS_ORDER); + mask = 0 - (digit_t)bit; + + oqs_sidh_cln16_swap_points(W, U, mask); + oqs_sidh_cln16_swap_points(U, V, mask); + oqs_sidh_cln16_select_f2elm(xP, xQ, constant1, mask); + oqs_sidh_cln16_select_f2elm(xQ, xPQ, constant2, mask); + oqs_sidh_cln16_xADD(W, U, constant1); // If bit=0 then W <- W+U, U <- 2*U and V <- U+V, + oqs_sidh_cln16_xDBLADD(U, V, constant2, A24); // else if bit=1 then U <- U+V, V <- 2*V and W <- V+W + oqs_sidh_cln16_swap_points(U, V, mask); + oqs_sidh_cln16_swap_points(W, U, mask); + } + + return SIDH_CRYPTO_SUCCESS; +} + + +void oqs_sidh_cln16_get_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, oqs_sidh_cln16_f2elm_t *coeff) { + // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficient A/C and the 5 coefficients + // that are used to evaluate the isogeny at a point in oqs_sidh_cln16_eval_4_isog(). + + oqs_sidh_cln16_fp2add751(P->X, P->Z, coeff[0]); // coeff[0] = X4+Z4 + oqs_sidh_cln16_fp2sqr751_mont(P->X, coeff[3]); // coeff[3] = X4^2 + oqs_sidh_cln16_fp2sqr751_mont(P->Z, coeff[4]); // coeff[4] = Z4^2 + oqs_sidh_cln16_fp2sqr751_mont(coeff[0], coeff[0]); // coeff[0] = (X4+Z4)^2 + oqs_sidh_cln16_fp2add751(coeff[3], coeff[4], coeff[1]); // coeff[1] = X4^2+Z4^2 + oqs_sidh_cln16_fp2sub751(coeff[3], coeff[4], coeff[2]); // coeff[2] = X4^2-Z4^2 + oqs_sidh_cln16_fp2sqr751_mont(coeff[3], coeff[3]); // coeff[3] = X4^4 + oqs_sidh_cln16_fp2sqr751_mont(coeff[4], coeff[4]); // coeff[4] = Z4^4 + oqs_sidh_cln16_fp2add751(coeff[3], coeff[3], A); // A = 2*X4^4 + oqs_sidh_cln16_fp2sub751(coeff[0], coeff[1], coeff[0]); // coeff[0] = 2*X4*Z4 = (X4+Z4)^2 - (X4^2+Z4^2) + oqs_sidh_cln16_fp2sub751(A, coeff[4], A); // A = 2*X4^4-Z4^4 + oqs_sidh_cln16_fp2copy751(coeff[4], C); // C = Z4^4 + oqs_sidh_cln16_fp2add751(A, A, A); // A = 2(2*X4^4-Z4^4) +} + + +void oqs_sidh_cln16_eval_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t *coeff) { + // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 5 coefficients in coeff (computed in the function four_isogeny_from_projective_kernel()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2mul751_mont(P->X, coeff[0], P->X); // X = coeff[0]*X + oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[1], t0); // t0 = coeff[1]*Z + oqs_sidh_cln16_fp2sub751(P->X, t0, P->X); // X = X-t0 + oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[2], P->Z); // Z = coeff[2]*Z + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t0); // t0 = X-Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, P->X, P->Z); // Z = X*Z + oqs_sidh_cln16_fp2sqr751_mont(t0, t0); // t0 = t0^2 + oqs_sidh_cln16_fp2add751(P->Z, P->Z, P->Z); // Z = Z+Z + oqs_sidh_cln16_fp2add751(P->Z, P->Z, P->Z); // Z = Z+Z + oqs_sidh_cln16_fp2add751(P->Z, t0, P->X); // X = t0+Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, t0, P->Z); // Z = t0*Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, coeff[4], P->Z); // Z = coeff[4]*Z + oqs_sidh_cln16_fp2mul751_mont(t0, coeff[4], t0); // t0 = t0*coeff[4] + oqs_sidh_cln16_fp2mul751_mont(P->X, coeff[3], t1); // t1 = X*coeff[3] + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fp2mul751_mont(P->X, t0, P->X); // X = X*t0 +} + + +void oqs_sidh_cln16_first_4_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t Aout, oqs_sidh_cln16_f2elm_t Cout, PCurveIsogenyStruct CurveIsogeny) { + // Computes first 4-isogeny computed by Alice. + // Inputs: projective point P = (X4:Z4) and curve constant A. + // Output: the projective point P = (X4:Z4) in the codomain and isogenous curve constant Aout/Cout. + oqs_sidh_cln16_f2elm_t t0 = { {0} }, t1, t2; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, t0[0]); + oqs_sidh_cln16_fpadd751(t0[0], t0[0], t0[0]); // t0 = 2 (in Montgomery domain) + oqs_sidh_cln16_fp2sub751(A, t0, Cout); // Cout = A-2 + oqs_sidh_cln16_fpadd751(t0[0], t0[0], t1[0]); + oqs_sidh_cln16_fpadd751(t0[0], t1[0], t0[0]); // t0 = 6 (in Montgomery domain) + oqs_sidh_cln16_fp2add751(P->X, P->Z, t1); // t1 = X+Z + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2); // t2 = X-Z + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (X+Z)^2 + oqs_sidh_cln16_fp2add751(A, t0, Aout); // A = A+6 + oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, P->Z); // Z = X*Z + oqs_sidh_cln16_fp2neg751(P->Z); // Z = -X*Z + oqs_sidh_cln16_fp2sqr751_mont(t2, t2); // t2 = (X-Z)^2 + oqs_sidh_cln16_fp2mul751_mont(P->Z, Cout, P->Z); // Z = -C*X*Z + oqs_sidh_cln16_fp2add751(Aout, Aout, Aout); // Aout = 2*A+12 + oqs_sidh_cln16_fp2sub751(t1, P->Z, P->X); // X = (X+Z)^2+C*X*Z + oqs_sidh_cln16_fp2mul751_mont(P->Z, t2, P->Z); // Z = -C*X*Z*(X-Z)^2 + oqs_sidh_cln16_fp2mul751_mont(P->X, t1, P->X); // X = (X+Z)^2*[(X+Z)^2+C*X*Z] +} + + +void oqs_sidh_cln16_xTPL(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A24, oqs_sidh_cln16_f2elm_t C24) { + // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constant A/C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + oqs_sidh_cln16_f2elm_t t0, t1, t2, t3, t4, t5; + + oqs_sidh_cln16_fp2sub751(P->X, P->Z, t2); // t2 = X-Z + oqs_sidh_cln16_fp2add751(P->X, P->Z, t3); // t3 = X+Z + oqs_sidh_cln16_fp2sqr751_mont(t2, t0); // t0 = t2^2 + oqs_sidh_cln16_fp2sqr751_mont(t3, t1); // t1 = t3^2 + oqs_sidh_cln16_fp2mul751_mont(t0, C24, t4); // t4 = C24*t0 + oqs_sidh_cln16_fp2mul751_mont(t1, t4, t5); // t5 = t4*t1 + oqs_sidh_cln16_fp2sub751(t1, t0, t1); // t1 = t1-t0 + oqs_sidh_cln16_fp2mul751_mont(A24, t1, t0); // t0 = A24*t1 + oqs_sidh_cln16_fp2add751(t4, t0, t4); // t4 = t4+t0 + oqs_sidh_cln16_fp2mul751_mont(t1, t4, t4); // t4 = t4*t1 + oqs_sidh_cln16_fp2add751(t5, t4, t0); // t0 = t5+t4 + oqs_sidh_cln16_fp2sub751(t5, t4, t1); // t1 = t5-t4 + oqs_sidh_cln16_fp2mul751_mont(t0, t2, t0); // t0 = t2*t0 + oqs_sidh_cln16_fp2mul751_mont(t1, t3, t1); // t1 = t3*t1 + oqs_sidh_cln16_fp2sub751(t0, t1, t4); // t4 = t0-t1 + oqs_sidh_cln16_fp2add751(t0, t1, t5); // t5 = t0+t1 + oqs_sidh_cln16_fp2sqr751_mont(t4, t4); // t4 = t4^2 + oqs_sidh_cln16_fp2sqr751_mont(t5, t5); // t5 = t5^2 + oqs_sidh_cln16_fp2mul751_mont(P->X, t4, t4); // t4 = X*t4 + oqs_sidh_cln16_fp2mul751_mont(P->Z, t5, Q->X); // X3 = Z*t5 + oqs_sidh_cln16_fp2copy751(t4, Q->Z); // Z3 = t4 +} + + +void oqs_sidh_cln16_xTPLe(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C, int e) { + // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constant A/C. + // Output: projective Montgomery x-coordinates P <- (3^e)*P. + oqs_sidh_cln16_f2elm_t A24, C24; + int i; + + oqs_sidh_cln16_fp2add751(C, C, A24); + oqs_sidh_cln16_fp2add751(A24, A24, C24); + oqs_sidh_cln16_fp2add751(A24, A, A24); + oqs_sidh_cln16_copy_words((digit_t *)P, (digit_t *)Q, 2 * 2 * NWORDS_FIELD); + + for (i = 0; i < e; i++) { + oqs_sidh_cln16_xTPL(Q, Q, A24, C24); + } +} + + +void oqs_sidh_cln16_get_3_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t C) { + // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + oqs_sidh_cln16_f2elm_t t0, t1; + + oqs_sidh_cln16_fp2sqr751_mont(P->X, t0); // t0 = X^2 + oqs_sidh_cln16_fp2add751(t0, t0, t1); // t1 = 2*t0 + oqs_sidh_cln16_fp2add751(t0, t1, t0); // t0 = t0+t1 + oqs_sidh_cln16_fp2sqr751_mont(P->Z, t1); // t1 = Z^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, A); // A = t1^2 + oqs_sidh_cln16_fp2add751(t1, t1, t1); // t1 = 2*t1 + oqs_sidh_cln16_fp2add751(t1, t1, C); // C = 2*t1 + oqs_sidh_cln16_fp2sub751(t0, t1, t1); // t1 = t0-t1 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t1); // t1 = t0*t1 + oqs_sidh_cln16_fp2sub751(A, t1, A); // A = A-t1 + oqs_sidh_cln16_fp2sub751(A, t1, A); // A = A-t1 + oqs_sidh_cln16_fp2sub751(A, t1, A); // A = A-t1 + oqs_sidh_cln16_fp2mul751_mont(P->X, P->Z, t1); // t1 = X*Z // ms trade-off possible (1 mul for 1sqr + 1add + 2sub) + oqs_sidh_cln16_fp2mul751_mont(C, t1, C); // C = C*t1 +} + + +void oqs_sidh_cln16_eval_3_isog(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q) { + // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P = (X:Z). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point R = phi(Q) = (XX:ZZ). + oqs_sidh_cln16_f2elm_t t0, t1, t2; + + oqs_sidh_cln16_fp2mul751_mont(P->X, Q->X, t0); // t0 = X3*X + oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->X, t1); // t1 = Z3*X + oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->Z, t2); // t2 = Z3*Z + oqs_sidh_cln16_fp2sub751(t0, t2, t0); // t0 = X3*X-Z3*Z + oqs_sidh_cln16_fp2mul751_mont(P->X, Q->Z, t2); // t2 = X3*Z + oqs_sidh_cln16_fp2sub751(t1, t2, t1); // t1 = Z3*X-X3*Z + oqs_sidh_cln16_fp2sqr751_mont(t0, t0); // t0 = (X3*X-Z3*Z)^2 + oqs_sidh_cln16_fp2sqr751_mont(t1, t1); // t1 = (Z3*X-X3*Z)^2 + oqs_sidh_cln16_fp2mul751_mont(Q->X, t0, Q->X); // X = X*(X3*X-Z3*Z)^2 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, t1, Q->Z); // Z = Z*(Z3*X-X3*Z)^2 +} + + +void oqs_sidh_cln16_inv_3_way(oqs_sidh_cln16_f2elm_t z1, oqs_sidh_cln16_f2elm_t z2, oqs_sidh_cln16_f2elm_t z3) { + // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + oqs_sidh_cln16_f2elm_t t0, t1, t2, t3; + + oqs_sidh_cln16_fp2mul751_mont(z1, z2, t0); // t0 = z1*z2 + oqs_sidh_cln16_fp2mul751_mont(z3, t0, t1); // t1 = z1*z2*z3 + oqs_sidh_cln16_fp2inv751_mont(t1); // t1 = 1/(z1*z2*z3) + oqs_sidh_cln16_fp2mul751_mont(z3, t1, t2); // t2 = 1/(z1*z2) + oqs_sidh_cln16_fp2mul751_mont(t2, z2, t3); // t3 = 1/z1 + oqs_sidh_cln16_fp2mul751_mont(t2, z1, z2); // z2 = 1/z2 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, z3); // z3 = 1/z3 + oqs_sidh_cln16_fp2copy751(t3, z1); // z1 = 1/z1 +} + + +void oqs_sidh_cln16_distort_and_diff(oqs_sidh_cln16_felm_t xP, oqs_sidh_cln16_point_proj_t D, PCurveIsogenyStruct CurveIsogeny) { + // Computing the point (x(Q-P),z(Q-P)) + // Input: coordinate xP of point P=(xP,yP) + // Output: the point D = (x(Q-P),z(Q-P)), where Q=tau(P). + oqs_sidh_cln16_felm_t one; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one); + oqs_sidh_cln16_fpsqr751_mont(xP, D->X[0]); // XD = xP^2 + oqs_sidh_cln16_fpadd751(D->X[0], one, D->X[0]); // XD = XD+1 + oqs_sidh_cln16_fpcopy751(D->X[0], D->X[1]); // XD = XD*i + oqs_sidh_cln16_fpzero751(D->X[0]); + oqs_sidh_cln16_fpadd751(xP, xP, D->Z[0]); // ZD = xP+xP +} + + +void oqs_sidh_cln16_get_A(oqs_sidh_cln16_f2elm_t xP, oqs_sidh_cln16_f2elm_t xQ, oqs_sidh_cln16_f2elm_t xR, oqs_sidh_cln16_f2elm_t A, PCurveIsogenyStruct CurveIsogeny) { + // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + oqs_sidh_cln16_f2elm_t t0, t1, one = { {0} }; + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_fp2add751(xP, xQ, t1); // t1 = xP+xQ + oqs_sidh_cln16_fp2mul751_mont(xP, xQ, t0); // t0 = xP*xQ + oqs_sidh_cln16_fp2mul751_mont(xR, t1, A); // A = xR*t1 + oqs_sidh_cln16_fp2add751(t0, A, A); // A = A+t0 + oqs_sidh_cln16_fp2mul751_mont(t0, xR, t0); // t0 = t0*xR + oqs_sidh_cln16_fp2sub751(A, one, A); // A = A-1 + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2add751(t1, xR, t1); // t1 = t1+xR + oqs_sidh_cln16_fp2add751(t0, t0, t0); // t0 = t0+t0 + oqs_sidh_cln16_fp2sqr751_mont(A, A); // A = A^2 + oqs_sidh_cln16_fp2inv751_mont(t0); // t0 = 1/t0 + oqs_sidh_cln16_fp2mul751_mont(A, t0, A); // A = A*t0 + oqs_sidh_cln16_fp2sub751(A, t1, A); // Afinal = A-t1 +} diff --git a/src/kex_sidh_cln16/fpx.c b/src/kex_sidh_cln16/fpx.c new file mode 100644 index 000000000..e3c50c67b --- /dev/null +++ b/src/kex_sidh_cln16/fpx.c @@ -0,0 +1,611 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: core functions over GF(p751^2) and field operations over the prime p751 +* +*********************************************************************************************/ + +#include "SIDH_internal.h" + + +// Global constants +const uint64_t p751[NWORDS_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xEEAFFFFFFFFFFFFF, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C + }; +const uint64_t p751p1[NWORDS_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xEEB0000000000000, + 0xE3EC968549F878A8, 0xDA959B1A13F7CC76, 0x084E9867D6EBE876, 0x8562B5045CB25748, 0x0E12909F97BADC66, 0x00006FE5D541F71C + }; +const uint64_t p751x2[NWORDS_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF, + 0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38 + }; +const uint64_t Montgomery_R2[NWORDS_FIELD] = { 0x233046449DAD4058, 0xDB010161A696452A, 0x5E36941472E3FD8E, 0xF40BFE2082A2E706, 0x4932CCA8904F8751 , 0x1F735F1F1EE7FC81, + 0xA24F4D80C1048E18, 0xB56C383CCDB607C5, 0x441DD47B735F9C90, 0x5673ED2C6A6AC82A, 0x06C905261132294B, 0x000041AD830F1F35 + }; + + +/*******************************************************/ +/************* Field arithmetic functions **************/ + +__inline void oqs_sidh_cln16_fpcopy751(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t c) { + // Copy of a field element, c = a + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + c[i] = a[i]; + } +} + + +__inline void oqs_sidh_cln16_fpzero751(oqs_sidh_cln16_felm_t a) { + // Zeroing a field element, a = 0 + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + a[i] = 0; + } +} + + +void oqs_sidh_cln16_to_mont(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t mc) { + // Conversion to Montgomery representation + // mc = a*R^2*R^-1 mod p751 = a*R mod p751, where a in [0, p751-1] + // The Montgomery constant R^2 mod p751 is the global value "Montgomery_R2". + + oqs_sidh_cln16_fpmul751_mont(a, (digit_t *)&Montgomery_R2, mc); +} + + +void oqs_sidh_cln16_from_mont(oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t c) { + // Conversion from Montgomery representation to standard representation + // c = ma*R^-1 mod p751 = a mod p751, where ma in [0, p751-1]. + digit_t one[NWORDS_FIELD] = {0}; + + one[0] = 1; + oqs_sidh_cln16_fpmul751_mont(ma, one, c); + oqs_sidh_cln16_fpcorrection751(c); +} + + +UNUSED static __inline unsigned int is_felm_zero(oqs_sidh_cln16_felm_t x) { + // Is x = 0? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + // NOTE: this function does not run in constant-time so it can only be used in functions + // incorporating countermeasures such as projective randomization. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + if (x[i] != 0) { + return false; + } + } + return true; +} + + +UNUSED static __inline unsigned int is_felm_even(oqs_sidh_cln16_felm_t x) { + // Is x even? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + return (unsigned int)((x[0] & 1) ^ 1); +} + + +UNUSED static __inline unsigned int is_felm_lt(oqs_sidh_cln16_felm_t x, oqs_sidh_cln16_felm_t y) { + // Is x < y? return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + // NOTE: this function does not run in constant-time so it can only be used in functions + // incorporating countermeasures such as projective randomization. + int i; + + for (i = NWORDS_FIELD - 1; i >= 0; i--) { + if (x[i] < y[i]) { + return true; + } else if (x[i] > y[i]) { + return false; + } + } + return false; +} + + +void oqs_sidh_cln16_copy_words(digit_t *a, digit_t *c, unsigned int nwords) { + // Copy wordsize digits, c = a, where lng(a) = nwords + unsigned int i; + + for (i = 0; i < nwords; i++) { + c[i] = a[i]; + } +} + + +__inline unsigned int oqs_sidh_cln16_mp_sub(digit_t *a, digit_t *b, digit_t *c, unsigned int nwords) { + // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + return borrow; +} + + +__inline unsigned int oqs_sidh_cln16_mp_add(digit_t *a, digit_t *b, digit_t *c, unsigned int nwords) { + // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + return carry; +} + + +void oqs_sidh_cln16_mp_shiftr1(digit_t *x, unsigned int nwords) { + // Multiprecision right shift by one + unsigned int i; + + for (i = 0; i < nwords - 1; i++) { + SHIFTR(x[i + 1], x[i], 1, x[i], RADIX); + } + x[nwords - 1] >>= 1; +} + + +void oqs_sidh_cln16_mp_shiftl1(digit_t *x, unsigned int nwords) { + // Multiprecision left right shift by one + int i; + + for (i = nwords - 1; i > 0; i--) { + SHIFTL(x[i], x[i - 1], 1, x[i], RADIX); + } + x[0] <<= 1; +} + + +void oqs_sidh_cln16_fpmul751_mont(oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mb, oqs_sidh_cln16_felm_t mc) { + // 751-bit Comba multi-precision multiplication, c = a*b mod p751 + oqs_sidh_cln16_dfelm_t temp = {0}; + + oqs_sidh_cln16_mp_mul(ma, mb, temp, NWORDS_FIELD); + oqs_sidh_cln16_rdc_mont(temp, mc); +} + + +void oqs_sidh_cln16_fpsqr751_mont(oqs_sidh_cln16_felm_t ma, oqs_sidh_cln16_felm_t mc) { + // 751-bit Comba multi-precision squaring, c = a^2 mod p751 + oqs_sidh_cln16_dfelm_t temp = {0}; + + oqs_sidh_cln16_mp_mul(ma, ma, temp, NWORDS_FIELD); + oqs_sidh_cln16_rdc_mont(temp, mc); +} + + +void oqs_sidh_cln16_fpinv751_mont(oqs_sidh_cln16_felm_t a) { + // Field inversion using Montgomery arithmetic, a = a^-1*R mod p751 + oqs_sidh_cln16_felm_t t[27], tt; + unsigned int i, j; + + // Precomputed table + oqs_sidh_cln16_fpsqr751_mont(a, tt); + oqs_sidh_cln16_fpmul751_mont(a, tt, t[0]); + oqs_sidh_cln16_fpmul751_mont(t[0], tt, t[1]); + oqs_sidh_cln16_fpmul751_mont(t[1], tt, t[2]); + oqs_sidh_cln16_fpmul751_mont(t[2], tt, t[3]); + oqs_sidh_cln16_fpmul751_mont(t[3], tt, t[3]); + for (i = 3; i <= 8; i++) { + oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]); + } + oqs_sidh_cln16_fpmul751_mont(t[9], tt, t[9]); + for (i = 9; i <= 20; i++) { + oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]); + } + oqs_sidh_cln16_fpmul751_mont(t[21], tt, t[21]); + for (i = 21; i <= 24; i++) { + oqs_sidh_cln16_fpmul751_mont(t[i], tt, t[i + 1]); + } + oqs_sidh_cln16_fpmul751_mont(t[25], tt, t[25]); + oqs_sidh_cln16_fpmul751_mont(t[25], tt, t[26]); + + oqs_sidh_cln16_fpcopy751(a, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[8], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 9; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 10; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[15], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[20], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[10], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[4], tt, tt); + for (i = 0; i < 10; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 10; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[9], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[17], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(a, tt, tt); + for (i = 0; i < 10; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[16], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[7], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[0], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[12], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[19], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[25], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[18], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[4], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[5], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[21], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[2], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[23], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[12], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[3], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[13], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[17], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[5], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[8], tt, tt); + for (i = 0; i < 8; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[11], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + oqs_sidh_cln16_fpmul751_mont(t[22], tt, tt); + for (i = 0; i < 7; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + for (j = 0; j < 61; j++) { + oqs_sidh_cln16_fpmul751_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) { + oqs_sidh_cln16_fpsqr751_mont(tt, tt); + } + } + oqs_sidh_cln16_fpmul751_mont(t[25], tt, a); +} + + +/***********************************************/ +/************* GF(p^2) FUNCTIONS ***************/ + +void oqs_sidh_cln16_fp2copy751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { + // Copy of a GF(p751^2) element, c = a + oqs_sidh_cln16_fpcopy751(a[0], c[0]); + oqs_sidh_cln16_fpcopy751(a[1], c[1]); +} + + +void oqs_sidh_cln16_fp2zero751(oqs_sidh_cln16_f2elm_t a) { + // Zeroing a GF(p751^2) element, a = 0 + oqs_sidh_cln16_fpzero751(a[0]); + oqs_sidh_cln16_fpzero751(a[1]); +} + + +void oqs_sidh_cln16_fp2neg751(oqs_sidh_cln16_f2elm_t a) { + // GF(p751^2) negation, a = -a in GF(p751^2) + oqs_sidh_cln16_fpneg751(a[0]); + oqs_sidh_cln16_fpneg751(a[1]); +} + + +__inline void oqs_sidh_cln16_fp2add751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { + // GF(p751^2) addition, c = a+b in GF(p751^2) + oqs_sidh_cln16_fpadd751(a[0], b[0], c[0]); + oqs_sidh_cln16_fpadd751(a[1], b[1], c[1]); +} + + +__inline void oqs_sidh_cln16_fp2sub751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { + // GF(p751^2) subtraction, c = a-b in GF(p751^2) + oqs_sidh_cln16_fpsub751(a[0], b[0], c[0]); + oqs_sidh_cln16_fpsub751(a[1], b[1], c[1]); +} + + +void oqs_sidh_cln16_fp2div2_751(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { + // GF(p751^2) division by two, c = a/2 in GF(p751^2) + oqs_sidh_cln16_fpdiv2_751(a[0], c[0]); + oqs_sidh_cln16_fpdiv2_751(a[1], c[1]); +} + + +void oqs_sidh_cln16_fp2correction751(oqs_sidh_cln16_f2elm_t a) { + // Modular correction, a = a in GF(p751^2) + oqs_sidh_cln16_fpcorrection751(a[0]); + oqs_sidh_cln16_fpcorrection751(a[1]); +} + + +void oqs_sidh_cln16_fp2sqr751_mont(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t c) { + // GF(p751^2) squaring using Montgomery arithmetic, c = a^2 in GF(p751^2) + oqs_sidh_cln16_felm_t t1, t2, t3; + + oqs_sidh_cln16_mp_add(a[0], a[1], t1, NWORDS_FIELD); // t1 = a0+a1 + oqs_sidh_cln16_fpsub751(a[0], a[1], t2); // t2 = a0-a1 + oqs_sidh_cln16_mp_add(a[0], a[0], t3, NWORDS_FIELD); // t3 = 2a0 + oqs_sidh_cln16_fpmul751_mont(t1, t2, c[0]); // c0 = (a0+a1)(a0-a1) + oqs_sidh_cln16_fpmul751_mont(t3, a[1], c[1]); // c1 = 2a0*a1 +} + + +void oqs_sidh_cln16_fp2mul751_mont(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b, oqs_sidh_cln16_f2elm_t c) { + // GF(p751^2) multiplication using Montgomery arithmetic, c = a*b in GF(p751^2) + oqs_sidh_cln16_felm_t t1, t2; + oqs_sidh_cln16_dfelm_t tt1, tt2, tt3; + digit_t mask; + unsigned int i, borrow; + + oqs_sidh_cln16_mp_mul(a[0], b[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 + oqs_sidh_cln16_mp_mul(a[1], b[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 + oqs_sidh_cln16_mp_add(a[0], a[1], t1, NWORDS_FIELD); // t1 = a0+a1 + oqs_sidh_cln16_mp_add(b[0], b[1], t2, NWORDS_FIELD); // t2 = b0+b1 + borrow = oqs_sidh_cln16_mp_sub(tt1, tt2, tt3, 2 * NWORDS_FIELD); // tt3 = a0*b0 - a1*b1 + mask = 0 - (digit_t)borrow; // if tt3 < 0 then mask = 0xFF..F, else if tt3 >= 0 then mask = 0x00..0 + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, tt3[NWORDS_FIELD + i], ((digit_t *)p751)[i] & mask, borrow, tt3[NWORDS_FIELD + i]); + } + oqs_sidh_cln16_rdc_mont(tt3, c[0]); // c[0] = a0*b0 - a1*b1 + oqs_sidh_cln16_mp_add(tt1, tt2, tt1, 2 * NWORDS_FIELD); // tt1 = a0*b0 + a1*b1 + oqs_sidh_cln16_mp_mul(t1, t2, tt2, NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) + oqs_sidh_cln16_mp_sub(tt2, tt1, tt2, 2 * NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + oqs_sidh_cln16_rdc_mont(tt2, c[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 +} + + +void oqs_sidh_cln16_to_fp2mont(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t mc) { + // Conversion of a GF(p751^2) element to Montgomery representation + // mc_i = a_i*R^2*R^-1 = a_i*R in GF(p751^2). + + oqs_sidh_cln16_to_mont(a[0], mc[0]); + oqs_sidh_cln16_to_mont(a[1], mc[1]); +} + + +void oqs_sidh_cln16_from_fp2mont(oqs_sidh_cln16_f2elm_t ma, oqs_sidh_cln16_f2elm_t c) { + // Conversion of a GF(p751^2) element from Montgomery representation to standard representation + // c_i = ma_i*R^-1 = a_i in GF(p751^2). + + oqs_sidh_cln16_from_mont(ma[0], c[0]); + oqs_sidh_cln16_from_mont(ma[1], c[1]); +} + + +void oqs_sidh_cln16_fp2inv751_mont(oqs_sidh_cln16_f2elm_t a) { + // GF(p751^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) + oqs_sidh_cln16_f2elm_t t1; + + oqs_sidh_cln16_fpsqr751_mont(a[0], t1[0]); // t10 = a0^2 + oqs_sidh_cln16_fpsqr751_mont(a[1], t1[1]); // t11 = a1^2 + oqs_sidh_cln16_fpadd751(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2 + oqs_sidh_cln16_fpinv751_mont(t1[0]); // t10 = (a0^2+a1^2)^-1 + oqs_sidh_cln16_fpneg751(a[1]); // a = a0-i*a1 + oqs_sidh_cln16_fpmul751_mont(a[0], t1[0], a[0]); + oqs_sidh_cln16_fpmul751_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} + + +void oqs_sidh_cln16_swap_points_basefield(oqs_sidh_cln16_point_basefield_proj_t P, oqs_sidh_cln16_point_basefield_proj_t Q, digit_t option) { + // Swap points over the base field + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + digit_t temp; + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + temp = option & (P->X[i] ^ Q->X[i]); + P->X[i] = temp ^ P->X[i]; + Q->X[i] = temp ^ Q->X[i]; + temp = option & (P->Z[i] ^ Q->Z[i]); + P->Z[i] = temp ^ P->Z[i]; + Q->Z[i] = temp ^ Q->Z[i]; + } +} + + +void oqs_sidh_cln16_swap_points(oqs_sidh_cln16_point_proj_t P, oqs_sidh_cln16_point_proj_t Q, digit_t option) { + // Swap points + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + digit_t temp; + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + temp = option & (P->X[0][i] ^ Q->X[0][i]); + P->X[0][i] = temp ^ P->X[0][i]; + Q->X[0][i] = temp ^ Q->X[0][i]; + temp = option & (P->Z[0][i] ^ Q->Z[0][i]); + P->Z[0][i] = temp ^ P->Z[0][i]; + Q->Z[0][i] = temp ^ Q->Z[0][i]; + temp = option & (P->X[1][i] ^ Q->X[1][i]); + P->X[1][i] = temp ^ P->X[1][i]; + Q->X[1][i] = temp ^ Q->X[1][i]; + temp = option & (P->Z[1][i] ^ Q->Z[1][i]); + P->Z[1][i] = temp ^ P->Z[1][i]; + Q->Z[1][i] = temp ^ Q->Z[1][i]; + } +} + + +void oqs_sidh_cln16_select_f2elm(oqs_sidh_cln16_f2elm_t x, oqs_sidh_cln16_f2elm_t y, oqs_sidh_cln16_f2elm_t z, digit_t option) { + // Select either x or y depending on value of option + // If option = 0 then z <- x, else if option = 0xFF...FF then z <- y + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + z[0][i] = (option & (x[0][i] ^ y[0][i])) ^ x[0][i]; + z[1][i] = (option & (x[1][i] ^ y[1][i])) ^ x[1][i]; + } +} diff --git a/src/kex_sidh_cln16/generic/fp_generic.c b/src/kex_sidh_cln16/generic/fp_generic.c new file mode 100644 index 000000000..cc2685d11 --- /dev/null +++ b/src/kex_sidh_cln16/generic/fp_generic.c @@ -0,0 +1,251 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: portable modular arithmetic +* +*********************************************************************************************/ + +#include "../SIDH_internal.h" + + +// Global constants +extern const uint64_t p751[NWORDS_FIELD]; +extern const uint64_t p751p1[NWORDS_FIELD]; +extern const uint64_t p751x2[NWORDS_FIELD]; + + +__inline void oqs_sidh_cln16_fpadd751(digit_t* a, digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((digit_t*)p751x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((digit_t*)p751x2)[i] & mask, carry, c[i]); + } +} + + +__inline void oqs_sidh_cln16_fpsub751(digit_t* a, digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p751. + // Inputs: a, b in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((digit_t*)p751x2)[i] & mask, borrow, c[i]); + } +} + + +__inline void oqs_sidh_cln16_fpneg751(digit_t* a) +{ // Modular negation, a = -a mod p751. + // Input/output: a in [0, 2*p751-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((digit_t*)p751x2)[i], a[i], borrow, a[i]); + } +} + + +void oqs_sidh_cln16_fpdiv2_751(digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p751. + // Input : a in [0, 2*p751-1] + // Output: c in [0, 2*p751-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p521 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((digit_t*)p751)[i] & mask, carry, c[i]); + } + + oqs_sidh_cln16_mp_shiftr1(c, NWORDS_FIELD); +} + + +void oqs_sidh_cln16_fpcorrection751(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p751-1] to [0, p751-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((digit_t*)p751)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((digit_t*)p751)[i] & mask, borrow, a[i]); + } +} + + +void oqs_sidh_cln16_digit_x_digit(digit_t a, digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void oqs_sidh_cln16_mp_mul_schoolbook(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords) +{ // Multiprecision schoolbook multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t u, v, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < (2*nwords); i++) c[i] = 0; + + for (i = 0; i < nwords; i++) { + u = 0; + for (j = 0; j < nwords; j++) { + MUL(a[i], b[j], UV+1, UV[0]); + ADDC(0, UV[0], u, carry, v); + u = UV[1] + carry; + ADDC(0, c[i+j], v, carry, v); + u = u + carry; + c[i+j] = v; + } + c[nwords+i] = u; + } +} + + +void oqs_sidh_cln16_mp_mul_comba(digit_t* a, digit_t* b, digit_t* c, unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j; + digit_t t = 0, u = 0, v = 0, UV[2]; + unsigned int carry = 0; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void oqs_sidh_cln16_rdc_mont(oqs_sidh_cln16_dfelm_t ma, oqs_sidh_cln16_felm_t mc) +{ // Optimized Montgomery reduction using comba and exploiting the special form of the prime p751. + // mc = ma*mb*R^-1 mod p751, where ma,mb,mc in [0, p751-1] and R = 2^768. + // ma and mb are assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p751_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p751_ZERO_WORDS+1)) { + MUL(mc[j], ((digit_t*)p751p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((digit_t*)p751p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} diff --git a/src/kex_sidh_cln16/kex_sidh_cln16.c b/src/kex_sidh_cln16/kex_sidh_cln16.c new file mode 100644 index 000000000..439fd8db7 --- /dev/null +++ b/src/kex_sidh_cln16/kex_sidh_cln16.c @@ -0,0 +1,183 @@ +#if defined(WINDOWS) +#define UNUSED +#else +#define UNUSED __attribute__ ((unused)) +#endif + +#include +#include +#if !defined(WINDOWS) +#include +#include +#endif + +#include +#include + +#include "kex_sidh_cln16.h" +#include "SIDH.h" + +OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand) { + + OQS_KEX *k = malloc(sizeof(OQS_KEX)); + if (k == NULL) { + return NULL; + } + + // Curve isogeny system initialization + PCurveIsogenyStruct curveIsogeny = oqs_sidh_cln16_curve_allocate(&CurveIsogeny_SIDHp751); + if (curveIsogeny == NULL) { + return NULL; + } + if (oqs_sidh_cln16_curve_initialize(curveIsogeny, rand, &CurveIsogeny_SIDHp751) != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_curve_free(curveIsogeny); + return NULL; + } + k->ctx = curveIsogeny; + k->method_name = strdup("SIDH CLN16"); + k->estimated_classical_security = 192; + k->estimated_quantum_security = 128; + k->seed = NULL; + k->seed_len = 0; + k->named_parameters = NULL; // TODO: create param p751 when we have more curves + k->rand = rand; + k->params = NULL; + k->alice_0 = &OQS_KEX_sidh_cln16_alice_0; + k->bob = &OQS_KEX_sidh_cln16_bob; + k->alice_1 = &OQS_KEX_sidh_cln16_alice_1; + k->alice_priv_free = &OQS_KEX_sidh_cln16_alice_priv_free; + k->free = &OQS_KEX_sidh_cln16_free; + + return k; +} + +int OQS_KEX_sidh_cln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) { + + int ret; + + *alice_priv = NULL; + /* alice_msg is alice's public key */ + *alice_msg = NULL; + + *alice_msg = malloc(SIDH_PUBKEY_LEN); + if (*alice_msg == NULL) { + goto err; + } + *alice_priv = malloc(SIDH_SECRETKEY_LEN); + if (*alice_priv == NULL) { + goto err; + } + + if (oqs_sidh_cln16_KeyGeneration_A((unsigned char *) *alice_priv, (unsigned char *) *alice_msg, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + *alice_msg_len = SIDH_PUBKEY_LEN; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*alice_msg); + free(*alice_priv); + +cleanup: + return ret; +} + +int OQS_KEX_sidh_cln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + uint8_t *bob_priv = NULL; + *bob_msg = NULL; + *key = NULL; + + if (alice_msg_len != SIDH_PUBKEY_LEN) { + goto err; + } + bob_priv = malloc(SIDH_SECRETKEY_LEN); + if (bob_priv == NULL) { + goto err; + } + *bob_msg = malloc(SIDH_PUBKEY_LEN); + if (*bob_msg == NULL) { + goto err; + } + *key = malloc(SIDH_SHAREDKEY_LEN); + if (*key == NULL) { + goto err; + } + + if (oqs_sidh_cln16_KeyGeneration_B((unsigned char *) bob_priv, (unsigned char *) *bob_msg, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + if (oqs_sidh_cln16_SecretAgreement_B((unsigned char *) bob_priv, (unsigned char *) alice_msg, (unsigned char *) *key, 0, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + + *key_len = SIDH_SHAREDKEY_LEN; + *bob_msg_len = SIDH_PUBKEY_LEN; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*bob_msg); + free(*key); + +cleanup: + + free(bob_priv); + return ret; +} + +int OQS_KEX_sidh_cln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) { + + int ret; + + *key = NULL; + + if (bob_msg_len != SIDH_PUBKEY_LEN) { + goto err; + } + + *key = malloc(SIDH_SHAREDKEY_LEN); + if (*key == NULL) { + goto err; + } + + if (oqs_sidh_cln16_SecretAgreement_A((unsigned char *) alice_priv, (unsigned char *) bob_msg, (unsigned char *) *key, false, k->ctx, k->rand) != SIDH_CRYPTO_SUCCESS) { + goto err; + } + + *key_len = SIDH_SHAREDKEY_LEN; + + ret = 1; + goto cleanup; + +err: + ret = 0; + free(*key); + +cleanup: + + return ret; +} + +void OQS_KEX_sidh_cln16_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) { + if (alice_priv) { + free(alice_priv); + } +} + +void OQS_KEX_sidh_cln16_free(OQS_KEX *k) { + if (!k) { + return; + } + oqs_sidh_cln16_curve_free((PCurveIsogenyStruct) k->ctx); + k->ctx = NULL; + free(k->method_name); + k->method_name = NULL; + free(k); +} diff --git a/src/kex_sidh_cln16/kex_sidh_cln16.h b/src/kex_sidh_cln16/kex_sidh_cln16.h new file mode 100644 index 000000000..60076faef --- /dev/null +++ b/src/kex_sidh_cln16/kex_sidh_cln16.h @@ -0,0 +1,24 @@ +/** + * \file kex_sidh_cln16.h + * \brief Header for SIDH key exchange protocol from the Microsoft SIDH library + */ + +#ifndef __OQS_KEX_SIDH_CLN16_H +#define __OQS_KEX_SIDH_CLN16_H + +#include +#include + +#include +#include + +OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand); + +int OQS_KEX_sidh_cln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len); +int OQS_KEX_sidh_cln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len); +int OQS_KEX_sidh_cln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len); + +void OQS_KEX_sidh_cln16_alice_priv_free(OQS_KEX *k, void *alice_priv); +void OQS_KEX_sidh_cln16_free(OQS_KEX *k); + +#endif diff --git a/src/kex_sidh_cln16/sidh_kex.c b/src/kex_sidh_cln16/sidh_kex.c new file mode 100644 index 000000000..d3d904ba0 --- /dev/null +++ b/src/kex_sidh_cln16/sidh_kex.c @@ -0,0 +1,392 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: isogeny-based key exchange +* +*********************************************************************************************/ + +#include "SIDH_internal.h" + +extern const unsigned int splits_Alice[SIDH_MAX_Alice]; +extern const unsigned int splits_Bob[SIDH_MAX_Bob]; + +#ifdef SIDH_ASM +#include "AMD64/fp_x64.c" +#else +#include "generic/fp_generic.c" +#endif + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_KeyGeneration_A(unsigned char *pPrivateKeyA, unsigned char *pPublicKeyA, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { + // Alice's key-pair generation + // It produces a private key pPrivateKeyA and computes the public key pPublicKeyA. + // The private key is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). + // The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes. + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int owords = NBITS_TO_NWORDS(CurveIsogeny->owordbits), pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + oqs_sidh_cln16_point_basefield_t P; + oqs_sidh_cln16_point_proj_t R, phiP = oqs_sidh_cln16_point_proj_t_EMPTY, phiQ = oqs_sidh_cln16_point_proj_t_EMPTY, phiD = oqs_sidh_cln16_point_proj_t_EMPTY, pts[SIDH_MAX_INT_POINTS_ALICE]; + oqs_sidh_cln16_publickey_t *PublicKeyA = (oqs_sidh_cln16_publickey_t *)pPublicKeyA; + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0; + oqs_sidh_cln16_f2elm_t coeff[5], A = { {0} }, C = { {0} }, Aout, Cout; + SIDH_CRYPTO_STATUS Status; + + if (pPrivateKeyA == NULL || pPublicKeyA == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + // Choose a random even number in the range [2, oA-2] as secret key for Alice + Status = oqs_sidh_cln16_random_mod_order((digit_t *)pPrivateKeyA, SIDH_ALICE, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *)pPrivateKeyA, owords); + return Status; + } + + oqs_sidh_cln16_to_mont((digit_t *)CurveIsogeny->PA, (digit_t *)P); // Conversion of Alice's generators to Montgomery representation + oqs_sidh_cln16_to_mont(((digit_t *)CurveIsogeny->PA) + NWORDS_FIELD, ((digit_t *)P) + NWORDS_FIELD); + + Status = oqs_sidh_cln16_secret_pt(P, (digit_t *)pPrivateKeyA, SIDH_ALICE, R, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *)pPrivateKeyA, owords); + return Status; + } + + oqs_sidh_cln16_copy_words((digit_t *)CurveIsogeny->PB, (digit_t *)phiP, pwords); // Copy X-coordinates from Bob's public parameters, set Z <- 1 + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)phiP->Z); + oqs_sidh_cln16_to_mont((digit_t *)phiP, (digit_t *)phiP); + oqs_sidh_cln16_copy_words((digit_t *)phiP, (digit_t *)phiQ, pwords); // QB = (-XPB:1) + oqs_sidh_cln16_fpneg751(phiQ->X[0]); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)phiQ->Z); + oqs_sidh_cln16_distort_and_diff(phiP->X[0], phiD, CurveIsogeny); // DB = (x(QB-PB),z(QB-PB)) + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->A, A[0]); // Extracting curve parameters A and C + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(A[0], A[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + oqs_sidh_cln16_first_4_isog(phiP, A, Aout, Cout, CurveIsogeny); + oqs_sidh_cln16_first_4_isog(phiQ, A, Aout, Cout, CurveIsogeny); + oqs_sidh_cln16_first_4_isog(phiD, A, Aout, Cout, CurveIsogeny); + oqs_sidh_cln16_first_4_isog(R, A, A, C, CurveIsogeny); + + index = 0; + for (row = 1; row < SIDH_MAX_Alice; row++) { + while (index < SIDH_MAX_Alice - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Alice[SIDH_MAX_Alice - index - row]; + oqs_sidh_cln16_xDBLe(R, R, A, C, (int)(2 * m)); + index += m; + } + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_4_isog(pts[i], coeff); + } + oqs_sidh_cln16_eval_4_isog(phiP, coeff); + oqs_sidh_cln16_eval_4_isog(phiQ, coeff); + oqs_sidh_cln16_eval_4_isog(phiD, coeff); + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + oqs_sidh_cln16_eval_4_isog(phiP, coeff); + oqs_sidh_cln16_eval_4_isog(phiQ, coeff); + oqs_sidh_cln16_eval_4_isog(phiD, coeff); + + oqs_sidh_cln16_inv_3_way(phiP->Z, phiQ->Z, phiD->Z); + oqs_sidh_cln16_fp2mul751_mont(phiP->X, phiP->Z, phiP->X); + oqs_sidh_cln16_fp2mul751_mont(phiQ->X, phiQ->Z, phiQ->X); + oqs_sidh_cln16_fp2mul751_mont(phiD->X, phiD->Z, phiD->X); + + oqs_sidh_cln16_from_fp2mont(phiP->X, ((oqs_sidh_cln16_f2elm_t *)PublicKeyA)[0]); // Converting back to standard representation + oqs_sidh_cln16_from_fp2mont(phiQ->X, ((oqs_sidh_cln16_f2elm_t *)PublicKeyA)[1]); + oqs_sidh_cln16_from_fp2mont(phiD->X, ((oqs_sidh_cln16_f2elm_t *)PublicKeyA)[2]); + +// Cleanup: + oqs_sidh_cln16_clear_words((void *)R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)phiP, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)phiQ, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)phiD, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)coeff, 5 * 2 * pwords); + + return Status; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_KeyGeneration_B(unsigned char *pPrivateKeyB, unsigned char *pPublicKeyB, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { + // Bob's key-pair generation + // It produces a private key pPrivateKeyB and computes the public key pPublicKeyB. + // The private key is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total). + // The public key consists of 3 elements in GF(p751^2), i.e., 564 bytes. + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int owords = NBITS_TO_NWORDS(CurveIsogeny->owordbits), pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + oqs_sidh_cln16_point_basefield_t P; + oqs_sidh_cln16_point_proj_t R, phiP = oqs_sidh_cln16_point_proj_t_EMPTY, phiQ = oqs_sidh_cln16_point_proj_t_EMPTY, phiD = oqs_sidh_cln16_point_proj_t_EMPTY, pts[SIDH_MAX_INT_POINTS_BOB]; + oqs_sidh_cln16_publickey_t *PublicKeyB = (oqs_sidh_cln16_publickey_t *)pPublicKeyB; + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0; + oqs_sidh_cln16_f2elm_t A = { {0} }, C = { {0} }; + SIDH_CRYPTO_STATUS Status; + + if (pPrivateKeyB == NULL || pPublicKeyB == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + // Choose a random number equivalent to 0 (mod 3) in the range [3, oB-3] as secret key for Bob + Status = oqs_sidh_cln16_random_mod_order((digit_t *)pPrivateKeyB, SIDH_BOB, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *)pPrivateKeyB, owords); + return Status; + } + + oqs_sidh_cln16_to_mont((digit_t *)CurveIsogeny->PB, (digit_t *)P); // Conversion of Bob's generators to Montgomery representation + oqs_sidh_cln16_to_mont(((digit_t *)CurveIsogeny->PB) + NWORDS_FIELD, ((digit_t *)P) + NWORDS_FIELD); + + Status = oqs_sidh_cln16_secret_pt(P, (digit_t *)pPrivateKeyB, SIDH_BOB, R, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *)pPrivateKeyB, owords); + return Status; + } + + oqs_sidh_cln16_copy_words((digit_t *)CurveIsogeny->PA, (digit_t *)phiP, pwords); // Copy X-coordinates from Alice's public parameters, set Z <- 1 + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)phiP->Z); + oqs_sidh_cln16_to_mont((digit_t *)phiP, (digit_t *)phiP); // Conversion to Montgomery representation + oqs_sidh_cln16_copy_words((digit_t *)phiP, (digit_t *)phiQ, pwords); // QA = (-XPA:1) + oqs_sidh_cln16_fpneg751(phiQ->X[0]); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, (digit_t *)phiQ->Z); + oqs_sidh_cln16_distort_and_diff(phiP->X[0], phiD, CurveIsogeny); // DA = (x(QA-PA),z(QA-PA)) + + oqs_sidh_cln16_fpcopy751(CurveIsogeny->A, A[0]); // Extracting curve parameters A and C + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(A[0], A[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + index = 0; + for (row = 1; row < SIDH_MAX_Bob; row++) { + while (index < SIDH_MAX_Bob - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Bob[SIDH_MAX_Bob - index - row]; + oqs_sidh_cln16_xTPLe(R, R, A, C, (int)m); + index += m; + } + oqs_sidh_cln16_get_3_isog(R, A, C); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_3_isog(R, pts[i]); + } + oqs_sidh_cln16_eval_3_isog(R, phiP); + oqs_sidh_cln16_eval_3_isog(R, phiQ); + oqs_sidh_cln16_eval_3_isog(R, phiD); + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_3_isog(R, A, C); + oqs_sidh_cln16_eval_3_isog(R, phiP); + oqs_sidh_cln16_eval_3_isog(R, phiQ); + oqs_sidh_cln16_eval_3_isog(R, phiD); + + oqs_sidh_cln16_inv_3_way(phiP->Z, phiQ->Z, phiD->Z); + oqs_sidh_cln16_fp2mul751_mont(phiP->X, phiP->Z, phiP->X); + oqs_sidh_cln16_fp2mul751_mont(phiQ->X, phiQ->Z, phiQ->X); + oqs_sidh_cln16_fp2mul751_mont(phiD->X, phiD->Z, phiD->X); + + oqs_sidh_cln16_from_fp2mont(phiP->X, ((oqs_sidh_cln16_f2elm_t *)PublicKeyB)[0]); // Converting back to standard representation + oqs_sidh_cln16_from_fp2mont(phiQ->X, ((oqs_sidh_cln16_f2elm_t *)PublicKeyB)[1]); + oqs_sidh_cln16_from_fp2mont(phiD->X, ((oqs_sidh_cln16_f2elm_t *)PublicKeyB)[2]); + +// Cleanup: + oqs_sidh_cln16_clear_words((void *)R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)phiP, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)phiQ, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)phiD, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)C, 2 * pwords); + + return Status; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_SecretAgreement_A(unsigned char *pPrivateKeyA, unsigned char *pPublicKeyB, unsigned char *pSharedSecretA, bool validate, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { + // Alice's shared secret generation + // It produces a shared secret key pSharedSecretA using her secret key pPrivateKeyA and Bob's public key pPublicKeyB + // Inputs: Alice's pPrivateKeyA is an even integer in the range [2, oA-2], where oA = 2^372 (i.e., 372 bits in total). + // Bob's pPublicKeyB consists of 3 elements in GF(p751^2), i.e., 564 bytes. + // "validate" flag that indicates if Alice must validate Bob's public key. + // Output: a shared secret pSharedSecretA that consists of one element in GF(p751^2), i.e., 1502 bits in total. + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_ALICE], npts = 0; + oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_ALICE]; + oqs_sidh_cln16_publickey_t *PublicKeyB = (oqs_sidh_cln16_publickey_t *)pPublicKeyB; + oqs_sidh_cln16_f2elm_t jinv, coeff[5], PKB[3], A, C = { {0} }; + bool valid_PublicKey = false; + SIDH_CRYPTO_STATUS Status; + + if (pPrivateKeyA == NULL || pPublicKeyB == NULL || pSharedSecretA == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *)PublicKeyB)[0], PKB[0]); // Extracting and converting Bob's public curve parameters to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *)PublicKeyB)[1], PKB[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *)PublicKeyB)[2], PKB[2]); + + oqs_sidh_cln16_get_A(PKB[0], PKB[1], PKB[2], A, CurveIsogeny); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + if (validate == true) { // Alice validating Bob's public key + Status = oqs_sidh_cln16_Validate_PKB(A, &PKB[0], &valid_PublicKey, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + return Status; + } + if (valid_PublicKey != true) { + Status = SIDH_CRYPTO_ERROR_PUBLIC_KEY_VALIDATION; + return Status; + } + } + + Status = oqs_sidh_cln16_ladder_3_pt(PKB[0], PKB[1], PKB[2], (digit_t *)pPrivateKeyA, SIDH_ALICE, R, A, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + return Status; + } + oqs_sidh_cln16_first_4_isog(R, A, A, C, CurveIsogeny); + + index = 0; + for (row = 1; row < SIDH_MAX_Alice; row++) { + while (index < SIDH_MAX_Alice - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Alice[SIDH_MAX_Alice - index - row]; + oqs_sidh_cln16_xDBLe(R, R, A, C, (int)(2 * m)); + index += m; + } + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_4_isog(pts[i], coeff); + } + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_4_isog(R, A, C, coeff); + oqs_sidh_cln16_j_inv(A, C, jinv); + oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *)pSharedSecretA); // Converting back to standard representation + +// Cleanup: + oqs_sidh_cln16_clear_words((void *)R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)pts, SIDH_MAX_INT_POINTS_ALICE * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)jinv, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)coeff, 5 * 2 * pwords); + + return Status; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_SecretAgreement_B(unsigned char *pPrivateKeyB, unsigned char *pPublicKeyA, unsigned char *pSharedSecretB, bool validate, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { + // Bob's shared secret generation + // It produces a shared secret key pSharedSecretB using his secret key pPrivateKeyB and Alice's public key pPublicKeyA + // Inputs: Bob's pPrivateKeyB is an integer in the range [1, oB-1], where oA = 3^239 (i.e., 379 bits in total). + // Alice's pPublicKeyA consists of 3 elements in GF(p751^2), i.e., 564 bytes. + // "validate" flag that indicates if Bob must validate Alice's public key. + // Output: a shared secret pSharedSecretB that consists of one element in GF(p751^2), i.e., 1502 bits in total. + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int pwords = NBITS_TO_NWORDS(CurveIsogeny->pwordbits); + unsigned int i, row, m, index = 0, pts_index[SIDH_MAX_INT_POINTS_BOB], npts = 0; + oqs_sidh_cln16_point_proj_t R, pts[SIDH_MAX_INT_POINTS_BOB]; + oqs_sidh_cln16_publickey_t *PublicKeyA = (oqs_sidh_cln16_publickey_t *)pPublicKeyA; + oqs_sidh_cln16_f2elm_t jinv, A, PKA[3], C = { {0} }; + bool valid_PublicKey = false; + SIDH_CRYPTO_STATUS Status; + + if (pPrivateKeyB == NULL || pPublicKeyA == NULL || pSharedSecretB == NULL || oqs_sidh_cln16_is_CurveIsogenyStruct_null(CurveIsogeny)) { + return SIDH_CRYPTO_ERROR_INVALID_PARAMETER; + } + + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *)PublicKeyA)[0], PKA[0]); // Extracting and converting Alice's public curve parameters to Montgomery representation + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *)PublicKeyA)[1], PKA[1]); + oqs_sidh_cln16_to_fp2mont(((oqs_sidh_cln16_f2elm_t *)PublicKeyA)[2], PKA[2]); + + oqs_sidh_cln16_get_A(PKA[0], PKA[1], PKA[2], A, CurveIsogeny); + oqs_sidh_cln16_fpcopy751(CurveIsogeny->C, C[0]); + oqs_sidh_cln16_to_mont(C[0], C[0]); + + if (validate == true) { // Bob validating Alice's public key + Status = oqs_sidh_cln16_Validate_PKA(A, &PKA[0], &valid_PublicKey, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + return Status; + } + if (valid_PublicKey != true) { + Status = SIDH_CRYPTO_ERROR_PUBLIC_KEY_VALIDATION; + return Status; + } + } + + Status = oqs_sidh_cln16_ladder_3_pt(PKA[0], PKA[1], PKA[2], (digit_t *)pPrivateKeyB, SIDH_BOB, R, A, CurveIsogeny); + if (Status != SIDH_CRYPTO_SUCCESS) { + return Status; + } + + index = 0; + for (row = 1; row < SIDH_MAX_Bob; row++) { + while (index < SIDH_MAX_Bob - row) { + oqs_sidh_cln16_fp2copy751(R->X, pts[npts]->X); + oqs_sidh_cln16_fp2copy751(R->Z, pts[npts]->Z); + pts_index[npts] = index; + npts += 1; + m = splits_Bob[SIDH_MAX_Bob - index - row]; + oqs_sidh_cln16_xTPLe(R, R, A, C, (int)m); + index += m; + } + oqs_sidh_cln16_get_3_isog(R, A, C); + + for (i = 0; i < npts; i++) { + oqs_sidh_cln16_eval_3_isog(R, pts[i]); + } + + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->X, R->X); + oqs_sidh_cln16_fp2copy751(pts[npts - 1]->Z, R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + oqs_sidh_cln16_get_3_isog(R, A, C); + oqs_sidh_cln16_j_inv(A, C, jinv); + oqs_sidh_cln16_from_fp2mont(jinv, (oqs_sidh_cln16_felm_t *)pSharedSecretB); // Converting back to standard representation + +// Cleanup: + oqs_sidh_cln16_clear_words((void *)R, 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)pts, SIDH_MAX_INT_POINTS_BOB * 2 * 2 * pwords); + oqs_sidh_cln16_clear_words((void *)A, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)C, 2 * pwords); + oqs_sidh_cln16_clear_words((void *)jinv, 2 * pwords); + + return Status; +} diff --git a/src/kex_sidh_cln16/validate.c b/src/kex_sidh_cln16/validate.c new file mode 100644 index 000000000..27a8a4ffb --- /dev/null +++ b/src/kex_sidh_cln16/validate.c @@ -0,0 +1,220 @@ +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny-based cryptography library for Diffie-Hellman key +* exchange providing 128 bits of quantum security and 192 bits of classical security. +* +* Copyright (c) Microsoft Corporation. All rights reserved. +* +* +* Abstract: functions for validation of public keys +* +* SECURITY NOTE: these functions run in variable time because it is assumed that they are +* used over public data. +* +*********************************************************************************************/ + +#include +#include "SIDH_internal.h" + +static bool is_equal_fp(oqs_sidh_cln16_felm_t a, oqs_sidh_cln16_felm_t b) { + // Return true if a = b in GF(p751). Otherwise, return false + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + if (a[i] != b[i]) { + return false; + } + } + + return true; +} + + +static bool is_equal_fp2(oqs_sidh_cln16_f2elm_t a, oqs_sidh_cln16_f2elm_t b) { + // Return true if a = b in GF(p751^2). Otherwise, return false + + return (is_equal_fp(a[0], b[0]) && is_equal_fp(a[1], b[1])); +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_random_fp2(oqs_sidh_cln16_f2elm_t f2value, PCurveIsogenyStruct pCurveIsogeny, OQS_RAND *rand) { + // Output random value in GF(p751). It makes requests of random values to the "random_bytes" function. + // If successful, the output is given in "f2value". + unsigned int ntry = 0, nbytes; + oqs_sidh_cln16_felm_t t1, p751; + unsigned char mask; + oqs_sidh_cln16_clear_words((void *)f2value, 2 * NWORDS_FIELD); + oqs_sidh_cln16_fpcopy751(pCurveIsogeny->prime, p751); + nbytes = (pCurveIsogeny->pbits + 7) / 8; // Number of random bytes to be requested + mask = (unsigned char)(8 * nbytes - pCurveIsogeny->pbits); + mask = ((unsigned char) - 1 >> mask); // Value for masking last random byte + + do { + ntry++; + if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, p751-1] + return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS; + } + + rand->rand_n(rand, (uint8_t *)&f2value[0], nbytes); + ((unsigned char *)&f2value[0])[nbytes - 1] &= mask; // Masking last byte + } while (oqs_sidh_cln16_mp_sub(p751, f2value[0], t1, NWORDS_FIELD) == 1); + + ntry = 0; + do { + ntry++; + if (ntry > 100) { // Max. 100 iterations to obtain random value in [0, p751-1] + return SIDH_CRYPTO_ERROR_TOO_MANY_ITERATIONS; + } + + rand->rand_n(rand, (uint8_t *)&f2value[1], nbytes); + ((unsigned char *)&f2value[1])[nbytes - 1] &= mask; // Masking last byte + } while (oqs_sidh_cln16_mp_sub(p751, f2value[1], t1, NWORDS_FIELD) == 1); + +// Cleanup + oqs_sidh_cln16_clear_words((void *)t1, NWORDS_FIELD); + + return SIDH_CRYPTO_SUCCESS; +} + + +static bool test_curve(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_f2elm_t rvalue, PCurveIsogenyStruct CurveIsogeny) { + // This function checks that the curve is in the correct supersingular isogeny class via Sutherland's Monte Carlo algorithm. + // It also checks that the curve is not a subfield curve. Both Alice and Bob call this same function in their respective validation procedures below. + // Inputs: the curve constant A, corresponding to E_A: y^2=x^3+A*x^2+x, + // a random value "rvalue" in Fp2. + // Output: returns "true" if curve is valid, "false" otherwise. + oqs_sidh_cln16_f2elm_t t0, t1, one = { {0} }, zero = { {0} }; + oqs_sidh_cln16_point_proj_t rP, P1; + bool valid_curve; + + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, one[0]); + + // Test j invariant in Fp2\Fp + oqs_sidh_cln16_fp2sqr751_mont(A, t0); // t0 = a^2 + oqs_sidh_cln16_fp2sub751(t0, one, t0); + oqs_sidh_cln16_fp2sub751(t0, one, t0); + oqs_sidh_cln16_fp2sub751(t0, one, t0); // t0 = t0-3 + oqs_sidh_cln16_fp2sqr751_mont(t0, t1); // t1 = t0^2 + oqs_sidh_cln16_fp2mul751_mont(t0, t1, t1); // t1 = t1*t0 + oqs_sidh_cln16_fp2sub751(t0, one, t0); // t0 = t0-1 + oqs_sidh_cln16_fpmul751_mont(t1[0], t0[1], t1[0]); + oqs_sidh_cln16_fpmul751_mont(t1[1], t0[0], t1[1]); + oqs_sidh_cln16_fp2correction751(t1); + + valid_curve = !is_equal_fp(t1[0], t1[1]); + + // Test supersingular + oqs_sidh_cln16_fp2copy751(rvalue, rP->X); + oqs_sidh_cln16_fp2copy751(one, rP->Z); + + oqs_sidh_cln16_xDBLe(rP, rP, A, one, 1); + oqs_sidh_cln16_xDBLe(rP, P1, A, one, 371); + oqs_sidh_cln16_xTPLe(P1, P1, A, one, 239); + oqs_sidh_cln16_fp2mul751_mont(rP->X, P1->Z, rP->X); // X = X*Z1 + oqs_sidh_cln16_fp2mul751_mont(rP->Z, P1->X, rP->Z); // Z = Z*X1 + oqs_sidh_cln16_fp2sub751(rP->X, rP->Z, rP->X); // X = X-Z + oqs_sidh_cln16_fp2mul751_mont(rP->X, P1->Z, rP->X); // X = X*Z1 + oqs_sidh_cln16_fp2correction751(rP->X); + + return (valid_curve && is_equal_fp2(rP->X, zero)); +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_Validate_PKA(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_publickey_t PKA, bool *valid, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { + // Bob validating Alice's public key + // Inputs: Alice's public key [A,xP,xQ,xQP], where xP,xQ and xQP are contained in PKA, + // the exponent eB (=239 for our curve) for Miller's algorithm. + // Output: valid = "true" if key is valid, "false" otherwise. + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int eB1 = CurveIsogeny->eB - 1; // eB1 = eB-1 + oqs_sidh_cln16_f2elm_t t0, t1, rvalue, one = { {0} }, zero = { {0} }; + oqs_sidh_cln16_point_proj_t P = oqs_sidh_cln16_point_proj_t_EMPTY, Q = oqs_sidh_cln16_point_proj_t_EMPTY; + SIDH_CRYPTO_STATUS Status; + + // Choose a random element in GF(p751^2) for Sutherland's algorithm. Assume that it is in Montgomery representation + Status = oqs_sidh_cln16_random_fp2(rvalue, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *)rvalue, 2 * NWORDS_FIELD); + return Status; + } + + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_fp2copy751(PKA[0], P->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, P->Z[0]); + oqs_sidh_cln16_fp2copy751(PKA[1], Q->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, Q->Z[0]); + + oqs_sidh_cln16_xTPLe(P, P, A, one, eB1); + oqs_sidh_cln16_xTPLe(Q, Q, A, one, eB1); + oqs_sidh_cln16_fp2mul751_mont(P->X, Q->Z, t0); // t0 = XP*ZQ + oqs_sidh_cln16_fp2mul751_mont(Q->X, P->Z, t1); // t1 = XQ*ZP + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fp2mul751_mont(P->Z, t0, t0); // t0 = ZP*t0 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, t0, t0); // t0 = ZQ*t0 + oqs_sidh_cln16_fp2correction751(t0); + *valid = !is_equal_fp2(t0, zero); // Checks that ZP*ZQ*(XQ*ZP-XP*ZQ) != 0, i.e., that 3^(e-1)*P != 3^(e-1)*Q and neither P nor Q has order 3^(e-1) + + oqs_sidh_cln16_xTPLe(P, P, A, one, 1); + oqs_sidh_cln16_xTPLe(Q, Q, A, one, 1); + oqs_sidh_cln16_fp2correction751(P->Z); + oqs_sidh_cln16_fp2correction751(Q->Z); + *valid = *valid & is_equal_fp2(P->Z, zero); // Checks that 3^e*P = 0 + *valid = *valid & is_equal_fp2(Q->Z, zero); // Checks that 3^e*Q = 0 + *valid = *valid & test_curve(A, rvalue, CurveIsogeny); // Tests curve via Sutherland's algorithm + + return SIDH_CRYPTO_SUCCESS; +} + + +SIDH_CRYPTO_STATUS oqs_sidh_cln16_Validate_PKB(oqs_sidh_cln16_f2elm_t A, oqs_sidh_cln16_publickey_t PKB, bool *valid, PCurveIsogenyStruct CurveIsogeny, OQS_RAND *rand) { + // Alice validating Bob's public key + // Inputs: Bob's public key [A,xP,xQ,xQP], where xP,xQ and xQP are contained in PKB, + // the exponent eA (=372 for our curve) for Miller's algorithm. + // Output: valid = "true" if key is valid, "false" otherwise. + // CurveIsogeny must be set up in advance using oqs_sidh_cln16_curve_initialize(). + unsigned int oAbits2 = CurveIsogeny->oAbits - 2; // oAbits2 = oAbits-2 + oqs_sidh_cln16_f2elm_t t0, t1, two, four, rvalue, one = { {0} }, zero = { {0} }; + oqs_sidh_cln16_point_proj_t P = oqs_sidh_cln16_point_proj_t_EMPTY, Q = oqs_sidh_cln16_point_proj_t_EMPTY; + SIDH_CRYPTO_STATUS Status; + + // Choose a random element in GF(p751^2) for Sutherland's algorithm. Assume that it is in Montgomery representation + Status = oqs_sidh_cln16_random_fp2(rvalue, CurveIsogeny, rand); + if (Status != SIDH_CRYPTO_SUCCESS) { + oqs_sidh_cln16_clear_words((void *)rvalue, 2 * NWORDS_FIELD); + return Status; + } + + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, one[0]); + oqs_sidh_cln16_fp2copy751(PKB[0], P->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, P->Z[0]); + oqs_sidh_cln16_fp2copy751(PKB[1], Q->X); + oqs_sidh_cln16_fpcopy751((digit_t *)CurveIsogeny->Montgomery_one, Q->Z[0]); + + oqs_sidh_cln16_fp2add751(one, one, two); + oqs_sidh_cln16_fp2add751(two, two, four); // four = 4 + oqs_sidh_cln16_xDBLe(P, P, A, one, oAbits2); + oqs_sidh_cln16_xDBLe(Q, Q, A, one, oAbits2); + oqs_sidh_cln16_fp2mul751_mont(P->X, Q->Z, t0); // t0 = XP*ZQ + oqs_sidh_cln16_fp2mul751_mont(Q->X, P->Z, t1); // t1 = XQ*ZP + oqs_sidh_cln16_fp2sub751(t0, t1, t0); // t0 = t0-t1 + oqs_sidh_cln16_fp2mul751_mont(P->Z, t0, t0); // t0 = ZP*t0 + oqs_sidh_cln16_fp2mul751_mont(Q->Z, t0, t0); // t0 = ZQ*t0 + oqs_sidh_cln16_fp2correction751(t0); + *valid = !is_equal_fp2(t0, zero); // Checks that ZP*ZQ*(XQ*ZP-XP*ZQ) != 0, i.e., that 2^(e-2)*P != 2^(e-2)*Q and neither P nor Q has order 2^(e-2) + + oqs_sidh_cln16_fp2add751(A, two, t0); // t0 = A+2 + oqs_sidh_cln16_xDBL(P, P, t0, four); + oqs_sidh_cln16_xDBL(Q, Q, t0, four); + oqs_sidh_cln16_fp2mul751_mont(P->Z, Q->Z, t0); // t0 = ZP*ZQ + oqs_sidh_cln16_fp2correction751(t0); + *valid = *valid & !is_equal_fp2(t0, zero); // Checks that 2^(e-1)*P != 0 and 2^(e-1)*Q != 0 + + oqs_sidh_cln16_xDBL(P, P, t0, four); + oqs_sidh_cln16_xDBL(Q, Q, t0, four); + oqs_sidh_cln16_fp2correction751(P->Z); + oqs_sidh_cln16_fp2correction751(Q->Z); + *valid = *valid & is_equal_fp2(P->Z, zero); // Checks that 2^e*P = 0 + *valid = *valid & is_equal_fp2(Q->Z, zero); // Checks that 2^e*Q = 0 + *valid = *valid & test_curve(A, rvalue, CurveIsogeny); // Tests curve via Sutherland's algorithm + + return SIDH_CRYPTO_SUCCESS; +} From d94b54e99f6c4d7c138deddd57b613a9e3961da3 Mon Sep 17 00:00:00 2001 From: Douglas Stebila Date: Thu, 24 Nov 2016 16:15:46 -0500 Subject: [PATCH 3/5] Rename file. --- src/kex_sidh_cln16/{License.txt => LICENSE.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/kex_sidh_cln16/{License.txt => LICENSE.txt} (100%) diff --git a/src/kex_sidh_cln16/License.txt b/src/kex_sidh_cln16/LICENSE.txt similarity index 100% rename from src/kex_sidh_cln16/License.txt rename to src/kex_sidh_cln16/LICENSE.txt From fcbc38097078415e429878ced74b2a2ea2b48933 Mon Sep 17 00:00:00 2001 From: Douglas Stebila Date: Thu, 24 Nov 2016 16:16:04 -0500 Subject: [PATCH 4/5] Rename file. --- src/kex_rlwe_msrln16/{License.txt => LICENSE.txt} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/kex_rlwe_msrln16/{License.txt => LICENSE.txt} (100%) diff --git a/src/kex_rlwe_msrln16/License.txt b/src/kex_rlwe_msrln16/LICENSE.txt similarity index 100% rename from src/kex_rlwe_msrln16/License.txt rename to src/kex_rlwe_msrln16/LICENSE.txt From d23f8d3eddf3e72a5971983562721ba9b9c63df5 Mon Sep 17 00:00:00 2001 From: Alex Parent Date: Fri, 25 Nov 2016 14:06:48 -0500 Subject: [PATCH 5/5] Fix possible memory leaks. --- src/kex_sidh_cln16/SIDH_setup.c | 1 + src/kex_sidh_cln16/kex_sidh_cln16.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/kex_sidh_cln16/SIDH_setup.c b/src/kex_sidh_cln16/SIDH_setup.c index 621cfa2ef..8865629a8 100644 --- a/src/kex_sidh_cln16/SIDH_setup.c +++ b/src/kex_sidh_cln16/SIDH_setup.c @@ -73,6 +73,7 @@ PCurveIsogenyStruct oqs_sidh_cln16_curve_allocate(PCurveIsogenyStaticData CurveD pCurveIsogeny->Montgomery_pp = (digit_t *)calloc(1, pbytes); pCurveIsogeny->Montgomery_one = (digit_t *)calloc(1, pbytes); if (oqs_sidh_cln16_is_CurveIsogenyStruct_null(pCurveIsogeny)) { + oqs_sidh_cln16_curve_free(pCurveIsogeny); return NULL; } return pCurveIsogeny; diff --git a/src/kex_sidh_cln16/kex_sidh_cln16.c b/src/kex_sidh_cln16/kex_sidh_cln16.c index 439fd8db7..94708a647 100644 --- a/src/kex_sidh_cln16/kex_sidh_cln16.c +++ b/src/kex_sidh_cln16/kex_sidh_cln16.c @@ -27,9 +27,11 @@ OQS_KEX *OQS_KEX_sidh_cln16_new(OQS_RAND *rand) { // Curve isogeny system initialization PCurveIsogenyStruct curveIsogeny = oqs_sidh_cln16_curve_allocate(&CurveIsogeny_SIDHp751); if (curveIsogeny == NULL) { + free(k); return NULL; } if (oqs_sidh_cln16_curve_initialize(curveIsogeny, rand, &CurveIsogeny_SIDHp751) != SIDH_CRYPTO_SUCCESS) { + free(k); oqs_sidh_cln16_curve_free(curveIsogeny); return NULL; }