Integrates MSR's RLWE Latticecrypto library into OQS. (#58)

This commit is contained in:
Christian Paquin 2016-11-23 11:06:54 -05:00 committed by Douglas Stebila
parent 04b841ca2c
commit 7fcf29d37f
22 changed files with 3270 additions and 129 deletions

View File

@ -58,6 +58,7 @@ links:
$(LN) ../../src/kex/kex.h include/oqs
$(LN) ../../src/kex_rlwe_bcns15/kex_rlwe_bcns15.h include/oqs
$(LN) ../../src/kex_rlwe_newhope/kex_rlwe_newhope.h include/oqs
$(LN) ../../src/kex_rlwe_msrln16/kex_rlwe_msrln16.h include/oqs
$(LN) ../../src/kex_lwe_frodo/kex_lwe_frodo.h include/oqs
$(LN) ../../src/rand/rand.h include/oqs
$(LN) ../../src/rand_urandom_chacha20/rand_urandom_chacha20.h include/oqs
@ -85,6 +86,11 @@ KEX_RLWE_NEWHOPE_OBJS := $(addprefix objs/kex_rlwe_newhope/, kex_rlwe_newhope.o)
KEX_RLWE_NEWHOPE_HEADERS := $(addprefix src/kex_rlwe_newhope/, kex_rlwe_newhope.h fips202.c newhope.c params.h poly.c precomp.c)
$(KEX_RLWE_NEWHOPE_OBJS): $(KEX_RLWE_NEWHOPE_HEADERS)
# KEX_RLWE_MSRLN16
KEX_RLWE_MSRLN16_OBJS := $(addprefix objs/kex_rlwe_msrln16/, kex_rlwe_msrln16.o LatticeCrypto_kex.o ntt_constants.o)
KEX_RLWE_MSRLN16_HEADERS := $(addprefix src/kex_rlwe_msrln16/, LatticeCrypto.h LatticeCrypto_priv.h kex_rlwe_msrln16.h )
$(KEX_RLWE_MSRLN16_OBJS): $(KEX_RLWE_MSRLN16_HEADERS)
# KEX_LWE_FRODO
KEX_LWE_FRODO_OBJS := $(addprefix objs/kex_lwe_frodo/, lwe.o kex_lwe_frodo.o lwe_noise.o)
KEX_LWE_FRODO_HEADERS := $(addprefix src/kex_lwe_frodo/, kex_lwe_frodo.h local.h)
@ -106,9 +112,10 @@ objs/kex/kex.o: src/kex/kex.h
# LIB
RAND_OBJS := $(RAND_URANDOM_AESCTR_OBJS) $(RAND_URANDOM_CHACHA_OBJS)
lib: $(RAND_OBJS) $(KEX_RLWE_BCNS15_OBJS) $(KEX_RLWE_NEWHOPE_OBJS) $(KEX_LWE_FRODO_OBJS) objs/rand/rand.o objs/kex/kex.o $(AES_OBJS) $(COMMON_OBJS)
lib: $(RAND_OBJS) $(KEX_RLWE_BCNS15_OBJS) $(KEX_RLWE_NEWHOPE_OBJS) $(KEX_LWE_FRODO_OBJS) $(KEX_RLWE_MSRLN16_OBJS) objs/rand/rand.o objs/kex/kex.o $(AES_OBJS) $(COMMON_OBJS)
rm -f liboqs.a
$(AR) liboqs.a $^
$(RANLIB) liboqs.a

View File

@ -26,6 +26,7 @@ liboqs currently contains:
- `rand_urandom_chacha20`: pseudorandom number generator seeded from /dev/urandom and expanded using the ChaCha20 stream cipher
- `kex_rlwe_bcns15`: key exchange from the ring learning with errors problem (Bos, Costello, Naehrig, Stebila, *IEEE Symposium on Security & Privacy 2015*, [https://eprint.iacr.org/2014/599](https://eprint.iacr.org/2014/599))
- `kex_rlwe_newhope`: "NewHope": key exchange from the ring learning with errors problem (Alkim, Ducas, Pöppelmann, Schwabe, *USENIX Security 2016*, [https://eprint.iacr.org/2015/1092](https://eprint.iacr.org/2015/1092)) (using the reference C implementation of NewHope from [https://github.com/tpoeppelmann/newhope](https://github.com/tpoeppelmann/newhope))
- `kex_rlwe_msrln16`: MSR implementation of Peikert's RLWE key exchange, based on the implementation of Alkim, Ducas, Pöppelmann, and Schwabe, with improvements from Longa and Naehrig [https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/](https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/)
- `kex_lwe_frodo`: key exchange from the learning with errors problem (Bos, Costello, Ducas, Mironov, Naehrig, Nikolaenko, Raghunathan, Stebila, *ACM Conference on Computer and Communications Security 2016*, [http://eprint.iacr.org/2016/659](http://eprint.iacr.org/2016/659))
Building and Running
@ -114,11 +115,12 @@ In the long term, we are also interested in including post-quantum signature sch
License
-------
liboqs is licensed under the MIT License; see [https://github.com/open-quantum-safe/liboqs/blob/master/LICENSE.txt](LICENSE.txt) for details. liboqs includes some third party libraries or modules that are licensed differently; the corresponding subfolder contains the license that applies in that case. In particular:
liboqs is licensed under the MIT License; see [LICENSE.txt](https://github.com/open-quantum-safe/liboqs/blob/master/LICENSE.txt) for details. liboqs includes some third party libraries or modules that are licensed differently; the corresponding subfolder contains the license that applies in that case. In particular:
- `src/kex_rlwe_bcns15`: public domain ([http://unlicense.org](http://unlicense.org))
- `src/rand_urandom_chacha20/external`: public domain
- `src/kex_rlwe_bcns15`: public domain ([Unlicense](http://unlicense.org))
- `src/kex_rlwe_msrln16/external`: public domain ([CC0](http://creativecommons.org/publicdomain/zero/1.0/))
- `src/kex_rlwe_newhope`: public domain
- `src/rand_urandom_chacha20/external`: public domain
Team
----

View File

@ -29,6 +29,9 @@
<ClInclude Include="..\..\src\kex_rlwe_bcns15\rlwe_table.h" />
<ClInclude Include="..\..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" />
<ClInclude Include="..\..\src\kex_rlwe_newhope\params.h" />
<ClInclude Include="..\..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" />
<ClInclude Include="..\..\src\kex_rlwe_msrln16\LatticeCrypto.h" />
<ClInclude Include="..\..\src\kex_rlwe_msrln16\LatticeCrypto_priv.h" />
<ClInclude Include="..\..\src\rand\rand.h" />
<ClInclude Include="..\..\src\rand_urandom_aesctr\rand_urandom_aesctr.h" />
<ClInclude Include="..\..\src\rand_urandom_chacha20\rand_urandom_chacha20.h" />
@ -47,6 +50,9 @@
<ClCompile Include="..\..\src\kex_rlwe_bcns15\rlwe_kex.c" />
<ClCompile Include="..\..\src\kex_rlwe_newhope\kex_rlwe_newhope.c" />
<ClCompile Include="..\..\src\kex_rlwe_newhope\newhope.c" />
<ClCompile Include="..\..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.c" />
<ClCompile Include="..\..\src\kex_rlwe_msrln16\LatticeCrypto_kex.c" />
<ClCompile Include="..\..\src\kex_rlwe_msrln16\ntt_constants.c" />
<ClCompile Include="..\..\src\rand\rand.c" />
<ClCompile Include="..\..\src\rand_urandom_aesctr\rand_urandom_aesctr.c" />
<ClCompile Include="..\..\src\rand_urandom_chacha20\rand_urandom_chacha20.c" />
@ -134,6 +140,7 @@ copy "$(SolutionDir)..\src\kex\kex.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\"</Command>
</PreBuildEvent>
</ItemDefinitionGroup>
@ -161,6 +168,7 @@ copy "$(SolutionDir)..\src\kex\kex.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\"</Command>
</PreBuildEvent>
</ItemDefinitionGroup>
@ -192,6 +200,7 @@ copy "$(SolutionDir)..\src\kex\kex.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\"</Command>
</PreBuildEvent>
</ItemDefinitionGroup>
@ -223,6 +232,7 @@ copy "$(SolutionDir)..\src\kex\kex.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\aes\aes.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_newhope\kex_rlwe_newhope.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h" "$(SolutionDir)include\oqs\"
copy "$(SolutionDir)..\src\kex_lwe_frodo\kex_lwe_frodo.h" "$(SolutionDir)include\oqs\"</Command>
</PreBuildEvent>
</ItemDefinitionGroup>

View File

@ -1,138 +1,128 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
<Filter Include="Source Files\BCNS15">
<UniqueIdentifier>{178cf83e-52eb-4ead-a1ca-33558ffd988e}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\rand">
<UniqueIdentifier>{15505124-034e-4100-a6cb-a32309738b25}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\BCNS15">
<UniqueIdentifier>{768db348-be19-4ce5-a6bc-22d81c2c59d3}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\rand">
<UniqueIdentifier>{65721ee8-a809-4e67-88a8-ad33d6a6a800}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\Newhope">
<UniqueIdentifier>{d26b84bb-a12a-4229-b619-8ddfdaca9928}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\Newhope">
<UniqueIdentifier>{9445c3c6-bd2e-4448-8cba-a986ef929d45}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\Frodo">
<UniqueIdentifier>{9e41843f-5671-4bbf-916a-2688ac3ceaff}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\Frodo">
<UniqueIdentifier>{fb1ab057-5f38-445f-9bfa-2486aa8200e5}</UniqueIdentifier>
</Filter>
<Filter Include="Header Files\AES">
<UniqueIdentifier>{a6bb776b-ac51-4243-aec8-396a52ed9560}</UniqueIdentifier>
</Filter>
<Filter Include="Source Files\AES">
<UniqueIdentifier>{379cec65-7e0e-4eb2-9be1-414b154a911f}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\src\kex\kex.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h">
<Filter>Header Files\BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\rand\rand.h">
<Filter>Header Files\rand</Filter>
</ClInclude>
<ClInclude Include="..\..\src\rand_urandom_chacha20\rand_urandom_chacha20.h">
<Filter>Header Files\rand</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\rlwe_a.h">
<Filter>Header Files\BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\rlwe_table.h">
<Filter>Header Files\BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\local.h">
<Filter>Header Files\BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_newhope\kex_rlwe_newhope.h">
<Filter>Header Files\Newhope</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_newhope\params.h">
<Filter>Header Files\Newhope</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_lwe_frodo\kex_lwe_frodo.h">
<Filter>Header Files\Frodo</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_lwe_frodo\local.h">
<Filter>Header Files\Frodo</Filter>
</ClInclude>
<ClInclude Include="..\..\src\aes\aes.h">
<Filter>Header Files\AES</Filter>
</ClInclude>
<ClInclude Include="..\..\src\rand_urandom_aesctr\rand_urandom_aesctr.h">
<Filter>Header Files\rand</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\src\kex\kex.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\fft.c">
<Filter>Source Files\BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.c">
<Filter>Source Files\BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\rand\rand.c">
<Filter>Source Files\rand</Filter>
</ClCompile>
<ClCompile Include="..\..\src\rand_urandom_chacha20\rand_urandom_chacha20.c">
<Filter>Source Files\rand</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\rlwe_kex.c">
<Filter>Source Files\BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\rlwe.c">
<Filter>Source Files\BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_newhope\kex_rlwe_newhope.c">
<Filter>Source Files\Newhope</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_newhope\newhope.c">
<Filter>Source Files\Newhope</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_lwe_frodo\kex_lwe_frodo.c">
<Filter>Source Files\Frodo</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_lwe_frodo\lwe.c">
<Filter>Source Files\Frodo</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_lwe_frodo\lwe_noise.c">
<Filter>Source Files\Frodo</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex\kex.c" />
<ClCompile Include="..\..\src\aes\aes.c">
<Filter>Source Files\AES</Filter>
<Filter>AES</Filter>
</ClCompile>
<ClCompile Include="..\..\src\aes\aes_c.c">
<Filter>Source Files\AES</Filter>
<Filter>AES</Filter>
</ClCompile>
<ClCompile Include="..\..\src\aes\aes_ni.c">
<Filter>Source Files\AES</Filter>
<Filter>AES</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\fft.c">
<Filter>BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_lwe_frodo\kex_lwe_frodo.c">
<Filter>Frodo</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.c">
<Filter>BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.c">
<Filter>MSR LN16</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_newhope\kex_rlwe_newhope.c">
<Filter>NewHope</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_msrln16\LatticeCrypto_kex.c">
<Filter>MSR LN16</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_lwe_frodo\lwe.c">
<Filter>Frodo</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_lwe_frodo\lwe_noise.c">
<Filter>Frodo</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_newhope\newhope.c">
<Filter>NewHope</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_msrln16\ntt_constants.c">
<Filter>MSR LN16</Filter>
</ClCompile>
<ClCompile Include="..\..\src\rand\rand.c">
<Filter>Rand</Filter>
</ClCompile>
<ClCompile Include="..\..\src\rand_urandom_aesctr\rand_urandom_aesctr.c">
<Filter>Source Files\rand</Filter>
<Filter>Rand</Filter>
</ClCompile>
<ClCompile Include="..\..\src\rand_urandom_chacha20\rand_urandom_chacha20.c">
<Filter>Rand</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\rlwe.c">
<Filter>BCNS15</Filter>
</ClCompile>
<ClCompile Include="..\..\src\kex_rlwe_bcns15\rlwe_kex.c">
<Filter>BCNS15</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\src\kex\kex.h" />
<ClInclude Include="..\..\src\aes\aes.h">
<Filter>AES</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_lwe_frodo\kex_lwe_frodo.h">
<Filter>Frodo</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\kex_rlwe_bcns15.h">
<Filter>BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_msrln16\kex_rlwe_msrln16.h">
<Filter>MSR LN16</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_newhope\kex_rlwe_newhope.h">
<Filter>NewHope</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_msrln16\LatticeCrypto.h">
<Filter>MSR LN16</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_msrln16\LatticeCrypto_priv.h">
<Filter>MSR LN16</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\local.h">
<Filter>BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_lwe_frodo\local.h">
<Filter>Frodo</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_newhope\params.h">
<Filter>NewHope</Filter>
</ClInclude>
<ClInclude Include="..\..\src\rand\rand.h">
<Filter>Rand</Filter>
</ClInclude>
<ClInclude Include="..\..\src\rand_urandom_aesctr\rand_urandom_aesctr.h">
<Filter>Rand</Filter>
</ClInclude>
<ClInclude Include="..\..\src\rand_urandom_chacha20\rand_urandom_chacha20.h">
<Filter>Rand</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\rlwe_table.h">
<Filter>BCNS15</Filter>
</ClInclude>
<ClInclude Include="..\..\src\kex_rlwe_bcns15\rlwe_a.h">
<Filter>BCNS15</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="AES">
<UniqueIdentifier>{71c917ec-9181-4b88-bdfc-9611ee1abe9a}</UniqueIdentifier>
</Filter>
<Filter Include="Frodo">
<UniqueIdentifier>{6bfff158-3e78-402f-ba16-e8d315089de8}</UniqueIdentifier>
</Filter>
<Filter Include="NewHope">
<UniqueIdentifier>{d0291785-4232-4264-b1bd-08b7e3f8df5e}</UniqueIdentifier>
</Filter>
<Filter Include="BCNS15">
<UniqueIdentifier>{ab581356-2a96-4211-99e3-f5cecd92eda3}</UniqueIdentifier>
</Filter>
<Filter Include="Rand">
<UniqueIdentifier>{fd44eb34-2f81-411e-a55f-f279c4b101de}</UniqueIdentifier>
</Filter>
<Filter Include="MSR LN16">
<UniqueIdentifier>{9f5ed87f-ed1e-47b4-b7e7-1d6648cb88fd}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>

View File

@ -3,6 +3,7 @@
#include <oqs/kex.h>
#include <oqs/kex_rlwe_bcns15.h>
#include <oqs/kex_rlwe_newhope.h>
#include <oqs/kex_rlwe_msrln16.h>
#include <oqs/kex_lwe_frodo.h>
OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8_t *seed, const size_t seed_len, const char *named_parameters) {
@ -11,6 +12,8 @@ OQS_KEX *OQS_KEX_new(OQS_RAND *rand, enum OQS_KEX_alg_name alg_name, const uint8
return OQS_KEX_rlwe_bcns15_new(rand);
case OQS_KEX_alg_rlwe_bcns15:
return OQS_KEX_rlwe_bcns15_new(rand);
case OQS_KEX_alg_rlwe_msrln16:
return OQS_KEX_rlwe_msrln16_new(rand);
case OQS_KEX_alg_rlwe_newhope:
return OQS_KEX_rlwe_newhope_new(rand);
case OQS_KEX_alg_lwe_frodo:

View File

@ -15,6 +15,7 @@ enum OQS_KEX_alg_name {
OQS_KEX_alg_default,
OQS_KEX_alg_rlwe_bcns15,
OQS_KEX_alg_rlwe_newhope,
OQS_KEX_alg_rlwe_msrln16,
OQS_KEX_alg_lwe_frodo,
};

View File

@ -21,6 +21,7 @@ struct kex_testcase {
struct kex_testcase kex_testcases[] = {
{ OQS_KEX_alg_rlwe_bcns15, NULL, 0, NULL, "rlwe_bcns15", 0 },
{ OQS_KEX_alg_rlwe_newhope, NULL, 0, NULL, "rlwe_newhope", 0 },
{ OQS_KEX_alg_rlwe_msrln16, NULL, 0, NULL, "rlwe_msrln16", 0 },
{ OQS_KEX_alg_lwe_frodo, (unsigned char *) "01234567890123456", 16, "recommended", "lwe_frodo_recommended", 0 },
};

View File

@ -0,0 +1,40 @@
/****************************************************************************************
* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: constants for the x64 assembly implementation
*
*****************************************************************************************/
#include "../LatticeCrypto_priv.h"
#include <stdint.h>
uint32_t PRIME8x[8] = {OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_Q};
uint8_t ONE32x[32] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
uint32_t MASK12x8[8] = {0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff};
uint32_t PERM0246[4] = {0,2,4,6};
uint32_t PERM00224466[8] = {0,0,2,2,4,4,6,6};
uint32_t PERM02134657[8] = {0,2,1,3,4,6,5,7};
uint64_t PERM0145[4] = {0,1,4,5};
uint64_t PERM2367[4] = {2,3,6,7};
uint64_t MASK32[4] = {0xffffffff,0,0xffffffff,0};
uint64_t MASK42[4] = {0x3fff0000000,0,0x3fff0000000,0};
uint64_t MASK14_1[4] = {0x3fff,0,0x3fff,0};
uint64_t MASK14_2[4] = {0xFFFC000,0,0xFFFC000,0};
uint64_t MASK14_3[4] = {0x3FFF0000000,0,0x3FFF0000000,0};
uint64_t MASK14_4[4] = {0xFFFC0000000000,0,0xFFFC0000000000,0};
uint32_t ONE8x[8] = {1,1,1,1,1,1,1,1};
uint32_t THREE8x[8] = {3,3,3,3,3,3,3,3};
uint32_t FOUR8x[8] = {4,4,4,4,4,4,4,4};
uint32_t PARAM_Q4x8[8] = {3073,3073,3073,3073,3073,3073,3073,3073};
uint32_t PARAM_3Q4x8[8] = {9217,9217,9217,9217,9217,9217,9217,9217};
uint32_t PARAM_5Q4x8[8] = {15362,15362,15362,15362,15362,15362,15362,15362};
uint32_t PARAM_7Q4x8[8] = {21506,21506,21506,21506,21506,21506,21506,21506};
uint32_t PARAM_Q2x8[8] = {6145,6145,6145,6145,6145,6145,6145,6145};
uint32_t PARAM_3Q2x8[8] = {18434,18434,18434,18434,18434,18434,18434,18434};

View File

@ -0,0 +1,436 @@
//****************************************************************************************
// LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//
// Abstract: functions for error sampling and reconciliation in x64 assembly using AVX2
// vector instructions for Linux
//
//****************************************************************************************
.intel_syntax noprefix
// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
#define reg_p3 rdx
#define reg_p4 rcx
#define reg_p5 r8
.text
//***********************************************************************
// Error sampling from psi_12
// Operation: c [reg_p2] <- sampling(a) [reg_p1]
//***********************************************************************
.global oqs_rlwe_msrln16_error_sampling_asm
oqs_rlwe_msrln16_error_sampling_asm:
vmovdqu ymm7, ONE32x
movq r11, 384
movq r10, 32
movq r8, 24
xor rax, rax
xor rcx, rcx
loop1:
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // sample
vmovdqu ymm2, YMMWORD PTR [reg_p1+4*rax+32] // sample
vmovdqu ymm4, YMMWORD PTR [reg_p1+4*rax+64] // sample
movq r9, 2
loop1b:
vpand ymm1, ymm0, ymm7 // Collecting 8 bits for first sample
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm0, ymm0, 1
vpand ymm3, ymm0, ymm7
vpaddb ymm1, ymm1, ymm3
vpand ymm3, ymm2, ymm7 // Adding next 4 bits
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm2, ymm2, 1
vpand ymm3, ymm2, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm2, ymm2, 1
vpand ymm3, ymm2, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm2, ymm2, 1
vpand ymm3, ymm2, ymm7
vpaddb ymm1, ymm1, ymm3
vpsrlw ymm2, ymm2, 1 // Collecting 4-bits for second sample
vpand ymm5, ymm2, ymm7
vpsrlw ymm2, ymm2, 1
vpand ymm3, ymm2, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm2, ymm2, 1
vpand ymm3, ymm2, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm2, ymm2, 1
vpand ymm3, ymm2, ymm7
vpaddb ymm5, ymm5, ymm3
vpand ymm3, ymm4, ymm7 // Adding next 8 bits
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsrlw ymm4, ymm4, 1
vpand ymm3, ymm4, ymm7
vpaddb ymm5, ymm5, ymm3
vpsubb ymm5, ymm1, ymm5
vpermq ymm3, ymm5, 0x0e
vpmovsxbd ymm6, xmm5
vpsrldq ymm5, ymm5, 8
vpmovsxbd ymm7, xmm5
vpmovsxbd ymm8, xmm3
vpsrldq ymm3, ymm3, 8
vpmovsxbd ymm9, xmm3
vmovdqu YMMWORD PTR [reg_p2+4*rcx], ymm6
vmovdqu YMMWORD PTR [reg_p2+4*rcx+32], ymm7
vmovdqu YMMWORD PTR [reg_p2+4*rcx+64], ymm8
vmovdqu YMMWORD PTR [reg_p2+4*rcx+96], ymm9
add rcx, r10 // i+32
vpsrlw ymm0, ymm0, 1
vpsrlw ymm2, ymm2, 1
vpsrlw ymm4, ymm4, 1
dec r9
jnz loop1b
add rax, r8 // j+24
cmp rax, r11
jl loop1
ret
//***********************************************************************
// Reconciliation helper function
// Operation: c [reg_p2] <- function(a) [reg_p1]
// [reg_p3] points to random bits
//***********************************************************************
.global oqs_rlwe_msrln16_helprec_asm
oqs_rlwe_msrln16_helprec_asm:
vmovdqu ymm8, ONE8x
movq r11, 256
movq r10, 8
xor rax, rax
vmovdqu ymm4, YMMWORD PTR [reg_p3] // rbits
loop2:
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // x
vmovdqu ymm1, YMMWORD PTR [reg_p1+4*rax+4*256] // x+256
vmovdqu ymm2, YMMWORD PTR [reg_p1+4*rax+4*512] // x+512
vmovdqu ymm3, YMMWORD PTR [reg_p1+4*rax+4*768] // x+768
vpand ymm5, ymm4, ymm8 // Collecting 8 random bits
vpslld ymm0, ymm0, 1 // 2*x - rbits
vpslld ymm1, ymm1, 1
vpslld ymm2, ymm2, 1
vpslld ymm3, ymm3, 1
vpsubd ymm0, ymm0, ymm5
vpsubd ymm1, ymm1, ymm5
vpsubd ymm2, ymm2, ymm5
vpsubd ymm3, ymm3, ymm5
vmovdqu ymm15, PARAM_Q4x8
vmovdqu ymm7, FOUR8x
vmovdqu ymm8, ymm7
vmovdqu ymm9, ymm7
vmovdqu ymm10, ymm7
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm7, ymm7, ymm6
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm8, ymm8, ymm6
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm9, ymm9, ymm6
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm10, ymm10, ymm6
vmovdqu ymm15, PARAM_3Q4x8
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm7, ymm7, ymm6
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm8, ymm8, ymm6
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm9, ymm9, ymm6
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm10, ymm10, ymm6
vmovdqu ymm15, PARAM_5Q4x8
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm7, ymm7, ymm6
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm8, ymm8, ymm6
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm9, ymm9, ymm6
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm10, ymm10, ymm6
vmovdqu ymm15, PARAM_7Q4x8
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm7, ymm7, ymm6 // v0[0]
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm8, ymm8, ymm6 // v0[1]
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm9, ymm9, ymm6 // v0[2]
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm10, ymm10, ymm6 // v0[3]
vmovdqu ymm15, PARAM_Q2x8
vmovdqu ymm11, THREE8x
vmovdqu ymm12, ymm11
vmovdqu ymm13, ymm11
vmovdqu ymm14, ymm11
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm11, ymm11, ymm6
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm12, ymm12, ymm6
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm13, ymm13, ymm6
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm14, ymm14, ymm6
vmovdqu ymm15, PARAM_3Q2x8
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm11, ymm11, ymm6
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm12, ymm12, ymm6
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm13, ymm13, ymm6
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm14, ymm14, ymm6
vmovdqu ymm15, PRIME8x
vpsubd ymm6, ymm0, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm11, ymm11, ymm6 // v1[0]
vpsubd ymm6, ymm1, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm12, ymm12, ymm6 // v1[1]
vpsubd ymm6, ymm2, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm13, ymm13, ymm6 // v1[2]
vpsubd ymm6, ymm3, ymm15
vpsrld ymm6, ymm6, 31
vpsubd ymm14, ymm14, ymm6 // v1[3]
vpmulld ymm6, ymm7, ymm15
vpslld ymm0, ymm0, 1
vpsubd ymm0, ymm0, ymm6
vpabsd ymm0, ymm0
vpmulld ymm6, ymm8, ymm15
vpslld ymm1, ymm1, 1
vpsubd ymm1, ymm1, ymm6
vpabsd ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpmulld ymm6, ymm9, ymm15
vpslld ymm2, ymm2, 1
vpsubd ymm2, ymm2, ymm6
vpabsd ymm2, ymm2
vpaddd ymm0, ymm0, ymm2
vpmulld ymm6, ymm10, ymm15
vpslld ymm3, ymm3, 1
vpsubd ymm3, ymm3, ymm6
vpabsd ymm3, ymm3
vpaddd ymm0, ymm0, ymm3 // norm
vpsubd ymm0, ymm0, ymm15
vpsrad ymm0, ymm0, 31 // If norm < q then norm = 0xff...ff, else norm = 0
vpxor ymm7, ymm7, ymm11 // v0[i] = (norm & (v0[i] ^ v1[i])) ^ v1[i]
vpand ymm7, ymm7, ymm0
vpxor ymm7, ymm7, ymm11
vpxor ymm8, ymm8, ymm12
vpand ymm8, ymm8, ymm0
vpxor ymm8, ymm8, ymm12
vpxor ymm9, ymm9, ymm13
vpand ymm9, ymm9, ymm0
vpxor ymm9, ymm9, ymm13
vpxor ymm10, ymm10, ymm14
vpand ymm10, ymm10, ymm0
vpxor ymm10, ymm10, ymm14
vmovdqu ymm15, THREE8x
vmovdqu ymm14, ONE8x
vpsubd ymm7, ymm7, ymm10
vpand ymm7, ymm7, ymm15
vpsubd ymm8, ymm8, ymm10
vpand ymm8, ymm8, ymm15
vpsubd ymm9, ymm9, ymm10
vpand ymm9, ymm9, ymm15
vpslld ymm10, ymm10, 1
vpxor ymm0, ymm0, ymm14
vpand ymm0, ymm0, ymm14
vpaddd ymm10, ymm0, ymm10
vpand ymm10, ymm10, ymm15
vpsrld ymm4, ymm4, 1
vmovdqu YMMWORD PTR [reg_p2+4*rax], ymm7
vmovdqu YMMWORD PTR [reg_p2+4*rax+4*256], ymm8
vmovdqu YMMWORD PTR [reg_p2+4*rax+4*512], ymm9
vmovdqu YMMWORD PTR [reg_p2+4*rax+4*768], ymm10
add rax, r10 // j+8
add rcx, r9
cmp rax, r11
jl loop2
ret
//***********************************************************************
// Reconciliation function
// Operation: c [reg_p3] <- function(a [reg_p1], b [reg_p2])
//***********************************************************************
.global oqs_rlwe_msrln16_rec_asm
oqs_rlwe_msrln16_rec_asm:
vpxor ymm12, ymm12, ymm12
vmovdqu ymm15, PRIME8x
vpslld ymm14, ymm15, 2 // 4*Q
vpslld ymm13, ymm15, 3 // 8*Q
vpsubd ymm12, ymm12, ymm13 // -8*Q
vpxor ymm11, ymm12, ymm13 // 8*Q ^ -8*Q
vmovdqu ymm10, ONE8x
movq r11, 256
movq r10, 8
xor rax, rax
xor rcx, rcx
loop3:
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // x
vmovdqu ymm1, YMMWORD PTR [reg_p1+4*rax+4*256] // x+256
vmovdqu ymm2, YMMWORD PTR [reg_p1+4*rax+4*512] // x+512
vmovdqu ymm3, YMMWORD PTR [reg_p1+4*rax+4*768] // x+768
vmovdqu ymm4, YMMWORD PTR [reg_p2+4*rax] // rvec
vmovdqu ymm5, YMMWORD PTR [reg_p2+4*rax+4*256] // rvec+256
vmovdqu ymm6, YMMWORD PTR [reg_p2+4*rax+4*512] // rvec+512
vmovdqu ymm7, YMMWORD PTR [reg_p2+4*rax+4*768] // rvec+768
vpslld ymm8, ymm4, 1 // 2*rvec + rvec
vpaddd ymm4, ymm7, ymm8
vpslld ymm8, ymm5, 1
vpaddd ymm5, ymm7, ymm8
vpslld ymm8, ymm6, 1
vpaddd ymm6, ymm7, ymm8
vpmulld ymm4, ymm4, ymm15
vpmulld ymm5, ymm5, ymm15
vpmulld ymm6, ymm6, ymm15
vpmulld ymm7, ymm7, ymm15
vpslld ymm0, ymm0, 3 // 8*x
vpslld ymm1, ymm1, 3
vpslld ymm2, ymm2, 3
vpslld ymm3, ymm3, 3
vpsubd ymm0, ymm0, ymm4 // t[i]
vpsubd ymm1, ymm1, ymm5
vpsubd ymm2, ymm2, ymm6
vpsubd ymm3, ymm3, ymm7
vpsrad ymm8, ymm0, 31 // mask1
vpabsd ymm4, ymm0
vpsubd ymm4, ymm14, ymm4
vpsrad ymm4, ymm4, 31 // mask2
vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
vpxor ymm8, ymm8, ymm12
vpand ymm4, ymm4, ymm8
vpaddd ymm0, ymm0, ymm4
vpabsd ymm0, ymm0
vpsrad ymm8, ymm1, 31 // mask1
vpabsd ymm4, ymm1
vpsubd ymm4, ymm14, ymm4
vpsrad ymm4, ymm4, 31 // mask2
vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
vpxor ymm8, ymm8, ymm12
vpand ymm4, ymm4, ymm8
vpaddd ymm1, ymm1, ymm4
vpabsd ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpsrad ymm8, ymm2, 31 // mask1
vpabsd ymm4, ymm2
vpsubd ymm4, ymm14, ymm4
vpsrad ymm4, ymm4, 31 // mask2
vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
vpxor ymm8, ymm8, ymm12
vpand ymm4, ymm4, ymm8
vpaddd ymm2, ymm2, ymm4
vpabsd ymm2, ymm2
vpaddd ymm0, ymm0, ymm2
vpsrad ymm8, ymm3, 31 // mask1
vpabsd ymm4, ymm3
vpsubd ymm4, ymm14, ymm4
vpsrad ymm4, ymm4, 31 // mask2
vpand ymm8, ymm8, ymm11 // (mask1 & (8*PARAMETER_Q ^ -8*PARAMETER_Q)) ^ -8*PARAMETER_Q
vpxor ymm8, ymm8, ymm12
vpand ymm4, ymm4, ymm8
vpaddd ymm3, ymm3, ymm4
vpabsd ymm3, ymm3
vpaddd ymm0, ymm0, ymm3 // norm
vpsubd ymm0, ymm13, ymm0 // If norm < PARAMETER_Q then result = 1, else result = 0
vpsrld ymm0, ymm0, 31
vpxor ymm0, ymm0, ymm10
vpsrlq ymm1, ymm0, 31
vpor ymm1, ymm0, ymm1
vpsllq ymm2, ymm1, 2
vpsrldq ymm2, ymm2, 8
vpor ymm1, ymm2, ymm1
vpsllq ymm2, ymm1, 4
vpermq ymm2, ymm2, 0x56
vpor ymm0, ymm1, ymm2
vmovq r9, xmm0
mov BYTE PTR [reg_p3+rcx], r9b
add rax, r10 // j+8
inc rcx
cmp rax, r11
jl loop3
ret

View File

@ -0,0 +1,65 @@
/****************************************************************************************
* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: NTT functions and other low-level operations
*
*****************************************************************************************/
#include "../LatticeCrypto_priv.h"
void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t* a, const int32_t* psi_rev, unsigned int N)
{
oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm(a, psi_rev, N);
}
void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t* a, const int32_t* omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N)
{
oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm(a, omegainv_rev, omegainv1N_rev, Ninv, N);
}
void oqs_rlwe_msrln16_two_reduce12289(int32_t* a, unsigned int N)
{
oqs_rlwe_msrln16_two_reduce12289_asm(a, N);
}
void oqs_rlwe_msrln16_pmul(int32_t* a, int32_t* b, int32_t* c, unsigned int N)
{
oqs_rlwe_msrln16_pmul_asm(a, b, c, N);
}
void oqs_rlwe_msrln16_pmuladd(int32_t* a, int32_t* b, int32_t* c, int32_t* d, unsigned int N)
{
oqs_rlwe_msrln16_pmuladd_asm(a, b, c, d, N);
}
void oqs_rlwe_msrln16_smul(int32_t* a, int32_t scalar, unsigned int N)
{
unsigned int i;
for (i = 0; i < N; i++) {
a[i] = a[i]*scalar;
}
}
void oqs_rlwe_msrln16_correction(int32_t* a, int32_t p, unsigned int N)
{
unsigned int i;
int32_t mask;
for (i = 0; i < N; i++) {
mask = a[i] >> (4*sizeof(int32_t) - 1);
a[i] += (p & mask) - p;
mask = a[i] >> (4*sizeof(int32_t) - 1);
a[i] += (p & mask);
}
}

View File

@ -0,0 +1,979 @@
//****************************************************************************************
// LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//
// Abstract: NTT functions in x64 assembly using AVX2 vector instructions for Linux
//
//****************************************************************************************
.intel_syntax noprefix
// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
#define reg_p3 rdx
#define reg_p4 rcx
#define reg_p5 r8
.text
//***********************************************************************
// Forward NTT
// Operation: a [reg_p1] <- NTT(a) [reg_p1],
// [reg_p2] points to table and
// reg_p3 contains parameter n
//***********************************************************************
.global oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm
oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm:
push r12
push r13
push r14
// Stages m=1 -> m=32
mov r9, 1 // m = 1
mov rax, reg_p3
mov r12, reg_p3
shr r12, 4 // n/16
vmovdqu ymm14, MASK12x8
vmovdqu ymm12, PERM0246
mov r14, 16
mov rcx, 11
loop1:
shr rax, 1 // k = k/2
dec rcx
xor rdx, rdx // i = 0
loop2:
mov r10, rdx
mov r11, rax
dec r11
shl r10, cl // j1
add r11, r10 // j2
mov r13, r9
add r13, rdx // m+i
vbroadcastss ymm11, DWORD PTR [reg_p2+4*r13] // S
loop3:
mov r13, r10
add r13, rax // j+k
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r13] // a[j+k]
vpmovsxdq ymm3, XMMWORD PTR [reg_p1+4*r13+16] // a[j+k]
vpmovsxdq ymm5, XMMWORD PTR [reg_p1+4*r13+32] // a[j+k]
vpmovsxdq ymm7, XMMWORD PTR [reg_p1+4*r13+48] // a[j+k]
vpmuldq ymm1, ymm1, ymm11 // a[j+k].S
vpmuldq ymm3, ymm3, ymm11
vpmuldq ymm5, ymm5, ymm11
vpmuldq ymm7, ymm7, ymm11
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vmovdqu ymm13, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm1, 1 // 2*c0
vpsubd ymm13, ymm1, ymm13 // c0-c1
vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm13 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm13 // a[j] = U + V
vpermd ymm1, ymm12, ymm1
vpermd ymm0, ymm12, ymm0
vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j]
vmovdqu ymm13, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm3, 1 // 2*c0
vpsubd ymm13, ymm3, ymm13 // c0-c1
vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1
vpsubd ymm3, ymm2, ymm13 // a[j+k] = U - V
vpaddd ymm2, ymm2, ymm13 // a[j] = U + V
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r13], xmm1
vpermd ymm3, ymm12, ymm3
vpermd ymm2, ymm12, ymm2
vpmovsxdq ymm4, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j]
vmovdqu ymm13, ymm5
vpand ymm5, ymm14, ymm5 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm5, 1 // 2*c0
vpsubd ymm13, ymm5, ymm13 // c0-c1
vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1
vpsubd ymm5, ymm4, ymm13 // a[j+k] = U - V
vpaddd ymm4, ymm4, ymm13 // a[j] = U + V
vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2
vmovdqu XMMWORD PTR [reg_p1+4*r13+16], xmm3
vpermd ymm5, ymm12, ymm5
vpermd ymm4, ymm12, ymm4
vpmovsxdq ymm6, XMMWORD PTR [reg_p1+4*r10+48] // U = a[j]
vmovdqu ymm13, ymm7
vpand ymm7, ymm14, ymm7 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm7, 1 // 2*c0
vpsubd ymm13, ymm7, ymm13 // c0-c1
vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1
vpsubd ymm7, ymm6, ymm13 // a[j+k] = U - V
vpaddd ymm6, ymm6, ymm13 // a[j] = U + V
vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm4
vmovdqu XMMWORD PTR [reg_p1+4*r13+32], xmm5
vpermd ymm6, ymm12, ymm6
vpermd ymm7, ymm12, ymm7
vmovdqu XMMWORD PTR [reg_p1+4*r13+48], xmm7
vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm6
add r10, r14
cmp r10, r11
jl loop3
inc rdx
cmp rdx, r9
jl loop2
shl r9, 1
cmp r9, r12
jl loop1
// Stage m=64
xor rdx, rdx // i = 0
xor r10, r10 // j1 = 0
loop4:
vbroadcastss ymm11, DWORD PTR [reg_p2+4*rdx+4*64] // S
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+32] // a[j+k]
vpmovsxdq ymm3, XMMWORD PTR [reg_p1+4*r10+48] // a[j+k]
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j]
vpmuldq ymm1, ymm1, ymm11 // a[j+k].S
vpmuldq ymm3, ymm3, ymm11 // a[j+k].S
vmovdqu ymm13, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm1, 1 // 2*c0
vpsubd ymm13, ymm1, ymm13 // c0-c1
vpaddd ymm13, ymm13, ymm15 // V = 3*c0-c1
vmovdqu ymm10, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm10, ymm10, 12 // c1
vpslld ymm15, ymm3, 1 // 2*c0
vpsubd ymm10, ymm3, ymm10 // c0-c1
vpaddd ymm10, ymm10, ymm15 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm13 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm13 // a[j] = U + V
vpsubd ymm3, ymm2, ymm10 // a[j+k] = U - V
vpaddd ymm2, ymm2, ymm10 // a[j] = U + V
vpermd ymm0, ymm12, ymm0
vpermd ymm1, ymm12, ymm1
vpermd ymm2, ymm12, ymm2
vpermd ymm3, ymm12, ymm3
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm1
vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2
vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm3
add r10, r14 // j+16
inc rdx // i+1
cmp rdx, r9
jl loop4
// Stage m=128
shl r9, 1
xor rdx, rdx // i = 0
xor r10, r10 // j1 = 0
mov r13, 8
loop6:
vbroadcastss ymm2, DWORD PTR [reg_p2+4*rdx+4*128] // S
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+16] // a[j+k]
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpmuldq ymm1, ymm1, ymm2 // a[j+k].S
vmovdqu ymm3, ymm0
vpand ymm0, ymm14, ymm0 // c0
vpsrad ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // U = 3*c0-c1
vmovdqu ymm3, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrlq ymm4, ymm3, 24 // c2
vpsrad ymm3, ymm3, 12 // xc1
vpand ymm3, ymm14, ymm3 // c1
vpslld ymm5, ymm1, 3 // 8*c0
vpaddd ymm4, ymm1, ymm4 // c0+c2
vpaddd ymm4, ymm4, ymm5 // 9*c0+c2
vpslld ymm5, ymm3, 1 // 2*c1
vpaddd ymm1, ymm0, ymm3 // U+c1
vpsubd ymm0, ymm0, ymm3 // U-c1
vpsubd ymm4, ymm4, ymm5 // 9*c0-2*c1+c2
vpaddd ymm0, ymm0, ymm4 // U+(9*c0-3*c1+c2)
vpsubd ymm1, ymm1, ymm4 // U-(9*c0-3*c1+c2)
vpermd ymm0, ymm12, ymm0
vpermd ymm1, ymm12, ymm1
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm1
add r10, r13 // j+8
inc rdx // i+1
cmp rdx, r9
jl loop6
// Stage m=256
vmovdqu ymm9, PERM02134657
shl r9, 1
xor rdx, rdx // i = 0
xor r10, r10 // j1 = 0
mov r14, 32
loop7:
vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*rdx+4*256] // S = psi[m+i]->psi[m+i+3]
vpermq ymm8, ymm2, 0x50
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]->a[j+3]
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+16] // a[j+k]->a[j+k+3]
vpermq ymm3, ymm0, 0x4e
vinserti128 ymm0, ymm0, xmm1, 1 // U
vpblendd ymm1, ymm1, ymm3, 15
vpmuldq ymm3, ymm1, ymm8 // a[j+k].S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm4 // a[j] = U + V
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0
vpermq ymm8, ymm2, 0xfa
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j]->a[j+3]
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+48] // a[j+k]->a[j+k+3]
vpermq ymm3, ymm0, 0x4e
vinserti128 ymm0, ymm0, xmm1, 1 // U
vpblendd ymm1, ymm1, ymm3, 15
vpmuldq ymm3, ymm1, ymm8 // a[j+k].S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm4 // a[j] = U + V
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10+32], ymm0
vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*rdx+4*256+16] // S = psi[m+i]->psi[m+i+3]
vpermq ymm8, ymm2, 0x50
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10+64] // U = a[j]->a[j+3]
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+80] // a[j+k]->a[j+k+3]
vpermq ymm3, ymm0, 0x4e
vinserti128 ymm0, ymm0, xmm1, 1 // U
vpblendd ymm1, ymm1, ymm3, 15
vpmuldq ymm3, ymm1, ymm8 // a[j+k].S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm4 // a[j] = U + V
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10+64], ymm0
vpermq ymm8, ymm2, 0xfa
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10+96] // U = a[j]->a[j+3]
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+112] // a[j+k]->a[j+k+3]
vpermq ymm3, ymm0, 0x4e
vinserti128 ymm0, ymm0, xmm1, 1 // U
vpblendd ymm1, ymm1, ymm3, 15
vpmuldq ymm3, ymm1, ymm8 // a[j+k].S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm4 // a[j] = U + V
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10+96], ymm0
add r10, r14 // j+32
add rdx, r13 // i+8
cmp rdx, r9
jl loop7
// Stage m=512
vmovdqu ymm9, PERM00224466
shl r9, 1 // m = n/2
xor rdx, rdx // i = 0
xor r10, r10 // j1 = 0
mov r14, 4
loop8:
vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*rdx+4*512] // S
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10] // U = a[j]
vmovdqu ymm1, YMMWORD PTR [reg_p1+4*r10+4] // a[j+k]
vpmuldq ymm3, ymm1, ymm2 // a[j+k].S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm4, ymm4, ymm5 // V = 3*c0-c1
vpsubd ymm1, ymm0, ymm4 // a[j+k] = U - V
vpaddd ymm0, ymm0, ymm4 // a[j] = U + V
vpermd ymm1, ymm9, ymm1
vpblendd ymm0, ymm0, ymm1, 0xaa
vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0
add r10, r13 // j+8
add rdx, r14 // i+4
cmp rdx, r9
jl loop8
pop r14
pop r13
pop r12
ret
//***********************************************************************
// Inverse NTT
// Operation: a [reg_p1] <- INTT(a) [reg_p1],
// [reg_p2] points to table
// reg_p3 and reg_p4 point to constants for scaling and
// reg_p5 contains parameter n
//***********************************************************************
.global oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm
oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm:
push r12
push r13
push r14
push r15
push rbx
// Stage m=1024
vmovdqu ymm9, PERM00224466
vmovdqu ymm14, MASK12x8
mov r12, reg_p5
shr r12, 1 // n/2 = 512
xor r15, r15 // i = 0
xor r10, r10 // j1 = 0
mov r13, 8
mov r14, 4
loop1b:
vmovdqu ymm1, YMMWORD PTR [reg_p1+4*r10+4] // V = a[j+k]
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*r15+4*512] // S
vpsubd ymm3, ymm0, ymm1 // U - V
vpaddd ymm0, ymm0, ymm1 // U + V
vpmuldq ymm3, ymm3, ymm2 // (U - V).S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm1, ymm4, ymm5 // 3*c0-c1
vpermd ymm1, ymm9, ymm1
vpblendd ymm0, ymm0, ymm1, 0xaa
vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0
add r10, r13 // j+8
add r15, r14 // i+4
cmp r15, r12
jl loop1b
// Stage m=512
vmovdqu ymm9, PERM02134657
vmovdqu ymm13, PERM0145
vmovdqu ymm15, PERM2367
shr r12, 1 // n/4 = 256
xor r15, r15 // i = 0
xor r10, r10 // j1 = 0
mov r14, 32
loop2b:
vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*r15+4*256] // S = psi[m+i]->psi[m+i+3]
vpermq ymm8, ymm2, 0x50
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10] // U = a[j]->a[j+7]
vpermd ymm1, ymm15, ymm0
vpermd ymm0, ymm13, ymm0
vpsubd ymm3, ymm0, ymm1 // U - V
vpaddd ymm0, ymm0, ymm1 // U + V
vpmuldq ymm3, ymm3, ymm8 // (U - V).S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm1, ymm4, ymm5 // 3*c0-c1
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10], ymm0
vpermq ymm8, ymm2, 0xfa
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10+32] // U = a[j]->a[j+7]
vpermd ymm1, ymm15, ymm0
vpermd ymm0, ymm13, ymm0
vpsubd ymm3, ymm0, ymm1 // U - V
vpaddd ymm0, ymm0, ymm1 // U + V
vpmuldq ymm3, ymm3, ymm8 // (U - V).S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm1, ymm4, ymm5 // 3*c0-c1
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10+32], ymm0
vpmovsxdq ymm2, XMMWORD PTR [reg_p2+4*r15+4*256+16]// S = psi[m+i]->psi[m+i+3]
vpermq ymm8, ymm2, 0x50
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10+64] // U = a[j]->a[j+7]
vpermd ymm1, ymm15, ymm0
vpermd ymm0, ymm13, ymm0
vpsubd ymm3, ymm0, ymm1 // U - V
vpaddd ymm0, ymm0, ymm1 // U + V
vpmuldq ymm3, ymm3, ymm8 // (U - V).S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm1, ymm4, ymm5 // 3*c0-c1
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10+64], ymm0
vpermq ymm8, ymm2, 0xfa
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*r10+96] // U = a[j]->a[j+7]
vpermd ymm1, ymm15, ymm0
vpermd ymm0, ymm13, ymm0
vpsubd ymm3, ymm0, ymm1 // U - V
vpaddd ymm0, ymm0, ymm1 // U + V
vpmuldq ymm3, ymm3, ymm8 // (U - V).S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm1, ymm4, ymm5 // 3*c0-c1
vpslldq ymm1, ymm1, 4
vpblendd ymm0, ymm0, ymm1, 0xaa
vpermd ymm0, ymm9, ymm0
vmovdqu YMMWORD PTR [reg_p1+4*r10+96], ymm0
add r10, r14 // j+32
add r15, r13 // i+8
cmp r15, r12
jl loop2b
// Stage m=256
vmovdqu ymm12, PERM0246
shr r12, 1 // n/8 = 128
xor r15, r15 // i = 0
xor r10, r10 // j1 = 0
loop3b:
vbroadcastss ymm2, DWORD PTR [reg_p2+4*r15+4*128] // S
vpmovsxdq ymm1, XMMWORD PTR [reg_p1+4*r10+16] // V = a[j+k]
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpsubd ymm3, ymm0, ymm1 // U - V
vpaddd ymm0, ymm0, ymm1 // U + V
vpmuldq ymm3, ymm3, ymm2 // (U - V).S
vmovdqu ymm4, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm4, ymm4, 12 // c1
vpslld ymm5, ymm3, 1 // 2*c0
vpsubd ymm4, ymm3, ymm4 // c0-c1
vpaddd ymm1, ymm4, ymm5 // 3*c0-c1
vpermd ymm0, ymm12, ymm0
vpermd ymm1, ymm12, ymm1
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm1
add r10, r13 // j+8
inc r15 // i+1
cmp r15, r12
jl loop3b
// Stage m=128
shr r12, 1 // n/16 = 64
xor r15, r15 // i = 0
xor r10, r10 // j1 = 0
mov r14, 16
loop4b:
vbroadcastss ymm11, DWORD PTR [reg_p2+4*r15+4*64] // S
vpmovsxdq ymm13, XMMWORD PTR [reg_p1+4*r10+32] // V = a[j+k]
vpmovsxdq ymm15, XMMWORD PTR [reg_p1+4*r10+48] // V = a[j+k]
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j]
vpsubd ymm1, ymm0, ymm13 // U - V
vpaddd ymm0, ymm0, ymm13 // U + V
vpsubd ymm3, ymm2, ymm15 // U - V
vpaddd ymm2, ymm2, ymm15 // U + V
vpmuldq ymm1, ymm1, ymm11 // (U - V).S
vpmuldq ymm3, ymm3, ymm11 // (U - V).S
vmovdqu ymm13, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm1, 1 // 2*c0
vpsubd ymm13, ymm1, ymm13 // c0-c1
vpaddd ymm1, ymm13, ymm15 // 3*c0-c1
vmovdqu ymm13, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm3, 1 // 2*c0
vpsubd ymm13, ymm3, ymm13 // c0-c1
vpaddd ymm3, ymm13, ymm15 // 3*c0-c1
vpermd ymm0, ymm12, ymm0
vpermd ymm1, ymm12, ymm1
vpermd ymm2, ymm12, ymm2
vpermd ymm3, ymm12, ymm3
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm1
vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2
vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm3
add r10, r14 // j+16
inc r15 // i+1
cmp r15, r12
jl loop4b
// Stages m=64 -> m=4
mov r9, 5 // 5 iterations
mov rax, 8
loop5b:
shl rax, 1 // k = 2*k
shr r12, 1 // m/2
xor r15, r15 // i = 0
xor r8, r8
loop6b:
mov r10, r8 // Load j1
mov r11, rax
dec r11
add r11, r10 // j2
mov r13, r12
add r13, r15 // m/2+i
vbroadcastss ymm9, DWORD PTR [reg_p2+4*r13] // S
mov rbx, 4
loop7b:
mov r13, r10
add r13, rax // j+k
vpmovsxdq ymm10, XMMWORD PTR [reg_p1+4*r13] // V = a[j+k]
vpmovsxdq ymm11, XMMWORD PTR [reg_p1+4*r13+16] // V = a[j+k]
vpmovsxdq ymm13, XMMWORD PTR [reg_p1+4*r13+32] // V = a[j+k]
vpmovsxdq ymm15, XMMWORD PTR [reg_p1+4*r13+48] // V = a[j+k]
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpmovsxdq ymm2, XMMWORD PTR [reg_p1+4*r10+16] // U = a[j]
vpmovsxdq ymm4, XMMWORD PTR [reg_p1+4*r10+32] // U = a[j]
vpmovsxdq ymm6, XMMWORD PTR [reg_p1+4*r10+48] // U = a[j]
vpsubd ymm1, ymm0, ymm10 // U - V
vpaddd ymm0, ymm0, ymm10 // U + V
vpsubd ymm3, ymm2, ymm11 // U - V
vpaddd ymm2, ymm2, ymm11 // U + V
vpsubd ymm5, ymm4, ymm13 // U - V
vpaddd ymm4, ymm4, ymm13 // U + V
vpsubd ymm7, ymm6, ymm15 // U - V
vpaddd ymm6, ymm6, ymm15 // U + V
vpmuldq ymm1, ymm1, ymm9 // (U - V).S
vpmuldq ymm3, ymm3, ymm9
vpmuldq ymm5, ymm5, ymm9
vpmuldq ymm7, ymm7, ymm9
vmovdqu ymm13, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm1, 1 // 2*c0
vpsubd ymm13, ymm1, ymm13 // c0-c1
vpaddd ymm1, ymm13, ymm15 // 3*c0-c1
cmp r9, rbx
jne skip1
vmovdqu ymm13, ymm0
vpand ymm0, ymm14, ymm0 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm0, 1 // 2*c0
vpsubd ymm13, ymm0, ymm13 // c0-c1
vpaddd ymm0, ymm13, ymm15 // 3*c0-c1
vmovdqu ymm13, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm1, 1 // 2*c0
vpsubd ymm13, ymm1, ymm13 // c0-c1
vpaddd ymm1, ymm13, ymm15 // 3*c0-c1
skip1:
vpermd ymm1, ymm12, ymm1
vpermd ymm0, ymm12, ymm0
vmovdqu ymm13, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm3, 1 // 2*c0
vpsubd ymm13, ymm3, ymm13 // c0-c1
vpaddd ymm3, ymm13, ymm15 // 3*c0-c1
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r13], xmm1
cmp r9, rbx
jne skip2
vmovdqu ymm13, ymm2
vpand ymm2, ymm14, ymm2 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm2, 1 // 2*c0
vpsubd ymm13, ymm2, ymm13 // c0-c1
vpaddd ymm2, ymm13, ymm15 // 3*c0-c1
vmovdqu ymm13, ymm3
vpand ymm3, ymm14, ymm3 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm3, 1 // 2*c0
vpsubd ymm13, ymm3, ymm13 // c0-c1
vpaddd ymm3, ymm13, ymm15 // 3*c0-c1
skip2:
vpermd ymm3, ymm12, ymm3
vpermd ymm2, ymm12, ymm2
vmovdqu ymm13, ymm5
vpand ymm5, ymm14, ymm5 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm5, 1 // 2*c0
vpsubd ymm13, ymm5, ymm13 // c0-c1
vpaddd ymm5, ymm13, ymm15 // 3*c0-c1
vmovdqu XMMWORD PTR [reg_p1+4*r10+16], xmm2
vmovdqu XMMWORD PTR [reg_p1+4*r13+16], xmm3
cmp r9, rbx
jne skip3
vmovdqu ymm13, ymm4
vpand ymm4, ymm14, ymm4 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm4, 1 // 2*c0
vpsubd ymm13, ymm4, ymm13 // c0-c1
vpaddd ymm4, ymm13, ymm15 // 3*c0-c1
vmovdqu ymm13, ymm5
vpand ymm5, ymm14, ymm5 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm5, 1 // 2*c0
vpsubd ymm13, ymm5, ymm13 // c0-c1
vpaddd ymm5, ymm13, ymm15 // 3*c0-c1
skip3:
vpermd ymm5, ymm12, ymm5
vpermd ymm4, ymm12, ymm4
vmovdqu ymm13, ymm7
vpand ymm7, ymm14, ymm7 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm7, 1 // 2*c0
vpsubd ymm13, ymm7, ymm13 // c0-c1
vpaddd ymm7, ymm13, ymm15 // 3*c0-c1
vmovdqu XMMWORD PTR [reg_p1+4*r10+32], xmm4
vmovdqu XMMWORD PTR [reg_p1+4*r13+32], xmm5
cmp r9, rbx
jne skip4
vmovdqu ymm13, ymm6
vpand ymm6, ymm14, ymm6 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm6, 1 // 2*c0
vpsubd ymm13, ymm6, ymm13 // c0-c1
vpaddd ymm6, ymm13, ymm15 // 3*c0-c1
vmovdqu ymm13, ymm7
vpand ymm7, ymm14, ymm7 // c0
vpsrad ymm13, ymm13, 12 // c1
vpslld ymm15, ymm7, 1 // 2*c0
vpsubd ymm13, ymm7, ymm13 // c0-c1
vpaddd ymm7, ymm13, ymm15 // 3*c0-c1
skip4:
vpermd ymm7, ymm12, ymm7
vpermd ymm6, ymm12, ymm6
vmovdqu XMMWORD PTR [reg_p1+4*r13+48], xmm7
vmovdqu XMMWORD PTR [reg_p1+4*r10+48], xmm6
add r10, r14
cmp r10, r11
jl loop7b
mov rbx, rax
shl rbx, 1 // 2*k
add r8, rbx // j1+2*k
inc r15
cmp r15, r12
jl loop6b
dec r9
jnz loop5b
// Scaling step
shl rax, 1 // k = 2*k = 512
xor r10, r10 // j = 0
mov r14, 4
movq xmm0, reg_p3
vbroadcastsd ymm10, xmm0 // S = omegainv1N_rev
movq xmm0, reg_p4
vbroadcastsd ymm11, xmm0 // T = Ninv
loop8b:
vpmovsxdq ymm13, XMMWORD PTR [reg_p1+4*r10+4*512] // V = a[j+k]
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*r10] // U = a[j]
vpsubd ymm1, ymm0, ymm13 // U - V
vpaddd ymm0, ymm0, ymm13 // U + V
vpmuldq ymm1, ymm1, ymm10 // (U - V).S
vpmuldq ymm0, ymm0, ymm11 // (U + V).T
vmovdqu ymm13, ymm0
vpand ymm0, ymm14, ymm0 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm0, 1 // 2*c0
vpsubd ymm13, ymm0, ymm13 // c0-c1
vpaddd ymm0, ymm13, ymm15 // 3*c0-c1
vmovdqu ymm13, ymm1
vpand ymm1, ymm14, ymm1 // c0
vpsrlq ymm13, ymm13, 12 // c1
vpslld ymm15, ymm1, 1 // 2*c0
vpsubd ymm13, ymm1, ymm13 // c0-c1
vpaddd ymm1, ymm13, ymm15 // 3*c0-c1
vpermd ymm0, ymm12, ymm0
vpermd ymm1, ymm12, ymm1
vmovdqu XMMWORD PTR [reg_p1+4*r10], xmm0
vmovdqu XMMWORD PTR [reg_p1+4*r10+4*512], xmm1
add r10, r14 // j+4
cmp r10, rax
jl loop8b
loop9b:
pop rbx
pop r15
pop r14
pop r13
pop r12
ret
//***********************************************************************
// Component-wise multiplication and addition
// Operation: d [reg_p4] <- a [reg_p1] * b [reg_p2] + c [reg_p3]
// reg_p5 contains parameter n
//***********************************************************************
.global oqs_rlwe_msrln16_pmuladd_asm
oqs_rlwe_msrln16_pmuladd_asm:
vmovdqu ymm5, PERM0246
vmovdqu ymm6, MASK12x8
xor rax, rax
movq r11, 4
lazo2:
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*rax] // a
vpmovsxdq ymm1, XMMWORD PTR [reg_p2+4*rax] // b
vpmovsxdq ymm2, XMMWORD PTR [reg_p3+4*rax] // c
vpmuldq ymm0, ymm1, ymm0
vpaddq ymm0, ymm2, ymm0
vmovdqu ymm3, ymm0
vpand ymm0, ymm6, ymm0 // c0
vpsrlq ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // 3*c0-c1
vmovdqu ymm3, ymm0
vpand ymm0, ymm6, ymm0 // c0
vpsrad ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // 3*c0-c1
vpermd ymm0, ymm5, ymm0
vmovdqu XMMWORD PTR [reg_p4+4*rax], xmm0
add rax, r11 // j+4
cmp rax, reg_p5
jl lazo2
ret
//***********************************************************************
// Component-wise multiplication
// Operation: c [reg_p3] <- a [reg_p1] * b [reg_p2]
// reg_p4 contains parameter n
//***********************************************************************
.global oqs_rlwe_msrln16_pmul_asm
oqs_rlwe_msrln16_pmul_asm:
vmovdqu ymm5, PERM0246
vmovdqu ymm6, MASK12x8
xor rax, rax
movq r11, 4
lazo3:
vpmovsxdq ymm0, XMMWORD PTR [reg_p1+4*rax] // a
vpmovsxdq ymm1, XMMWORD PTR [reg_p2+4*rax] // b
vpmuldq ymm0, ymm1, ymm0
vmovdqu ymm3, ymm0
vpand ymm0, ymm6, ymm0 // c0
vpsrlq ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // 3*c0-c1
vmovdqu ymm3, ymm0
vpand ymm0, ymm6, ymm0 // c0
vpsrad ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // 3*c0-c1
vpermd ymm0, ymm5, ymm0
vmovdqu XMMWORD PTR [reg_p3+4*rax], xmm0
add rax, r11 // j+4
cmp rax, reg_p4
jl lazo3
ret
//***********************************************************************
// Two consecutive reductions
// Operation: c [reg_p1] <- a [reg_p1]
// reg_p2 contains parameter n
//***********************************************************************
.global oqs_rlwe_msrln16_two_reduce12289_asm
oqs_rlwe_msrln16_two_reduce12289_asm:
vmovdqu ymm6, MASK12x8
vmovdqu ymm7, PRIME8x
xor rax, rax
movq r11, 8
lazo4:
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // a
vmovdqu ymm3, ymm0
vpand ymm0, ymm6, ymm0 // c0
vpsrad ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // 3*c0-c1
vmovdqu ymm3, ymm0
vpand ymm0, ymm6, ymm0 // c0
vpsrad ymm3, ymm3, 12 // c1
vpslld ymm4, ymm0, 1 // 2*c0
vpsubd ymm3, ymm0, ymm3 // c0-c1
vpaddd ymm0, ymm3, ymm4 // 3*c0-c1
vpsrad ymm2, ymm0, 31
vpand ymm2, ymm7, ymm2
vpaddd ymm2, ymm0, ymm2
vpsubd ymm0, ymm2, ymm7
vpsrad ymm2, ymm0, 31
vpand ymm2, ymm7, ymm2
vpaddd ymm0, ymm0, ymm2
vmovdqu YMMWORD PTR [reg_p1+4*rax], ymm0
add rax, r11 // j+8
cmp rax, reg_p2
jl lazo4
ret
//***********************************************************************
// Encoding
// Operation: c [reg_p2] <- a [reg_p1]
//***********************************************************************
.global oqs_rlwe_msrln16_encode_asm
oqs_rlwe_msrln16_encode_asm:
vmovdqu ymm6, MASK32
vmovdqu ymm7, MASK42
mov r9, 1024
xor rax, rax
xor r10, r10
mov r11, 14
mov rcx, 8
lazo5:
vmovdqu ymm0, YMMWORD PTR [reg_p1+4*rax] // a
vpsrlq ymm1, ymm0, 18
vpsllq ymm2, ymm0, 4
vpand ymm0, ymm0, ymm6
vpsrldq ymm2, ymm2, 5
vpsrlq ymm3, ymm1, 4
vpand ymm1, ymm1, ymm6
vpand ymm2, ymm2, ymm7
vpsrldq ymm3, ymm3, 4
vpor ymm0, ymm0, ymm1
vpor ymm0, ymm0, ymm2
vpor ymm0, ymm0, ymm3
vpermq ymm1, ymm0, 0x0e
vmovdqu XMMWORD PTR [reg_p2+r10], xmm0
vmovdqu XMMWORD PTR [reg_p2+r10+7], xmm1
add r10, r11
add rax, rcx // j+8
cmp rax, r9
jl lazo5
ret
//***********************************************************************
// Decoding
// Operation: c [reg_p2] <- a [reg_p1]
//***********************************************************************
.global oqs_rlwe_msrln16_decode_asm
oqs_rlwe_msrln16_decode_asm:
vmovdqu ymm6, MASK14_1
vmovdqu ymm7, MASK14_2
vmovdqu ymm8, MASK14_3
vmovdqu ymm9, MASK14_4
mov r9, 1024
xor rax, rax
xor r10, r10
mov r11, 14
mov rcx, 8
lazo6:
vmovdqu xmm0, XMMWORD PTR [reg_p1+r10]
vmovdqu xmm1, XMMWORD PTR [reg_p1+r10+7]
vinserti128 ymm0, ymm0, xmm1, 1
vpand ymm1, ymm0, ymm6
vpand ymm2, ymm0, ymm7
vpand ymm3, ymm0, ymm8
vpand ymm4, ymm0, ymm9
vpsllq ymm2, ymm2, 18
vpsllq ymm3, ymm3, 4
vpslldq ymm3, ymm3, 4
vpsrlq ymm4, ymm4, 2
vpslldq ymm4, ymm4, 7
vpor ymm1, ymm1, ymm2
vpor ymm1, ymm1, ymm3
vpor ymm1, ymm1, ymm4
vmovdqu YMMWORD PTR [reg_p2+4*rax], ymm1
add r10, r11
add rax, rcx // j+8
cmp rax, r9
jl lazo6
ret

View File

@ -0,0 +1,94 @@
/***************************************************************************************
* LatticeCrypt: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: main header file
*
****************************************************************************************/
#ifndef __LatticeCrypt_H__
#define __LatticeCrypt_H__
// For C++
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
#include <oqs/rand.h>
// NOTE: probably a better way to do this.
#if (defined(__x86_64__) || defined(__x86_64) || defined(__arch64__) || defined(_M_AMD64) || defined(_M_X64) || defined(_WIN64) || !defined(__LP64__))
#define RADIX 64
typedef uint64_t digit_t; // Unsigned 64-bit digit
typedef int64_t sdigit_t; // Signed 64-bit digit
#else
#define RADIX 32
typedef uint32_t digit_t; // Unsigned 32-bit digit
typedef int32_t sdigit_t; // Signed 32-bit digit
#endif
// Definitions of the error-handling type and error codes
typedef enum {
CRYPTO_SUCCESS, // 0x00
CRYPTO_ERROR, // 0x01
CRYPTO_ERROR_DURING_TEST, // 0x02
CRYPTO_ERROR_UNKNOWN, // 0x03
CRYPTO_ERROR_NOT_IMPLEMENTED, // 0x04
CRYPTO_ERROR_NO_MEMORY, // 0x05
CRYPTO_ERROR_INVALID_PARAMETER, // 0x06
CRYPTO_ERROR_SHARED_KEY, // 0x07
CRYPTO_ERROR_TOO_MANY_ITERATIONS, // 0x08
CRYPTO_ERROR_END_OF_LIST
} CRYPTO_STATUS;
#define CRYPTO_STATUS_TYPE_SIZE (CRYPTO_ERROR_END_OF_LIST)
// Basic key-exchange constants
#define OQS_RLWE_MSRLN16_PKA_BYTES 1824 // Alice's public key size
#define OQS_RLWE_MSRLN16_PKB_BYTES 2048 // Bob's public key size
#define OQS_RLWE_MSRLN16_SHAREDKEY_BYTES 32 // Shared key size
/******************** Function prototypes *******************/
// Clear digits from memory. "nwords" indicates the number of digits to be zeroed.
extern void oqs_rlwe_msrln16_clear_words(void *mem, digit_t nwords);
/*********************** Key exchange API ***********************/
// Alice's key generation
// It produces a private key SecretKeyA and computes the public key PublicKeyA.
// Outputs: the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
// the public key PublicKeyA that occupies 1824 bytes
CRYPTO_STATUS oqs_rlwe_msrln16_KeyGeneration_A(int32_t *SecretKeyA, unsigned char *PublicKeyA, OQS_RAND *rand);
// Bob's key generation and shared secret computation
// It produces a private key and computes the public key PublicKeyB. In combination with Alice's public key PublicKeyA, it computes
// the shared secret SharedSecretB.
// Input: Alice's public key PublicKeyA that consists of 1824 bytes
// Outputs: the public key PublicKeyB that occupies 2048 bytes.
// the 256-bit shared secret SharedSecretB.
CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_B(unsigned char *PublicKeyA, unsigned char *SharedSecretB, unsigned char *PublicKeyB, OQS_RAND *rand);
// Alice's shared secret computation
// It computes the shared secret SharedSecretA using Bob's public key PublicKeyB and Alice's private key SecretKeyA.
// Inputs: Bob's public key PublicKeyB that consists of 2048 bytes
// the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
// Output: the 256-bit shared secret SharedSecretA.
CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_A(unsigned char *PublicKeyB, int32_t *SecretKeyA, unsigned char *SharedSecretA);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,452 @@
/****************************************************************************************
* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: Ring-LWE key exchange
* The implementation is based on the instantiation of Peikert's key exchange [1]
* due to Alkim, Ducas, Poppelmann and Schwabe [2].
*
* [1] C. Peikert, "Lattice cryptography for the internet", in Post-Quantum Cryptography -
* 6th International Workshop (PQCrypto 2014), LNCS 8772, pp. 197-219. Springer, 2014.
* [2] E. Alkim, L. Ducas, T. Pöppelmann and P. Schwabe, "Post-quantum key exchange - a new
* hope", IACR Cryptology ePrint Archive, Report 2015/1092, 2015.
*
******************************************************************************************/
#include "LatticeCrypto_priv.h"
#include "oqs/rand.h"
#include "external/shake128.h"
extern const int32_t psi_rev_ntt1024_12289[1024];
extern const int32_t omegainv_rev_ntt1024_12289[1024];
extern const int32_t omegainv10N_rev_ntt1024_12289;
extern const int32_t Ninv11_ntt1024_12289;
// import external code
#include "external/shake128.c"
#ifdef RLWE_ASM_AVX2
#include "AMD64/consts.c"
#include "AMD64/ntt_x64.c"
#else
#include "generic/ntt.c"
#endif
__inline void oqs_rlwe_msrln16_clear_words(void *mem, digit_t nwords) {
// Clear digits from memory. "nwords" indicates the number of digits to be zeroed.
// This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing.
unsigned int i;
volatile digit_t *v = mem;
for (i = 0; i < nwords; i++) {
v[i] = 0;
}
}
void oqs_rlwe_msrln16_encode_A(const uint32_t *pk, const unsigned char *seed, unsigned char *m) {
// Alice's message encoding
unsigned int i = 0, j;
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_encode_asm(pk, m);
i = 1792;
#else
for (j = 0; j < 1024; j += 4) {
m[i] = (unsigned char)(pk[j] & 0xFF);
m[i + 1] = (unsigned char)((pk[j] >> 8) | ((pk[j + 1] & 0x03) << 6));
m[i + 2] = (unsigned char)((pk[j + 1] >> 2) & 0xFF);
m[i + 3] = (unsigned char)((pk[j + 1] >> 10) | ((pk[j + 2] & 0x0F) << 4));
m[i + 4] = (unsigned char)((pk[j + 2] >> 4) & 0xFF);
m[i + 5] = (unsigned char)((pk[j + 2] >> 12) | ((pk[j + 3] & 0x3F) << 2));
m[i + 6] = (unsigned char)(pk[j + 3] >> 6);
i += 7;
}
#endif
for (j = 0; j < 32; j++) {
m[i + j] = seed[j];
}
}
void oqs_rlwe_msrln16_decode_A(const unsigned char *m, uint32_t *pk, unsigned char *seed) {
// Alice's message decoding
unsigned int i = 0, j;
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_decode_asm(m, pk);
i = 1792;
#else
for (j = 0; j < 1024; j += 4) {
pk[j] = ((uint32_t)m[i] | (((uint32_t)m[i + 1] & 0x3F) << 8));
pk[j + 1] = (((uint32_t)m[i + 1] >> 6) | ((uint32_t)m[i + 2] << 2) | (((uint32_t)m[i + 3] & 0x0F) << 10));
pk[j + 2] = (((uint32_t)m[i + 3] >> 4) | ((uint32_t)m[i + 4] << 4) | (((uint32_t)m[i + 5] & 0x03) << 12));
pk[j + 3] = (((uint32_t)m[i + 5] >> 2) | ((uint32_t)m[i + 6] << 6));
i += 7;
}
#endif
for (j = 0; j < 32; j++) {
seed[j] = m[i + j];
}
}
void oqs_rlwe_msrln16_encode_B(const uint32_t *pk, const uint32_t *rvec, unsigned char *m) {
// Bob's message encoding
unsigned int i = 0, j;
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_encode_asm(pk, m);
#else
for (j = 0; j < 1024; j += 4) {
m[i] = (unsigned char)(pk[j] & 0xFF);
m[i + 1] = (unsigned char)((pk[j] >> 8) | ((pk[j + 1] & 0x03) << 6));
m[i + 2] = (unsigned char)((pk[j + 1] >> 2) & 0xFF);
m[i + 3] = (unsigned char)((pk[j + 1] >> 10) | ((pk[j + 2] & 0x0F) << 4));
m[i + 4] = (unsigned char)((pk[j + 2] >> 4) & 0xFF);
m[i + 5] = (unsigned char)((pk[j + 2] >> 12) | ((pk[j + 3] & 0x3F) << 2));
m[i + 6] = (unsigned char)(pk[j + 3] >> 6);
i += 7;
}
#endif
i = 0;
for (j = 0; j < 1024 / 4; j++) {
m[1792 + j] = (unsigned char)(rvec[i] | (rvec[i + 1] << 2) | (rvec[i + 2] << 4) | (rvec[i + 3] << 6));
i += 4;
}
}
void oqs_rlwe_msrln16_decode_B(unsigned char *m, uint32_t *pk, uint32_t *rvec) {
// Bob's message decoding
unsigned int i = 0, j;
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_decode_asm(m, pk);
i = 1792;
#else
for (j = 0; j < 1024; j += 4) {
pk[j] = ((uint32_t)m[i] | (((uint32_t)m[i + 1] & 0x3F) << 8));
pk[j + 1] = (((uint32_t)m[i + 1] >> 6) | ((uint32_t)m[i + 2] << 2) | (((uint32_t)m[i + 3] & 0x0F) << 10));
pk[j + 2] = (((uint32_t)m[i + 3] >> 4) | ((uint32_t)m[i + 4] << 4) | (((uint32_t)m[i + 5] & 0x03) << 12));
pk[j + 3] = (((uint32_t)m[i + 5] >> 2) | ((uint32_t)m[i + 6] << 6));
i += 7;
}
#endif
i = 0;
for (j = 0; j < 1024 / 4; j++) {
rvec[i] = (uint32_t)(m[1792 + j] & 0x03);
rvec[i + 1] = (uint32_t)((m[1792 + j] >> 2) & 0x03);
rvec[i + 2] = (uint32_t)((m[1792 + j] >> 4) & 0x03);
rvec[i + 3] = (uint32_t)(m[1792 + j] >> 6);
i += 4;
}
}
static __inline uint32_t Abs(int32_t value) {
// Compute absolute value
uint32_t mask;
mask = (uint32_t)(value >> 31);
return ((mask ^ value) - mask);
}
CRYPTO_STATUS oqs_rlwe_msrln16_HelpRec(const uint32_t *x, uint32_t *rvec, OQS_RAND *rand) {
// Reconciliation helper
unsigned int i, j, norm;
unsigned char bit, random_bits[32];
uint32_t v0[4], v1[4];
// OQS integration note: call to aux API replaced with direct call to OQS_RAND
rand->rand_n(rand, random_bits, 32);
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_helprec_asm(x, rvec, random_bits);
#else
for (i = 0; i < 256; i++) {
bit = 1 & (random_bits[i >> 3] >> (i & 0x07));
rvec[i] = (x[i] << 1) - bit;
rvec[i + 256] = (x[i + 256] << 1) - bit;
rvec[i + 512] = (x[i + 512] << 1) - bit;
rvec[i + 768] = (x[i + 768] << 1) - bit;
norm = 0;
v0[0] = 4;
v0[1] = 4;
v0[2] = 4;
v0[3] = 4;
v1[0] = 3;
v1[1] = 3;
v1[2] = 3;
v1[3] = 3;
for (j = 0; j < 4; j++) {
v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q4 ) >> 31;
v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_3Q4) >> 31;
v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_5Q4) >> 31;
v0[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_7Q4) >> 31;
v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q2 ) >> 31;
v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q ) >> 31;
v1[j] -= (rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_3Q2) >> 31;
norm += Abs(2 * rvec[i + 256 * j] - OQS_RLWE_MSRLN16_PARAMETER_Q * v0[j]);
}
norm = (uint32_t)((int32_t)(norm - OQS_RLWE_MSRLN16_PARAMETER_Q) >> 31); // If norm < q then norm = 0xff...ff, else norm = 0
v0[0] = (norm & (v0[0] ^ v1[0])) ^ v1[0];
v0[1] = (norm & (v0[1] ^ v1[1])) ^ v1[1];
v0[2] = (norm & (v0[2] ^ v1[2])) ^ v1[2];
v0[3] = (norm & (v0[3] ^ v1[3])) ^ v1[3];
rvec[i] = (v0[0] - v0[3]) & 0x03;
rvec[i + 256] = (v0[1] - v0[3]) & 0x03;
rvec[i + 512] = (v0[2] - v0[3]) & 0x03;
rvec[i + 768] = ((v0[3] << 1) + (1 & ~norm)) & 0x03;
}
#endif
return CRYPTO_SUCCESS;
}
static __inline uint32_t LDDecode(int32_t *t) {
// Low-density decoding
unsigned int i, norm = 0;
uint32_t mask1, mask2, value;
int32_t cneg = -8 * OQS_RLWE_MSRLN16_PARAMETER_Q;
for (i = 0; i < 4; i++) {
mask1 = t[i] >> 31; // If t[i] < 0 then mask2 = 0xff...ff, else mask2 = 0
mask2 = (4 * OQS_RLWE_MSRLN16_PARAMETER_Q - (int32_t)Abs(t[i])) >> 31; // If 4*PARAMETER_Q > Abs(t[i]) then mask2 = 0, else mask2 = 0xff...ff
value = ((mask1 & (8 * OQS_RLWE_MSRLN16_PARAMETER_Q ^ cneg)) ^ cneg);
norm += Abs(t[i] + (mask2 & value));
}
return ((8 * OQS_RLWE_MSRLN16_PARAMETER_Q - norm) >> 31) ^ 1; // If norm < PARAMETER_Q then return 1, else return 0
}
void oqs_rlwe_msrln16_Rec(const uint32_t *x, const uint32_t *rvec, unsigned char *key) {
// Reconciliation
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_rec_asm(x, rvec, key);
#else
unsigned int i;
uint32_t t[4];
for (i = 0; i < 32; i++) {
key[i] = 0;
}
for (i = 0; i < 256; i++) {
t[0] = 8 * x[i] - (2 * rvec[i] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
t[1] = 8 * x[i + 256] - (2 * rvec[i + 256] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
t[2] = 8 * x[i + 512] - (2 * rvec[i + 512] + rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
t[3] = 8 * x[i + 768] - (rvec[i + 768]) * OQS_RLWE_MSRLN16_PARAMETER_Q;
key[i >> 3] |= (unsigned char)LDDecode((int32_t *)t) << (i & 0x07);
}
#endif
}
CRYPTO_STATUS oqs_rlwe_msrln16_get_error(int32_t *e, OQS_RAND *rand) {
// Error sampling
unsigned char stream[3 * OQS_RLWE_MSRLN16_PARAMETER_N];
uint32_t *pstream = (uint32_t *)&stream;
uint32_t acc1, acc2, temp;
uint8_t *pacc1 = (uint8_t *)&acc1, *pacc2 = (uint8_t *)&acc2;
unsigned int i, j;
// OQS integration note: call to aux API replaced with direct call to OQS_RAND
rand->rand_n(rand, stream, 3 * OQS_RLWE_MSRLN16_PARAMETER_N);
#if defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_error_sampling_asm(stream, e);
#else
for (i = 0; i < OQS_RLWE_MSRLN16_PARAMETER_N / 4; i++) {
acc1 = 0;
acc2 = 0;
for (j = 0; j < 8; j++) {
acc1 += (pstream[i] >> j) & 0x01010101;
acc2 += (pstream[i + OQS_RLWE_MSRLN16_PARAMETER_N / 4] >> j) & 0x01010101;
}
for (j = 0; j < 4; j++) {
temp = pstream[i + 2 * OQS_RLWE_MSRLN16_PARAMETER_N / 4] >> j;
acc1 += temp & 0x01010101;
acc2 += (temp >> 4) & 0x01010101;
}
e[2 * i] = pacc1[0] - pacc1[1];
e[2 * i + 1] = pacc1[2] - pacc1[3];
e[2 * i + OQS_RLWE_MSRLN16_PARAMETER_N / 2] = pacc2[0] - pacc2[1];
e[2 * i + OQS_RLWE_MSRLN16_PARAMETER_N / 2 + 1] = pacc2[2] - pacc2[3];
}
#endif
return CRYPTO_SUCCESS;
}
CRYPTO_STATUS oqs_rlwe_msrln16_generate_a(uint32_t *a, const unsigned char *seed) {
// Generation of parameter a
// OQS integration note: call to aux API replaced with direct call to shake128
unsigned int pos = 0, ctr = 0;
uint16_t val;
unsigned int nblocks = 16;
uint8_t buf[SHAKE128_RATE * 16];
unsigned char state[SHAKE128_STATE_SIZE] = { 0 };
FIPS202_SHAKE128_Absorb(seed, OQS_RLWE_MSRLN16_SEED_BYTES, state, sizeof(state));
FIPS202_SHAKE128_Squeeze(state, (unsigned char *)buf, nblocks * SHAKE128_RATE);
while (ctr < OQS_RLWE_MSRLN16_PARAMETER_N) {
val = (buf[pos] | ((uint16_t)buf[pos + 1] << 8)) & 0x3fff;
if (val < OQS_RLWE_MSRLN16_PARAMETER_Q) {
a[ctr++] = val;
}
pos += 2;
if (pos > SHAKE128_RATE * nblocks - 2) {
nblocks = 1;
FIPS202_SHAKE128_Squeeze(state, (unsigned char *)buf, nblocks * SHAKE128_RATE);
pos = 0;
}
}
return CRYPTO_SUCCESS;
}
CRYPTO_STATUS oqs_rlwe_msrln16_KeyGeneration_A(int32_t *SecretKeyA, unsigned char *PublicKeyA, OQS_RAND *rand) {
// Alice's key generation
// It produces a private key SecretKeyA and computes the public key PublicKeyA.
// Outputs: the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
// the public key PublicKeyA that occupies 1824 bytes
// pLatticeCrypto must be set up in advance using LatticeCrypto_initialize().
uint32_t a[OQS_RLWE_MSRLN16_PARAMETER_N];
int32_t e[OQS_RLWE_MSRLN16_PARAMETER_N];
unsigned char seed[OQS_RLWE_MSRLN16_SEED_BYTES];
CRYPTO_STATUS Status = CRYPTO_ERROR_UNKNOWN;
rand->rand_n(rand, seed, OQS_RLWE_MSRLN16_SEED_BYTES);
Status = oqs_rlwe_msrln16_generate_a(a, seed);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
Status = oqs_rlwe_msrln16_get_error(SecretKeyA, rand);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
Status = oqs_rlwe_msrln16_get_error(e, rand);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
oqs_rlwe_msrln16_NTT_CT_std2rev_12289(SecretKeyA, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_smul(e, 3, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_pmuladd((int32_t *)a, SecretKeyA, e, (int32_t *)a, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_correction((int32_t *)a, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_encode_A(a, seed, PublicKeyA);
cleanup:
oqs_rlwe_msrln16_clear_words((void *)e, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
return Status;
}
CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_B(unsigned char *PublicKeyA, unsigned char *SharedSecretB, unsigned char *PublicKeyB, OQS_RAND *rand) {
// Bob's key generation and shared secret computation
// It produces a private key and computes the public key PublicKeyB. In combination with Alice's public key PublicKeyA, it computes
// the shared secret SharedSecretB.
// Input: Alice's public key PublicKeyA that consists of 1824 bytes
// Outputs: the public key PublicKeyB that occupies 2048 bytes.
// the 256-bit shared secret SharedSecretB.
// pLatticeCrypto must be set up in advance using LatticeCrypto_initialize().
uint32_t pk_A[OQS_RLWE_MSRLN16_PARAMETER_N], a[OQS_RLWE_MSRLN16_PARAMETER_N], v[OQS_RLWE_MSRLN16_PARAMETER_N], r[OQS_RLWE_MSRLN16_PARAMETER_N];
int32_t sk_B[OQS_RLWE_MSRLN16_PARAMETER_N], e[OQS_RLWE_MSRLN16_PARAMETER_N];
unsigned char seed[OQS_RLWE_MSRLN16_SEED_BYTES];
CRYPTO_STATUS Status = CRYPTO_ERROR_UNKNOWN;
oqs_rlwe_msrln16_decode_A(PublicKeyA, pk_A, seed);
Status = oqs_rlwe_msrln16_generate_a(a, seed);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
Status = oqs_rlwe_msrln16_get_error(sk_B, rand);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
Status = oqs_rlwe_msrln16_get_error(e, rand);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
oqs_rlwe_msrln16_NTT_CT_std2rev_12289(sk_B, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_smul(e, 3, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_pmuladd((int32_t *)a, sk_B, e, (int32_t *)a, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_correction((int32_t *)a, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
Status = oqs_rlwe_msrln16_get_error(e, rand);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
oqs_rlwe_msrln16_NTT_CT_std2rev_12289(e, psi_rev_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_smul(e, 81, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_pmuladd((int32_t *)pk_A, sk_B, e, (int32_t *)v, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_INTT_GS_rev2std_12289((int32_t *)v, omegainv_rev_ntt1024_12289, omegainv10N_rev_ntt1024_12289, Ninv11_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_two_reduce12289((int32_t *)v, OQS_RLWE_MSRLN16_PARAMETER_N);
#if !defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_correction((int32_t *)v, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
#endif
Status = oqs_rlwe_msrln16_HelpRec(v, r, rand);
if (Status != CRYPTO_SUCCESS) {
goto cleanup;
}
oqs_rlwe_msrln16_Rec(v, r, SharedSecretB);
oqs_rlwe_msrln16_encode_B(a, r, PublicKeyB);
cleanup:
oqs_rlwe_msrln16_clear_words((void *)sk_B, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
oqs_rlwe_msrln16_clear_words((void *)e, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
oqs_rlwe_msrln16_clear_words((void *)a, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
oqs_rlwe_msrln16_clear_words((void *)v, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
oqs_rlwe_msrln16_clear_words((void *)r, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
return Status;
}
CRYPTO_STATUS oqs_rlwe_msrln16_SecretAgreement_A(unsigned char *PublicKeyB, int32_t *SecretKeyA, unsigned char *SharedSecretA) {
// Alice's shared secret computation
// It computes the shared secret SharedSecretA using Bob's public key PublicKeyB and Alice's private key SecretKeyA.
// Inputs: Bob's public key PublicKeyB that consists of 2048 bytes
// the private key SecretKeyA that consists of a 32-bit signed 1024-element array (4096 bytes in total)
// Output: the 256-bit shared secret SharedSecretA.
uint32_t u[OQS_RLWE_MSRLN16_PARAMETER_N], r[OQS_RLWE_MSRLN16_PARAMETER_N];
CRYPTO_STATUS Status = CRYPTO_SUCCESS;
oqs_rlwe_msrln16_decode_B(PublicKeyB, u, r);
oqs_rlwe_msrln16_pmul(SecretKeyA, (int32_t *)u, (int32_t *)u, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_INTT_GS_rev2std_12289((int32_t *)u, omegainv_rev_ntt1024_12289, omegainv10N_rev_ntt1024_12289, Ninv11_ntt1024_12289, OQS_RLWE_MSRLN16_PARAMETER_N);
oqs_rlwe_msrln16_two_reduce12289((int32_t *)u, OQS_RLWE_MSRLN16_PARAMETER_N);
#if !defined(RLWE_ASM_AVX2)
oqs_rlwe_msrln16_correction((int32_t *)u, OQS_RLWE_MSRLN16_PARAMETER_Q, OQS_RLWE_MSRLN16_PARAMETER_N);
#endif
oqs_rlwe_msrln16_Rec(u, r, SharedSecretA);
// Cleanup
oqs_rlwe_msrln16_clear_words((void *)u, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
oqs_rlwe_msrln16_clear_words((void *)r, OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(4 * OQS_RLWE_MSRLN16_PARAMETER_N));
return Status;
}

View File

@ -0,0 +1,122 @@
/****************************************************************************************
* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: internal header file
*
*****************************************************************************************/
#ifndef __LatticeCrypto_priv_H__
#define __LatticeCrypto_priv_H__
// For C++
#ifdef __cplusplus
extern "C" {
#endif
#include <oqs/rand.h>
#include "LatticeCrypto.h"
// Basic constants
#define OQS_RLWE_MSRLN16_PARAMETER_N 1024
#define OQS_RLWE_MSRLN16_PARAMETER_Q 12289
#define OQS_RLWE_MSRLN16_SEED_BYTES 256/8
#define OQS_RLWE_MSRLN16_PARAMETER_Q4 3073
#define OQS_RLWE_MSRLN16_PARAMETER_3Q4 9217
#define OQS_RLWE_MSRLN16_PARAMETER_5Q4 15362
#define OQS_RLWE_MSRLN16_PARAMETER_7Q4 21506
#define OQS_RLWE_MSRLN16_PARAMETER_Q2 6145
#define OQS_RLWE_MSRLN16_PARAMETER_3Q2 18434
// Macro definitions
#define OQS_RLWE_MSRLN16_NBITS_TO_NWORDS(nbits) (((nbits)+(sizeof(digit_t)*8)-1)/(sizeof(digit_t)*8)) // Conversion macro from number of bits to number of computer words
#define OQS_RLWE_MSRLN16_NBYTES_TO_NWORDS(nbytes) (((nbytes)+sizeof(digit_t)-1)/sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words
// Macro to avoid compiler warnings when detecting unreferenced parameters
#define OQS_RLWE_MSRLN16_UNREFERENCED_PARAMETER(PAR) (PAR)
/******************** Function prototypes *******************/
/******************* Polynomial functions *******************/
// Forward NTT
void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t *a, const int32_t *psi_rev, unsigned int N);
void oqs_rlwe_msrln16_NTT_CT_std2rev_12289_asm(int32_t *a, const int32_t *psi_rev, unsigned int N);
// Inverse NTT
void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N);
void oqs_rlwe_msrln16_INTT_GS_rev2std_12289_asm(int32_t *a, const int32_t *omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N);
// Reduction modulo q
int32_t oqs_rlwe_msrln16_reduce12289(int64_t a);
// Two merged reductions modulo q
int32_t oqs_rlwe_msrln16_reduce12289_2x(int64_t a);
// Two consecutive reductions modulo q
void oqs_rlwe_msrln16_two_reduce12289(int32_t *a, unsigned int N);
void oqs_rlwe_msrln16_two_reduce12289_asm(int32_t *a, unsigned int N);
// Correction modulo q
void oqs_rlwe_msrln16_correction(int32_t *a, int32_t p, unsigned int N);
// Component-wise multiplication
void oqs_rlwe_msrln16_pmul(int32_t *a, int32_t *b, int32_t *c, unsigned int N);
void oqs_rlwe_msrln16_pmul_asm(int32_t *a, int32_t *b, int32_t *c, unsigned int N);
// Component-wise multiplication and addition
void oqs_rlwe_msrln16_pmuladd(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N);
void oqs_rlwe_msrln16_pmuladd_asm(int32_t *a, int32_t *b, int32_t *c, int32_t *d, unsigned int N);
// Component-wise multiplication with scalar
void oqs_rlwe_msrln16_smul(int32_t *a, int32_t scalar, unsigned int N);
/******************* Key exchange functions *******************/
// Alice's message encoding
void oqs_rlwe_msrln16_encode_A(const uint32_t *pk, const unsigned char *seed, unsigned char *m);
// Alice's message decoding
void oqs_rlwe_msrln16_decode_A(const unsigned char *m, uint32_t *pk, unsigned char *seed);
// Bob's message encoding
void oqs_rlwe_msrln16_encode_B(const uint32_t *pk, const uint32_t *rvec, unsigned char *m);
// Bob's message decoding
void oqs_rlwe_msrln16_decode_B(unsigned char *m, uint32_t *pk, uint32_t *rvec);
// Partial message encoding/decoding (assembly optimized)
void oqs_rlwe_msrln16_encode_asm(const uint32_t *pk, unsigned char *m);
void oqs_rlwe_msrln16_decode_asm(const unsigned char *m, uint32_t *pk);
// Reconciliation helper
CRYPTO_STATUS oqs_rlwe_msrln16_HelpRec(const uint32_t *x, uint32_t *rvec, OQS_RAND *rand);
// Partial reconciliation helper (assembly optimized)
void oqs_rlwe_msrln16_helprec_asm(const uint32_t *x, uint32_t *rvec, unsigned char *random_bits);
// Reconciliation
void oqs_rlwe_msrln16_Rec(const uint32_t *x, const uint32_t *rvec, unsigned char *key);
void oqs_rlwe_msrln16_rec_asm(const uint32_t *x, const uint32_t *rvec, unsigned char *key);
// Error sampling
CRYPTO_STATUS oqs_rlwe_msrln16_get_error(int32_t *e, OQS_RAND *rand);
// Partial error sampling (assembly optimized)
void oqs_rlwe_msrln16_error_sampling_asm(unsigned char *stream, int32_t *e);
// Generation of parameter a
CRYPTO_STATUS oqs_rlwe_msrln16_generate_a(uint32_t *a, const unsigned char *seed);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,25 @@
LatticeCrypto
Copyright (c) Microsoft Corporation
All rights reserved.
MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
associated documentation files (the ""Software""), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
The library uses the public domain implementation of SHAKE128 by the Keccak team; see the header
of shake128.c for details.

View File

@ -0,0 +1,42 @@
LatticeCrypto v1.0 (C Edition)
==============================
LatticeCrypto is a post-quantum secure cryptography library based on the Ring-Learning with Errors (R-LWE)
problem. The version 1.0 of the library implements the instantiation of Peikert's key exchange [1] due to
Alkim, Ducas, Pöppelmann and Schwabe [2], and incorporates novel techniques to provide higher performance.
The library [3] was developed by Microsoft Research for experimentation purposes.
*** THE ORIGINAL README HAS BEEN TRIMMED LEAVING ONLY THE INFO RELEVANT FOR THE OQS INTEGRATION ***
1. CONTENTS:
--------
/ - Library C and header files
AMD64/ - Optimized implementation of the NTT for x64 platforms
generic/ - Implementation of the NTT in portable C
README.txt - This readme file
2. MAIN FEATURES:
-------------
- Support arithmetic functions for computations in power-of-2 cyclotomic rings that are the basis for
implementing Ring-LWE-based cryptographic algorithms.
- Support key exchange providing at least 128 bits of quantum and classical security.
- All functions evaluating secret data have regular, constant-time execution, which provides protection
against timing and cache attacks.
- Basic implementation of the underlying arithmetic functions using portable C to enable support on
a wide range of platforms including x64, x86 and ARM.
- Optional high-performance implementation of the underlying arithmetic functions for x64 platforms on
Linux using assembly and AVX2 vector instructions.
REFERENCES
----------
[1] C. Peikert, "Lattice cryptography for the internet", in Post-Quantum Cryptography - 6th International
Workshop (PQCrypto 2014), LNCS 8772, pp. 197-219. Springer, 2014.
[2] E. Alkim, L. Ducas, T. Pöppelmann and P. Schwabe, "Post-quantum key exchange - a new hope", IACR Cryp-
tology ePrint Archive, Report 2015/1092, 2015.
[3] https://www.microsoft.com/en-us/research/project/lattice-cryptography-library/

332
src/kex_rlwe_msrln16/external/shake128.c vendored Normal file
View File

@ -0,0 +1,332 @@
#if defined(WINDOWS)
#define UNUSED
#else
#define UNUSED __attribute__ ((unused))
#endif
/*
Original implementation modified to allow spliting the absorb and squeeze
phases of Keccak.
*/
/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".
For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
/*
================================================================
The purpose of this source file is to demonstrate a readable and compact
implementation of all the Keccak instances approved in the FIPS 202 standard,
including the hash functions and the extendable-output functions (XOFs).
We focused on clarity and on source-code compactness,
rather than on the performance.
The advantages of this implementation are:
+ The source code is compact, after removing the comments, that is. :-)
+ There are no tables with arbitrary constants.
+ For clarity, the comments link the operations to the specifications using
the same notation as much as possible.
+ There is no restriction in cryptographic features. In particular,
the SHAKE128 and SHAKE256 XOFs can produce any output length.
+ The code does not use much RAM, as all operations are done in place.
The drawbacks of this implementation are:
- There is no message queue. The whole message must be ready in a buffer.
- It is not optimized for peformance.
The implementation is even simpler on a little endian platform. Just define the
LITTLE_ENDIAN symbol in that case.
For a more complete set of implementations, please refer to
the Keccak Code Package at https://github.com/gvanas/KeccakCodePackage
For more information, please refer to:
* [Keccak Reference] http://keccak.noekeon.org/Keccak-reference-3.0.pdf
* [Keccak Specifications Summary] http://keccak.noekeon.org/specs_summary.html
This file uses UTF-8 encoding, as some comments use Greek letters.
================================================================
*/
/**
* Function to compute the Keccak[r, c] sponge function over a given input.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @param input Pointer to the input message.
* @param inputByteLen The number of input bytes provided in the input message.
* @param delimitedSuffix Bits that will be automatically appended to the end
* of the input message, as in domain separation.
* This is a byte containing from 0 to 7 bits
* These <i>n</i> bits must be in the least significant bit positions
* and must be delimited with a bit 1 at position <i>n</i>
* (counting from 0=LSB to 7=MSB) and followed by bits 0
* from position <i>n</i>+1 to position 7.
* Some examples:
* - If no bits are to be appended, then @a delimitedSuffix must be 0x01.
* - If the 2-bit sequence 0,1 is to be appended (as for SHA3-*), @a delimitedSuffix must be 0x06.
* - If the 4-bit sequence 1,1,1,1 is to be appended (as for SHAKE*), @a delimitedSuffix must be 0x1F.
* - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedSuffix must be 0x8B.
* @param output Pointer to the buffer where to store the output.
* @param outputByteLen The number of output bytes desired.
* @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
*/
UNUSED static void Keccak(unsigned int rate, unsigned int capacity, const unsigned char *input, unsigned long long int inputByteLen, unsigned char delimitedSuffix, unsigned char *output, unsigned long long int outputByteLen);
/*
* Performs the Keccak absorb phase. Same parameters as the Keccak function, but a SHAKE128_STATE_SIZE-byte state must also be provided.
* The Keccak_squeeze function can be called successively to generate output.
*/
static void Keccak_absorb(unsigned int rate, unsigned int capacity, const unsigned char *input, unsigned long long int inputByteLen, unsigned char delimitedSuffix, unsigned char* state, unsigned int stateLen);
/*
* Performs the Keccak squeeze phase. Same parameters as the Keccak function, but a SHAKE128_STATE_SIZE-byte state must also be provided.
* The Keccak_absorb function must be called first.
*/
static void Keccak_squeeze(unsigned int rate, unsigned int capacity, unsigned char* state, unsigned char *output, unsigned long long int outputByteLen);
static void FIPS202_SHAKE128_Absorb(const unsigned char *input, unsigned int inputByteLen, unsigned char* state, unsigned int stateLen)
{
Keccak_absorb(1344, 256, input, inputByteLen, 0x1F, state, stateLen);
}
static void FIPS202_SHAKE128_Squeeze(unsigned char* state, unsigned char *output, int outputByteLen)
{
Keccak_squeeze(1344, 256, state, output, outputByteLen);
}
UNUSED static void FIPS202_SHAKE128(const unsigned char *input, unsigned int inputByteLen, unsigned char *output, int outputByteLen, unsigned int stateLen)
{
unsigned char state[200] = { 0 };
FIPS202_SHAKE128_Absorb(input, inputByteLen, state, stateLen);
FIPS202_SHAKE128_Squeeze(state, output, outputByteLen);
}
/*
================================================================
Technicalities
================================================================
*/
typedef unsigned char UINT8;
typedef unsigned long long int UINT64;
typedef UINT64 tKeccakLane;
#ifndef LITTLE_ENDIAN
/** Function to load a 64-bit value using the little-endian (LE) convention.
* On a LE platform, this could be greatly simplified using a cast.
*/
static UINT64 load64(const UINT8 *x)
{
int i;
UINT64 u=0;
for(i=7; i>=0; --i) {
u <<= 8;
u |= x[i];
}
return u;
}
/** Function to store a 64-bit value using the little-endian (LE) convention.
* On a LE platform, this could be greatly simplified using a cast.
*/
static void store64(UINT8 *x, UINT64 u)
{
unsigned int i;
for(i=0; i<8; ++i) {
x[i] = u;
u >>= 8;
}
}
/** Function to XOR into a 64-bit value using the little-endian (LE) convention.
* On a LE platform, this could be greatly simplified using a cast.
*/
static void xor64(UINT8 *x, UINT64 u)
{
unsigned int i;
for(i=0; i<8; ++i) {
x[i] ^= u;
u >>= 8;
}
}
#endif
/*
================================================================
A readable and compact implementation of the Keccak-f[1600] permutation.
================================================================
*/
#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
#define i(x, y) ((x)+5*(y))
#ifdef LITTLE_ENDIAN
#define readLane(x, y) (((tKeccakLane*)state)[i(x, y)])
#define writeLane(x, y, lane) (((tKeccakLane*)state)[i(x, y)]) = (lane)
#define XORLane(x, y, lane) (((tKeccakLane*)state)[i(x, y)]) ^= (lane)
#else
#define readLane(x, y) load64((UINT8*)state+sizeof(tKeccakLane)*i(x, y))
#define writeLane(x, y, lane) store64((UINT8*)state+sizeof(tKeccakLane)*i(x, y), lane)
#define XORLane(x, y, lane) xor64((UINT8*)state+sizeof(tKeccakLane)*i(x, y), lane)
#endif
/**
* Function that computes the linear feedback shift register (LFSR) used to
* define the round constants (see [Keccak Reference, Section 1.2]).
*/
static int LFSR86540(UINT8 *LFSR)
{
int result = ((*LFSR) & 0x01) != 0;
if (((*LFSR) & 0x80) != 0)
// Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1
(*LFSR) = ((*LFSR) << 1) ^ 0x71;
else
(*LFSR) <<= 1;
return result;
}
/**
* Function that computes the Keccak-f[1600] permutation on the given state.
*/
static void KeccakF1600_StatePermute(void *state)
{
unsigned int round, x, y, j, t;
UINT8 LFSRstate = 0x01;
for(round=0; round<24; round++) {
{ // === θ step (see [Keccak Reference, Section 2.3.2]) ===
tKeccakLane C[5], D;
// Compute the parity of the columns
for(x=0; x<5; x++)
C[x] = readLane(x, 0) ^ readLane(x, 1) ^ readLane(x, 2) ^ readLane(x, 3) ^ readLane(x, 4);
for(x=0; x<5; x++) {
// Compute the θ effect for a given column
D = C[(x+4)%5] ^ ROL64(C[(x+1)%5], 1);
// Add the θ effect to the whole column
for (y=0; y<5; y++)
XORLane(x, y, D);
}
}
{ // === ρ and π steps (see [Keccak Reference, Sections 2.3.3 and 2.3.4]) ===
tKeccakLane current, temp;
// Start at coordinates (1 0)
x = 1; y = 0;
current = readLane(x, y);
// Iterate over ((0 1)(2 3))^t * (1 0) for 0 ≤ t ≤ 23
for(t=0; t<24; t++) {
// Compute the rotation constant r = (t+1)(t+2)/2
unsigned int r = ((t+1)*(t+2)/2)%64;
// Compute ((0 1)(2 3)) * (x y)
unsigned int Y = (2*x+3*y)%5; x = y; y = Y;
// Swap current and state(x,y), and rotate
temp = readLane(x, y);
writeLane(x, y, ROL64(current, r));
current = temp;
}
}
{ // === χ step (see [Keccak Reference, Section 2.3.1]) ===
tKeccakLane temp[5];
for(y=0; y<5; y++) {
// Take a copy of the plane
for(x=0; x<5; x++)
temp[x] = readLane(x, y);
// Compute χ on the plane
for(x=0; x<5; x++)
writeLane(x, y, temp[x] ^((~temp[(x+1)%5]) & temp[(x+2)%5]));
}
}
{ // === ι step (see [Keccak Reference, Section 2.3.5]) ===
for(j=0; j<7; j++) {
unsigned int bitPosition = (1<<j)-1; //2^j-1
if (LFSR86540(&LFSRstate))
XORLane(0, 0, (tKeccakLane)1<<bitPosition);
}
}
}
}
/*
================================================================
A readable and compact implementation of the Keccak sponge functions
that use the Keccak-f[1600] permutation.
================================================================
*/
#include <string.h>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
void Keccak_absorb(unsigned int rate, unsigned int capacity, const unsigned char *input, unsigned long long int inputByteLen, unsigned char delimitedSuffix, unsigned char* state, unsigned int stateLen)
{
unsigned int rateInBytes = rate/8;
unsigned int blockSize = 0;
unsigned int i;
if (((rate + capacity) != 1600) || ((rate % 8) != 0))
return;
// === Initialize the state ===
memset(state, 0, stateLen);
// === Absorb all the input blocks ===
while(inputByteLen > 0) {
blockSize = MIN(inputByteLen, rateInBytes);
for(i=0; i<blockSize; i++)
state[i] ^= input[i];
input += blockSize;
inputByteLen -= blockSize;
if (blockSize == rateInBytes) {
KeccakF1600_StatePermute(state);
blockSize = 0;
}
}
// === Do the padding and switch to the squeezing phase ===
// Absorb the last few bits and add the first bit of padding (which coincides with the delimiter in delimitedSuffix)
state[blockSize] ^= delimitedSuffix;
// If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding
if (((delimitedSuffix & 0x80) != 0) && (blockSize == (rateInBytes-1)))
KeccakF1600_StatePermute(state);
// Add the second bit of padding
state[rateInBytes-1] ^= 0x80;
}
void Keccak_squeeze(unsigned int rate, UNUSED unsigned int capacity, unsigned char* state, unsigned char *output, unsigned long long int outputByteLen)
{
unsigned int blockSize = 0;
unsigned int rateInBytes = rate / 8;
// Switch to the squeezing phase
KeccakF1600_StatePermute(state);
// === Squeeze out all the output blocks ===
while (outputByteLen > 0) {
blockSize = MIN(outputByteLen, rateInBytes);
memcpy(output, state, blockSize);
output += blockSize;
outputByteLen -= blockSize;
if (outputByteLen > 0)
KeccakF1600_StatePermute(state);
}
}

View File

@ -0,0 +1,24 @@
#ifndef SHAKE128_H
#define SHAKE128_H
#define SHAKE128_STATE_SIZE 200
#define SHAKE128_RATE 168
/*
* Computes SHAKE128 on the array "input" of length "inputByteLen", resulting in "outputByteLen" bytes stored in "output".
*/
static void FIPS202_SHAKE128(const unsigned char *input, unsigned int inputByteLen, unsigned char *output, int outputByteLen, unsigned int stateLen);
/*
* Performs the absorb phase of SHAKE128: ingests the "inputByteLen" bytes stored in "input"; storing the internal
* SHAKE128 state of length SHAKE128_STATE_SIZE in "state".
*/
static void FIPS202_SHAKE128_Absorb(const unsigned char *input, unsigned int inputByteLen, unsigned char* state, unsigned int stateLen);
/*
* Performs the squeeze phase of SHAKE128: generates "outputByteLen" bytes stored in "output" from the "state" of length
* SHAKE128_STATE_SIZE. Must be preceeded by a call to FIPS202_SHAKE128_Absorb.
*/
static void FIPS202_SHAKE128_Squeeze(unsigned char* state, unsigned char *output, int outputByteLen);
#endif

View File

@ -0,0 +1,182 @@
/****************************************************************************************
* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: NTT functions and other polynomial operations
*
*****************************************************************************************/
#include "../LatticeCrypto_priv.h"
const uint32_t mask12 = ((uint64_t)1 << 12) - 1;
int32_t oqs_rlwe_msrln16_reduce12289(int64_t a)
{ // Reduction modulo q
int32_t c0, c1;
c0 = (int32_t)(a & mask12);
c1 = (int32_t)(a >> 12);
return (3*c0 - c1);
}
int32_t oqs_rlwe_msrln16_reduce12289_2x(int64_t a)
{ // Two merged reductions modulo q
int32_t c0, c1, c2;
c0 = (int32_t)(a & mask12);
c1 = (int32_t)((a >> 12) & mask12);
c2 = (int32_t)(a >> 24);
return (9*c0 - 3*c1 + c2);
}
void oqs_rlwe_msrln16_NTT_CT_std2rev_12289(int32_t* a, const int32_t* psi_rev, unsigned int N)
{ // Forward NTT
unsigned int m, i, j, j1, j2, k = N;
int32_t S, U, V;
for (m = 1; m < 128; m = 2*m) {
k = k >> 1;
for (i = 0; i < m; i++) {
j1 = 2*i*k;
j2 = j1+k-1;
S = psi_rev[m+i];
for (j = j1; j <= j2; j++) {
U = a[j];
V = oqs_rlwe_msrln16_reduce12289((int64_t)a[j+k]*S);
a[j] = U+V;
a[j+k] = U-V;
}
}
}
k = 4;
for (i = 0; i < 128; i++) {
j1 = 8*i;
j2 = j1+3;
S = psi_rev[i+128];
for (j = j1; j <= j2; j++) {
U = oqs_rlwe_msrln16_reduce12289((int64_t)a[j]);
V = oqs_rlwe_msrln16_reduce12289_2x((int64_t)a[j+4]*S);
a[j] = U+V;
a[j+4] = U-V;
}
}
for (m = 256; m < N; m = 2*m) {
k = k >> 1;
for (i = 0; i < m; i++) {
j1 = 2*i*k;
j2 = j1+k-1;
S = psi_rev[m+i];
for (j = j1; j <= j2; j++) {
U = a[j];
V = oqs_rlwe_msrln16_reduce12289((int64_t)a[j+k]*S);
a[j] = U+V;
a[j+k] = U-V;
}
}
}
return;
}
void oqs_rlwe_msrln16_INTT_GS_rev2std_12289(int32_t* a, const int32_t* omegainv_rev, const int32_t omegainv1N_rev, const int32_t Ninv, unsigned int N)
{ // Inverse NTT
unsigned int m, h, i, j, j1, j2, k = 1;
int32_t S, U, V;
int64_t temp;
for (m = N; m > 2; m >>= 1) {
j1 = 0;
h = m >> 1;
for (i = 0; i < h; i++) {
j2 = j1+k-1;
S = omegainv_rev[h+i];
for (j = j1; j <= j2; j++) {
U = a[j];
V = a[j+k];
a[j] = U+V;
temp = (int64_t)(U-V)*S;
if (m == 32) {
a[j] = oqs_rlwe_msrln16_reduce12289((int64_t)a[j]);
a[j+k] = oqs_rlwe_msrln16_reduce12289_2x(temp);
} else {
a[j+k] = oqs_rlwe_msrln16_reduce12289(temp);
}
}
j1 = j1+2*k;
}
k = 2*k;
}
for (j = 0; j < k; j++) {
U = a[j];
V = a[j+k];
a[j] = oqs_rlwe_msrln16_reduce12289((int64_t)(U+V)*Ninv);
a[j+k] = oqs_rlwe_msrln16_reduce12289((int64_t)(U-V)*omegainv1N_rev);
}
return;
}
void oqs_rlwe_msrln16_two_reduce12289(int32_t* a, unsigned int N)
{ // Two consecutive reductions modulo q
unsigned int i;
for (i = 0; i < N; i++) {
a[i] = oqs_rlwe_msrln16_reduce12289((int64_t)a[i]);
a[i] = oqs_rlwe_msrln16_reduce12289((int64_t)a[i]);
}
}
void oqs_rlwe_msrln16_pmul(int32_t* a, int32_t* b, int32_t* c, unsigned int N)
{ // Component-wise multiplication
unsigned int i;
for (i = 0; i < N; i++) {
c[i] = oqs_rlwe_msrln16_reduce12289((int64_t)a[i]*b[i]);
c[i] = oqs_rlwe_msrln16_reduce12289((int64_t)c[i]);
}
}
void oqs_rlwe_msrln16_pmuladd(int32_t* a, int32_t* b, int32_t* c, int32_t* d, unsigned int N)
{ // Component-wise multiplication and addition
unsigned int i;
for (i = 0; i < N; i++) {
d[i] = oqs_rlwe_msrln16_reduce12289((int64_t)a[i]*b[i] + c[i]);
d[i] = oqs_rlwe_msrln16_reduce12289((int64_t)d[i]);
}
}
void oqs_rlwe_msrln16_smul(int32_t* a, int32_t scalar, unsigned int N)
{ // Component-wise multiplication with scalar
unsigned int i;
for (i = 0; i < N; i++) {
a[i] = a[i]*scalar;
}
}
void oqs_rlwe_msrln16_correction(int32_t* a, int32_t p, unsigned int N)
{ // Correction modulo q
unsigned int i;
int32_t mask;
for (i = 0; i < N; i++) {
mask = a[i] >> (4*sizeof(int32_t) - 1);
a[i] += (p & mask) - p;
mask = a[i] >> (4*sizeof(int32_t) - 1);
a[i] += (p & mask);
}
}

View File

@ -0,0 +1,165 @@
#if defined(WINDOWS)
#define UNUSED
#else
#define UNUSED __attribute__ ((unused))
#endif
#include <stdlib.h>
#include <string.h>
#if !defined(WINDOWS)
#include <unistd.h>
#include <strings.h>
#endif
#include <oqs/kex.h>
#include <oqs/rand.h>
#include "kex_rlwe_msrln16.h"
#include "LatticeCrypto.h"
#include "LatticeCrypto_priv.h"
OQS_KEX *OQS_KEX_rlwe_msrln16_new(OQS_RAND *rand) {
OQS_KEX *k = malloc(sizeof(OQS_KEX));
if (k == NULL) {
return NULL;
}
k->ctx = NULL;
k->method_name = strdup("RLWE MSR LN16");
k->estimated_classical_security = 128;
k->estimated_quantum_security = 128;
k->seed = NULL;
k->seed_len = 0;
k->named_parameters = NULL;
k->rand = rand;
k->params = NULL;
k->alice_0 = &OQS_KEX_rlwe_msrln16_alice_0;
k->bob = &OQS_KEX_rlwe_msrln16_bob;
k->alice_1 = &OQS_KEX_rlwe_msrln16_alice_1;
k->alice_priv_free = &OQS_KEX_rlwe_msrln16_alice_priv_free;
k->free = &OQS_KEX_rlwe_msrln16_free;
return k;
}
int OQS_KEX_rlwe_msrln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len) {
int ret;
*alice_priv = NULL;
/* alice_msg is alice's public key */
*alice_msg = NULL;
*alice_msg = malloc(OQS_RLWE_MSRLN16_PKA_BYTES);
if (*alice_msg == NULL) {
goto err;
}
*alice_priv = malloc(1024 * sizeof(uint32_t));
if (*alice_priv == NULL) {
goto err;
}
if (oqs_rlwe_msrln16_KeyGeneration_A((int32_t *) *alice_priv, (unsigned char *) *alice_msg, k->rand) != CRYPTO_SUCCESS) {
goto err;
}
*alice_msg_len = OQS_RLWE_MSRLN16_PKA_BYTES;
ret = 1;
goto cleanup;
err:
ret = 0;
free(*alice_msg);
free(*alice_priv);
cleanup:
return ret;
}
int OQS_KEX_rlwe_msrln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len) {
int ret;
*bob_msg = NULL;
*key = NULL;
if (alice_msg_len != OQS_RLWE_MSRLN16_PKA_BYTES) {
goto err;
}
*bob_msg = malloc(OQS_RLWE_MSRLN16_PKB_BYTES);
if (*bob_msg == NULL) {
goto err;
}
*key = malloc(OQS_RLWE_MSRLN16_SHAREDKEY_BYTES);
if (*key == NULL) {
goto err;
}
if (oqs_rlwe_msrln16_SecretAgreement_B((unsigned char *) alice_msg, (unsigned char *) *key, (unsigned char *) *bob_msg, k->rand) != CRYPTO_SUCCESS) {
goto err;
}
*key_len = OQS_RLWE_MSRLN16_SHAREDKEY_BYTES;
*bob_msg_len = OQS_RLWE_MSRLN16_PKB_BYTES;
ret = 1;
goto cleanup;
err:
ret = 0;
free(*bob_msg);
free(*key);
cleanup:
return ret;
}
int OQS_KEX_rlwe_msrln16_alice_1(UNUSED OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len) {
int ret;
*key = NULL;
if (bob_msg_len != OQS_RLWE_MSRLN16_PKB_BYTES) {
goto err;
}
*key = malloc(OQS_RLWE_MSRLN16_SHAREDKEY_BYTES);
if (*key == NULL) {
goto err;
}
if (oqs_rlwe_msrln16_SecretAgreement_A((unsigned char *) bob_msg, (int32_t *) alice_priv, (unsigned char *) *key) != CRYPTO_SUCCESS) {
goto err;
}
*key_len = OQS_RLWE_MSRLN16_SHAREDKEY_BYTES;
ret = 1;
goto cleanup;
err:
ret = 0;
free(*key);
cleanup:
return ret;
}
void OQS_KEX_rlwe_msrln16_alice_priv_free(UNUSED OQS_KEX *k, void *alice_priv) {
if (alice_priv) {
free(alice_priv);
}
}
void OQS_KEX_rlwe_msrln16_free(OQS_KEX *k) {
if (!k) {
return;
}
free(k->method_name);
k->method_name = NULL;
free(k);
}

View File

@ -0,0 +1,24 @@
/**
* \file kex_rlwe_msrln16.h
* \brief Header for ring-LWE key exchange protocol from the Microsoft LatticeCrypto library
*/
#ifndef __OQS_KEX_RLWE_MSRLN16_H
#define __OQS_KEX_RLWE_MSRLN16_H
#include <stddef.h>
#include <stdint.h>
#include <oqs/kex.h>
#include <oqs/rand.h>
OQS_KEX *OQS_KEX_rlwe_msrln16_new(OQS_RAND *rand);
int OQS_KEX_rlwe_msrln16_alice_0(OQS_KEX *k, void **alice_priv, uint8_t **alice_msg, size_t *alice_msg_len);
int OQS_KEX_rlwe_msrln16_bob(OQS_KEX *k, const uint8_t *alice_msg, const size_t alice_msg_len, uint8_t **bob_msg, size_t *bob_msg_len, uint8_t **key, size_t *key_len);
int OQS_KEX_rlwe_msrln16_alice_1(OQS_KEX *k, const void *alice_priv, const uint8_t *bob_msg, const size_t bob_msg_len, uint8_t **key, size_t *key_len);
void OQS_KEX_rlwe_msrln16_alice_priv_free(OQS_KEX *k, void *alice_priv);
void OQS_KEX_rlwe_msrln16_free(OQS_KEX *k);
#endif

View File

@ -0,0 +1,145 @@
/****************************************************************************************
* LatticeCrypto: an efficient post-quantum Ring-Learning With Errors cryptography library
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*
* Abstract: fixed constants for the Number Theoretic Transform (NTT)
*
*****************************************************************************************/
#include "LatticeCrypto_priv.h"
// N^-1 * prime_scale^-8
const int32_t Ninv8_ntt1024_12289 = 8350;
// N^-1 * prime_scale^-7 * omegainv_rev_ntt1024_12289[1]
const int32_t omegainv7N_rev_ntt1024_12289 = 795;
// N^-1 * prime_scale^-11
const int32_t Ninv11_ntt1024_12289 = 2585;
// N^-1 * prime_scale^-10 * omegainv_rev_ntt1024_12289[1]
const int32_t omegainv10N_rev_ntt1024_12289 = 10953;
// Index-reversed matrices containing powers of psi (psi_rev_nttxxx_yyy) and inverse powers of omega (omegainv_rev_nttxxx_yyy),
// where xxx is parameter N and yyy is the prime q.
const int32_t psi_rev_ntt1024_12289[1024] = {
8193, 493, 6845, 9908, 1378, 10377, 7952, 435, 10146, 1065, 404, 7644, 1207, 3248, 11121, 5277, 2437, 3646, 2987, 6022, 9867, 6250, 10102, 9723, 1002, 7278, 4284, 7201,
875, 3780, 1607, 4976, 8146, 4714, 242, 1537, 3704, 9611, 5019, 545, 5084, 10657, 4885, 11272, 3066, 12262, 3763, 10849, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859,
7188, 1067, 2401, 11847, 390, 11516, 8511, 3833, 2780, 7094, 4895, 1484, 2305, 5042, 8236, 2645, 7875, 9442, 2174, 7917, 1689, 3364, 4057, 3271, 10863, 4654, 1777, 10626,
3636, 7351, 9585, 6998, 160, 3149, 4437, 12286, 10123, 3915, 7370, 12176, 4048, 2249, 2884, 1153, 9103, 6882, 2126, 10659, 3510, 5332, 2865, 9919, 9320, 8311, 9603, 9042,
3016, 12046, 9289, 11618, 7098, 3136, 9890, 3400, 2178, 1544, 5559, 420, 8304, 4905, 476, 3531, 9326, 4896, 9923, 3051, 3091, 81, 1000, 4320, 1177, 8034, 9521, 10654, 11563,
7678, 10436, 12149, 3014, 9088, 5086, 1326, 11119, 2319, 11334, 790, 2747, 7443, 3135, 3712, 1062, 9995, 7484, 8736, 9283, 2744, 11726, 2975, 9664, 949, 7468, 9650, 7266,
5828, 6561, 7698, 3328, 6512, 1351, 7311, 8155, 5736, 722, 10984, 4043, 7143, 10810, 1, 8668, 2545, 3504, 8747, 11077, 1646, 9094, 5860, 1759, 8582, 3694, 7110, 8907, 11934,
8058, 9741, 9558, 3932, 5911, 4890, 3637, 8830, 5542, 12144, 5755, 7657, 7901, 11029, 11955, 9863, 10861, 1696, 3284, 2881, 7197, 2089, 9000, 2013, 729, 9048, 11809, 2842,
11267, 9, 6498, 544, 2468, 339, 1381, 2525, 8112, 3584, 6958, 4989, 10616, 8011, 5374, 9452, 12159, 4354, 9893, 7837, 3296, 8340, 7222, 2197, 118, 2476, 5767, 827, 8541,
11336, 3434, 3529, 2908, 12071, 2361, 1843, 3030, 8174, 6147, 9842, 8326, 576, 10335, 10238, 10484, 9407, 11836, 5908, 418, 3772, 7515, 5429, 7552, 10996, 12133, 2767, 3969,
8298, 6413, 10008, 2031, 5333, 10800, 9789, 10706, 5942, 1263, 49, 5915, 10806, 11939, 10777, 1815, 5383, 3202, 4493, 6920, 10232, 1975, 8532, 2925, 347, 4754, 1858, 11863,
8974, 9551, 5868, 9634, 5735, 11566, 12115, 10596, 3009, 6190, 11994, 6523, 652, 3762, 9370, 4016, 4077, 8561, 4049, 5990, 11130, 11143, 948, 325, 1404, 6992, 6119, 8333,
10929, 1200, 5184, 2555, 6122, 1594, 10327, 7183, 5961, 2692, 12121, 4298, 3329, 5919, 4433, 8455, 7032, 1747, 3123, 3054, 6803, 5782, 10723, 9341, 2503, 683, 2459, 3656,
64, 4240, 3570, 835, 6065, 4046, 11580, 10970, 3150, 10331, 4322, 2078, 1112, 4079, 11231, 441, 922, 1050, 4536, 6844, 8429, 2683, 11099, 3818, 6171, 8500, 12142, 6833, 4449,
4749, 6752, 7500, 7822, 8214, 6974, 7965, 7373, 2169, 522, 5079, 3262, 10316, 6715, 1278, 9945, 3514, 11248, 11271, 5925, 468, 3988, 382, 11973, 5339, 6843, 6196, 8579, 2033,
8291, 1922, 3879, 11035, 973, 6854, 10930, 5206, 6760, 3199, 56, 3565, 654, 1702, 10302, 5862, 6153, 5415, 8646, 11889, 10561, 7341, 6152, 7232, 4698, 8844, 4780, 10240, 4912,
1321, 12097, 7048, 2920, 3127, 4169, 11502, 3482, 11279, 5468, 5874, 11612, 6055, 8953, 52, 3174, 10966, 9523, 151, 2127, 3957, 2839, 9784, 6383, 1579, 431, 7507, 5886, 3029,
6695, 4213, 504, 11684, 2302, 8689, 9026, 4624, 6212, 11868, 4080, 6221, 8687, 1003, 8757, 241, 58, 5009, 10333, 885, 6281, 3438, 9445, 11314, 8077, 6608, 3477, 142, 1105,
8841, 343, 4538, 1908, 1208, 4727, 7078, 10423, 10125, 6873, 11573, 10179, 416, 814, 1705, 2450, 8700, 717, 9307, 1373, 8186, 2429, 10568, 10753, 7228, 11071, 438, 8774, 5993,
3278, 4209, 6877, 3449, 1136, 3708, 3238, 2926, 1826, 4489, 3171, 8024, 8611, 1928, 464, 3205, 8930, 7080, 1092, 10900, 10221, 11943, 4404, 9126, 4032, 7449, 6127, 8067, 10763,
125, 540, 8921, 8062, 612, 8051, 12229, 9572, 9089, 10754, 10029, 68, 6453, 7723, 4781, 4924, 1014, 448, 3942, 5232, 1327, 8682, 3744, 7326, 3056, 9761, 5845, 5588, 412, 7187,
3975, 4883, 3087, 6454, 2257, 7784, 5676, 1417, 8400, 11710, 5596, 5987, 9175, 2769, 5966, 212, 6555, 11113, 5508, 11014, 1125, 4860, 10844, 1131, 4267, 6636, 2275, 9828, 5063,
4176, 3765, 1518, 8794, 4564, 10224, 5826, 3534, 3961, 4145, 10533, 506, 11034, 6505, 10897, 2674, 10077, 3338, 9013, 3511, 6811, 11111, 2776, 1165, 2575, 8881, 10347, 377,
4578, 11914, 10669, 10104, 392, 10453, 425, 9489, 193, 2231, 6197, 1038, 11366, 6204, 8122, 2894, 3654, 10975, 10545, 6599, 2455, 11951, 3947, 20, 5002, 5163, 4608, 8946, 8170,
10138, 1522, 8665, 10397, 3344, 5598, 10964, 6565, 11260, 1945, 11041, 9847, 7174, 4939, 2148, 6330, 3959, 5797, 4913, 3528, 8054, 3825, 8914, 9998, 4335, 8896, 9342, 3982,
6680, 11653, 7790, 6617, 1737, 622, 10485, 10886, 6195, 7100, 1687, 406, 12143, 5268, 9389, 12050, 994, 7735, 5464, 7383, 4670, 512, 364, 9929, 3028, 5216, 5518, 1226, 7550,
8038, 7043, 7814, 11053, 3017, 3121, 7584, 2600, 11232, 6780, 12085, 5219, 1409, 9600, 4605, 8151, 12109, 463, 8882, 8308, 10821, 9247, 10945, 9806, 2054, 6203, 6643, 3120,
6105, 8348, 8536, 6919, 8753, 11007, 8717, 9457, 2021, 9060, 4730, 3929, 10583, 3723, 845, 1936, 7, 5054, 3154, 3285, 4360, 3805, 11522, 2213, 4153, 12239, 12073, 5526, 769,
4099, 3944, 5604, 5530, 11024, 9282, 2171, 3480, 7434, 8520, 3232, 11996, 9656, 1406, 2945, 5349, 7207, 4590, 11607, 11309, 5202, 844, 7082, 4050, 8016, 9068, 9694, 8452, 7000,
5662, 567, 2941, 8619, 3808, 4987, 2373, 5135, 63, 7605, 3360, 11839, 10345, 578, 6921, 7628, 510, 5386, 2622, 7806, 5703, 10783, 9224, 11379, 5900, 4719, 11538, 3502, 5789,
10631, 5618, 826, 5043, 3090, 10891, 9951, 7596, 2293, 11872, 6151, 3469, 4443, 8871, 1555, 1802, 5103, 1891, 1223, 2334, 7878, 1590, 881, 365, 1927, 11274, 4510, 9652, 2946,
6828, 1280, 614, 10918, 12265, 7250, 6742, 9804, 11385, 2276, 11307, 2593, 879, 7899, 8071, 3454, 8531, 3795, 9021, 5776, 1849, 7766, 7988, 457, 8, 530, 9663, 7785, 11511, 3578,
7592, 10588, 3466, 8972, 9757, 3332, 139, 2046, 2940, 10808, 9332, 874, 2301, 5650, 12119, 150, 648, 8000, 9982, 9416, 2827, 2434, 11498, 6481, 12268, 9754, 11169, 11823, 11259,
3821, 10608, 2929, 6263, 4649, 6320, 9687, 10388, 502, 5118, 8496, 6226, 10716, 8443, 7624, 6883, 9269, 6616, 8620, 5287, 944, 7519, 6125, 1882, 11249, 10254, 5410, 1251, 1790,
5275, 8449, 10447, 4113, 72, 2828, 4352, 7455, 2712, 11048, 7911, 3451, 4094, 6508, 3045, 11194, 2643, 1783, 7211, 4974, 7724, 9811, 9449, 3019, 4194, 2730, 6878, 10421, 2253,
4518, 9195, 7469, 11129, 9173, 12100, 1763, 2209, 9617, 5170, 865, 1279, 1694, 10759, 8420, 4423, 10555, 3815, 5832, 10939
};
const int32_t omegainv_rev_ntt1024_12289[1024] = {
8193, 11796, 2381, 5444, 11854, 4337, 1912, 10911, 7012, 1168, 9041, 11082, 4645, 11885, 11224, 2143, 7313, 10682, 8509, 11414, 5088, 8005, 5011, 11287, 2566, 2187, 6039, 2422,
6267, 9302, 8643, 9852, 8456, 3778, 773, 11899, 442, 9888, 11222, 5101, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 1440, 8526, 27, 9223, 1017, 7404, 1632, 7205, 11744, 7270,
2678, 8585, 10752, 12047, 7575, 4143, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 8889, 2399, 9153, 5191, 671, 3000, 243, 9273, 3247, 2686, 3978, 2969, 2370, 9424, 6957,
8779, 1630, 10163, 5407, 3186, 11136, 9405, 10040, 8241, 113, 4919, 8374, 2166, 3, 7852, 9140, 12129, 5291, 2704, 4938, 8653, 1663, 10512, 7635, 1426, 9018, 8232, 8925, 10600, 4372,
10115, 2847, 4414, 9644, 4053, 7247, 9984, 10805, 7394, 5195, 9509, 953, 3748, 11462, 6522, 9813, 12171, 10092, 5067, 3949, 8993, 4452, 2396, 7935, 130, 2837, 6915, 4278, 1673, 7300,
5331, 8705, 4177, 9764, 10908, 11950, 9821, 11745, 5791, 12280, 1022, 9447, 480, 3241, 11560, 10276, 3289, 10200, 5092, 9408, 9005, 10593, 1428, 2426, 334, 1260, 4388, 4632, 6534,
145, 6747, 3459, 8652, 7399, 6378, 8357, 2731, 2548, 4231, 355, 3382, 5179, 8595, 3707, 10530, 6429, 3195, 10643, 1212, 3542, 8785, 9744, 3621, 12288, 1479, 5146, 8246, 1305, 11567,
6553, 4134, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023, 2639, 4821, 11340, 2625, 9314, 563, 9545, 3006, 3553, 4805, 2294, 11227, 8577, 9154, 4846, 9542, 11499, 955, 9970, 1170,
10963, 7203, 3201, 9275, 140, 1853, 4611, 726, 1635, 2768, 4255, 11112, 7969, 11289, 12208, 9198, 9238, 2366, 7393, 2963, 11184, 12147, 8812, 5681, 4212, 975, 2844, 8851, 6008, 11404,
1956, 7280, 12231, 12048, 3532, 11286, 3602, 6068, 8209, 421, 6077, 7665, 3263, 3600, 9987, 605, 11785, 8076, 5594, 9260, 6403, 4782, 11858, 10710, 5906, 2505, 9450, 8332, 10162,
12138, 2766, 1323, 9115, 12237, 3336, 6234, 677, 6415, 6821, 1010, 8807, 787, 8120, 9162, 9369, 5241, 192, 10968, 7377, 2049, 7509, 3445, 7591, 5057, 6137, 4948, 1728, 400, 3643,
6874, 6136, 6427, 1987, 10587, 11635, 8724, 12233, 9090, 5529, 7083, 1359, 5435, 11316, 1254, 8410, 10367, 3998, 10256, 3710, 6093, 5446, 6950, 316, 11907, 8301, 11821, 6364, 1018,
1041, 8775, 2344, 11011, 5574, 1973, 9027, 7210, 11767, 10120, 4916, 4324, 5315, 4075, 4467, 4789, 5537, 7540, 7840, 5456, 147, 3789, 6118, 8471, 1190, 9606, 3860, 5445, 7753, 11239,
11367, 11848, 1058, 8210, 11177, 10211, 7967, 1958, 9139, 1319, 709, 8243, 6224, 11454, 8719, 8049, 12225, 8633, 9830, 11606, 9786, 2948, 1566, 6507, 5486, 9235, 9166, 10542, 5257,
3834, 7856, 6370, 8960, 7991, 168, 9597, 6328, 5106, 1962, 10695, 6167, 9734, 7105, 11089, 1360, 3956, 6170, 5297, 10885, 11964, 11341, 1146, 1159, 6299, 8240, 3728, 8212, 8273, 2919,
8527, 11637, 5766, 295, 6099, 9280, 1693, 174, 723, 6554, 2655, 6421, 2738, 3315, 426, 10431, 7535, 11942, 9364, 3757, 10314, 2057, 5369, 7796, 9087, 6906, 10474, 1512, 350, 1483,
6374, 12240, 11026, 6347, 1583, 2500, 1489, 6956, 10258, 2281, 5876, 3991, 8320, 9522, 156, 1293, 4737, 6860, 4774, 8517, 11871, 6381, 453, 2882, 1805, 2051, 1954, 11713, 3963, 2447,
6142, 4115, 9259, 10446, 9928, 218, 9381, 8760, 8855, 1350, 6457, 8474, 1734, 7866, 3869, 1530, 10595, 11010, 11424, 7119, 2672, 10080, 10526, 189, 3116, 1160, 4820, 3094, 7771, 10036,
1868, 5411, 9559, 8095, 9270, 2840, 2478, 4565, 7315, 5078, 10506, 9646, 1095, 9244, 5781, 8195, 8838, 4378, 1241, 9577, 4834, 7937, 9461, 12217, 8176, 1842, 3840, 7014, 10499, 11038,
6879, 2035, 1040, 10407, 6164, 4770, 11345, 7002, 3669, 5673, 3020, 5406, 4665, 3846, 1573, 6063, 3793, 7171, 11787, 1901, 2602, 5969, 7640, 6026, 9360, 1681, 8468, 1030, 466, 1120,
2535, 21, 5808, 791, 9855, 9462, 2873, 2307, 4289, 11641, 12139, 170, 6639, 9988, 11415, 2957, 1481, 9349, 10243, 12150, 8957, 2532, 3317, 8823, 1701, 4697, 8711, 778, 4504, 2626,
11759, 12281, 11832, 4301, 4523, 10440, 6513, 3268, 8494, 3758, 8835, 4218, 4390, 11410, 9696, 982, 10013, 904, 2485, 5547, 5039, 24, 1371, 11675, 11009, 5461, 9343, 2637, 7779, 1015,
10362, 11924, 11408, 10699, 4411, 9955, 11066, 10398, 7186, 10487, 10734, 3418, 7846, 8820, 6138, 417, 9996, 4693, 2338, 1398, 9199, 7246, 11463, 6671, 1658, 6500, 8787, 751, 7570,
6389, 910, 3065, 1506, 6586, 4483, 9667, 6903, 11779, 4661, 5368, 11711, 1944, 450, 8929, 4684, 12226, 7154, 9916, 7302, 8481, 3670, 9348, 11722, 6627, 5289, 3837, 2595, 3221, 4273,
8239, 5207, 11445, 7087, 980, 682, 7699, 5082, 6940, 9344, 10883, 2633, 293, 9057, 3769, 4855, 8809, 10118, 3007, 1265, 6759, 6685, 8345, 8190, 11520, 6763, 216, 50, 8136, 10076, 767,
8484, 7929, 9004, 9135, 7235, 12282, 10353, 11444, 8566, 1706, 8360, 7559, 3229, 10268, 2832, 3572, 1282, 3536, 5370, 3753, 3941, 6184, 9169, 5646, 6086, 10235, 2483, 1344, 3042, 1468,
3981, 3407, 11826, 180, 4138, 7684, 2689, 10880, 7070, 204, 5509, 1057, 9689, 4705, 9168, 9272, 1236, 4475, 5246, 4251, 4739, 11063, 6771, 7073, 9261, 2360, 11925, 11777, 7619, 4906,
6825, 4554, 11295, 239, 2900, 7021, 146, 11883, 10602, 5189, 6094, 1403, 1804, 11667, 10552, 5672, 4499, 636, 5609, 8307, 2947, 3393, 7954, 2291, 3375, 8464, 4235, 8761, 7376, 6492,
8330, 5959, 10141, 7350, 5115, 2442, 1248, 10344, 1029, 5724, 1325, 6691, 8945, 1892, 3624, 10767, 2151, 4119, 3343, 7681, 7126, 7287, 12269, 8342, 338, 9834, 5690, 1744, 1314, 8635,
9395, 4167, 6085, 923, 11251, 6092, 10058, 12096, 2800, 11864, 1836, 11897, 2185, 1620, 375, 7711, 11912, 1942, 3408, 9714, 11124, 9513, 1178, 5478, 8778, 3276, 8951, 2212, 9615, 1392,
5784, 1255, 11783, 1756, 8144, 8328, 8755, 6463, 2065, 7725, 3495, 10771, 8524, 8113, 7226, 2461, 10014, 5653, 8022, 11158, 1445, 7429, 11164, 1275, 6781, 1176, 5734, 12077, 6323, 9520,
3114, 6302, 6693, 579, 3889, 10872, 6613, 4505, 10032, 5835, 9202, 7406, 8314, 5102, 11877, 6701, 6444, 2528, 9233, 4963, 8545, 3607, 10962, 7057, 8347, 11841, 11275, 7365, 7508, 4566,
5836, 12221, 2260, 1535, 3200, 2717, 60, 4238, 11677, 4227, 3368, 11749, 12164, 1526, 4222, 6162, 4840, 8257, 3163, 7885, 346, 2068, 1389, 11197, 5209, 3359, 9084, 11825, 10361, 3678,
4265, 9118, 7800, 10463, 9363, 9051, 8581, 11153, 8840, 5412, 8080, 9011, 6296, 3515, 11851, 1218, 5061, 1536, 1721, 9860, 4103, 10916, 2982, 11572, 3589, 9839, 10584, 11475, 11873,
2110, 716, 5416, 2164, 1866, 5211, 7562, 11081, 10381, 7751, 11946, 3448
};
const int32_t psi_rev_ntt512_12289[512] = {
8193, 493, 6845, 9908, 1378, 10377, 7952, 435, 10146, 1065, 404, 7644, 1207, 3248, 11121, 5277, 2437, 3646, 2987, 6022, 9867, 6250, 10102, 9723, 1002, 7278, 4284, 7201, 875, 3780, 1607,
4976, 8146, 4714, 242, 1537, 3704, 9611, 5019, 545, 5084, 10657, 4885, 11272, 3066, 12262, 3763, 10849, 2912, 5698, 11935, 4861, 7277, 9808, 11244, 2859, 7188, 1067, 2401, 11847, 390,
11516, 8511, 3833, 2780, 7094, 4895, 1484, 2305, 5042, 8236, 2645, 7875, 9442, 2174, 7917, 1689, 3364, 4057, 3271, 10863, 4654, 1777, 10626, 3636, 7351, 9585, 6998, 160, 3149, 4437,
12286, 10123, 3915, 7370, 12176, 4048, 2249, 2884, 1153, 9103, 6882, 2126, 10659, 3510, 5332, 2865, 9919, 9320, 8311, 9603, 9042, 3016, 12046, 9289, 11618, 7098, 3136, 9890, 3400, 2178,
1544, 5559, 420, 8304, 4905, 476, 3531, 9326, 4896, 9923, 3051, 3091, 81, 1000, 4320, 1177, 8034, 9521, 10654, 11563, 7678, 10436, 12149, 3014, 9088, 5086, 1326, 11119, 2319, 11334, 790,
2747, 7443, 3135, 3712, 1062, 9995, 7484, 8736, 9283, 2744, 11726, 2975, 9664, 949, 7468, 9650, 7266, 5828, 6561, 7698, 3328, 6512, 1351, 7311, 8155, 5736, 722, 10984, 4043, 7143, 10810,
1, 8668, 2545, 3504, 8747, 11077, 1646, 9094, 5860, 1759, 8582, 3694, 7110, 8907, 11934, 8058, 9741, 9558, 3932, 5911, 4890, 3637, 8830, 5542, 12144, 5755, 7657, 7901, 11029, 11955, 9863,
10861, 1696, 3284, 2881, 7197, 2089, 9000, 2013, 729, 9048, 11809, 2842, 11267, 9, 6498, 544, 2468, 339, 1381, 2525, 8112, 3584, 6958, 4989, 10616, 8011, 5374, 9452, 12159, 4354, 9893,
7837, 3296, 8340, 7222, 2197, 118, 2476, 5767, 827, 8541, 11336, 8855, 8760, 9381, 218, 9928, 10446, 9259, 4115, 6142, 2447, 3963, 11713, 1954, 2051, 1805, 2882, 453, 6381, 11871, 8517,
4774, 6860, 4737, 1293, 156, 9522, 8320, 3991, 5876, 2281, 10258, 6956, 1489, 2500, 1583, 6347, 11026, 12240, 6374, 1483, 350, 1512, 10474, 6906, 9087, 7796, 5369, 2057, 10314, 3757,
9364, 11942, 7535, 10431, 426, 3315, 2738, 6421, 2655, 6554, 723, 174, 1693, 9280, 6099, 295, 5766, 11637, 8527, 2919, 8273, 8212, 3728, 8240, 6299, 1159, 1146, 11341, 11964, 10885, 5297,
6170, 3956, 1360, 11089, 7105, 9734, 6167, 10695, 1962, 5106, 6328, 9597, 168, 7991, 8960, 6370, 7856, 3834, 5257, 10542, 9166, 9235, 5486, 6507, 1566, 2948, 9786, 11606, 9830, 8633,
12225, 8049, 8719, 11454, 6224, 8243, 709, 1319, 9139, 1958, 7967, 10211, 11177, 8210, 1058, 11848, 11367, 11239, 7753, 5445, 3860, 9606, 1190, 8471, 6118, 3789, 147, 5456, 7840, 7540,
5537, 4789, 4467, 4075, 5315, 4324, 4916, 10120, 11767, 7210, 9027, 1973, 5574, 11011, 2344, 8775, 1041, 1018, 6364, 11821, 8301, 11907, 316, 6950, 5446, 6093, 3710, 10256, 3998, 10367,
8410, 1254, 11316, 5435, 1359, 7083, 5529, 9090, 12233, 8724, 11635, 10587, 1987, 6427, 6136, 6874, 3643, 400, 1728, 4948, 6137, 5057, 7591, 3445, 7509, 2049, 7377, 10968, 192, 5241, 9369,
9162, 8120, 787, 8807, 1010, 6821, 6415, 677, 6234, 3336, 12237, 9115, 1323, 2766, 12138, 10162, 8332, 9450, 2505, 5906, 10710, 11858, 4782, 6403, 9260, 5594, 8076, 11785, 605, 9987, 3600,
3263, 7665, 6077, 421, 8209, 6068, 3602, 11286, 3532, 12048, 12231, 7280, 1956, 11404, 6008, 8851, 2844, 975, 4212, 5681, 8812, 12147, 11184
};
const int32_t omegainv_rev_ntt512_12289[512] = {
8193, 11796, 2381, 5444, 11854, 4337, 1912, 10911, 7012, 1168, 9041, 11082, 4645, 11885, 11224, 2143, 7313, 10682, 8509, 11414, 5088, 8005, 5011, 11287, 2566, 2187, 6039, 2422, 6267, 9302,
8643, 9852, 8456, 3778, 773, 11899, 442, 9888, 11222, 5101, 9430, 1045, 2481, 5012, 7428, 354, 6591, 9377, 1440, 8526, 27, 9223, 1017, 7404, 1632, 7205, 11744, 7270, 2678, 8585, 10752,
12047, 7575, 4143, 8758, 11813, 7384, 3985, 11869, 6730, 10745, 10111, 8889, 2399, 9153, 5191, 671, 3000, 243, 9273, 3247, 2686, 3978, 2969, 2370, 9424, 6957, 8779, 1630, 10163, 5407, 3186,
11136, 9405, 10040, 8241, 113, 4919, 8374, 2166, 3, 7852, 9140, 12129, 5291, 2704, 4938, 8653, 1663, 10512, 7635, 1426, 9018, 8232, 8925, 10600, 4372, 10115, 2847, 4414, 9644, 4053, 7247,
9984, 10805, 7394, 5195, 9509, 953, 3748, 11462, 6522, 9813, 12171, 10092, 5067, 3949, 8993, 4452, 2396, 7935, 130, 2837, 6915, 4278, 1673, 7300, 5331, 8705, 4177, 9764, 10908, 11950, 9821,
11745, 5791, 12280, 1022, 9447, 480, 3241, 11560, 10276, 3289, 10200, 5092, 9408, 9005, 10593, 1428, 2426, 334, 1260, 4388, 4632, 6534, 145, 6747, 3459, 8652, 7399, 6378, 8357, 2731, 2548,
4231, 355, 3382, 5179, 8595, 3707, 10530, 6429, 3195, 10643, 1212, 3542, 8785, 9744, 3621, 12288, 1479, 5146, 8246, 1305, 11567, 6553, 4134, 4978, 10938, 5777, 8961, 4591, 5728, 6461, 5023,
2639, 4821, 11340, 2625, 9314, 563, 9545, 3006, 3553, 4805, 2294, 11227, 8577, 9154, 4846, 9542, 11499, 955, 9970, 1170, 10963, 7203, 3201, 9275, 140, 1853, 4611, 726, 1635, 2768, 4255,
11112, 7969, 11289, 12208, 9198, 9238, 2366, 7393, 2963, 1105, 142, 3477, 6608, 8077, 11314, 9445, 3438, 6281, 885, 10333, 5009, 58, 241, 8757, 1003, 8687, 6221, 4080, 11868, 6212, 4624,
9026, 8689, 2302, 11684, 504, 4213, 6695, 3029, 5886, 7507, 431, 1579, 6383, 9784, 2839, 3957, 2127, 151, 9523, 10966, 3174, 52, 8953, 6055, 11612, 5874, 5468, 11279, 3482, 11502, 4169,
3127, 2920, 7048, 12097, 1321, 4912, 10240, 4780, 8844, 4698, 7232, 6152, 7341, 10561, 11889, 8646, 5415, 6153, 5862, 10302, 1702, 654, 3565, 56, 3199, 6760, 5206, 10930, 6854, 973, 11035,
3879, 1922, 8291, 2033, 8579, 6196, 6843, 5339, 11973, 382, 3988, 468, 5925, 11271, 11248, 3514, 9945, 1278, 6715, 10316, 3262, 5079, 522, 2169, 7373, 7965, 6974, 8214, 7822, 7500, 6752,
4749, 4449, 6833, 12142, 8500, 6171, 3818, 11099, 2683, 8429, 6844, 4536, 1050, 922, 441, 11231, 4079, 1112, 2078, 4322, 10331, 3150, 10970, 11580, 4046, 6065, 835, 3570, 4240, 64, 3656,
2459, 683, 2503, 9341, 10723, 5782, 6803, 3054, 3123, 1747, 7032, 8455, 4433, 5919, 3329, 4298, 12121, 2692, 5961, 7183, 10327, 1594, 6122, 2555, 5184, 1200, 10929, 8333, 6119, 6992, 1404,
325, 948, 11143, 11130, 5990, 4049, 8561, 4077, 4016, 9370, 3762, 652, 6523, 11994, 6190, 3009, 10596, 12115, 11566, 5735, 9634, 5868, 9551, 8974, 11863, 1858, 4754, 347, 2925, 8532, 1975,
10232, 6920, 4493, 3202, 5383, 1815, 10777, 11939, 10806, 5915, 49, 1263, 5942, 10706, 9789, 10800, 5333, 2031, 10008, 6413, 8298, 3969, 2767, 12133, 10996, 7552, 5429, 7515, 3772, 418, 5908,
11836, 9407, 10484, 10238, 10335, 576, 8326, 9842, 6147, 8174, 3030, 1843, 2361, 12071, 2908, 3529, 3434
};