mirror of
https://github.com/open-quantum-safe/liboqs.git
synced 2025-10-04 00:02:01 -04:00
NVIDIA: Adding cuPQC as a backend for ML-KEM. (#2044)
* Adding cuPQC as a backend for ML-KEM. Signed-off-by: Steven Reeves <sreeves@nvidia.com> * Fixing transposition error that left out OQS_USE_CUPQC in CMake system. Signed-off-by: Steven Reeves <sreeves@nvidia.com> * Add CMake dependent options for cupqc. Fixed formatting in kem_ml_kem_####.c and kem/family/kem_scheme.c Signed-off-by: Steven Reeves <sreeves@nvidia.com> * Move cupqc_ml-kem source files to correctly named dir Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Stop piggybacking on pqcrystals-kyber-standard and move cupqc_ml-kem metadata to separate upstream repo Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Update licensing information Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Update PLATFORMS.md Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Fix kem_family cmakelists template Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Run copy_from_upsream.py and pull updated upstream Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Add cupqc build test to basic.yml Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Move cupqc build test from basic.yml to linux.yml Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Fix error in linux.yml Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * fixup! Fix error in linux.yml Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Redo cupqc build check Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Supply default CUDA arch to cupqc-buildcheck configuration stage Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Specify CUDAXX in cupqc-buildcheck Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> * Make cuPQC_DIR explicit in cupqc-buildcheck Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> --------- Signed-off-by: Steven Reeves <sreeves@nvidia.com> Signed-off-by: Pravek Sharma <sharmapravek@gmail.com> Co-authored-by: Pravek Sharma <sharmapravek@gmail.com>
This commit is contained in:
parent
99affa6935
commit
6a16ac68b5
@ -338,18 +338,36 @@ if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCT
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
|
||||
if(OQS_USE_CUPQC)
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_512_cuda "" ON "OQS_ENABLE_KEM_ml_kem_512" OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
|
||||
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
|
||||
if(OQS_USE_CUPQC)
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_768_cuda "" ON "OQS_ENABLE_KEM_ml_kem_768" OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
|
||||
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_BMI2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_avx2 "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin")
|
||||
if(OQS_USE_CUPQC)
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_ml_kem_1024_cuda "" ON "OQS_ENABLE_KEM_ml_kem_1024" OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux")
|
||||
if(OQS_DIST_X86_64_BUILD OR (OQS_USE_AVX2_INSTRUCTIONS AND OQS_USE_POPCNT_INSTRUCTIONS))
|
||||
|
13
.github/workflows/linux.yml
vendored
13
.github/workflows/linux.yml
vendored
@ -188,6 +188,19 @@ jobs:
|
||||
--numprocesses=auto \
|
||||
--ignore=tests/test_code_conventions.py ${{ matrix.PYTEST_ARGS }}"
|
||||
|
||||
cupqc-buildcheck:
|
||||
name: Check that code builds with OQS_USE_CUPQC=ON
|
||||
runs-on: ubuntu-latest
|
||||
container: openquantumsafe/ci-ubuntu-latest:latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # pin@v4
|
||||
- name: Configure
|
||||
run: mkdir build && cd build && cuPQC_DIR=/cupqc/cupqc/cupqc-pkg-0.2.0/cmake/ CUDACXX=/usr/local/cuda-12.6/bin/nvcc cmake -GNinja -DOQS_USE_CUPQC=ON -DCMAKE_CUDA_ARCHITECTURES=80 .. && cmake -LA -N ..
|
||||
- name: Build code
|
||||
run: ninja
|
||||
working-directory: build
|
||||
|
||||
linux_cross_compile:
|
||||
runs-on: ubuntu-latest
|
||||
container: openquantumsafe/ci-ubuntu-latest:latest
|
||||
|
@ -27,6 +27,7 @@ option(OQS_LIBJADE_BUILD "Enable formally verified implementation of supported a
|
||||
option(OQS_PERMIT_UNSUPPORTED_ARCHITECTURE "Permit compilation on an an unsupported architecture." OFF)
|
||||
option(OQS_STRICT_WARNINGS "Enable all compiler warnings." OFF)
|
||||
option(OQS_EMBEDDED_BUILD "Compile liboqs for an Embedded environment without a full standard library." OFF)
|
||||
option(OQS_USE_CUPQC "Utilize cuPQC as the backend for supported PQC algorithms." OFF)
|
||||
|
||||
# Libfuzzer isn't supported on gcc
|
||||
if('${CMAKE_C_COMPILER_ID}' STREQUAL 'Clang')
|
||||
@ -140,6 +141,16 @@ else()
|
||||
message(FATAL_ERROR "Unknown or unsupported processor: " ${CMAKE_SYSTEM_PROCESSOR} ". Override by setting OQS_PERMIT_UNSUPPORTED_ARCHITECTURE=ON")
|
||||
endif()
|
||||
|
||||
if(${OQS_USE_CUPQC})
|
||||
# CMAKE's CUDA language requires CMAKE 3.18
|
||||
cmake_minimum_required (VERSION 3.18)
|
||||
enable_language(CUDA)
|
||||
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 80 90)
|
||||
endif()
|
||||
find_package(cuPQC 0.2.0 REQUIRED)
|
||||
endif()
|
||||
|
||||
if (NOT ((CMAKE_SYSTEM_NAME MATCHES "Linux|Darwin") AND (ARCH_X86_64 STREQUAL "ON")) AND (OQS_LIBJADE_BUILD STREQUAL "ON"))
|
||||
message(FATAL_ERROR "Building liboqs with libjade implementations from libjade is only supported on Linux and Darwin on x86_64.")
|
||||
endif()
|
||||
|
@ -13,6 +13,7 @@ The following options can be passed to CMake before the build file generation pr
|
||||
- [OQS_DIST_BUILD](#OQS_DIST_BUILD)
|
||||
- [OQS_USE_CPUFEATURE_INSTRUCTIONS](#OQS_USE_CPUFEATURE_INSTRUCTIONS)
|
||||
- [OQS_USE_OPENSSL](#OQS_USE_OPENSSL)
|
||||
- [OQS_USE_CUPQC](#OQS_USE_CUPQC)
|
||||
- [OQS_OPT_TARGET](#OQS_OPT_TARGET)
|
||||
- [OQS_SPEED_USE_ARM_PMU](#OQS_SPEED_USE_ARM_PMU)
|
||||
- [USE_SANITIZER](#USE_SANITIZER)
|
||||
@ -124,6 +125,13 @@ Dynamically load OpenSSL through `dlopen`. When using liboqs from other cryptogr
|
||||
|
||||
Only has an effect if the system supports `dlopen` and ELF binary format, such as Linux or BSD family.
|
||||
|
||||
### OQS_USE_CUPQC
|
||||
|
||||
Can be `ON` or `OFF`. When `ON`, use NVIDIA's cuPQC library where able (currently just ML-KEM). When this option is enabled, liboqs may not run correctly on machines that lack supported GPUs. To download cuPQC follow the instructions at (https://developer.nvidia.com/cupqc-download/). Detailed descriptions of the API, requirements, and installation guide are in the cuPQC documentation (https://docs.nvidia.com/cuda/cupqc/index.html). While the code shipped by liboqs required to use cuPQC is licensed under Apache 2.0 the cuPQC SDK comes with its own license agreement (https://docs.nvidia.com/cuda/cupqc/license.html).
|
||||
|
||||
**Default**: `OFF`
|
||||
|
||||
|
||||
## Stateful Hash Based Signatures
|
||||
|
||||
XMSS and LMS are the two supported Hash-Based Signatures schemes.
|
||||
|
@ -63,3 +63,4 @@ In this policy, the words "must" and "must not" specify absolute requirements th
|
||||
- ppc641e for Ubuntu (Focal)
|
||||
- s390x for Ubuntu (Focal)
|
||||
- loongarch64 for Debian Linux (trixie)
|
||||
- NVIDIA GPU architectures 70, 75, 80, 86, 89, and 90 with a x86_64 CPU for Linux
|
||||
|
@ -9,6 +9,10 @@
|
||||
- **Primary Source**<a name="primary-source"></a>:
|
||||
- **Source**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
|
||||
- **Implementation license (SPDX-Identifier)**: CC0-1.0 or Apache-2.0
|
||||
- **Optimized Implementation sources**: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd with copy_from_upstream patches
|
||||
- **cupqc-cuda**:<a name="cupqc-cuda"></a>
|
||||
- **Source**: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
|
||||
- **Implementation license (SPDX-Identifier)**: Apache-2.0
|
||||
|
||||
|
||||
## Parameter set summary
|
||||
@ -25,6 +29,7 @@
|
||||
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:----------------------|
|
||||
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
|
||||
| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
|
||||
| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |
|
||||
|
||||
Are implementations chosen based on runtime CPU feature detection? **Yes**.
|
||||
|
||||
@ -36,6 +41,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
|
||||
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
|
||||
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
|
||||
| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
|
||||
| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |
|
||||
|
||||
Are implementations chosen based on runtime CPU feature detection? **Yes**.
|
||||
|
||||
@ -45,6 +51,7 @@ Are implementations chosen based on runtime CPU feature detection? **Yes**.
|
||||
|:---------------------------------:|:-------------------------|:----------------------------|:--------------------------------|:------------------------|:-----------------------------------|:-----------------------------------------------|:---------------------|
|
||||
| [Primary Source](#primary-source) | ref | All | All | None | True | True | False |
|
||||
| [Primary Source](#primary-source) | avx2 | x86\_64 | Linux,Darwin | AVX2,BMI2,POPCNT | True | True | False |
|
||||
| [cupqc-cuda](#cupqc-cuda) | cuda | CUDA | Linux,Darwin | None | False | False | False |
|
||||
|
||||
Are implementations chosen based on runtime CPU feature detection? **Yes**.
|
||||
|
||||
|
@ -20,6 +20,10 @@ primary-upstream:
|
||||
source: https://github.com/pq-crystals/kyber/commit/10b478fc3cc4ff6215eb0b6a11bd758bf0929cbd
|
||||
with copy_from_upstream patches
|
||||
spdx-license-identifier: CC0-1.0 or Apache-2.0
|
||||
optimized-upstreams:
|
||||
cupqc-cuda:
|
||||
source: https://github.com/praveksharma/cupqc-mlkem/commit/b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
|
||||
spdx-license-identifier: Apache-2.0
|
||||
parameter-sets:
|
||||
- name: ML-KEM-512
|
||||
claimed-nist-level: 1
|
||||
@ -54,6 +58,16 @@ parameter-sets:
|
||||
no-secret-dependent-branching-claimed: true
|
||||
no-secret-dependent-branching-checked-by-valgrind: true
|
||||
large-stack-usage: false
|
||||
- upstream: cupqc-cuda
|
||||
upstream-id: cuda
|
||||
supported-platforms:
|
||||
- architecture: CUDA
|
||||
operating_systems:
|
||||
- Linux
|
||||
- Darwin
|
||||
no-secret-dependent-branching-claimed: false
|
||||
no-secret-dependent-branching-checked-by-valgrind: false
|
||||
large-stack-usage: false
|
||||
- name: ML-KEM-768
|
||||
claimed-nist-level: 3
|
||||
claimed-security: IND-CCA2
|
||||
@ -87,6 +101,16 @@ parameter-sets:
|
||||
no-secret-dependent-branching-claimed: true
|
||||
no-secret-dependent-branching-checked-by-valgrind: true
|
||||
large-stack-usage: false
|
||||
- upstream: cupqc-cuda
|
||||
upstream-id: cuda
|
||||
supported-platforms:
|
||||
- architecture: CUDA
|
||||
operating_systems:
|
||||
- Linux
|
||||
- Darwin
|
||||
no-secret-dependent-branching-claimed: false
|
||||
no-secret-dependent-branching-checked-by-valgrind: false
|
||||
large-stack-usage: false
|
||||
- name: ML-KEM-1024
|
||||
claimed-nist-level: 5
|
||||
claimed-security: IND-CCA2
|
||||
@ -120,3 +144,13 @@ parameter-sets:
|
||||
no-secret-dependent-branching-claimed: true
|
||||
no-secret-dependent-branching-checked-by-valgrind: true
|
||||
large-stack-usage: false
|
||||
- upstream: cupqc-cuda
|
||||
upstream-id: cuda
|
||||
supported-platforms:
|
||||
- architecture: CUDA
|
||||
operating_systems:
|
||||
- Linux
|
||||
- Darwin
|
||||
no-secret-dependent-branching-claimed: false
|
||||
no-secret-dependent-branching-checked-by-valgrind: false
|
||||
large-stack-usage: false
|
||||
|
@ -11,6 +11,18 @@ if(OQS_DIST_X86_64_BUILD OR ({% for flag in platform['required_flags'] -%} OQS_U
|
||||
{%- endif %}
|
||||
endif()
|
||||
{% if platform['operating_systems'] %}endif()
|
||||
{% endif -%}
|
||||
{%- endfor -%}
|
||||
{%- for platform in impl['supported_platforms'] if platform['architecture'] == 'CUDA' %}
|
||||
{% if platform['operating_systems'] %}if(CMAKE_SYSTEM_NAME MATCHES "{{ platform['operating_systems']|join('|') }}")
|
||||
{% endif -%}
|
||||
if(OQS_USE_CUPQC)
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}_{{ impl['name'] }} "" ON "OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['scheme'] }}" OFF)
|
||||
{%- if 'alias_scheme' in scheme %}
|
||||
cmake_dependent_option(OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }} "" ON "OQS_ENABLE_KEM_{{ family['name'] }}_{{ scheme['alias_scheme'] }}" OFF)
|
||||
{%- endif %}
|
||||
endif()
|
||||
{% if platform['operating_systems'] %}endif()
|
||||
{% endif -%}
|
||||
{%- endfor -%}
|
||||
{%- for platform in impl['supported_platforms'] if platform['architecture'] == 'ARM64_V8' %}
|
||||
|
@ -495,14 +495,15 @@ def handle_implementation(impl, family, scheme, dst_basedir):
|
||||
else:
|
||||
# determine list of files to copy:
|
||||
if 'sources' in i:
|
||||
srcs = i['sources'].split(" ")
|
||||
for s in srcs:
|
||||
# Copy recursively only in case of directories not with plain files to avoid copying over symbolic links
|
||||
if os.path.isfile(os.path.join(origfolder, s)):
|
||||
subprocess.run(['cp', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
|
||||
else:
|
||||
subprocess.run(
|
||||
['cp', '-r', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
|
||||
if i['sources']:
|
||||
srcs = i['sources'].split(" ")
|
||||
for s in srcs:
|
||||
# Copy recursively only in case of directories not with plain files to avoid copying over symbolic links
|
||||
if os.path.isfile(os.path.join(origfolder, s)):
|
||||
subprocess.run(['cp', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
|
||||
else:
|
||||
subprocess.run(
|
||||
['cp', '-r', os.path.join(origfolder, s), os.path.join(srcfolder, os.path.basename(s))])
|
||||
else:
|
||||
subprocess.run(['cp', '-pr', os.path.join(origfolder, '.'), srcfolder])
|
||||
# raise Exception("Malformed YML file: No sources listed to copy. Check upstream YML file." )
|
||||
@ -598,14 +599,15 @@ def process_families(instructions, basedir, with_kat, with_generator, with_libja
|
||||
# when provided to the compiler; OQS uses the term ARM_NEON
|
||||
if req['architecture'] == 'arm_8':
|
||||
req['architecture'] = 'ARM64_V8'
|
||||
if req['architecture'] == 'ARM64_V8' and 'asimd' in req['required_flags']:
|
||||
req['required_flags'].remove('asimd')
|
||||
req['required_flags'].append('arm_neon')
|
||||
if req['architecture'] == 'ARM64_V8' and 'sha3' in req['required_flags']:
|
||||
req['required_flags'].remove('sha3')
|
||||
req['required_flags'].append('arm_sha3')
|
||||
impl['required_flags'] = req['required_flags']
|
||||
family['all_required_flags'].update(req['required_flags'])
|
||||
if 'required_flags' in req:
|
||||
if req['architecture'] == 'ARM64_V8' and 'asimd' in req['required_flags']:
|
||||
req['required_flags'].remove('asimd')
|
||||
req['required_flags'].append('arm_neon')
|
||||
if req['architecture'] == 'ARM64_V8' and 'sha3' in req['required_flags']:
|
||||
req['required_flags'].remove('sha3')
|
||||
req['required_flags'].append('arm_sha3')
|
||||
impl['required_flags'] = req['required_flags']
|
||||
family['all_required_flags'].update(req['required_flags'])
|
||||
except KeyError as ke:
|
||||
if (impl['name'] != family['default_implementation']):
|
||||
print("No required flags found for %s (KeyError %s on impl %s)" % (
|
||||
|
@ -38,6 +38,14 @@ upstreams:
|
||||
kem_meta_path: '{pretty_name_full}_META.yml'
|
||||
kem_scheme_path: '.'
|
||||
patches: [pqcrystals-ml_kem.patch]
|
||||
-
|
||||
name: cupqc
|
||||
git_url: https://github.com/praveksharma/cupqc-mlkem.git
|
||||
git_branch: main
|
||||
git_commit: b026f4e5475cd9c20c2082c7d9bad80e5b0ba89e
|
||||
kem_meta_path: '{pretty_name_full}_META.yml'
|
||||
kem_scheme_path: '.'
|
||||
patches: []
|
||||
-
|
||||
name: pqcrystals-dilithium
|
||||
git_url: https://github.com/pq-crystals/dilithium.git
|
||||
@ -166,6 +174,10 @@ kems:
|
||||
-
|
||||
name: ml_kem
|
||||
default_implementation: ref
|
||||
arch_specific_implementations:
|
||||
cuda: cuda
|
||||
arch_specific_upstream_locations:
|
||||
cuda: cupqc
|
||||
upstream_location: pqcrystals-kyber-standard
|
||||
schemes:
|
||||
-
|
||||
|
@ -33,11 +33,19 @@ if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}{%- if 'alias_scheme' in
|
||||
target_compile_options({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PUBLIC {{ impl['compile_opts'] }})
|
||||
{%- endif -%}
|
||||
|
||||
{%- elif impl['name'] == 'cuda' %}
|
||||
|
||||
if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if 'alias_scheme' in scheme %} OR OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}{%- endif %})
|
||||
add_library({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} OBJECT {{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }}/cupqc_ml-kem.cu)
|
||||
target_link_libraries({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} cupqc)
|
||||
set_property(TARGET {{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PROPERTY CUDA_ARCHITECTURES OFF)
|
||||
target_compile_options({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE {{ impl['compile_opts'] }})
|
||||
{%- else %}
|
||||
|
||||
if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if 'alias_scheme' in scheme %} OR OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}{%- endif %})
|
||||
add_library({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} OBJECT {% for source_file in impl['sources']|sort -%}{{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }}/{{ source_file }}{%- if not loop.last %} {% endif -%}{%- endfor -%})
|
||||
{%- endif %}
|
||||
{%- if impl['name'] != 'cuda' %}
|
||||
target_include_directories({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/{{ impl['upstream']['name'] }}_{{ scheme['pqclean_scheme'] }}_{{ impl['name'] }})
|
||||
target_include_directories({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE ${PROJECT_SOURCE_DIR}/src/common/pqclean_shims)
|
||||
{%- if impl['name'] != scheme['default_implementation'] and impl['required_flags'] -%}
|
||||
@ -60,6 +68,7 @@ if(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme_c'] }}_{{ impl['name'] }}{%- if
|
||||
target_compile_definitions({{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} PRIVATE old_gas_syntax)
|
||||
endif()
|
||||
{%- endif %}
|
||||
{%- endif %}{# cupqc #}
|
||||
set(_{{ family|upper }}_OBJS ${_{{ family|upper }}_OBJS} $<TARGET_OBJECTS:{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}>)
|
||||
endif()
|
||||
{%- endfor -%}
|
||||
|
@ -93,7 +93,9 @@ extern int {{ scheme['metadata']['default_dec_signature'] }}(uint8_t *ss, const
|
||||
{%- endfor %}
|
||||
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
|
||||
|
||||
{% if impl['name'] == 'cuda'%}
|
||||
#if defined(OQS_USE_CUPQC)
|
||||
{%- endif %}
|
||||
#if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
|
||||
{%- if impl['signature_keypair'] %}
|
||||
extern int {{ impl['signature_keypair'] }}(uint8_t *pk, uint8_t *sk);
|
||||
@ -113,6 +115,9 @@ extern int {{ impl['signature_dec'] }}(uint8_t *ss, const uint8_t *ct, const uin
|
||||
extern int PQCLEAN_{{ scheme['pqclean_scheme_c']|upper }}_{{ impl['name']|upper }}_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
{%- endif %}
|
||||
#endif
|
||||
{%- if impl['name'] == 'cuda'%}
|
||||
#endif /* OQS_USE_CUPQC */
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
|
||||
{%- if libjade_implementation is defined and scheme['libjade_implementation'] %}
|
||||
@ -166,7 +171,12 @@ OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_keypair(uint8_t *
|
||||
{% endfor -%}
|
||||
#else /*OQS_LIBJADE_BUILD && (OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['scheme'] }} {%- if 'alias_scheme' in scheme %} || OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}{%- endif %})*/
|
||||
{%- endif %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] == 'cuda' %}
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }})
|
||||
return (OQS_STATUS) {{ impl['signature_keypair'] }}(public_key, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} */
|
||||
{%- endfor %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if (impl['name'] != scheme['default_implementation'] and impl['name'] != 'cuda') %}
|
||||
{%- if loop.first %}
|
||||
#if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
|
||||
{%- else %}
|
||||
@ -240,7 +250,12 @@ OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_encaps(uint8_t *c
|
||||
{% endfor -%}
|
||||
#else /*OQS_LIBJADE_BUILD && (OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['scheme'] }} {%- if 'alias_scheme' in scheme %} || OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}{%- endif %})*/
|
||||
{%- endif %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] == 'cuda' %}
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }})
|
||||
return (OQS_STATUS) {{ impl['signature_enc'] }}(ciphertext, shared_secret, public_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} */
|
||||
{%- endfor %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if (impl['name'] != scheme['default_implementation'] and impl['name'] != 'cuda') %}
|
||||
{%- if loop.first %}
|
||||
#if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
|
||||
{%- else %}
|
||||
@ -314,7 +329,12 @@ OQS_API OQS_STATUS OQS_KEM_{{ family }}_{{ scheme['scheme'] }}_decaps(uint8_t *s
|
||||
{% endfor -%}
|
||||
#else /*OQS_LIBJADE_BUILD && (OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['scheme'] }} {%- if 'alias_scheme' in scheme %} || OQS_ENABLE_LIBJADE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}{%- endif %})*/
|
||||
{%- endif %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] != scheme['default_implementation'] %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if impl['name'] == 'cuda' %}
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }})
|
||||
return (OQS_STATUS) {{ impl['signature_dec'] }}(shared_secret, ciphertext, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }} */
|
||||
{%- endfor %}
|
||||
{%- for impl in scheme['metadata']['implementations'] if (impl['name'] != scheme['default_implementation'] and impl['name'] != 'cuda') %}
|
||||
{%- if loop.first %}
|
||||
#if defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['scheme'] }}_{{ impl['name'] }}) {%- if 'alias_scheme' in scheme %} || defined(OQS_ENABLE_KEM_{{ family }}_{{ scheme['alias_scheme'] }}_{{ impl['name'] }}){%- endif %}
|
||||
{%- else %}
|
||||
|
@ -95,8 +95,43 @@ def update_upstream_kem_alg_docs(liboqs_root, kems, upstream_info, write_changes
|
||||
oqs_yaml_path = os.path.join(liboqs_root, 'docs', 'algorithms', 'kem', '{}.yml'.format(kem['name']))
|
||||
if os.path.isfile(oqs_yaml_path):
|
||||
oqs_yaml = load_yaml(oqs_yaml_path)
|
||||
|
||||
upstream_base_url = ui['git_url'][:-len(".git")]
|
||||
# upstream is special: We will take the upstream git commit information
|
||||
# (possibly with added patch comment) as it is what drove the update
|
||||
|
||||
# Need to check if yml is of old format. If so, update to new format
|
||||
if 'primary-upstream' not in oqs_yaml:
|
||||
print("Updating format of {}. Please double check ordering of yaml file".format(scheme['pretty_name_full']))
|
||||
lhs = oqs_yaml['upstream']
|
||||
oqs_yaml['primary-upstream'] = dict()
|
||||
oqs_yaml['primary-upstream']['spdx-license-identifier'] = oqs_yaml['spdx-license-identifier']
|
||||
for i in range(len(oqs_yaml['parameter-sets'])):
|
||||
for j in range(len(oqs_yaml['parameter-sets'][i]['implementations'])):
|
||||
oqs_yaml['parameter-sets'][i]['implementations'][j]['upstream'] = 'primary-upstream'
|
||||
else:
|
||||
continue
|
||||
lhs = oqs_yaml['primary-upstream']['source']
|
||||
oqs_yaml['primary-upstream']['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+patches_done).format(upstream_base_url, ui['git_commit']), "primary-upstream")
|
||||
if 'upstream' in oqs_yaml:
|
||||
del oqs_yaml['upstream']
|
||||
del oqs_yaml['spdx-license-identifier']
|
||||
|
||||
if ouis:
|
||||
for upstream in ouis:
|
||||
optimized_upstream_base_url = ouis[upstream]['git_url'][:-len(".git")]
|
||||
optimized_patches_done=""
|
||||
if 'patches' in ouis[upstream]:
|
||||
for patchfilename in ouis[upstream]['patches']:
|
||||
if kem['name'] in patchfilename:
|
||||
optimized_patches_done=" with copy_from_upstream patches"
|
||||
if 'optimized-upstreams' in oqs_yaml and upstream in oqs_yaml['optimized-upstreams']:
|
||||
lhs = oqs_yaml['optimized-upstreams'][upstream]['source']
|
||||
else:
|
||||
lhs = ''
|
||||
oqs_yaml['optimized-upstreams'] = oqs_yaml.get('optimized-upstreams', dict())
|
||||
oqs_yaml['optimized-upstreams'][upstream] = oqs_yaml['optimized-upstreams'].get(upstream, dict())
|
||||
git_commit = ouis[upstream]['git_commit']
|
||||
oqs_yaml['optimized-upstreams'][upstream]['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+optimized_patches_done).format(optimized_upstream_base_url, git_commit), "optimized-upstreams")
|
||||
|
||||
# We cannot assume that the ordering of "parameter-sets"
|
||||
# in the OQS YAML files matches that of copy_from_upstream.yml
|
||||
@ -111,45 +146,6 @@ def update_upstream_kem_alg_docs(liboqs_root, kems, upstream_info, write_changes
|
||||
oqs_yaml['type'] = rhs_if_not_equal(oqs_yaml['type'], upstream_yaml['type'], "type")
|
||||
oqs_yaml['principal-submitters'] = rhs_if_not_equal(oqs_yaml['principal-submitters'], upstream_yaml['principal-submitters'], "principal-submitters")
|
||||
|
||||
upstream_base_url = ui['git_url'][:-len(".git")]
|
||||
# upstream is special: We will take the upstream git commit information
|
||||
# (possibly with added patch comment) as it is what drove the update
|
||||
|
||||
# Need to check if yml is of old format. If so, update to new format
|
||||
if 'primary-upstream' not in oqs_yaml:
|
||||
print("Updating format of {}. Please double check ordering of yaml file".format(scheme['pretty_name_full']))
|
||||
lhs = oqs_yaml['upstream']
|
||||
oqs_yaml['primary-upstream'] = dict()
|
||||
oqs_yaml['primary-upstream']['spdx-license-identifier'] = oqs_yaml['spdx-license-identifier']
|
||||
for i in range(len(oqs_yaml['parameter-sets'])):
|
||||
for j in range(len(oqs_yaml['parameter-sets'][i]['implementations'])):
|
||||
oqs_yaml['parameter-sets'][i]['implementations'][j]['upstream'] = 'primary-upstream'
|
||||
else:
|
||||
lhs = oqs_yaml['primary-upstream']['source']
|
||||
oqs_yaml['primary-upstream']['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+patches_done).format(upstream_base_url, ui['git_commit']), "primary-upstream")
|
||||
if 'upstream' in oqs_yaml:
|
||||
del oqs_yaml['upstream']
|
||||
del oqs_yaml['spdx-license-identifier']
|
||||
|
||||
if ouis:
|
||||
for upstream in ouis:
|
||||
optimized_upstream_base_url = ouis[upstream]['git_url'][:-len(".git")]
|
||||
for patchfilename in ouis[upstream]['patches']:
|
||||
if kem['name'] in patchfilename:
|
||||
patches_done=" with copy_from_upstream patches"
|
||||
patches_done=""
|
||||
if 'patches' in ouis[upstream]:
|
||||
for patchfilename in ouis[upstream]['patches']:
|
||||
if kem['name'] in patchfilename:
|
||||
patches_done=" with copy_from_upstream patches"
|
||||
if 'optimized-upstreams' in oqs_yaml and upstream in oqs_yaml['optimized-upstreams']:
|
||||
lhs = oqs_yaml['optimized-upstreams'][upstream]['source']
|
||||
else:
|
||||
lhs = ''
|
||||
git_commit = ouis[upstream]['git_commit']
|
||||
oqs_yaml['optimized-upstreams'][upstream]['source'] = rhs_if_not_equal(lhs, ("{}/commit/{}"+patches_done).format(optimized_upstream_base_url, git_commit), "optimized-upstreams")
|
||||
|
||||
|
||||
if 'auxiliary-submitters' in upstream_yaml:
|
||||
oqs_yaml['auxiliary-submitters'] = rhs_if_not_equal(oqs_yaml['auxiliary-submitters'] if 'auxiliary-submitters' in oqs_yaml else '', upstream_yaml['auxiliary-submitters'], "auxiliary-submitters")
|
||||
|
||||
@ -204,7 +200,7 @@ def update_upstream_kem_alg_docs(liboqs_root, kems, upstream_info, write_changes
|
||||
upstream_impl['supported_platforms'][i]['architecture'] = 'ARM64_V8'
|
||||
if 'asimd' in upstream_impl['supported_platforms'][i]['required_flags']:
|
||||
upstream_impl['supported_platforms'][i]['required_flags'].remove('asimd')
|
||||
if not upstream_impl['supported_platforms'][i]['required_flags']:
|
||||
if 'required_flags' in upstream_impl['supported_platforms'][i] and not upstream_impl['supported_platforms'][i]['required_flags']:
|
||||
del upstream_impl['supported_platforms'][i]['required_flags']
|
||||
|
||||
impl['supported-platforms'] = rhs_if_not_equal(impl['supported-platforms'], upstream_impl['supported_platforms'], "supported-platforms")
|
||||
|
@ -99,6 +99,11 @@ if(${OQS_USE_OPENSSL})
|
||||
target_link_libraries(oqs-internal PRIVATE ${OPENSSL_CRYPTO_LIBRARY})
|
||||
endif()
|
||||
endif()
|
||||
if(${OQS_USE_CUPQC})
|
||||
set_property(TARGET oqs PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
target_link_libraries(oqs PRIVATE cupqc)
|
||||
target_link_options(oqs PRIVATE $<DEVICE_LINK: -dlto>)
|
||||
endif()
|
||||
|
||||
target_include_directories(oqs
|
||||
PUBLIC
|
||||
|
@ -23,6 +23,14 @@ if(OQS_ENABLE_KEM_ml_kem_512_avx2)
|
||||
set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_avx2>)
|
||||
endif()
|
||||
|
||||
if(OQS_ENABLE_KEM_ml_kem_512_cuda)
|
||||
add_library(ml_kem_512_cuda OBJECT cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu)
|
||||
target_link_libraries(ml_kem_512_cuda cupqc)
|
||||
set_property(TARGET ml_kem_512_cuda PROPERTY CUDA_ARCHITECTURES OFF)
|
||||
target_compile_options(ml_kem_512_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true -dlto -arch=compute_70>)
|
||||
set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_512_cuda>)
|
||||
endif()
|
||||
|
||||
if(OQS_ENABLE_KEM_ml_kem_768)
|
||||
add_library(ml_kem_768_ref OBJECT kem_ml_kem_768.c pqcrystals-kyber-standard_ml-kem-768_ref/cbd.c pqcrystals-kyber-standard_ml-kem-768_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-768_ref/kem.c pqcrystals-kyber-standard_ml-kem-768_ref/ntt.c pqcrystals-kyber-standard_ml-kem-768_ref/poly.c pqcrystals-kyber-standard_ml-kem-768_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-768_ref/reduce.c pqcrystals-kyber-standard_ml-kem-768_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-768_ref/verify.c)
|
||||
target_compile_options(ml_kem_768_ref PUBLIC -DKYBER_K=3)
|
||||
@ -41,6 +49,14 @@ if(OQS_ENABLE_KEM_ml_kem_768_avx2)
|
||||
set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_avx2>)
|
||||
endif()
|
||||
|
||||
if(OQS_ENABLE_KEM_ml_kem_768_cuda)
|
||||
add_library(ml_kem_768_cuda OBJECT cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu)
|
||||
target_link_libraries(ml_kem_768_cuda cupqc)
|
||||
set_property(TARGET ml_kem_768_cuda PROPERTY CUDA_ARCHITECTURES OFF)
|
||||
target_compile_options(ml_kem_768_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true -dlto -arch=compute_70>)
|
||||
set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_768_cuda>)
|
||||
endif()
|
||||
|
||||
if(OQS_ENABLE_KEM_ml_kem_1024)
|
||||
add_library(ml_kem_1024_ref OBJECT kem_ml_kem_1024.c pqcrystals-kyber-standard_ml-kem-1024_ref/cbd.c pqcrystals-kyber-standard_ml-kem-1024_ref/indcpa.c pqcrystals-kyber-standard_ml-kem-1024_ref/kem.c pqcrystals-kyber-standard_ml-kem-1024_ref/ntt.c pqcrystals-kyber-standard_ml-kem-1024_ref/poly.c pqcrystals-kyber-standard_ml-kem-1024_ref/polyvec.c pqcrystals-kyber-standard_ml-kem-1024_ref/reduce.c pqcrystals-kyber-standard_ml-kem-1024_ref/symmetric-shake.c pqcrystals-kyber-standard_ml-kem-1024_ref/verify.c)
|
||||
target_compile_options(ml_kem_1024_ref PUBLIC -DKYBER_K=4)
|
||||
@ -59,4 +75,12 @@ if(OQS_ENABLE_KEM_ml_kem_1024_avx2)
|
||||
set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_avx2>)
|
||||
endif()
|
||||
|
||||
if(OQS_ENABLE_KEM_ml_kem_1024_cuda)
|
||||
add_library(ml_kem_1024_cuda OBJECT cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu)
|
||||
target_link_libraries(ml_kem_1024_cuda cupqc)
|
||||
set_property(TARGET ml_kem_1024_cuda PROPERTY CUDA_ARCHITECTURES OFF)
|
||||
target_compile_options(ml_kem_1024_cuda PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-rdc=true -dlto -arch=compute_70>)
|
||||
set(_ML_KEM_OBJS ${_ML_KEM_OBJS} $<TARGET_OBJECTS:ml_kem_1024_cuda>)
|
||||
endif()
|
||||
|
||||
set(ML_KEM_OBJS ${_ML_KEM_OBJS} PARENT_SCOPE)
|
||||
|
172
src/kem/ml_kem/cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu
Normal file
172
src/kem/ml_kem/cupqc_ml-kem-1024_cuda/cupqc_ml-kem.cu
Normal file
@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright 2025 Nvidia Corporation
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
**/
|
||||
|
||||
#include <cupqc.hpp>
|
||||
#include <stdexcept>
|
||||
#include <oqs/oqsconfig.h>
|
||||
|
||||
using namespace cupqc;
|
||||
|
||||
// Checks the return value from a CUDA API function
|
||||
#define CUDA_CHECK(err) \
|
||||
if (err != cudaSuccess) { failure = true; goto cleanup; }
|
||||
|
||||
template<class MLKEM_Keygen>
|
||||
__global__ void keygen_kernel(uint8_t *pk, uint8_t *sk, uint8_t *workspace, uint8_t *randombytes) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Keygen::shared_memory_size];
|
||||
MLKEM_Keygen().execute(pk, sk, randombytes, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int keypair(uint8_t *pk, uint8_t *sk) {
|
||||
using MLKEM_Keygen = decltype(MLKEM_Base() + Function<function::Keygen>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr, *randombytes=nullptr;
|
||||
uint8_t *d_pk = nullptr, *d_sk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Keygen>(1);
|
||||
randombytes = get_entropy<MLKEM_Keygen>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Keygen::public_key_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Keygen::secret_key_size));
|
||||
|
||||
// Run routine
|
||||
keygen_kernel<MLKEM_Keygen><<<1, MLKEM_Keygen::BlockDim>>>(d_pk, d_sk, workspace, randombytes);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(pk, d_pk, MLKEM_Keygen::public_key_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(sk, d_sk, MLKEM_Keygen::secret_key_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_pk != nullptr) cudaFree(d_pk);
|
||||
if (d_sk != nullptr) cudaFree(d_sk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
if (randombytes != nullptr) release_entropy(randombytes);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
template<class MLKEM_Encaps>
|
||||
__global__ void encaps_kernel(uint8_t *ct, uint8_t *ss, const uint8_t *pk, uint8_t *workspace, uint8_t *randombytes) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Encaps::shared_memory_size];
|
||||
MLKEM_Encaps().execute(ct, ss, pk, randombytes, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int encaps(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
|
||||
using MLKEM_Encaps = decltype(MLKEM_Base() + Function<function::Encaps>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr, *randombytes=nullptr;
|
||||
uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_pk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Encaps>(1);
|
||||
randombytes = get_entropy<MLKEM_Encaps>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Encaps::ciphertext_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Encaps::shared_secret_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Encaps::public_key_size));
|
||||
|
||||
// Copy data to GPU
|
||||
CUDA_CHECK(cudaMemcpy(d_pk, pk, MLKEM_Encaps::public_key_size, cudaMemcpyDefault));
|
||||
|
||||
// Run routine
|
||||
encaps_kernel<MLKEM_Encaps><<<1, MLKEM_Encaps::BlockDim>>>(d_ct, d_ss, d_pk, workspace, randombytes);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(ct, d_ct, MLKEM_Encaps::ciphertext_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Encaps::shared_secret_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_ct != nullptr) cudaFree(d_ct);
|
||||
if (d_ss != nullptr) cudaFree(d_ss);
|
||||
if (d_pk != nullptr) cudaFree(d_pk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
if (randombytes != nullptr) release_entropy(randombytes);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
template<class MLKEM_Decaps>
|
||||
__global__ void decaps_kernel(uint8_t *ss, const uint8_t *ct, const uint8_t *sk, uint8_t *workspace) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Decaps::shared_memory_size];
|
||||
MLKEM_Decaps().execute(ss, ct, sk, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int decaps(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
|
||||
using MLKEM_Decaps = decltype(MLKEM_Base() + Function<function::Decaps>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr;
|
||||
uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_sk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Decaps>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Decaps::ciphertext_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Decaps::shared_secret_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Decaps::secret_key_size));
|
||||
|
||||
// Copy data to GPU
|
||||
CUDA_CHECK(cudaMemcpy(d_sk, sk, MLKEM_Decaps::secret_key_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(d_ct, ct, MLKEM_Decaps::ciphertext_size, cudaMemcpyDefault));
|
||||
|
||||
// Run routine
|
||||
decaps_kernel<MLKEM_Decaps><<<1, MLKEM_Decaps::BlockDim>>>(d_ss, d_ct, d_sk, workspace);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Decaps::shared_secret_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_ct != nullptr) cudaFree(d_ct);
|
||||
if (d_ss != nullptr) cudaFree(d_ss);
|
||||
if (d_sk != nullptr) cudaFree(d_sk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
using KEM_1024 = decltype(ML_KEM_1024() + Block());
|
||||
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
|
||||
int cupqc_ml_kem_1024_keypair(uint8_t *pk, uint8_t *sk) {
|
||||
return keypair<KEM_1024>(pk, sk);
|
||||
}
|
||||
int cupqc_ml_kem_1024_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
|
||||
return encaps<KEM_1024>(ct, ss, pk);
|
||||
}
|
||||
int cupqc_ml_kem_1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
|
||||
return decaps<KEM_1024>(ss, ct, sk);
|
||||
}
|
||||
#endif
|
||||
}
|
172
src/kem/ml_kem/cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu
Normal file
172
src/kem/ml_kem/cupqc_ml-kem-512_cuda/cupqc_ml-kem.cu
Normal file
@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright 2025 Nvidia Corporation
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
**/
|
||||
|
||||
#include <cupqc.hpp>
|
||||
#include <stdexcept>
|
||||
#include <oqs/oqsconfig.h>
|
||||
|
||||
using namespace cupqc;
|
||||
|
||||
// Checks the return value from a CUDA API function
|
||||
#define CUDA_CHECK(err) \
|
||||
if (err != cudaSuccess) { failure = true; goto cleanup; }
|
||||
|
||||
template<class MLKEM_Keygen>
|
||||
__global__ void keygen_kernel(uint8_t *pk, uint8_t *sk, uint8_t *workspace, uint8_t *randombytes) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Keygen::shared_memory_size];
|
||||
MLKEM_Keygen().execute(pk, sk, randombytes, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int keypair(uint8_t *pk, uint8_t *sk) {
|
||||
using MLKEM_Keygen = decltype(MLKEM_Base() + Function<function::Keygen>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr, *randombytes=nullptr;
|
||||
uint8_t *d_pk = nullptr, *d_sk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Keygen>(1);
|
||||
randombytes = get_entropy<MLKEM_Keygen>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Keygen::public_key_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Keygen::secret_key_size));
|
||||
|
||||
// Run routine
|
||||
keygen_kernel<MLKEM_Keygen><<<1, MLKEM_Keygen::BlockDim>>>(d_pk, d_sk, workspace, randombytes);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(pk, d_pk, MLKEM_Keygen::public_key_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(sk, d_sk, MLKEM_Keygen::secret_key_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_pk != nullptr) cudaFree(d_pk);
|
||||
if (d_sk != nullptr) cudaFree(d_sk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
if (randombytes != nullptr) release_entropy(randombytes);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
template<class MLKEM_Encaps>
|
||||
__global__ void encaps_kernel(uint8_t *ct, uint8_t *ss, const uint8_t *pk, uint8_t *workspace, uint8_t *randombytes) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Encaps::shared_memory_size];
|
||||
MLKEM_Encaps().execute(ct, ss, pk, randombytes, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int encaps(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
|
||||
using MLKEM_Encaps = decltype(MLKEM_Base() + Function<function::Encaps>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr, *randombytes=nullptr;
|
||||
uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_pk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Encaps>(1);
|
||||
randombytes = get_entropy<MLKEM_Encaps>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Encaps::ciphertext_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Encaps::shared_secret_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Encaps::public_key_size));
|
||||
|
||||
// Copy data to GPU
|
||||
CUDA_CHECK(cudaMemcpy(d_pk, pk, MLKEM_Encaps::public_key_size, cudaMemcpyDefault));
|
||||
|
||||
// Run routine
|
||||
encaps_kernel<MLKEM_Encaps><<<1, MLKEM_Encaps::BlockDim>>>(d_ct, d_ss, d_pk, workspace, randombytes);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(ct, d_ct, MLKEM_Encaps::ciphertext_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Encaps::shared_secret_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_ct != nullptr) cudaFree(d_ct);
|
||||
if (d_ss != nullptr) cudaFree(d_ss);
|
||||
if (d_pk != nullptr) cudaFree(d_pk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
if (randombytes != nullptr) release_entropy(randombytes);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
template<class MLKEM_Decaps>
|
||||
__global__ void decaps_kernel(uint8_t *ss, const uint8_t *ct, const uint8_t *sk, uint8_t *workspace) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Decaps::shared_memory_size];
|
||||
MLKEM_Decaps().execute(ss, ct, sk, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int decaps(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
|
||||
using MLKEM_Decaps = decltype(MLKEM_Base() + Function<function::Decaps>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr;
|
||||
uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_sk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Decaps>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Decaps::ciphertext_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Decaps::shared_secret_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Decaps::secret_key_size));
|
||||
|
||||
// Copy data to GPU
|
||||
CUDA_CHECK(cudaMemcpy(d_sk, sk, MLKEM_Decaps::secret_key_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(d_ct, ct, MLKEM_Decaps::ciphertext_size, cudaMemcpyDefault));
|
||||
|
||||
// Run routine
|
||||
decaps_kernel<MLKEM_Decaps><<<1, MLKEM_Decaps::BlockDim>>>(d_ss, d_ct, d_sk, workspace);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Decaps::shared_secret_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_ct != nullptr) cudaFree(d_ct);
|
||||
if (d_ss != nullptr) cudaFree(d_ss);
|
||||
if (d_sk != nullptr) cudaFree(d_sk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
using KEM_512 = decltype(ML_KEM_512() + Block());
|
||||
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
|
||||
int cupqc_ml_kem_512_keypair(uint8_t *pk, uint8_t *sk) {
|
||||
return keypair<KEM_512>(pk, sk);
|
||||
}
|
||||
int cupqc_ml_kem_512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
|
||||
return encaps<KEM_512>(ct, ss, pk);
|
||||
}
|
||||
int cupqc_ml_kem_512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
|
||||
return decaps<KEM_512>(ss, ct, sk);
|
||||
}
|
||||
#endif
|
||||
}
|
172
src/kem/ml_kem/cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu
Normal file
172
src/kem/ml_kem/cupqc_ml-kem-768_cuda/cupqc_ml-kem.cu
Normal file
@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright 2025 Nvidia Corporation
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
**/
|
||||
|
||||
#include <cupqc.hpp>
|
||||
#include <stdexcept>
|
||||
#include <oqs/oqsconfig.h>
|
||||
|
||||
using namespace cupqc;
|
||||
|
||||
// Checks the return value from a CUDA API function
|
||||
#define CUDA_CHECK(err) \
|
||||
if (err != cudaSuccess) { failure = true; goto cleanup; }
|
||||
|
||||
template<class MLKEM_Keygen>
|
||||
__global__ void keygen_kernel(uint8_t *pk, uint8_t *sk, uint8_t *workspace, uint8_t *randombytes) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Keygen::shared_memory_size];
|
||||
MLKEM_Keygen().execute(pk, sk, randombytes, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int keypair(uint8_t *pk, uint8_t *sk) {
|
||||
using MLKEM_Keygen = decltype(MLKEM_Base() + Function<function::Keygen>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr, *randombytes=nullptr;
|
||||
uint8_t *d_pk = nullptr, *d_sk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Keygen>(1);
|
||||
randombytes = get_entropy<MLKEM_Keygen>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Keygen::public_key_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Keygen::secret_key_size));
|
||||
|
||||
// Run routine
|
||||
keygen_kernel<MLKEM_Keygen><<<1, MLKEM_Keygen::BlockDim>>>(d_pk, d_sk, workspace, randombytes);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(pk, d_pk, MLKEM_Keygen::public_key_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(sk, d_sk, MLKEM_Keygen::secret_key_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_pk != nullptr) cudaFree(d_pk);
|
||||
if (d_sk != nullptr) cudaFree(d_sk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
if (randombytes != nullptr) release_entropy(randombytes);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
template<class MLKEM_Encaps>
|
||||
__global__ void encaps_kernel(uint8_t *ct, uint8_t *ss, const uint8_t *pk, uint8_t *workspace, uint8_t *randombytes) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Encaps::shared_memory_size];
|
||||
MLKEM_Encaps().execute(ct, ss, pk, randombytes, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int encaps(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
|
||||
using MLKEM_Encaps = decltype(MLKEM_Base() + Function<function::Encaps>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr, *randombytes=nullptr;
|
||||
uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_pk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Encaps>(1);
|
||||
randombytes = get_entropy<MLKEM_Encaps>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Encaps::ciphertext_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Encaps::shared_secret_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_pk, MLKEM_Encaps::public_key_size));
|
||||
|
||||
// Copy data to GPU
|
||||
CUDA_CHECK(cudaMemcpy(d_pk, pk, MLKEM_Encaps::public_key_size, cudaMemcpyDefault));
|
||||
|
||||
// Run routine
|
||||
encaps_kernel<MLKEM_Encaps><<<1, MLKEM_Encaps::BlockDim>>>(d_ct, d_ss, d_pk, workspace, randombytes);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(ct, d_ct, MLKEM_Encaps::ciphertext_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Encaps::shared_secret_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_ct != nullptr) cudaFree(d_ct);
|
||||
if (d_ss != nullptr) cudaFree(d_ss);
|
||||
if (d_pk != nullptr) cudaFree(d_pk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
if (randombytes != nullptr) release_entropy(randombytes);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
template<class MLKEM_Decaps>
|
||||
__global__ void decaps_kernel(uint8_t *ss, const uint8_t *ct, const uint8_t *sk, uint8_t *workspace) {
|
||||
__shared__ uint8_t smem_ptr[MLKEM_Decaps::shared_memory_size];
|
||||
MLKEM_Decaps().execute(ss, ct, sk, workspace, smem_ptr);
|
||||
}
|
||||
|
||||
template<class MLKEM_Base>
|
||||
int decaps(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
|
||||
using MLKEM_Decaps = decltype(MLKEM_Base() + Function<function::Decaps>());
|
||||
|
||||
bool failure = false;
|
||||
uint8_t *workspace = nullptr;
|
||||
uint8_t *d_ct = nullptr, *d_ss = nullptr, *d_sk = nullptr;
|
||||
|
||||
// Allocate device workspaces
|
||||
try {
|
||||
workspace = make_workspace<MLKEM_Decaps>(1);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
failure = true;
|
||||
goto cleanup;
|
||||
}
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ct, MLKEM_Decaps::ciphertext_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_ss, MLKEM_Decaps::shared_secret_size));
|
||||
CUDA_CHECK(cudaMalloc((void**)&d_sk, MLKEM_Decaps::secret_key_size));
|
||||
|
||||
// Copy data to GPU
|
||||
CUDA_CHECK(cudaMemcpy(d_sk, sk, MLKEM_Decaps::secret_key_size, cudaMemcpyDefault));
|
||||
CUDA_CHECK(cudaMemcpy(d_ct, ct, MLKEM_Decaps::ciphertext_size, cudaMemcpyDefault));
|
||||
|
||||
// Run routine
|
||||
decaps_kernel<MLKEM_Decaps><<<1, MLKEM_Decaps::BlockDim>>>(d_ss, d_ct, d_sk, workspace);
|
||||
|
||||
// Copy data back to the host
|
||||
CUDA_CHECK(cudaMemcpy(ss, d_ss, MLKEM_Decaps::shared_secret_size, cudaMemcpyDefault));
|
||||
|
||||
cleanup:
|
||||
// Free device memory
|
||||
if (d_ct != nullptr) cudaFree(d_ct);
|
||||
if (d_ss != nullptr) cudaFree(d_ss);
|
||||
if (d_sk != nullptr) cudaFree(d_sk);
|
||||
if (workspace != nullptr) destroy_workspace(workspace);
|
||||
|
||||
return failure ? -1 : 0;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
using KEM_768 = decltype(ML_KEM_768() + Block());
|
||||
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
|
||||
int cupqc_ml_kem_768_keypair(uint8_t *pk, uint8_t *sk) {
|
||||
return keypair<KEM_768>(pk, sk);
|
||||
}
|
||||
int cupqc_ml_kem_768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) {
|
||||
return encaps<KEM_768>(ct, ss, pk);
|
||||
}
|
||||
int cupqc_ml_kem_768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) {
|
||||
return decaps<KEM_768>(ss, ct, sk);
|
||||
}
|
||||
#endif
|
||||
}
|
@ -40,7 +40,18 @@ extern int pqcrystals_ml_kem_1024_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8
|
||||
extern int pqcrystals_ml_kem_1024_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
#endif
|
||||
|
||||
#if defined(OQS_USE_CUPQC)
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
|
||||
extern int cupqc_ml_kem_1024_keypair(uint8_t *pk, uint8_t *sk);
|
||||
extern int cupqc_ml_kem_1024_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
|
||||
extern int cupqc_ml_kem_1024_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
#endif
|
||||
#endif /* OQS_USE_CUPQC */
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *secret_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_1024_keypair(public_key, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
@ -57,6 +68,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_keypair(uint8_t *public_key, uint8_t *sec
|
||||
}
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_1024_enc(ciphertext, shared_secret, public_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
@ -73,6 +87,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_encaps(uint8_t *ciphertext, uint8_t *shar
|
||||
}
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_1024_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_1024_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_1024_dec(shared_secret, ciphertext, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_1024_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_1024_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
|
@ -40,7 +40,18 @@ extern int pqcrystals_ml_kem_512_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_
|
||||
extern int pqcrystals_ml_kem_512_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
#endif
|
||||
|
||||
#if defined(OQS_USE_CUPQC)
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
|
||||
extern int cupqc_ml_kem_512_keypair(uint8_t *pk, uint8_t *sk);
|
||||
extern int cupqc_ml_kem_512_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
|
||||
extern int cupqc_ml_kem_512_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
#endif
|
||||
#endif /* OQS_USE_CUPQC */
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secret_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_512_keypair(public_key, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
@ -57,6 +68,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_keypair(uint8_t *public_key, uint8_t *secr
|
||||
}
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_512_enc(ciphertext, shared_secret, public_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
@ -73,6 +87,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_512_encaps(uint8_t *ciphertext, uint8_t *share
|
||||
}
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_512_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_512_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_512_dec(shared_secret, ciphertext, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_512_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_512_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
|
@ -40,7 +40,18 @@ extern int pqcrystals_ml_kem_768_avx2_enc(uint8_t *ct, uint8_t *ss, const uint8_
|
||||
extern int pqcrystals_ml_kem_768_avx2_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
#endif
|
||||
|
||||
#if defined(OQS_USE_CUPQC)
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
|
||||
extern int cupqc_ml_kem_768_keypair(uint8_t *pk, uint8_t *sk);
|
||||
extern int cupqc_ml_kem_768_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk);
|
||||
extern int cupqc_ml_kem_768_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk);
|
||||
#endif
|
||||
#endif /* OQS_USE_CUPQC */
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secret_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_768_keypair(public_key, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
@ -57,6 +68,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_keypair(uint8_t *public_key, uint8_t *secr
|
||||
}
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *shared_secret, const uint8_t *public_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_768_enc(ciphertext, shared_secret, public_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
@ -73,6 +87,9 @@ OQS_API OQS_STATUS OQS_KEM_ml_kem_768_encaps(uint8_t *ciphertext, uint8_t *share
|
||||
}
|
||||
|
||||
OQS_API OQS_STATUS OQS_KEM_ml_kem_768_decaps(uint8_t *shared_secret, const uint8_t *ciphertext, const uint8_t *secret_key) {
|
||||
#if defined(OQS_USE_CUPQC) && defined(OQS_ENABLE_KEM_ml_kem_768_cuda)
|
||||
return (OQS_STATUS) cupqc_ml_kem_768_dec(shared_secret, ciphertext, secret_key);
|
||||
#endif /* OQS_USE_CUPQC && OQS_ENABLE_KEM_ml_kem_768_cuda */
|
||||
#if defined(OQS_ENABLE_KEM_ml_kem_768_avx2)
|
||||
#if defined(OQS_DIST_BUILD)
|
||||
if (OQS_CPU_has_extension(OQS_CPU_EXT_AVX2) && OQS_CPU_has_extension(OQS_CPU_EXT_BMI2) && OQS_CPU_has_extension(OQS_CPU_EXT_POPCNT)) {
|
||||
|
@ -69,6 +69,8 @@
|
||||
|
||||
#cmakedefine OQS_ENABLE_SHA3_xkcp_low_avx2 1
|
||||
|
||||
#cmakedefine01 OQS_USE_CUPQC
|
||||
|
||||
#cmakedefine OQS_ENABLE_KEM_BIKE 1
|
||||
#cmakedefine OQS_ENABLE_KEM_bike_l1 1
|
||||
#cmakedefine OQS_ENABLE_KEM_bike_l3 1
|
||||
@ -129,10 +131,13 @@
|
||||
#cmakedefine OQS_ENABLE_KEM_ML_KEM 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_512 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_512_avx2 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_512_cuda 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_768 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_768_avx2 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_768_cuda 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_1024 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_avx2 1
|
||||
#cmakedefine OQS_ENABLE_KEM_ml_kem_1024_cuda 1
|
||||
|
||||
#cmakedefine OQS_ENABLE_SIG_DILITHIUM 1
|
||||
#cmakedefine OQS_ENABLE_SIG_dilithium_2 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user