From 707750e03488d985938dd119bf3914ba1d065d98 Mon Sep 17 00:00:00 2001 From: John Schanck Date: Thu, 11 Mar 2021 21:31:26 -0500 Subject: [PATCH] Fix OQS_PORTABLE_BUILD logic for Frodo (#927) * Fix OQS_PORTABLE_BUILD logic for Frodo * Refine 'OQS_PORTABLE_BUILD' by general architecture, e.g. x86_64 * Use CMake to define ARCH_X86_64 and ARCH_ARM_ANY used in common.{c,h} * Avoid triggering portable build on ARM * Set OQS_USE_CPU_EXTENSIONS even on MSVC * Compile AES-NI code in portable x86_64 builds --- .CMake/compiler_opts.cmake | 2 - CMakeLists.txt | 16 +++-- src/CMakeLists.txt | 5 +- src/common/CMakeLists.txt | 2 +- src/common/common.h | 9 --- src/kem/frodokem/CMakeLists.txt | 30 +++++---- .../external/frodo_macrify_optimized.c | 67 +++++++------------ src/oqsconfig.h.cmake | 3 + 8 files changed, 61 insertions(+), 73 deletions(-) diff --git a/.CMake/compiler_opts.cmake b/.CMake/compiler_opts.cmake index 66ea44861..f43acb38a 100644 --- a/.CMake/compiler_opts.cmake +++ b/.CMake/compiler_opts.cmake @@ -13,7 +13,6 @@ if(CMAKE_C_COMPILER_ID MATCHES "Clang") set(OQS_USE_PTHREADS_IN_TESTS 1) endif() - option(OQS_USE_CPU_EXTENSIONS "Enable compile and run-time support for CPU extensions such as AVX2, SSE, etc." ON) if(OQS_USE_CPU_EXTENSIONS) include(${CMAKE_CURRENT_LIST_DIR}/gcc_clang_intrinsics.cmake) endif() @@ -68,7 +67,6 @@ elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU") set(OQS_USE_PTHREADS_IN_TESTS 1) endif() - option(OQS_USE_CPU_EXTENSIONS "Enable compile and run-time support for CPU extensions such as AVX2, SSE, etc." ON) if(OQS_USE_CPU_EXTENSIONS) include(${CMAKE_CURRENT_LIST_DIR}/gcc_clang_intrinsics.cmake) endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index b50bad261..1032e398e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,11 @@ endif() project(liboqs C ASM) +option(OQS_USE_CPU_EXTENSIONS "Enable compile and run-time support for CPU extensions such as AVX2, SSE, etc." ON) +option(OQS_PORTABLE_BUILD "Ensure the resulting library is portable. This implies having run-time checks for CPU extensions." ON) +option(OQS_BUILD_ONLY_LIB "Build only liboqs and do not expose build targets for tests, documentation, and pretty-printing available." OFF) +option(OQS_MINIMAL_BUILD "Only build the default KEM and Signature schemes." OFF) + set(CMAKE_C_STANDARD 11) set(CMAKE_C_STANDARD_REQUIRED ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -24,14 +29,21 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64") set(ARCH "x86_64") + set(ARCH_X86_64 ON) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64") set(ARCH "arm64") + set(ARCH_ARM_ANY ON) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") set(ARCH "arm") + set(ARCH_ARM_ANY ON) else() message(FATAL_ERROR "Unknown or unsupported processor: " ${CMAKE_SYSTEM_PROCESSOR}) endif() +if(OQS_PORTABLE_BUILD AND ARCH_X86_64) + set(OQS_PORTABLE_X86_64_BUILD ON) +endif() + if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") set(OQS_DEBUG_BUILD ON) else() @@ -42,10 +54,6 @@ if(WIN32) set(CMAKE_GENERATOR_CC cl) endif() -option(OQS_PORTABLE_BUILD "Ensure the resulting library is portable. This implies having run-time checks for CPU extensions." ON) -option(OQS_BUILD_ONLY_LIB "Build only liboqs and do not expose build targets for tests, documentation, and pretty-printing available." OFF) -option(OQS_MINIMAL_BUILD "Only build the default KEM and Signature schemes." OFF) - include(.CMake/compiler_opts.cmake) include(.CMake/alg_support.cmake) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a2ffd4a36..e7fe451bf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,10 +8,7 @@ if(OQS_ENABLE_KEM_BIKE) endif() if(OQS_ENABLE_KEM_FRODOKEM) add_subdirectory(kem/frodokem) - set(KEM_OBJS ${KEM_OBJS} $) - if(OQS_USE_AVX2_INSTRUCTIONS) - set(KEM_OBJS ${KEM_OBJS} $) - endif() + set(KEM_OBJS ${KEM_OBJS} ${FRODOKEM_OBJS}) endif() if(OQS_ENABLE_KEM_SIKE OR OQS_ENABLE_KEM_SIDH) add_subdirectory(kem/sike) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 7126e478d..c0d8e2523 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -14,7 +14,7 @@ if(OQS_USE_AES_OPENSSL) set(AES_IMPL aes/aes_ossl.c) else() set(AES_IMPL aes/aes.c aes/aes_c.c) - if (OQS_USE_AES_INSTRUCTIONS) + if (OQS_PORTABLE_X86_64_BUILD OR OQS_USE_AES_INSTRUCTIONS) set(AES_IMPL ${AES_IMPL} aes/aes128_ni.c) set(AES_IMPL ${AES_IMPL} aes/aes256_ni.c) set_source_files_properties(aes/aes128_ni.c PROPERTIES COMPILE_FLAGS -maes) diff --git a/src/common/common.h b/src/common/common.h index d5a47f82b..6d72a7ad0 100644 --- a/src/common/common.h +++ b/src/common/common.h @@ -100,15 +100,6 @@ typedef enum { #if defined(OQS_USE_CPU_EXTENSIONS) -/** - * Architecture macros. - */ -#if (defined(_M_X64) || defined(__x86_64__)) -#define ARCH_X86_64 -#elif (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__)) -#define ARCH_ARM_ANY -#endif - /** * CPU runtime detection flags */ diff --git a/src/kem/frodokem/CMakeLists.txt b/src/kem/frodokem/CMakeLists.txt index e2534fa41..f73d6ede4 100644 --- a/src/kem/frodokem/CMakeLists.txt +++ b/src/kem/frodokem/CMakeLists.txt @@ -1,5 +1,7 @@ # SPDX-License-Identifier: MIT +set(_FRODOKEM_OBJS "") + if(OQS_ENABLE_KEM_frodokem_640_aes) set(SRCS kem_frodokem640aes.c external/frodo640aes.c) endif() @@ -22,19 +24,20 @@ if(OQS_ENABLE_KEM_frodokem_1344_shake) endif() add_library(frodokem OBJECT ${SRCS}) +set(_FRODOKEM_OBJS ${_FRODOKEM_OBJS} $) -if(OQS_USE_AVX2_INSTRUCTIONS) - if(OQS_USE_AES_INSTRUCTIONS) - if(OQS_ENABLE_KEM_frodokem_640_aes) - set(SRCS_AVX2 external/frodo640aes_avx2.c) - endif() - if(OQS_ENABLE_KEM_frodokem_976_aes) - set(SRCS_AVX2 ${SRCS_AVX2} external/frodo976aes_avx2.c) - endif() - if(OQS_ENABLE_KEM_frodokem_1344_aes) - set(SRCS_AVX2 ${SRCS_AVX2} external/frodo1344aes_avx2.c) - endif() +set(SRCS_AVX2 "") +if(OQS_PORTABLE_X86_64_BUILD OR OQS_USE_AVX2_INSTRUCTIONS) + if(OQS_ENABLE_KEM_frodokem_640_aes) + set(SRCS_AVX2 ${SRCS_AVX2} external/frodo640aes_avx2.c) endif() + if(OQS_ENABLE_KEM_frodokem_976_aes) + set(SRCS_AVX2 ${SRCS_AVX2} external/frodo976aes_avx2.c) + endif() + if(OQS_ENABLE_KEM_frodokem_1344_aes) + set(SRCS_AVX2 ${SRCS_AVX2} external/frodo1344aes_avx2.c) + endif() + if(OQS_ENABLE_KEM_frodokem_640_shake) set(SRCS_AVX2 ${SRCS_AVX2} external/frodo640shake_avx2.c) endif() @@ -46,5 +49,8 @@ if(OQS_USE_AVX2_INSTRUCTIONS) endif() add_library(frodokem_avx2 OBJECT ${SRCS_AVX2}) - target_compile_options(frodokem_avx2 PRIVATE -maes -mavx2) + target_compile_options(frodokem_avx2 PRIVATE -mavx2) + set(_FRODOKEM_OBJS ${_FRODOKEM_OBJS} $) endif() + +set(FRODOKEM_OBJS ${_FRODOKEM_OBJS} PARENT_SCOPE) diff --git a/src/kem/frodokem/external/frodo_macrify_optimized.c b/src/kem/frodokem/external/frodo_macrify_optimized.c index 437e692d8..e03db5e5f 100644 --- a/src/kem/frodokem/external/frodo_macrify_optimized.c +++ b/src/kem/frodokem/external/frodo_macrify_optimized.c @@ -16,76 +16,61 @@ #define frodo_mul_add_as_plus_e_actual frodo_mul_add_as_plus_e_portable #include "frodo_macrify_as_plus_e.c" -#if defined(OQS_USE_AVX2_INSTRUCTIONS) + int frodo_mul_add_as_plus_e_avx2(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); -#endif +int frodo_mul_add_sa_plus_e_aes_avx2(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); +int frodo_mul_add_sa_plus_e_aes_portable(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); +int frodo_mul_add_sa_plus_e_shake_avx2(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); +int frodo_mul_add_sa_plus_e_shake_portable(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); int frodo_mul_add_as_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { // Generate-and-multiply: generate matrix A (N x N) row-wise, multiply by s on the right. // Inputs: s, e (N x N_BAR) // Output: out = A*s + e (N x N_BAR) -#if defined(OQS_USE_AVX2_INSTRUCTIONS) - #if defined(OQS_PORTABLE_BUILD) +#if defined(OQS_PORTABLE_X86_64_BUILD) OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions(); if (available_cpu_extensions.AVX2_ENABLED) { return frodo_mul_add_as_plus_e_avx2(out, s, e, seed_A); } else { return frodo_mul_add_as_plus_e_portable(out, s, e, seed_A); } - #else // OQS_USE_AVX2_INSTRUCTIONS && !(OQS_PORTABLE_BUILD) - return frodo_mul_add_as_plus_e_avx2(out, s, e, seed_A); - #endif -#else // !(OQS_USE_AVX2_INSTRUCTIONS) +#elif defined(OQS_USE_AVX2_INSTRUCTIONS) + return frodo_mul_add_as_plus_e_avx2(out, s, e, seed_A); +#else return frodo_mul_add_as_plus_e_portable(out, s, e, seed_A); #endif } -#if defined(USE_AES128_FOR_A) -int frodo_mul_add_sa_plus_e_aes_portable(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); -#if defined(OQS_USE_AES_INSTRUCTIONS) && defined(OQS_USE_AVX2_INSTRUCTIONS) -int frodo_mul_add_sa_plus_e_aes_avx2(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); -#endif -#elif defined(USE_SHAKE128_FOR_A) -int frodo_mul_add_sa_plus_e_shake_portable(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); -#if defined(OQS_USE_AVX2_INSTRUCTIONS) -int frodo_mul_add_sa_plus_e_shake_avx2(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A); -#endif -#endif - int frodo_mul_add_sa_plus_e(uint16_t *out, const uint16_t *s, const uint16_t *e, const uint8_t *seed_A) { // Generate-and-multiply: generate matrix A (N x N) column-wise, multiply by s' on the left. // Inputs: s', e' (N_BAR x N) // Output: out = s'*A + e' (N_BAR x N) #if defined(USE_AES128_FOR_A) - #if defined(OQS_USE_AES_INSTRUCTIONS) && defined(OQS_USE_AVX2_INSTRUCTIONS) - #if defined(OQS_PORTABLE_BUILD) + #if defined(OQS_PORTABLE_X86_64_BUILD) OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions(); - if (available_cpu_extensions.AES_ENABLED && available_cpu_extensions.AVX2_ENABLED) { + if (available_cpu_extensions.AVX2_ENABLED) { return frodo_mul_add_sa_plus_e_aes_avx2(out, s, e, seed_A); } else { return frodo_mul_add_sa_plus_e_aes_portable(out, s, e, seed_A); } - #else // OQS_USE_AES_INSTRUCTIONS && OQS_USE_AVX2_INSTRUCTIONS && !(OQS_PORTABLE_BUILD) + #elif defined(OQS_USE_AVX2_INSTRUCTIONS) return frodo_mul_add_sa_plus_e_aes_avx2(out, s, e, seed_A); - #endif - #else // !(OQS_USE_AES_INSTRUCTIONS && OQS_USE_AVX2_INSTRUCTIONS) + #else return frodo_mul_add_sa_plus_e_aes_portable(out, s, e, seed_A); - #endif -#else // USE_SHAKE128_FOR_A - #if defined(OQS_USE_AVX2_INSTRUCTIONS) - #if defined(OQS_PORTABLE_BUILD) - OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions(); - if (available_cpu_extensions.AVX2_ENABLED) { - return frodo_mul_add_sa_plus_e_shake_avx2(out, s, e, seed_A); - } else { - return frodo_mul_add_sa_plus_e_shake_portable(out, s, e, seed_A); - } - #else // OQS_USE_AVX2_INSTRUCTIONS && !(OQS_PORTABLE_BUILD) - return frodo_mul_add_sa_plus_e_shake_avx2(out, s, e, seed_A); #endif - #else // !(OQS_USE_AVX2_INSTRUCTIONS) - return frodo_mul_add_sa_plus_e_shake_portable(out, s, e, seed_A); - #endif +#elif defined(USE_SHAKE128_FOR_A) + #if defined(OQS_PORTABLE_X86_64_BUILD) + OQS_CPU_EXTENSIONS available_cpu_extensions = OQS_get_available_CPU_extensions(); + if (available_cpu_extensions.AVX2_ENABLED) { + return frodo_mul_add_sa_plus_e_shake_avx2(out, s, e, seed_A); + } else { + return frodo_mul_add_sa_plus_e_shake_portable(out, s, e, seed_A); + } + #elif defined(OQS_USE_AVX2_INSTRUCTIONS) + return frodo_mul_add_sa_plus_e_shake_avx2(out, s, e, seed_A); + #else + return frodo_mul_add_sa_plus_e_shake_portable(out, s, e, seed_A); + #endif #endif } diff --git a/src/oqsconfig.h.cmake b/src/oqsconfig.h.cmake index 4f02d99b0..e9df9f99f 100644 --- a/src/oqsconfig.h.cmake +++ b/src/oqsconfig.h.cmake @@ -3,7 +3,10 @@ #cmakedefine OQS_VERSION_TEXT "@OQS_VERSION_TEXT@" #cmakedefine OQS_COMPILE_BUILD_TARGET "@OQS_COMPILE_BUILD_TARGET@" #cmakedefine OQS_PORTABLE_BUILD 1 +#cmakedefine OQS_PORTABLE_X86_64_BUILD 1 #cmakedefine OQS_DEBUG_BUILD 1 +#cmakedefine ARCH_X86_64 1 +#cmakedefine ARCH_ARM_ANY 1 #cmakedefine OQS_KEM_DEFAULT @OQS_KEM_DEFAULT@ #cmakedefine OQS_SIG_DEFAULT @OQS_SIG_DEFAULT@