Newer CPU feature detection

This commit is contained in:
Douglas Stebila 2019-07-08 20:05:45 -04:00
parent cf469d4156
commit dce10891f3
6 changed files with 206 additions and 79 deletions

View File

@ -0,0 +1,77 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_check_x86_features.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_X86_FEATURES([ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
#
# DESCRIPTION
#
# Checks if the host cpu supports various x86 instruction set, the
# instructions that will get tested are "mmx, popcnt, sse, sse2, sse3,
# sse4.1, sse4.2, sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the
# instruction set is supported by the host cpu, the C preprocessor macro
# HAVE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case
# with dot replaced by underscore. For example, the test for "sse4.2"
# would export HAVE_SSE4_2_INSTRUCTIONS=1. Also the compiler flag
# "-msse4.2" would be added to X86_FEATURE_CFLAGS variable, that can be
# obtained in Makefile.am using @X86_FEATURE_CFLAGS@.
#
# If any of the test for the instruction set were succeeded, the configure
# script would run ACTION-IF-FOUND if it is specified, or append
# X86_FEATURE_CFLAGS to CFLAGS. If none of the instruction were found,
# ACTION-IF-NOT-FOUND hook is triggered.
#
# This macro requires gcc extended builtin function "__builtin_cpu_init"
# and "__builtin_cpu_supports" to detect the cpu features. It will error
# out if the compiler doesn't has these builtins.
#
# See also AX_GCC_X86_CPU_SUPPORTS, which is the actual macro that perform
# the checks for the instruction sets.
#
# LICENSE
#
# Copyright (c) 2016 Felix Chern <idryman@gmail.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 2
AC_DEFUN([AX_CHECK_X86_FEATURES],
[m4_foreach_w(
[ax_x86_feature],
[mmx popcnt sse sse2 sse3 sse4.1 sse4.2 sse4a avx avx2 avx512f fma fma4 bmi bmi2 aes],
[AX_X86_CPU_SUPPORTS(ax_x86_feature,
[X86_FEATURE_CFLAGS="$X86_FEATURE_CFLAGS -m[]ax_x86_feature"],
[])
])
AC_SUBST([X86_FEATURE_CFLAGS])
m4_ifval([$1],[$1],
[CFLAGS="$CFLAGS $X86_FEATURE_CFLAGS"])
$2
])

View File

@ -0,0 +1,98 @@
# ============================================================================
# adapted from
# https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpu_supports.html
# ============================================================================
#
# SYNOPSIS
#
# AX_X86_CPU_SUPPORTS(X86-INSTRUCTION-SET,
# [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND])
#
# DESCRIPTION
#
# Checks if the host cpu supports X86-INSTRUCTION-SET. The instruction set
# that can be tested are "mmx, popcnt, sse, sse2, sse3, sse4.1, sse4.2,
# sse4a, avx, avx2, avx512f, fma, fma4, bmi, bmi2". If the instruction set
# is supported by the host cpu, the C preprocessor macro
# USE_XXX_INSTRUCTIONS is set to 1. The XXX is up-cased instruction case
# with dot replaced by underscore. For example, the test for "sse4.2"
# would export USE_SSE4_2_INSTRUCTIONS=1. This macro requires gcc
# extended builtin function "__builtin_cpu_supports" to detect the cpu features.
#
# If the test for the instruction set succeeded, the hook ACTION-IF-FOUND
# would run. Otherwise the hook ACTION-IF-NOT-FOUND would run if
# specified.
#
# See also AX_CHECK_X86_FEATURES, which checks all the possible
# instruction set and export the corresponding CFLAGS.
#
# LICENSE
#
# Copyright (c) 2016 Felix Chern <idryman@gmail.com>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 3
AC_DEFUN([AX_X86_CPU_SUPPORTS],[
AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AS_VAR_PUSHDEF([x86_feature], [AS_TR_SH([ax_cv_x86_cpu_supports_$1])])
AM_COND_IF([X86_64],[
AC_CACHE_CHECK([for x86 $1 instruction support],
[x86_feature],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM( [#include <stdlib.h> ],
[ #if defined(__GNUC__) && !defined(__clang__)
__builtin_cpu_init();
#endif
if (__builtin_cpu_supports("$1"))
return 0;
return 1;
]
)],
[x86_feature=yes],
[x86_feature=no]
)]
)
])
AC_LANG_POP([C])
AM_CONDITIONAL(AS_TR_SH(m4_toupper([USE_$1_INSTRUCTIONS])), [test x$x86_feature = xyes]) # ADDED BY OQS
AS_VAR_IF([x86_feature],
[yes],
[
AC_DEFINE(
AS_TR_CPP([USE_$1_INSTRUCTIONS]),
[1],
[Define if $1 instructions are supported]
)
$2
],
[$3]
)
AS_VAR_POPDEF([x86_feature])
])

View File

@ -1,54 +0,0 @@
AC_DEFUN([AX_X86_CPU_SUPPORTS],
[AC_REQUIRE([AC_PROG_CC])
AM_COND_IF([X86_64],[
AC_LANG_PUSH([C])
AS_VAR_PUSHDEF([x86_feature], [AS_TR_SH([ax_cv_x86_cpu_supports_$1])])
AC_CACHE_CHECK([for x86 $1 instruction support],
[x86_feature],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM( [#include <stdlib.h> ],
[ #if defined(__GNUC__) && !defined(__clang__)
__builtin_cpu_init ();
#endif
if (__builtin_cpu_supports("$1"))
{
return 0;
}
return 1;
])],
[x86_feature=yes],
[x86_feature=no]
)]
)
AC_LANG_POP([C])
AM_CONDITIONAL(m4_toupper([USE_$1_INSTRUCTIONS]), [test x$x86_feature = xyes])
AS_VAR_IF([x86_feature],[yes],
[AC_DEFINE(
AS_TR_CPP([USE_$1_INSTRUCTIONS]),
[1],
[Define if $1 instructions are supported])
$2],
[$3]
)
AS_VAR_POPDEF([x86_feature])
],
[AM_CONDITIONAL(m4_toupper([USE_$1_INSTRUCTIONS]), [false])]
)
])
AC_DEFUN([AX_CHECK_X86_FEATURES],
[m4_foreach_w(
[ax_x86_feature],
[mmx popcnt sse sse2 sse3 sse4a sse5 avx avx2 avx512f fma fma4 bmi bmi2
avx512bw avx512cd avx512dq avx512er avx512ifma avx512pf avx512vbmi aes],
[AX_X86_CPU_SUPPORTS(ax_x86_feature,
[X86_FEATURE_CFLAGS="$X86_FEATURE_CFLAGS -m[]ax_x86_feature"],
[])
])
AC_SUBST([X86_FEATURE_CFLAGS])
m4_ifval([$1],[$1],
[CFLAGS="$CFLAGS $X86_FEATURE_CFLAGS"])
$2
])

View File

@ -30,28 +30,25 @@ AC_DEFUN([DETECT_HOST_AND_CPU], [
AM_CONDITIONAL([ON_OPENBSD], [test "x$openbsd" = xtrue])
# Enable assembly optimizations here
# Appearenly asm optimizations do not work well with darwin
AM_COND_IF([ON_LINUX], [
case $host_cpu in
x86_64* )
AM_CPPFLAGS=${AM_CPPFLAGS}" -DSIDH_ASM -march=x86-64"
x86_64=true
;;
aarch64* )
AM_CPPFLAGS=${AM_CPPFLAGS}" -DSIDH_ASM -march=armv8-a+crc"
arm64=true
;;
arm* )
AM_CPPFLAGS=${AM_CPPFLAGS}" -DARM"
arm=true
;;
*)
#Default Case
AC_MSG_ERROR([Your CPU is not currently supported])
;;
esac
])
case $host_cpu in
x86_64* )
AM_CPPFLAGS=${AM_CPPFLAGS}" -DSIDH_ASM -march=x86-64"
x86_64=true
;;
aarch64* )
AM_CPPFLAGS=${AM_CPPFLAGS}" -DSIDH_ASM -march=armv8-a+crc"
arm64=true
;;
arm* )
AM_CPPFLAGS=${AM_CPPFLAGS}" -DARM"
arm=true
;;
*)
#Default Case
AC_MSG_ERROR([Your CPU is not currently supported])
;;
esac
AM_CONDITIONAL([X86_64], [test "x$x86_64" = xtrue])
AM_CONDITIONAL([ARM64], [test "x$arm64" = xtrue])
AM_CONDITIONAL([ARM], [test "x$arm" = xtrue])
@ -70,7 +67,7 @@ AC_DEFUN([DETECT_HOST_AND_CPU], [
[gcc_cv_compiler=false]
)]
)
#Check if further x86 optimizations are available (e.g., avx/avx2/bmi).
AX_CHECK_X86_FEATURES
])

View File

@ -67,7 +67,7 @@ AC_DEFUN([CONFIG_FEATURES],
AC_DEFINE(OQS_ENABLE_KEM_bike3_l5, 1, "Define to 1 when BIKE3-L5 enabled")
AM_COND_IF([USE_AES_INSTRUCTIONS],
[AM_COND_IF([USE_AVX2_INSTRUCTIONS],
[AM_COND_IF([USE_AVX512F_INSTRUCTIONS],
[
AM_CONDITIONAL([BIKE_ADDITIONAL_IMPL], [test x$gcc_cv_compiler = xtrue])
AM_COND_IF([BIKE_ADDITIONAL_IMPL], [AC_DEFINE(OQS_KEM_BIKE_ADDITIONAL_IMPLEMENTATION, 1, "Define to 1 when BIKE uses the additional implementation")])

View File

@ -29,3 +29,12 @@ test_aes_LDFLAGS = -L../src/crypto/aes/.libs -laes # required since OQS_A
test_aes_LDFLAGS += ${commonflags}
test_sha3_LDFLAGS = -L../src/crypto/sha3/.libs -lsha3 # required since OQS_SHA3 symbols are not part of OQS public API
test_sha3_LDFLAGS += ${commonflags}
example_kem_DEPENDENCIES = ../liboqs.la
example_sig_DEPENDENCIES = ../liboqs.la
speed_kem_DEPENDENCIES = ../liboqs.la
speed_sig_DEPENDENCIES = ../liboqs.la
test_kem_DEPENDENCIES = ../liboqs.la
test_sig_DEPENDENCIES = ../liboqs.la
test_aes_DEPENDENCIES = ../liboqs.la
test_sha3_DEPENDENCIES = ../liboqs.la