diff --git a/src/common/aes/aes.c b/src/common/aes/aes.c index e40bf336c..b5a124f6c 100644 --- a/src/common/aes/aes.c +++ b/src/common/aes/aes.c @@ -31,7 +31,7 @@ void OQS_AES128_ECB_load_schedule(const uint8_t *key, void **_schedule) { C_OR_NI_OR_ARM( oqs_aes128_load_schedule_c(key, _schedule), oqs_aes128_load_schedule_ni(key, _schedule), - oqs_aes128_load_schedule_armv8(key, _schedule) + oqs_aes128_load_schedule_no_bitslice(key, _schedule) ) } @@ -39,7 +39,7 @@ void OQS_AES128_free_schedule(void *schedule) { C_OR_NI_OR_ARM( oqs_aes128_free_schedule_c(schedule), oqs_aes128_free_schedule_ni(schedule), - oqs_aes128_free_schedule_armv8(schedule) + oqs_aes128_free_schedule_no_bitslice(schedule) ) } @@ -47,7 +47,7 @@ void OQS_AES256_ECB_load_schedule(const uint8_t *key, void **_schedule) { C_OR_NI_OR_ARM( oqs_aes256_load_schedule_c(key, _schedule), oqs_aes256_load_schedule_ni(key, _schedule), - oqs_aes256_load_schedule_armv8(key, _schedule) + oqs_aes256_load_schedule_no_bitslice(key, _schedule) ) } @@ -59,7 +59,7 @@ void OQS_AES256_free_schedule(void *schedule) { C_OR_NI_OR_ARM( oqs_aes256_free_schedule_c(schedule), oqs_aes256_free_schedule_ni(schedule), - oqs_aes256_free_schedule_armv8(schedule) + oqs_aes256_free_schedule_no_bitslice(schedule) ) } diff --git a/src/common/aes/aes128_armv8.c b/src/common/aes/aes128_armv8.c index 6ea381816..b5003ad01 100644 --- a/src/common/aes/aes128_armv8.c +++ b/src/common/aes/aes128_armv8.c @@ -12,160 +12,6 @@ typedef struct { uint64_t sk_exp[PQC_AES128_STATESIZE]; } aes128ctx; - -#define FSbData \ - { \ - 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, \ - 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, \ - 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, \ - 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, \ - 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, \ - 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, \ - 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, \ - 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, \ - 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, \ - 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, \ - 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, \ - 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, \ - 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, \ - 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, \ - 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, \ - 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, \ - 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, \ - 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, \ - 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, \ - 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, \ - 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, \ - 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, \ - 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, \ - 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, \ - 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, \ - 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, \ - 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, \ - 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, \ - 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, \ - 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, \ - 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, \ - 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 \ - } - -static unsigned int FSb[256] = FSbData; -#undef FSbData - -#define f_FSb_32__1(x) ((FSb[((x) >> 24) &0xFF] << 24) ^ \ - (FSb[((x) >> 16) &0xFF] << 16)) - -#define f_FSb_32__2(x) ((FSb[((x) >> 8) &0xFF] << 8 ) ^ \ - (FSb[((x) ) &0xFF] & 0xFF)) - - - -static inline unsigned int rotr(const unsigned int x, const unsigned int n) { - unsigned int r; - r = ((x >> n) | (x << (32 - n))); - return r; -} -static inline unsigned int rotl(const unsigned int x, const unsigned int n) { - unsigned int r; - r = ((x << n) | (x >> (32 - n))); - return r; -} - -// From crypto_core/aes128encrypt/dolbeau/armv8crypto -static inline void aes128_armv8_keysched(const unsigned int key[], unsigned int *aes_edrk) { - unsigned int i = 0; - unsigned int rotl_aes_edrk; - unsigned int tmp8, tmp9, tmp10, tmp11; - unsigned int temp_lds; -#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - unsigned int round = 0x01000000; -#else - unsigned int round = 0x00000001; -#endif - - tmp8 = (key[0]); - aes_edrk[0] = tmp8; - tmp9 = (key[1]); - aes_edrk[1] = tmp9; - tmp10 = (key[2]); - aes_edrk[2] = tmp10; - tmp11 = (key[3]); - aes_edrk[3] = tmp11; - - for ( i = 4; i < 36; /* i += 4 */ ) { -#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - rotl_aes_edrk = rotl(tmp11, 8); -#else - rotl_aes_edrk = rotr(tmp11, 8); -#endif - temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2(rotl_aes_edrk); - - tmp8 = tmp8 ^ round ^ temp_lds; - round = round << 1; - - aes_edrk[i++] = tmp8; - tmp9 = tmp9 ^ tmp8; - aes_edrk[i++] = tmp9; - tmp10 = tmp10 ^ tmp9; - aes_edrk[i++] = tmp10; - tmp11 = tmp11 ^ tmp10; - aes_edrk[i++] = tmp11; - } - -#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - round = 0x1B000000; - rotl_aes_edrk = rotl(tmp11, 8); -#else - round = 0x0000001B; - rotl_aes_edrk = rotr(tmp11, 8); -#endif - temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2(rotl_aes_edrk); - - tmp8 = tmp8 ^ round ^ temp_lds; - - aes_edrk[i++] = tmp8; - tmp9 = tmp9 ^ tmp8; - aes_edrk[i++] = tmp9; - tmp10 = tmp10 ^ tmp9; - aes_edrk[i++] = tmp10; - tmp11 = tmp11 ^ tmp10; - aes_edrk[i++] = tmp11; - -#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) - round = 0x36000000; - rotl_aes_edrk = rotl(tmp11, 8); -#else - round = 0x00000036; - rotl_aes_edrk = rotr(tmp11, 8); -#endif - temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2(rotl_aes_edrk); - - tmp8 = tmp8 ^ round ^ temp_lds; - - aes_edrk[i++] = tmp8; - tmp9 = tmp9 ^ tmp8; - aes_edrk[i++] = tmp9; - tmp10 = tmp10 ^ tmp9; - aes_edrk[i++] = tmp10; - tmp11 = tmp11 ^ tmp10; - aes_edrk[i++] = tmp11; -} - - -void oqs_aes128_load_schedule_armv8(const uint8_t *key, void **_schedule) { - *_schedule = malloc(44 * sizeof(int)); - assert(*_schedule != NULL); - unsigned int *schedule = (unsigned int *) *_schedule; - aes128_armv8_keysched((const unsigned int *) key, schedule); -} - -void oqs_aes128_free_schedule_armv8(void *schedule) { - if (schedule != NULL) { - OQS_MEM_secure_free(schedule, 44 * sizeof(int)); - } -} - - // From crypto_core/aes128encrypt/dolbeau/armv8crypto static inline void aes128_armv8_encrypt(const unsigned char *rkeys, const unsigned char *n, unsigned char *out) { uint8x16_t temp = vld1q_u8(n); diff --git a/src/common/aes/aes256_armv8.c b/src/common/aes/aes256_armv8.c index e2937557f..2b9616773 100644 --- a/src/common/aes/aes256_armv8.c +++ b/src/common/aes/aes256_armv8.c @@ -7,160 +7,6 @@ #include - -static inline unsigned int rotr(const unsigned int x, const unsigned int n) { - unsigned int r; - r = ((x >> n) | (x << (32 - n))); - return r; -} -static inline unsigned int rotl(const unsigned int x, const unsigned int n) { - unsigned int r; - r = ((x << n) | (x >> (32 - n))); - return r; -} - -#define FSbData \ - { \ - 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, \ - 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, \ - 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, \ - 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, \ - 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, \ - 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, \ - 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, \ - 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, \ - 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, \ - 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, \ - 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, \ - 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, \ - 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, \ - 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, \ - 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, \ - 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, \ - 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, \ - 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, \ - 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, \ - 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, \ - 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, \ - 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, \ - 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, \ - 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, \ - 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, \ - 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, \ - 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, \ - 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, \ - 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, \ - 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, \ - 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, \ - 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 \ - } - -static unsigned int FSb[256] = FSbData; -#undef FSbData - -#define f_FSb_32__1(x) ((FSb[((x) >> 24) &0xFF] << 24) ^ \ - (FSb[((x) >> 16) &0xFF] << 16)) - -#define f_FSb_32__2(x) ((FSb[((x) >> 8) &0xFF] << 8 ) ^ \ - (FSb[((x) ) &0xFF] & 0xFF)) - - - -static inline void aes256_armv8_keysched(const unsigned int key[], unsigned int *aes_edrk) { - unsigned int i = 0; - unsigned int rotl_aes_edrk; - unsigned int tmp8, tmp9, tmp10, tmp11; - unsigned int tmp12, tmp13, tmp14, tmp15; - unsigned int temp_lds; - -#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - unsigned int round = 0x01000000; -#else - unsigned int round = 0x00000001; -#endif - - tmp8 = (key[0]); - aes_edrk[0] = tmp8; - tmp9 = (key[1]); - aes_edrk[1] = tmp9; - tmp10 = (key[2]); - aes_edrk[2] = tmp10; - tmp11 = (key[3]); - aes_edrk[3] = tmp11; - tmp12 = (key[4]); - aes_edrk[4] = tmp12; - tmp13 = (key[5]); - aes_edrk[5] = tmp13; - tmp14 = (key[6]); - aes_edrk[6] = tmp14; - tmp15 = (key[7]); - aes_edrk[7] = tmp15; - - for ( i = 8; i < 56; /* i+=8 */) { - -#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - rotl_aes_edrk = rotl(tmp15, 8); -#else - rotl_aes_edrk = rotr(tmp15, 8); -#endif - temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2(rotl_aes_edrk); - - tmp8 = tmp8 ^ round ^ temp_lds; - round = round << 1; - - aes_edrk[i++] = tmp8; - tmp9 = tmp9 ^ tmp8; - aes_edrk[i++] = tmp9; - tmp10 = tmp10 ^ tmp9; - aes_edrk[i++] = tmp10; - tmp11 = tmp11 ^ tmp10; - aes_edrk[i++] = tmp11; - - temp_lds = f_FSb_32__1(tmp11) ^ f_FSb_32__2(tmp11); - - tmp12 = tmp12 ^ temp_lds; - aes_edrk[i++] = tmp12; - tmp13 = tmp13 ^ tmp12; - aes_edrk[i++] = tmp13; - tmp14 = tmp14 ^ tmp13; - aes_edrk[i++] = tmp14; - tmp15 = tmp15 ^ tmp14; - aes_edrk[i++] = tmp15; - } - -#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - rotl_aes_edrk = rotl(tmp15, 8); -#else - rotl_aes_edrk = rotr(tmp15, 8); -#endif - - temp_lds = f_FSb_32__1(rotl_aes_edrk) ^ f_FSb_32__2(rotl_aes_edrk); - - tmp8 = tmp8 ^ round ^ temp_lds; - round = round << 1; - - aes_edrk[i++] = tmp8; - tmp9 = tmp9 ^ tmp8; - aes_edrk[i++] = tmp9; - tmp10 = tmp10 ^ tmp9; - aes_edrk[i++] = tmp10; - tmp11 = tmp11 ^ tmp10; - aes_edrk[i++] = tmp11; -} - -void oqs_aes256_load_schedule_armv8(const uint8_t *key, void **_schedule) { - *_schedule = malloc(60 * sizeof(int)); - assert(*_schedule != NULL); - unsigned int *schedule = (unsigned int *) *_schedule; - aes256_armv8_keysched((const unsigned int *) key, schedule); -} - -void oqs_aes256_free_schedule_armv8(void *schedule) { - if (schedule != NULL) { - OQS_MEM_secure_free(schedule, 60 * sizeof(int)); - } -} - // From crypto_core/aes256encrypt/dolbeau/armv8crypto static inline void aes256_armv8_encrypt(const unsigned char *rkeys, const unsigned char *n, unsigned char *out) { uint8x16_t temp = vld1q_u8(n); diff --git a/src/common/aes/aes_c.c b/src/common/aes/aes_c.c index 643562e2b..daf30bf60 100644 --- a/src/common/aes/aes_c.c +++ b/src/common/aes/aes_c.c @@ -408,6 +408,31 @@ static void br_aes_ct64_keysched(uint64_t *comp_skey, const unsigned char *key, } } +static void aes_keysched_no_bitslice(uint32_t *skey, const unsigned char *key, unsigned int key_len) { + unsigned int i, j, k, nk, nkf; + uint32_t tmp; + unsigned nrounds = 10 + ((key_len - 16) >> 2); + + nk = (key_len >> 2); + nkf = ((nrounds + 1) << 2); + br_range_dec32le(skey, (key_len >> 2), key); + tmp = skey[(key_len >> 2) - 1]; + for (i = nk, j = 0, k = 0; i < nkf; i ++) { + if (j == 0) { + tmp = (tmp << 24) | (tmp >> 8); + tmp = sub_word(tmp) ^ Rcon[k]; + } else if (nk > 6 && j == 4) { + tmp = sub_word(tmp); + } + tmp ^= skey[i - nk]; + skey[i] = tmp; + if (++ j == nk) { + j = 0; + k ++; + } + } +} + static void br_aes_ct64_skey_expand(uint64_t *skey, const uint64_t *comp_skey, unsigned int nrounds) { unsigned u, v, n; @@ -616,6 +641,20 @@ void oqs_aes256_load_schedule_c(const uint8_t *key, void **_schedule) { br_aes_ct64_skey_expand(ctx->sk_exp, skey, 14); } +void oqs_aes128_load_schedule_no_bitslice(const uint8_t *key, void **_schedule) { + *_schedule = malloc(44 * sizeof(int)); + assert(*_schedule != NULL); + uint32_t *schedule = (uint32_t *) *_schedule; + aes_keysched_no_bitslice(schedule, (const unsigned char *) key, 16); +} + +void oqs_aes256_load_schedule_no_bitslice(const uint8_t *key, void **_schedule) { + *_schedule = malloc(60 * sizeof(int)); + assert(*_schedule != NULL); + uint32_t *schedule = (uint32_t *) *_schedule; + aes_keysched_no_bitslice(schedule, (const unsigned char *) key, 32); +} + void oqs_aes128_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { assert(plaintext_len % 16 == 0); const aes128ctx *ctx = (const aes128ctx *) schedule; @@ -645,3 +684,16 @@ void oqs_aes256_free_schedule_c(void *schedule) { OQS_MEM_secure_free(ctx, sizeof(aes256ctx)); } } + +void oqs_aes128_free_schedule_no_bitslice(void *schedule) { + if (schedule != NULL) { + OQS_MEM_secure_free(schedule, 44 * sizeof(int)); + } +} + +void oqs_aes256_free_schedule_no_bitslice(void *schedule) { + if (schedule != NULL) { + OQS_MEM_secure_free(schedule, 60 * sizeof(int)); + } +} + diff --git a/src/common/aes/aes_local.h b/src/common/aes/aes_local.h index 1548dddfa..aae734d4d 100644 --- a/src/common/aes/aes_local.h +++ b/src/common/aes/aes_local.h @@ -11,8 +11,8 @@ void oqs_aes128_load_schedule_c(const uint8_t *key, void **_schedule); void oqs_aes128_free_schedule_c(void *schedule); void oqs_aes128_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); -void oqs_aes128_load_schedule_armv8(const uint8_t *key, void **_schedule); -void oqs_aes128_free_schedule_armv8(void *schedule); +void oqs_aes128_load_schedule_no_bitslice(const uint8_t *key, void **_schedule); +void oqs_aes128_free_schedule_no_bitslice(void *schedule); void oqs_aes128_enc_sch_block_armv8(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext); void oqs_aes128_ecb_enc_sch_armv8(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); @@ -27,8 +27,8 @@ void oqs_aes256_free_schedule_c(void *schedule); void oqs_aes256_ecb_enc_sch_c(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); void oqs_aes256_ctr_enc_sch_c(const uint8_t *iv, const size_t iv_len, const void *schedule, uint8_t *out, size_t out_len); -void oqs_aes256_free_schedule_armv8(void *schedule); -void oqs_aes256_load_schedule_armv8(const uint8_t *key, void **_schedule); +void oqs_aes256_load_schedule_no_bitslice(const uint8_t *key, void **_schedule); +void oqs_aes256_free_schedule_no_bitslice(void *schedule); void oqs_aes256_enc_sch_block_armv8(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext); void oqs_aes256_ecb_enc_sch_armv8(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext);