Change 64 bit add to 32 bit add to wrap on 32 bit counter for AES-CTR AES-NI implementation (#2252)

Signed-off-by: Brandon Luo <sel4@disroot.org>
Co-authored-by: Brandon Luo <brandon.luo@ll.mit.edu>
This commit is contained in:
max-p-log-p 2025-09-09 16:55:19 -04:00 committed by GitHub
parent d566dd236a
commit c7ae760162
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 20 deletions

View File

@ -150,18 +150,18 @@ void oqs_aes128_ctr_enc_sch_upd_blks_ni(void *schedule, uint8_t *out, size_t out
while (out_blks >= 4) { while (out_blks >= 4) {
__m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask); __m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask);
__m128i nv1 = _mm_shuffle_epi8(_mm_add_epi64(ctx->iv, _mm_set_epi64x(1, 0)), mask); __m128i nv1 = _mm_shuffle_epi8(_mm_add_epi32(ctx->iv, _mm_set_epi64x(1, 0)), mask);
__m128i nv2 = _mm_shuffle_epi8(_mm_add_epi64(ctx->iv, _mm_set_epi64x(2, 0)), mask); __m128i nv2 = _mm_shuffle_epi8(_mm_add_epi32(ctx->iv, _mm_set_epi64x(2, 0)), mask);
__m128i nv3 = _mm_shuffle_epi8(_mm_add_epi64(ctx->iv, _mm_set_epi64x(3, 0)), mask); __m128i nv3 = _mm_shuffle_epi8(_mm_add_epi32(ctx->iv, _mm_set_epi64x(3, 0)), mask);
aes128ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out); aes128ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
ctx->iv = _mm_add_epi64(ctx->iv, _mm_set_epi64x(4, 0)); ctx->iv = _mm_add_epi32(ctx->iv, _mm_set_epi64x(4, 0));
out += 64; out += 64;
out_blks -= 4; out_blks -= 4;
} }
while (out_blks >= 1) { while (out_blks >= 1) {
__m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask); __m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask);
aes128ni_encrypt(schedule, nv0, out); aes128ni_encrypt(schedule, nv0, out);
ctx->iv = _mm_add_epi64(ctx->iv, _mm_set_epi64x(1, 0)); ctx->iv = _mm_add_epi32(ctx->iv, _mm_set_epi64x(1, 0));
out += 16; out += 16;
out_blks--; out_blks--;
} }
@ -181,11 +181,11 @@ void oqs_aes128_ctr_enc_sch_ni(const uint8_t *iv, const size_t iv_len, const voi
while (out_len >= 64) { while (out_len >= 64) {
__m128i nv0 = block; __m128i nv0 = block;
__m128i nv1 = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask); __m128i nv1 = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask);
__m128i nv2 = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)), mask); __m128i nv2 = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)), mask);
__m128i nv3 = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)), mask); __m128i nv3 = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)), mask);
aes128ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out); aes128ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
block = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)), mask); block = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)), mask);
out += 64; out += 64;
out_len -= 64; out_len -= 64;
} }
@ -193,7 +193,7 @@ void oqs_aes128_ctr_enc_sch_ni(const uint8_t *iv, const size_t iv_len, const voi
aes128ni_encrypt(schedule, block, out); aes128ni_encrypt(schedule, block, out);
out += 16; out += 16;
out_len -= 16; out_len -= 16;
block = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask); block = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask);
} }
if (out_len > 0) { if (out_len > 0) {
uint8_t tmp[16]; uint8_t tmp[16];

View File

@ -184,18 +184,18 @@ void oqs_aes256_ctr_enc_sch_upd_blks_ni(void *schedule, uint8_t *out, size_t out
while (out_blks >= 4) { while (out_blks >= 4) {
__m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask); __m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask);
__m128i nv1 = _mm_shuffle_epi8(_mm_add_epi64(ctx->iv, _mm_set_epi64x(1, 0)), mask); __m128i nv1 = _mm_shuffle_epi8(_mm_add_epi32(ctx->iv, _mm_set_epi64x(1, 0)), mask);
__m128i nv2 = _mm_shuffle_epi8(_mm_add_epi64(ctx->iv, _mm_set_epi64x(2, 0)), mask); __m128i nv2 = _mm_shuffle_epi8(_mm_add_epi32(ctx->iv, _mm_set_epi64x(2, 0)), mask);
__m128i nv3 = _mm_shuffle_epi8(_mm_add_epi64(ctx->iv, _mm_set_epi64x(3, 0)), mask); __m128i nv3 = _mm_shuffle_epi8(_mm_add_epi32(ctx->iv, _mm_set_epi64x(3, 0)), mask);
aes256ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out); aes256ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
ctx->iv = _mm_add_epi64(ctx->iv, _mm_set_epi64x(4, 0)); ctx->iv = _mm_add_epi32(ctx->iv, _mm_set_epi64x(4, 0));
out += 64; out += 64;
out_blks -= 4; out_blks -= 4;
} }
while (out_blks >= 1) { while (out_blks >= 1) {
__m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask); __m128i nv0 = _mm_shuffle_epi8(ctx->iv, mask);
aes256ni_encrypt(schedule, nv0, out); aes256ni_encrypt(schedule, nv0, out);
ctx->iv = _mm_add_epi64(ctx->iv, _mm_set_epi64x(1, 0)); ctx->iv = _mm_add_epi32(ctx->iv, _mm_set_epi64x(1, 0));
out += 16; out += 16;
out_blks--; out_blks--;
} }
@ -215,11 +215,11 @@ void oqs_aes256_ctr_enc_sch_ni(const uint8_t *iv, const size_t iv_len, const voi
while (out_len >= 64) { while (out_len >= 64) {
__m128i nv0 = block; __m128i nv0 = block;
__m128i nv1 = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask); __m128i nv1 = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask);
__m128i nv2 = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)), mask); __m128i nv2 = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(2, 0)), mask);
__m128i nv3 = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)), mask); __m128i nv3 = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(3, 0)), mask);
aes256ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out); aes256ni_encrypt_x4(schedule, nv0, nv1, nv2, nv3, out);
block = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)), mask); block = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(4, 0)), mask);
out += 64; out += 64;
out_len -= 64; out_len -= 64;
} }
@ -227,7 +227,7 @@ void oqs_aes256_ctr_enc_sch_ni(const uint8_t *iv, const size_t iv_len, const voi
aes256ni_encrypt(schedule, block, out); aes256ni_encrypt(schedule, block, out);
out += 16; out += 16;
out_len -= 16; out_len -= 16;
block = _mm_shuffle_epi8(_mm_add_epi64(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask); block = _mm_shuffle_epi8(_mm_add_epi32(_mm_shuffle_epi8(block, mask), _mm_set_epi64x(1, 0)), mask);
} }
if (out_len > 0) { if (out_len > 0) {
uint8_t tmp[16]; uint8_t tmp[16];