summaryrefslogtreecommitdiffstats
path: root/crypto/aegis128-neon-inner.c
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/aegis128-neon-inner.c')
-rw-r--r--crypto/aegis128-neon-inner.c65
1 files changed, 65 insertions, 0 deletions
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
index 3d8043c4832b..ed55568afd1b 100644
--- a/crypto/aegis128-neon-inner.c
+++ b/crypto/aegis128-neon-inner.c
@@ -17,6 +17,8 @@
17 17
18#include <stddef.h> 18#include <stddef.h>
19 19
20extern int aegis128_have_aes_insn;
21
20void *memcpy(void *dest, const void *src, size_t n); 22void *memcpy(void *dest, const void *src, size_t n);
21void *memset(void *s, int c, size_t n); 23void *memset(void *s, int c, size_t n);
22 24
@@ -24,6 +26,8 @@ struct aegis128_state {
24 uint8x16_t v[5]; 26 uint8x16_t v[5];
25}; 27};
26 28
29extern const uint8x16x4_t crypto_aes_sbox[];
30
27static struct aegis128_state aegis128_load_state_neon(const void *state) 31static struct aegis128_state aegis128_load_state_neon(const void *state)
28{ 32{
29 return (struct aegis128_state){ { 33 return (struct aegis128_state){ {
@@ -49,6 +53,46 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
49{ 53{
50 uint8x16_t z = {}; 54 uint8x16_t z = {};
51 55
56#ifdef CONFIG_ARM64
57 if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
58 static const uint8x16_t shift_rows = {
59 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
60 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
61 };
62 static const uint8x16_t ror32by8 = {
63 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
64 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
65 };
66 uint8x16_t v;
67
68 // shift rows
69 w = vqtbl1q_u8(w, shift_rows);
70
71 // sub bytes
72 if (!IS_ENABLED(CONFIG_CC_IS_GCC)) {
73 v = vqtbl4q_u8(crypto_aes_sbox[0], w);
74 v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40);
75 v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80);
76 v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0);
77 } else {
78 asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
79 w -= 0x40;
80 asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
81 w -= 0x40;
82 asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
83 w -= 0x40;
84 asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
85 }
86
87 // mix columns
88 w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
89 w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
90 w ^= vqtbl1q_u8(v ^ w, ror32by8);
91
92 return w;
93 }
94#endif
95
52 /* 96 /*
53 * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics 97 * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
54 * to force the compiler to issue the aese/aesmc instructions in pairs. 98 * to force the compiler to issue the aese/aesmc instructions in pairs.
@@ -73,10 +117,27 @@ struct aegis128_state aegis128_update_neon(struct aegis128_state st,
73 return st; 117 return st;
74} 118}
75 119
120static inline __attribute__((always_inline))
121void preload_sbox(void)
122{
123 if (!IS_ENABLED(CONFIG_ARM64) ||
124 !IS_ENABLED(CONFIG_CC_IS_GCC) ||
125 __builtin_expect(aegis128_have_aes_insn, 1))
126 return;
127
128 asm("ld1 {v16.16b-v19.16b}, [%0], #64 \n\t"
129 "ld1 {v20.16b-v23.16b}, [%0], #64 \n\t"
130 "ld1 {v24.16b-v27.16b}, [%0], #64 \n\t"
131 "ld1 {v28.16b-v31.16b}, [%0] \n\t"
132 :: "r"(crypto_aes_sbox));
133}
134
76void crypto_aegis128_update_neon(void *state, const void *msg) 135void crypto_aegis128_update_neon(void *state, const void *msg)
77{ 136{
78 struct aegis128_state st = aegis128_load_state_neon(state); 137 struct aegis128_state st = aegis128_load_state_neon(state);
79 138
139 preload_sbox();
140
80 st = aegis128_update_neon(st, vld1q_u8(msg)); 141 st = aegis128_update_neon(st, vld1q_u8(msg));
81 142
82 aegis128_save_state_neon(st, state); 143 aegis128_save_state_neon(st, state);
@@ -88,6 +149,8 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
88 struct aegis128_state st = aegis128_load_state_neon(state); 149 struct aegis128_state st = aegis128_load_state_neon(state);
89 uint8x16_t msg; 150 uint8x16_t msg;
90 151
152 preload_sbox();
153
91 while (size >= AEGIS_BLOCK_SIZE) { 154 while (size >= AEGIS_BLOCK_SIZE) {
92 uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; 155 uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
93 156
@@ -120,6 +183,8 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
120 struct aegis128_state st = aegis128_load_state_neon(state); 183 struct aegis128_state st = aegis128_load_state_neon(state);
121 uint8x16_t msg; 184 uint8x16_t msg;
122 185
186 preload_sbox();
187
123 while (size >= AEGIS_BLOCK_SIZE) { 188 while (size >= AEGIS_BLOCK_SIZE) {
124 msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; 189 msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
125 st = aegis128_update_neon(st, msg); 190 st = aegis128_update_neon(st, msg);