diff options
Diffstat (limited to 'crypto/aegis128-neon-inner.c')
-rw-r--r-- | crypto/aegis128-neon-inner.c | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c index 3d8043c4832b..ed55568afd1b 100644 --- a/crypto/aegis128-neon-inner.c +++ b/crypto/aegis128-neon-inner.c | |||
@@ -17,6 +17,8 @@ | |||
17 | 17 | ||
18 | #include <stddef.h> | 18 | #include <stddef.h> |
19 | 19 | ||
20 | extern int aegis128_have_aes_insn; | ||
21 | |||
20 | void *memcpy(void *dest, const void *src, size_t n); | 22 | void *memcpy(void *dest, const void *src, size_t n); |
21 | void *memset(void *s, int c, size_t n); | 23 | void *memset(void *s, int c, size_t n); |
22 | 24 | ||
@@ -24,6 +26,8 @@ struct aegis128_state { | |||
24 | uint8x16_t v[5]; | 26 | uint8x16_t v[5]; |
25 | }; | 27 | }; |
26 | 28 | ||
29 | extern const uint8x16x4_t crypto_aes_sbox[]; | ||
30 | |||
27 | static struct aegis128_state aegis128_load_state_neon(const void *state) | 31 | static struct aegis128_state aegis128_load_state_neon(const void *state) |
28 | { | 32 | { |
29 | return (struct aegis128_state){ { | 33 | return (struct aegis128_state){ { |
@@ -49,6 +53,46 @@ uint8x16_t aegis_aes_round(uint8x16_t w) | |||
49 | { | 53 | { |
50 | uint8x16_t z = {}; | 54 | uint8x16_t z = {}; |
51 | 55 | ||
56 | #ifdef CONFIG_ARM64 | ||
57 | if (!__builtin_expect(aegis128_have_aes_insn, 1)) { | ||
58 | static const uint8x16_t shift_rows = { | ||
59 | 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3, | ||
60 | 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb, | ||
61 | }; | ||
62 | static const uint8x16_t ror32by8 = { | ||
63 | 0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, | ||
64 | 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc, | ||
65 | }; | ||
66 | uint8x16_t v; | ||
67 | |||
68 | // shift rows | ||
69 | w = vqtbl1q_u8(w, shift_rows); | ||
70 | |||
71 | // sub bytes | ||
72 | if (!IS_ENABLED(CONFIG_CC_IS_GCC)) { | ||
73 | v = vqtbl4q_u8(crypto_aes_sbox[0], w); | ||
74 | v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40); | ||
75 | v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80); | ||
76 | v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0); | ||
77 | } else { | ||
78 | asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w)); | ||
79 | w -= 0x40; | ||
80 | asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w)); | ||
81 | w -= 0x40; | ||
82 | asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w)); | ||
83 | w -= 0x40; | ||
84 | asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w)); | ||
85 | } | ||
86 | |||
87 | // mix columns | ||
88 | w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b); | ||
89 | w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v); | ||
90 | w ^= vqtbl1q_u8(v ^ w, ror32by8); | ||
91 | |||
92 | return w; | ||
93 | } | ||
94 | #endif | ||
95 | |||
52 | /* | 96 | /* |
53 | * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics | 97 | * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics |
54 | * to force the compiler to issue the aese/aesmc instructions in pairs. | 98 | * to force the compiler to issue the aese/aesmc instructions in pairs. |
@@ -73,10 +117,27 @@ struct aegis128_state aegis128_update_neon(struct aegis128_state st, | |||
73 | return st; | 117 | return st; |
74 | } | 118 | } |
75 | 119 | ||
120 | static inline __attribute__((always_inline)) | ||
121 | void preload_sbox(void) | ||
122 | { | ||
123 | if (!IS_ENABLED(CONFIG_ARM64) || | ||
124 | !IS_ENABLED(CONFIG_CC_IS_GCC) || | ||
125 | __builtin_expect(aegis128_have_aes_insn, 1)) | ||
126 | return; | ||
127 | |||
128 | asm("ld1 {v16.16b-v19.16b}, [%0], #64 \n\t" | ||
129 | "ld1 {v20.16b-v23.16b}, [%0], #64 \n\t" | ||
130 | "ld1 {v24.16b-v27.16b}, [%0], #64 \n\t" | ||
131 | "ld1 {v28.16b-v31.16b}, [%0] \n\t" | ||
132 | :: "r"(crypto_aes_sbox)); | ||
133 | } | ||
134 | |||
76 | void crypto_aegis128_update_neon(void *state, const void *msg) | 135 | void crypto_aegis128_update_neon(void *state, const void *msg) |
77 | { | 136 | { |
78 | struct aegis128_state st = aegis128_load_state_neon(state); | 137 | struct aegis128_state st = aegis128_load_state_neon(state); |
79 | 138 | ||
139 | preload_sbox(); | ||
140 | |||
80 | st = aegis128_update_neon(st, vld1q_u8(msg)); | 141 | st = aegis128_update_neon(st, vld1q_u8(msg)); |
81 | 142 | ||
82 | aegis128_save_state_neon(st, state); | 143 | aegis128_save_state_neon(st, state); |
@@ -88,6 +149,8 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, | |||
88 | struct aegis128_state st = aegis128_load_state_neon(state); | 149 | struct aegis128_state st = aegis128_load_state_neon(state); |
89 | uint8x16_t msg; | 150 | uint8x16_t msg; |
90 | 151 | ||
152 | preload_sbox(); | ||
153 | |||
91 | while (size >= AEGIS_BLOCK_SIZE) { | 154 | while (size >= AEGIS_BLOCK_SIZE) { |
92 | uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; | 155 | uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; |
93 | 156 | ||
@@ -120,6 +183,8 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, | |||
120 | struct aegis128_state st = aegis128_load_state_neon(state); | 183 | struct aegis128_state st = aegis128_load_state_neon(state); |
121 | uint8x16_t msg; | 184 | uint8x16_t msg; |
122 | 185 | ||
186 | preload_sbox(); | ||
187 | |||
123 | while (size >= AEGIS_BLOCK_SIZE) { | 188 | while (size >= AEGIS_BLOCK_SIZE) { |
124 | msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; | 189 | msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; |
125 | st = aegis128_update_neon(st, msg); | 190 | st = aegis128_update_neon(st, msg); |