diff options
-rw-r--r-- | crypto/Kconfig | 5 | ||||
-rw-r--r-- | crypto/Makefile | 11 | ||||
-rw-r--r-- | crypto/aegis128-neon-inner.c | 149 | ||||
-rw-r--r-- | crypto/aegis128-neon.c | 43 |
4 files changed, 208 insertions, 0 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig index 559494bbc0db..2e7f08ba0675 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -306,6 +306,11 @@ config CRYPTO_AEGIS128 | |||
306 | help | 306 | help |
307 | Support for the AEGIS-128 dedicated AEAD algorithm. | 307 | Support for the AEGIS-128 dedicated AEAD algorithm. |
308 | 308 | ||
309 | config CRYPTO_AEGIS128_SIMD | ||
310 | bool "Support SIMD acceleration for AEGIS-128" | ||
311 | depends on CRYPTO_AEGIS128 && ((ARM || ARM64) && KERNEL_MODE_NEON) | ||
312 | default y | ||
313 | |||
309 | config CRYPTO_AEGIS128_AESNI_SSE2 | 314 | config CRYPTO_AEGIS128_AESNI_SSE2 |
310 | tristate "AEGIS-128 AEAD algorithm (x86_64 AESNI+SSE2 implementation)" | 315 | tristate "AEGIS-128 AEAD algorithm (x86_64 AESNI+SSE2 implementation)" |
311 | depends on X86 && 64BIT | 316 | depends on X86 && 64BIT |
diff --git a/crypto/Makefile b/crypto/Makefile index 362a36f0bd2f..b3e16b4fb414 100644 --- a/crypto/Makefile +++ b/crypto/Makefile | |||
@@ -91,6 +91,17 @@ obj-$(CONFIG_CRYPTO_CCM) += ccm.o | |||
91 | obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o | 91 | obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o |
92 | obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o | 92 | obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o |
93 | aegis128-y := aegis128-core.o | 93 | aegis128-y := aegis128-core.o |
94 | |||
95 | ifeq ($(ARCH),arm) | ||
96 | CFLAGS_aegis128-neon-inner.o += -ffreestanding -march=armv7-a -mfloat-abi=softfp -mfpu=crypto-neon-fp-armv8 | ||
97 | aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o | ||
98 | endif | ||
99 | ifeq ($(ARCH),arm64) | ||
100 | CFLAGS_aegis128-neon-inner.o += -ffreestanding -mcpu=generic+crypto | ||
101 | CFLAGS_REMOVE_aegis128-neon-inner.o += -mgeneral-regs-only | ||
102 | aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o | ||
103 | endif | ||
104 | |||
94 | obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o | 105 | obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o |
95 | obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o | 106 | obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o |
96 | obj-$(CONFIG_CRYPTO_DES) += des_generic.o | 107 | obj-$(CONFIG_CRYPTO_DES) += des_generic.o |
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c new file mode 100644 index 000000000000..26e9450a5833 --- /dev/null +++ b/crypto/aegis128-neon-inner.c | |||
@@ -0,0 +1,149 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
2 | /* | ||
3 | * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> | ||
4 | */ | ||
5 | |||
6 | #ifdef CONFIG_ARM64 | ||
7 | #include <asm/neon-intrinsics.h> | ||
8 | |||
9 | #define AES_ROUND "aese %0.16b, %1.16b \n\t aesmc %0.16b, %0.16b" | ||
10 | #else | ||
11 | #include <arm_neon.h> | ||
12 | |||
13 | #define AES_ROUND "aese.8 %q0, %q1 \n\t aesmc.8 %q0, %q0" | ||
14 | #endif | ||
15 | |||
16 | #define AEGIS_BLOCK_SIZE 16 | ||
17 | |||
18 | #include <stddef.h> | ||
19 | |||
20 | void *memcpy(void *dest, const void *src, size_t n); | ||
21 | void *memset(void *s, int c, size_t n); | ||
22 | |||
23 | struct aegis128_state { | ||
24 | uint8x16_t v[5]; | ||
25 | }; | ||
26 | |||
27 | static struct aegis128_state aegis128_load_state_neon(const void *state) | ||
28 | { | ||
29 | return (struct aegis128_state){ { | ||
30 | vld1q_u8(state), | ||
31 | vld1q_u8(state + 16), | ||
32 | vld1q_u8(state + 32), | ||
33 | vld1q_u8(state + 48), | ||
34 | vld1q_u8(state + 64) | ||
35 | } }; | ||
36 | } | ||
37 | |||
38 | static void aegis128_save_state_neon(struct aegis128_state st, void *state) | ||
39 | { | ||
40 | vst1q_u8(state, st.v[0]); | ||
41 | vst1q_u8(state + 16, st.v[1]); | ||
42 | vst1q_u8(state + 32, st.v[2]); | ||
43 | vst1q_u8(state + 48, st.v[3]); | ||
44 | vst1q_u8(state + 64, st.v[4]); | ||
45 | } | ||
46 | |||
47 | static uint8x16_t aegis_aes_round(uint8x16_t w) | ||
48 | { | ||
49 | uint8x16_t z = {}; | ||
50 | |||
51 | /* | ||
52 | * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics | ||
53 | * to force the compiler to issue the aese/aesmc instructions in pairs. | ||
54 | * This is much faster on many cores, where the instruction pair can | ||
55 | * execute in a single cycle. | ||
56 | */ | ||
57 | asm(AES_ROUND : "+w"(w) : "w"(z)); | ||
58 | return w; | ||
59 | } | ||
60 | |||
61 | static struct aegis128_state aegis128_update_neon(struct aegis128_state st, | ||
62 | uint8x16_t m) | ||
63 | { | ||
64 | uint8x16_t t; | ||
65 | |||
66 | t = aegis_aes_round(st.v[3]); | ||
67 | st.v[3] ^= aegis_aes_round(st.v[2]); | ||
68 | st.v[2] ^= aegis_aes_round(st.v[1]); | ||
69 | st.v[1] ^= aegis_aes_round(st.v[0]); | ||
70 | st.v[0] ^= aegis_aes_round(st.v[4]) ^ m; | ||
71 | st.v[4] ^= t; | ||
72 | |||
73 | return st; | ||
74 | } | ||
75 | |||
76 | void crypto_aegis128_update_neon(void *state, const void *msg) | ||
77 | { | ||
78 | struct aegis128_state st = aegis128_load_state_neon(state); | ||
79 | |||
80 | st = aegis128_update_neon(st, vld1q_u8(msg)); | ||
81 | |||
82 | aegis128_save_state_neon(st, state); | ||
83 | } | ||
84 | |||
85 | void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, | ||
86 | unsigned int size) | ||
87 | { | ||
88 | struct aegis128_state st = aegis128_load_state_neon(state); | ||
89 | uint8x16_t tmp; | ||
90 | |||
91 | while (size >= AEGIS_BLOCK_SIZE) { | ||
92 | uint8x16_t s = vld1q_u8(src); | ||
93 | |||
94 | tmp = s ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; | ||
95 | st = aegis128_update_neon(st, s); | ||
96 | vst1q_u8(dst, tmp); | ||
97 | |||
98 | size -= AEGIS_BLOCK_SIZE; | ||
99 | src += AEGIS_BLOCK_SIZE; | ||
100 | dst += AEGIS_BLOCK_SIZE; | ||
101 | } | ||
102 | |||
103 | if (size > 0) { | ||
104 | uint8_t buf[AEGIS_BLOCK_SIZE] = {}; | ||
105 | uint8x16_t msg; | ||
106 | |||
107 | memcpy(buf, src, size); | ||
108 | msg = vld1q_u8(buf); | ||
109 | tmp = msg ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; | ||
110 | st = aegis128_update_neon(st, msg); | ||
111 | vst1q_u8(buf, tmp); | ||
112 | memcpy(dst, buf, size); | ||
113 | } | ||
114 | |||
115 | aegis128_save_state_neon(st, state); | ||
116 | } | ||
117 | |||
118 | void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, | ||
119 | unsigned int size) | ||
120 | { | ||
121 | struct aegis128_state st = aegis128_load_state_neon(state); | ||
122 | uint8x16_t tmp; | ||
123 | |||
124 | while (size >= AEGIS_BLOCK_SIZE) { | ||
125 | tmp = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; | ||
126 | st = aegis128_update_neon(st, tmp); | ||
127 | vst1q_u8(dst, tmp); | ||
128 | |||
129 | size -= AEGIS_BLOCK_SIZE; | ||
130 | src += AEGIS_BLOCK_SIZE; | ||
131 | dst += AEGIS_BLOCK_SIZE; | ||
132 | } | ||
133 | |||
134 | if (size > 0) { | ||
135 | uint8_t buf[AEGIS_BLOCK_SIZE] = {}; | ||
136 | uint8x16_t msg; | ||
137 | |||
138 | memcpy(buf, src, size); | ||
139 | msg = vld1q_u8(buf) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4]; | ||
140 | vst1q_u8(buf, msg); | ||
141 | memcpy(dst, buf, size); | ||
142 | |||
143 | memset(buf + size, 0, AEGIS_BLOCK_SIZE - size); | ||
144 | msg = vld1q_u8(buf); | ||
145 | st = aegis128_update_neon(st, msg); | ||
146 | } | ||
147 | |||
148 | aegis128_save_state_neon(st, state); | ||
149 | } | ||
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c new file mode 100644 index 000000000000..c1c0a1686f67 --- /dev/null +++ b/crypto/aegis128-neon.c | |||
@@ -0,0 +1,43 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
2 | /* | ||
3 | * Copyright (C) 2019 Linaro Ltd <ard.biesheuvel@linaro.org> | ||
4 | */ | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/neon.h> | ||
8 | |||
9 | #include "aegis.h" | ||
10 | |||
11 | void crypto_aegis128_update_neon(void *state, const void *msg); | ||
12 | void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src, | ||
13 | unsigned int size); | ||
14 | void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src, | ||
15 | unsigned int size); | ||
16 | |||
17 | bool crypto_aegis128_have_simd(void) | ||
18 | { | ||
19 | return cpu_have_feature(cpu_feature(AES)); | ||
20 | } | ||
21 | |||
22 | void crypto_aegis128_update_simd(union aegis_block *state, const void *msg) | ||
23 | { | ||
24 | kernel_neon_begin(); | ||
25 | crypto_aegis128_update_neon(state, msg); | ||
26 | kernel_neon_end(); | ||
27 | } | ||
28 | |||
29 | void crypto_aegis128_encrypt_chunk_simd(union aegis_block *state, u8 *dst, | ||
30 | const u8 *src, unsigned int size) | ||
31 | { | ||
32 | kernel_neon_begin(); | ||
33 | crypto_aegis128_encrypt_chunk_neon(state, dst, src, size); | ||
34 | kernel_neon_end(); | ||
35 | } | ||
36 | |||
37 | void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst, | ||
38 | const u8 *src, unsigned int size) | ||
39 | { | ||
40 | kernel_neon_begin(); | ||
41 | crypto_aegis128_decrypt_chunk_neon(state, dst, src, size); | ||
42 | kernel_neon_end(); | ||
43 | } | ||