diff options
author | Ard Biesheuvel <ard.biesheuvel@linaro.org> | 2017-07-24 06:28:10 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2017-08-03 21:27:19 -0400 |
commit | f402e3115e20b345bd6fbfcf463a506d958c7bf6 (patch) | |
tree | 701bf0f90557307d554f48df36a41224762aa3b6 | |
parent | da1793312f7693787e0ed22aa121261c3e0e15c0 (diff) |
crypto: arm64/aes-ce-cipher - match round key endianness with generic code
In order to be able to reuse the generic AES code as a fallback for
situations where the NEON may not be used, update the key handling
to match the byte order of the generic code: it stores round keys
as sequences of 32-bit quantities rather than streams of bytes, and
so our code needs to be updated to reflect that.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | arch/arm64/crypto/aes-ce-ccm-core.S | 30 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ce-cipher.c | 35 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-ce.S | 12 |
3 files changed, 37 insertions, 40 deletions
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index 3363560c79b7..e3a375c4cb83 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions | 2 | * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions |
3 | * | 3 | * |
4 | * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | 4 | * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) | |||
32 | beq 8f /* out of input? */ | 32 | beq 8f /* out of input? */ |
33 | cbnz w8, 0b | 33 | cbnz w8, 0b |
34 | eor v0.16b, v0.16b, v1.16b | 34 | eor v0.16b, v0.16b, v1.16b |
35 | 1: ld1 {v3.16b}, [x4] /* load first round key */ | 35 | 1: ld1 {v3.4s}, [x4] /* load first round key */ |
36 | prfm pldl1strm, [x1] | 36 | prfm pldl1strm, [x1] |
37 | cmp w5, #12 /* which key size? */ | 37 | cmp w5, #12 /* which key size? */ |
38 | add x6, x4, #16 | 38 | add x6, x4, #16 |
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) | |||
42 | mov v5.16b, v3.16b | 42 | mov v5.16b, v3.16b |
43 | b 4f | 43 | b 4f |
44 | 2: mov v4.16b, v3.16b | 44 | 2: mov v4.16b, v3.16b |
45 | ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ | 45 | ld1 {v5.4s}, [x6], #16 /* load 2nd round key */ |
46 | 3: aese v0.16b, v4.16b | 46 | 3: aese v0.16b, v4.16b |
47 | aesmc v0.16b, v0.16b | 47 | aesmc v0.16b, v0.16b |
48 | 4: ld1 {v3.16b}, [x6], #16 /* load next round key */ | 48 | 4: ld1 {v3.4s}, [x6], #16 /* load next round key */ |
49 | aese v0.16b, v5.16b | 49 | aese v0.16b, v5.16b |
50 | aesmc v0.16b, v0.16b | 50 | aesmc v0.16b, v0.16b |
51 | 5: ld1 {v4.16b}, [x6], #16 /* load next round key */ | 51 | 5: ld1 {v4.4s}, [x6], #16 /* load next round key */ |
52 | subs w7, w7, #3 | 52 | subs w7, w7, #3 |
53 | aese v0.16b, v3.16b | 53 | aese v0.16b, v3.16b |
54 | aesmc v0.16b, v0.16b | 54 | aesmc v0.16b, v0.16b |
55 | ld1 {v5.16b}, [x6], #16 /* load next round key */ | 55 | ld1 {v5.4s}, [x6], #16 /* load next round key */ |
56 | bpl 3b | 56 | bpl 3b |
57 | aese v0.16b, v4.16b | 57 | aese v0.16b, v4.16b |
58 | subs w2, w2, #16 /* last data? */ | 58 | subs w2, w2, #16 /* last data? */ |
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data) | |||
90 | * u32 rounds); | 90 | * u32 rounds); |
91 | */ | 91 | */ |
92 | ENTRY(ce_aes_ccm_final) | 92 | ENTRY(ce_aes_ccm_final) |
93 | ld1 {v3.16b}, [x2], #16 /* load first round key */ | 93 | ld1 {v3.4s}, [x2], #16 /* load first round key */ |
94 | ld1 {v0.16b}, [x0] /* load mac */ | 94 | ld1 {v0.16b}, [x0] /* load mac */ |
95 | cmp w3, #12 /* which key size? */ | 95 | cmp w3, #12 /* which key size? */ |
96 | sub w3, w3, #2 /* modified # of rounds */ | 96 | sub w3, w3, #2 /* modified # of rounds */ |
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final) | |||
100 | mov v5.16b, v3.16b | 100 | mov v5.16b, v3.16b |
101 | b 2f | 101 | b 2f |
102 | 0: mov v4.16b, v3.16b | 102 | 0: mov v4.16b, v3.16b |
103 | 1: ld1 {v5.16b}, [x2], #16 /* load next round key */ | 103 | 1: ld1 {v5.4s}, [x2], #16 /* load next round key */ |
104 | aese v0.16b, v4.16b | 104 | aese v0.16b, v4.16b |
105 | aesmc v0.16b, v0.16b | 105 | aesmc v0.16b, v0.16b |
106 | aese v1.16b, v4.16b | 106 | aese v1.16b, v4.16b |
107 | aesmc v1.16b, v1.16b | 107 | aesmc v1.16b, v1.16b |
108 | 2: ld1 {v3.16b}, [x2], #16 /* load next round key */ | 108 | 2: ld1 {v3.4s}, [x2], #16 /* load next round key */ |
109 | aese v0.16b, v5.16b | 109 | aese v0.16b, v5.16b |
110 | aesmc v0.16b, v0.16b | 110 | aesmc v0.16b, v0.16b |
111 | aese v1.16b, v5.16b | 111 | aese v1.16b, v5.16b |
112 | aesmc v1.16b, v1.16b | 112 | aesmc v1.16b, v1.16b |
113 | 3: ld1 {v4.16b}, [x2], #16 /* load next round key */ | 113 | 3: ld1 {v4.4s}, [x2], #16 /* load next round key */ |
114 | subs w3, w3, #3 | 114 | subs w3, w3, #3 |
115 | aese v0.16b, v3.16b | 115 | aese v0.16b, v3.16b |
116 | aesmc v0.16b, v0.16b | 116 | aesmc v0.16b, v0.16b |
@@ -137,31 +137,31 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ | |||
137 | cmp w4, #12 /* which key size? */ | 137 | cmp w4, #12 /* which key size? */ |
138 | sub w7, w4, #2 /* get modified # of rounds */ | 138 | sub w7, w4, #2 /* get modified # of rounds */ |
139 | ins v1.d[1], x9 /* no carry in lower ctr */ | 139 | ins v1.d[1], x9 /* no carry in lower ctr */ |
140 | ld1 {v3.16b}, [x3] /* load first round key */ | 140 | ld1 {v3.4s}, [x3] /* load first round key */ |
141 | add x10, x3, #16 | 141 | add x10, x3, #16 |
142 | bmi 1f | 142 | bmi 1f |
143 | bne 4f | 143 | bne 4f |
144 | mov v5.16b, v3.16b | 144 | mov v5.16b, v3.16b |
145 | b 3f | 145 | b 3f |
146 | 1: mov v4.16b, v3.16b | 146 | 1: mov v4.16b, v3.16b |
147 | ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ | 147 | ld1 {v5.4s}, [x10], #16 /* load 2nd round key */ |
148 | 2: /* inner loop: 3 rounds, 2x interleaved */ | 148 | 2: /* inner loop: 3 rounds, 2x interleaved */ |
149 | aese v0.16b, v4.16b | 149 | aese v0.16b, v4.16b |
150 | aesmc v0.16b, v0.16b | 150 | aesmc v0.16b, v0.16b |
151 | aese v1.16b, v4.16b | 151 | aese v1.16b, v4.16b |
152 | aesmc v1.16b, v1.16b | 152 | aesmc v1.16b, v1.16b |
153 | 3: ld1 {v3.16b}, [x10], #16 /* load next round key */ | 153 | 3: ld1 {v3.4s}, [x10], #16 /* load next round key */ |
154 | aese v0.16b, v5.16b | 154 | aese v0.16b, v5.16b |
155 | aesmc v0.16b, v0.16b | 155 | aesmc v0.16b, v0.16b |
156 | aese v1.16b, v5.16b | 156 | aese v1.16b, v5.16b |
157 | aesmc v1.16b, v1.16b | 157 | aesmc v1.16b, v1.16b |
158 | 4: ld1 {v4.16b}, [x10], #16 /* load next round key */ | 158 | 4: ld1 {v4.4s}, [x10], #16 /* load next round key */ |
159 | subs w7, w7, #3 | 159 | subs w7, w7, #3 |
160 | aese v0.16b, v3.16b | 160 | aese v0.16b, v3.16b |
161 | aesmc v0.16b, v0.16b | 161 | aesmc v0.16b, v0.16b |
162 | aese v1.16b, v3.16b | 162 | aese v1.16b, v3.16b |
163 | aesmc v1.16b, v1.16b | 163 | aesmc v1.16b, v1.16b |
164 | ld1 {v5.16b}, [x10], #16 /* load next round key */ | 164 | ld1 {v5.4s}, [x10], #16 /* load next round key */ |
165 | bpl 2b | 165 | bpl 2b |
166 | aese v0.16b, v4.16b | 166 | aese v0.16b, v4.16b |
167 | aese v1.16b, v4.16b | 167 | aese v1.16b, v4.16b |
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index 50d9fe11d0c8..a0a0e5e3a8b5 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions | 2 | * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions |
3 | * | 3 | * |
4 | * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | 4 | * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 7 | * it under the terms of the GNU General Public License version 2 as |
@@ -9,6 +9,7 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <asm/neon.h> | 11 | #include <asm/neon.h> |
12 | #include <asm/unaligned.h> | ||
12 | #include <crypto/aes.h> | 13 | #include <crypto/aes.h> |
13 | #include <linux/cpufeature.h> | 14 | #include <linux/cpufeature.h> |
14 | #include <linux/crypto.h> | 15 | #include <linux/crypto.h> |
@@ -47,24 +48,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) | |||
47 | kernel_neon_begin_partial(4); | 48 | kernel_neon_begin_partial(4); |
48 | 49 | ||
49 | __asm__(" ld1 {v0.16b}, %[in] ;" | 50 | __asm__(" ld1 {v0.16b}, %[in] ;" |
50 | " ld1 {v1.16b}, [%[key]], #16 ;" | 51 | " ld1 {v1.4s}, [%[key]], #16 ;" |
51 | " cmp %w[rounds], #10 ;" | 52 | " cmp %w[rounds], #10 ;" |
52 | " bmi 0f ;" | 53 | " bmi 0f ;" |
53 | " bne 3f ;" | 54 | " bne 3f ;" |
54 | " mov v3.16b, v1.16b ;" | 55 | " mov v3.16b, v1.16b ;" |
55 | " b 2f ;" | 56 | " b 2f ;" |
56 | "0: mov v2.16b, v1.16b ;" | 57 | "0: mov v2.16b, v1.16b ;" |
57 | " ld1 {v3.16b}, [%[key]], #16 ;" | 58 | " ld1 {v3.4s}, [%[key]], #16 ;" |
58 | "1: aese v0.16b, v2.16b ;" | 59 | "1: aese v0.16b, v2.16b ;" |
59 | " aesmc v0.16b, v0.16b ;" | 60 | " aesmc v0.16b, v0.16b ;" |
60 | "2: ld1 {v1.16b}, [%[key]], #16 ;" | 61 | "2: ld1 {v1.4s}, [%[key]], #16 ;" |
61 | " aese v0.16b, v3.16b ;" | 62 | " aese v0.16b, v3.16b ;" |
62 | " aesmc v0.16b, v0.16b ;" | 63 | " aesmc v0.16b, v0.16b ;" |
63 | "3: ld1 {v2.16b}, [%[key]], #16 ;" | 64 | "3: ld1 {v2.4s}, [%[key]], #16 ;" |
64 | " subs %w[rounds], %w[rounds], #3 ;" | 65 | " subs %w[rounds], %w[rounds], #3 ;" |
65 | " aese v0.16b, v1.16b ;" | 66 | " aese v0.16b, v1.16b ;" |
66 | " aesmc v0.16b, v0.16b ;" | 67 | " aesmc v0.16b, v0.16b ;" |
67 | " ld1 {v3.16b}, [%[key]], #16 ;" | 68 | " ld1 {v3.4s}, [%[key]], #16 ;" |
68 | " bpl 1b ;" | 69 | " bpl 1b ;" |
69 | " aese v0.16b, v2.16b ;" | 70 | " aese v0.16b, v2.16b ;" |
70 | " eor v0.16b, v0.16b, v3.16b ;" | 71 | " eor v0.16b, v0.16b, v3.16b ;" |
@@ -92,24 +93,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) | |||
92 | kernel_neon_begin_partial(4); | 93 | kernel_neon_begin_partial(4); |
93 | 94 | ||
94 | __asm__(" ld1 {v0.16b}, %[in] ;" | 95 | __asm__(" ld1 {v0.16b}, %[in] ;" |
95 | " ld1 {v1.16b}, [%[key]], #16 ;" | 96 | " ld1 {v1.4s}, [%[key]], #16 ;" |
96 | " cmp %w[rounds], #10 ;" | 97 | " cmp %w[rounds], #10 ;" |
97 | " bmi 0f ;" | 98 | " bmi 0f ;" |
98 | " bne 3f ;" | 99 | " bne 3f ;" |
99 | " mov v3.16b, v1.16b ;" | 100 | " mov v3.16b, v1.16b ;" |
100 | " b 2f ;" | 101 | " b 2f ;" |
101 | "0: mov v2.16b, v1.16b ;" | 102 | "0: mov v2.16b, v1.16b ;" |
102 | " ld1 {v3.16b}, [%[key]], #16 ;" | 103 | " ld1 {v3.4s}, [%[key]], #16 ;" |
103 | "1: aesd v0.16b, v2.16b ;" | 104 | "1: aesd v0.16b, v2.16b ;" |
104 | " aesimc v0.16b, v0.16b ;" | 105 | " aesimc v0.16b, v0.16b ;" |
105 | "2: ld1 {v1.16b}, [%[key]], #16 ;" | 106 | "2: ld1 {v1.4s}, [%[key]], #16 ;" |
106 | " aesd v0.16b, v3.16b ;" | 107 | " aesd v0.16b, v3.16b ;" |
107 | " aesimc v0.16b, v0.16b ;" | 108 | " aesimc v0.16b, v0.16b ;" |
108 | "3: ld1 {v2.16b}, [%[key]], #16 ;" | 109 | "3: ld1 {v2.4s}, [%[key]], #16 ;" |
109 | " subs %w[rounds], %w[rounds], #3 ;" | 110 | " subs %w[rounds], %w[rounds], #3 ;" |
110 | " aesd v0.16b, v1.16b ;" | 111 | " aesd v0.16b, v1.16b ;" |
111 | " aesimc v0.16b, v0.16b ;" | 112 | " aesimc v0.16b, v0.16b ;" |
112 | " ld1 {v3.16b}, [%[key]], #16 ;" | 113 | " ld1 {v3.4s}, [%[key]], #16 ;" |
113 | " bpl 1b ;" | 114 | " bpl 1b ;" |
114 | " aesd v0.16b, v2.16b ;" | 115 | " aesd v0.16b, v2.16b ;" |
115 | " eor v0.16b, v0.16b, v3.16b ;" | 116 | " eor v0.16b, v0.16b, v3.16b ;" |
@@ -165,20 +166,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, | |||
165 | key_len != AES_KEYSIZE_256) | 166 | key_len != AES_KEYSIZE_256) |
166 | return -EINVAL; | 167 | return -EINVAL; |
167 | 168 | ||
168 | memcpy(ctx->key_enc, in_key, key_len); | ||
169 | ctx->key_length = key_len; | 169 | ctx->key_length = key_len; |
170 | for (i = 0; i < kwords; i++) | ||
171 | ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32)); | ||
170 | 172 | ||
171 | kernel_neon_begin_partial(2); | 173 | kernel_neon_begin_partial(2); |
172 | for (i = 0; i < sizeof(rcon); i++) { | 174 | for (i = 0; i < sizeof(rcon); i++) { |
173 | u32 *rki = ctx->key_enc + (i * kwords); | 175 | u32 *rki = ctx->key_enc + (i * kwords); |
174 | u32 *rko = rki + kwords; | 176 | u32 *rko = rki + kwords; |
175 | 177 | ||
176 | #ifndef CONFIG_CPU_BIG_ENDIAN | ||
177 | rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; | 178 | rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; |
178 | #else | ||
179 | rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ | ||
180 | rki[0]; | ||
181 | #endif | ||
182 | rko[1] = rko[0] ^ rki[1]; | 179 | rko[1] = rko[0] ^ rki[1]; |
183 | rko[2] = rko[1] ^ rki[2]; | 180 | rko[2] = rko[1] ^ rki[2]; |
184 | rko[3] = rko[2] ^ rki[3]; | 181 | rko[3] = rko[2] ^ rki[3]; |
@@ -210,9 +207,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, | |||
210 | 207 | ||
211 | key_dec[0] = key_enc[j]; | 208 | key_dec[0] = key_enc[j]; |
212 | for (i = 1, j--; j > 0; i++, j--) | 209 | for (i = 1, j--; j > 0; i++, j--) |
213 | __asm__("ld1 {v0.16b}, %[in] ;" | 210 | __asm__("ld1 {v0.4s}, %[in] ;" |
214 | "aesimc v1.16b, v0.16b ;" | 211 | "aesimc v1.16b, v0.16b ;" |
215 | "st1 {v1.16b}, %[out] ;" | 212 | "st1 {v1.4s}, %[out] ;" |
216 | 213 | ||
217 | : [out] "=Q"(key_dec[i]) | 214 | : [out] "=Q"(key_dec[i]) |
218 | : [in] "Q"(key_enc[j]) | 215 | : [in] "Q"(key_enc[j]) |
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index b46093d567e5..50330f5c3adc 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S | |||
@@ -2,7 +2,7 @@ | |||
2 | * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with | 2 | * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with |
3 | * Crypto Extensions | 3 | * Crypto Extensions |
4 | * | 4 | * |
5 | * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> | 5 | * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
@@ -22,11 +22,11 @@ | |||
22 | cmp \rounds, #12 | 22 | cmp \rounds, #12 |
23 | blo 2222f /* 128 bits */ | 23 | blo 2222f /* 128 bits */ |
24 | beq 1111f /* 192 bits */ | 24 | beq 1111f /* 192 bits */ |
25 | ld1 {v17.16b-v18.16b}, [\rk], #32 | 25 | ld1 {v17.4s-v18.4s}, [\rk], #32 |
26 | 1111: ld1 {v19.16b-v20.16b}, [\rk], #32 | 26 | 1111: ld1 {v19.4s-v20.4s}, [\rk], #32 |
27 | 2222: ld1 {v21.16b-v24.16b}, [\rk], #64 | 27 | 2222: ld1 {v21.4s-v24.4s}, [\rk], #64 |
28 | ld1 {v25.16b-v28.16b}, [\rk], #64 | 28 | ld1 {v25.4s-v28.4s}, [\rk], #64 |
29 | ld1 {v29.16b-v31.16b}, [\rk] | 29 | ld1 {v29.4s-v31.4s}, [\rk] |
30 | .endm | 30 | .endm |
31 | 31 | ||
32 | /* prepare for encryption with key in rk[] */ | 32 | /* prepare for encryption with key in rk[] */ |