aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2017-07-24 06:28:10 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2017-08-03 21:27:19 -0400
commitf402e3115e20b345bd6fbfcf463a506d958c7bf6 (patch)
tree701bf0f90557307d554f48df36a41224762aa3b6
parentda1793312f7693787e0ed22aa121261c3e0e15c0 (diff)
crypto: arm64/aes-ce-cipher - match round key endianness with generic code
In order to be able to reuse the generic AES code as a fallback for situations where the NEON may not be used, update the key handling to match the byte order of the generic code: it stores round keys as sequences of 32-bit quantities rather than streams of bytes, and so our code needs to be updated to reflect that. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S30
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c35
-rw-r--r--arch/arm64/crypto/aes-ce.S12
3 files changed, 37 insertions, 40 deletions
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 3363560c79b7..e3a375c4cb83 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -1,7 +1,7 @@
1/* 1/*
2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions 2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
3 * 3 *
4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -32,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
32 beq 8f /* out of input? */ 32 beq 8f /* out of input? */
33 cbnz w8, 0b 33 cbnz w8, 0b
34 eor v0.16b, v0.16b, v1.16b 34 eor v0.16b, v0.16b, v1.16b
351: ld1 {v3.16b}, [x4] /* load first round key */ 351: ld1 {v3.4s}, [x4] /* load first round key */
36 prfm pldl1strm, [x1] 36 prfm pldl1strm, [x1]
37 cmp w5, #12 /* which key size? */ 37 cmp w5, #12 /* which key size? */
38 add x6, x4, #16 38 add x6, x4, #16
@@ -42,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
42 mov v5.16b, v3.16b 42 mov v5.16b, v3.16b
43 b 4f 43 b 4f
442: mov v4.16b, v3.16b 442: mov v4.16b, v3.16b
45 ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ 45 ld1 {v5.4s}, [x6], #16 /* load 2nd round key */
463: aese v0.16b, v4.16b 463: aese v0.16b, v4.16b
47 aesmc v0.16b, v0.16b 47 aesmc v0.16b, v0.16b
484: ld1 {v3.16b}, [x6], #16 /* load next round key */ 484: ld1 {v3.4s}, [x6], #16 /* load next round key */
49 aese v0.16b, v5.16b 49 aese v0.16b, v5.16b
50 aesmc v0.16b, v0.16b 50 aesmc v0.16b, v0.16b
515: ld1 {v4.16b}, [x6], #16 /* load next round key */ 515: ld1 {v4.4s}, [x6], #16 /* load next round key */
52 subs w7, w7, #3 52 subs w7, w7, #3
53 aese v0.16b, v3.16b 53 aese v0.16b, v3.16b
54 aesmc v0.16b, v0.16b 54 aesmc v0.16b, v0.16b
55 ld1 {v5.16b}, [x6], #16 /* load next round key */ 55 ld1 {v5.4s}, [x6], #16 /* load next round key */
56 bpl 3b 56 bpl 3b
57 aese v0.16b, v4.16b 57 aese v0.16b, v4.16b
58 subs w2, w2, #16 /* last data? */ 58 subs w2, w2, #16 /* last data? */
@@ -90,7 +90,7 @@ ENDPROC(ce_aes_ccm_auth_data)
90 * u32 rounds); 90 * u32 rounds);
91 */ 91 */
92ENTRY(ce_aes_ccm_final) 92ENTRY(ce_aes_ccm_final)
93 ld1 {v3.16b}, [x2], #16 /* load first round key */ 93 ld1 {v3.4s}, [x2], #16 /* load first round key */
94 ld1 {v0.16b}, [x0] /* load mac */ 94 ld1 {v0.16b}, [x0] /* load mac */
95 cmp w3, #12 /* which key size? */ 95 cmp w3, #12 /* which key size? */
96 sub w3, w3, #2 /* modified # of rounds */ 96 sub w3, w3, #2 /* modified # of rounds */
@@ -100,17 +100,17 @@ ENTRY(ce_aes_ccm_final)
100 mov v5.16b, v3.16b 100 mov v5.16b, v3.16b
101 b 2f 101 b 2f
1020: mov v4.16b, v3.16b 1020: mov v4.16b, v3.16b
1031: ld1 {v5.16b}, [x2], #16 /* load next round key */ 1031: ld1 {v5.4s}, [x2], #16 /* load next round key */
104 aese v0.16b, v4.16b 104 aese v0.16b, v4.16b
105 aesmc v0.16b, v0.16b 105 aesmc v0.16b, v0.16b
106 aese v1.16b, v4.16b 106 aese v1.16b, v4.16b
107 aesmc v1.16b, v1.16b 107 aesmc v1.16b, v1.16b
1082: ld1 {v3.16b}, [x2], #16 /* load next round key */ 1082: ld1 {v3.4s}, [x2], #16 /* load next round key */
109 aese v0.16b, v5.16b 109 aese v0.16b, v5.16b
110 aesmc v0.16b, v0.16b 110 aesmc v0.16b, v0.16b
111 aese v1.16b, v5.16b 111 aese v1.16b, v5.16b
112 aesmc v1.16b, v1.16b 112 aesmc v1.16b, v1.16b
1133: ld1 {v4.16b}, [x2], #16 /* load next round key */ 1133: ld1 {v4.4s}, [x2], #16 /* load next round key */
114 subs w3, w3, #3 114 subs w3, w3, #3
115 aese v0.16b, v3.16b 115 aese v0.16b, v3.16b
116 aesmc v0.16b, v0.16b 116 aesmc v0.16b, v0.16b
@@ -137,31 +137,31 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
137 cmp w4, #12 /* which key size? */ 137 cmp w4, #12 /* which key size? */
138 sub w7, w4, #2 /* get modified # of rounds */ 138 sub w7, w4, #2 /* get modified # of rounds */
139 ins v1.d[1], x9 /* no carry in lower ctr */ 139 ins v1.d[1], x9 /* no carry in lower ctr */
140 ld1 {v3.16b}, [x3] /* load first round key */ 140 ld1 {v3.4s}, [x3] /* load first round key */
141 add x10, x3, #16 141 add x10, x3, #16
142 bmi 1f 142 bmi 1f
143 bne 4f 143 bne 4f
144 mov v5.16b, v3.16b 144 mov v5.16b, v3.16b
145 b 3f 145 b 3f
1461: mov v4.16b, v3.16b 1461: mov v4.16b, v3.16b
147 ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ 147 ld1 {v5.4s}, [x10], #16 /* load 2nd round key */
1482: /* inner loop: 3 rounds, 2x interleaved */ 1482: /* inner loop: 3 rounds, 2x interleaved */
149 aese v0.16b, v4.16b 149 aese v0.16b, v4.16b
150 aesmc v0.16b, v0.16b 150 aesmc v0.16b, v0.16b
151 aese v1.16b, v4.16b 151 aese v1.16b, v4.16b
152 aesmc v1.16b, v1.16b 152 aesmc v1.16b, v1.16b
1533: ld1 {v3.16b}, [x10], #16 /* load next round key */ 1533: ld1 {v3.4s}, [x10], #16 /* load next round key */
154 aese v0.16b, v5.16b 154 aese v0.16b, v5.16b
155 aesmc v0.16b, v0.16b 155 aesmc v0.16b, v0.16b
156 aese v1.16b, v5.16b 156 aese v1.16b, v5.16b
157 aesmc v1.16b, v1.16b 157 aesmc v1.16b, v1.16b
1584: ld1 {v4.16b}, [x10], #16 /* load next round key */ 1584: ld1 {v4.4s}, [x10], #16 /* load next round key */
159 subs w7, w7, #3 159 subs w7, w7, #3
160 aese v0.16b, v3.16b 160 aese v0.16b, v3.16b
161 aesmc v0.16b, v0.16b 161 aesmc v0.16b, v0.16b
162 aese v1.16b, v3.16b 162 aese v1.16b, v3.16b
163 aesmc v1.16b, v1.16b 163 aesmc v1.16b, v1.16b
164 ld1 {v5.16b}, [x10], #16 /* load next round key */ 164 ld1 {v5.4s}, [x10], #16 /* load next round key */
165 bpl 2b 165 bpl 2b
166 aese v0.16b, v4.16b 166 aese v0.16b, v4.16b
167 aese v1.16b, v4.16b 167 aese v1.16b, v4.16b
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index 50d9fe11d0c8..a0a0e5e3a8b5 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions 2 * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions
3 * 3 *
4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <asm/neon.h> 11#include <asm/neon.h>
12#include <asm/unaligned.h>
12#include <crypto/aes.h> 13#include <crypto/aes.h>
13#include <linux/cpufeature.h> 14#include <linux/cpufeature.h>
14#include <linux/crypto.h> 15#include <linux/crypto.h>
@@ -47,24 +48,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
47 kernel_neon_begin_partial(4); 48 kernel_neon_begin_partial(4);
48 49
49 __asm__(" ld1 {v0.16b}, %[in] ;" 50 __asm__(" ld1 {v0.16b}, %[in] ;"
50 " ld1 {v1.16b}, [%[key]], #16 ;" 51 " ld1 {v1.4s}, [%[key]], #16 ;"
51 " cmp %w[rounds], #10 ;" 52 " cmp %w[rounds], #10 ;"
52 " bmi 0f ;" 53 " bmi 0f ;"
53 " bne 3f ;" 54 " bne 3f ;"
54 " mov v3.16b, v1.16b ;" 55 " mov v3.16b, v1.16b ;"
55 " b 2f ;" 56 " b 2f ;"
56 "0: mov v2.16b, v1.16b ;" 57 "0: mov v2.16b, v1.16b ;"
57 " ld1 {v3.16b}, [%[key]], #16 ;" 58 " ld1 {v3.4s}, [%[key]], #16 ;"
58 "1: aese v0.16b, v2.16b ;" 59 "1: aese v0.16b, v2.16b ;"
59 " aesmc v0.16b, v0.16b ;" 60 " aesmc v0.16b, v0.16b ;"
60 "2: ld1 {v1.16b}, [%[key]], #16 ;" 61 "2: ld1 {v1.4s}, [%[key]], #16 ;"
61 " aese v0.16b, v3.16b ;" 62 " aese v0.16b, v3.16b ;"
62 " aesmc v0.16b, v0.16b ;" 63 " aesmc v0.16b, v0.16b ;"
63 "3: ld1 {v2.16b}, [%[key]], #16 ;" 64 "3: ld1 {v2.4s}, [%[key]], #16 ;"
64 " subs %w[rounds], %w[rounds], #3 ;" 65 " subs %w[rounds], %w[rounds], #3 ;"
65 " aese v0.16b, v1.16b ;" 66 " aese v0.16b, v1.16b ;"
66 " aesmc v0.16b, v0.16b ;" 67 " aesmc v0.16b, v0.16b ;"
67 " ld1 {v3.16b}, [%[key]], #16 ;" 68 " ld1 {v3.4s}, [%[key]], #16 ;"
68 " bpl 1b ;" 69 " bpl 1b ;"
69 " aese v0.16b, v2.16b ;" 70 " aese v0.16b, v2.16b ;"
70 " eor v0.16b, v0.16b, v3.16b ;" 71 " eor v0.16b, v0.16b, v3.16b ;"
@@ -92,24 +93,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
92 kernel_neon_begin_partial(4); 93 kernel_neon_begin_partial(4);
93 94
94 __asm__(" ld1 {v0.16b}, %[in] ;" 95 __asm__(" ld1 {v0.16b}, %[in] ;"
95 " ld1 {v1.16b}, [%[key]], #16 ;" 96 " ld1 {v1.4s}, [%[key]], #16 ;"
96 " cmp %w[rounds], #10 ;" 97 " cmp %w[rounds], #10 ;"
97 " bmi 0f ;" 98 " bmi 0f ;"
98 " bne 3f ;" 99 " bne 3f ;"
99 " mov v3.16b, v1.16b ;" 100 " mov v3.16b, v1.16b ;"
100 " b 2f ;" 101 " b 2f ;"
101 "0: mov v2.16b, v1.16b ;" 102 "0: mov v2.16b, v1.16b ;"
102 " ld1 {v3.16b}, [%[key]], #16 ;" 103 " ld1 {v3.4s}, [%[key]], #16 ;"
103 "1: aesd v0.16b, v2.16b ;" 104 "1: aesd v0.16b, v2.16b ;"
104 " aesimc v0.16b, v0.16b ;" 105 " aesimc v0.16b, v0.16b ;"
105 "2: ld1 {v1.16b}, [%[key]], #16 ;" 106 "2: ld1 {v1.4s}, [%[key]], #16 ;"
106 " aesd v0.16b, v3.16b ;" 107 " aesd v0.16b, v3.16b ;"
107 " aesimc v0.16b, v0.16b ;" 108 " aesimc v0.16b, v0.16b ;"
108 "3: ld1 {v2.16b}, [%[key]], #16 ;" 109 "3: ld1 {v2.4s}, [%[key]], #16 ;"
109 " subs %w[rounds], %w[rounds], #3 ;" 110 " subs %w[rounds], %w[rounds], #3 ;"
110 " aesd v0.16b, v1.16b ;" 111 " aesd v0.16b, v1.16b ;"
111 " aesimc v0.16b, v0.16b ;" 112 " aesimc v0.16b, v0.16b ;"
112 " ld1 {v3.16b}, [%[key]], #16 ;" 113 " ld1 {v3.4s}, [%[key]], #16 ;"
113 " bpl 1b ;" 114 " bpl 1b ;"
114 " aesd v0.16b, v2.16b ;" 115 " aesd v0.16b, v2.16b ;"
115 " eor v0.16b, v0.16b, v3.16b ;" 116 " eor v0.16b, v0.16b, v3.16b ;"
@@ -165,20 +166,16 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
165 key_len != AES_KEYSIZE_256) 166 key_len != AES_KEYSIZE_256)
166 return -EINVAL; 167 return -EINVAL;
167 168
168 memcpy(ctx->key_enc, in_key, key_len);
169 ctx->key_length = key_len; 169 ctx->key_length = key_len;
170 for (i = 0; i < kwords; i++)
171 ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
170 172
171 kernel_neon_begin_partial(2); 173 kernel_neon_begin_partial(2);
172 for (i = 0; i < sizeof(rcon); i++) { 174 for (i = 0; i < sizeof(rcon); i++) {
173 u32 *rki = ctx->key_enc + (i * kwords); 175 u32 *rki = ctx->key_enc + (i * kwords);
174 u32 *rko = rki + kwords; 176 u32 *rko = rki + kwords;
175 177
176#ifndef CONFIG_CPU_BIG_ENDIAN
177 rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; 178 rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
178#else
179 rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
180 rki[0];
181#endif
182 rko[1] = rko[0] ^ rki[1]; 179 rko[1] = rko[0] ^ rki[1];
183 rko[2] = rko[1] ^ rki[2]; 180 rko[2] = rko[1] ^ rki[2];
184 rko[3] = rko[2] ^ rki[3]; 181 rko[3] = rko[2] ^ rki[3];
@@ -210,9 +207,9 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
210 207
211 key_dec[0] = key_enc[j]; 208 key_dec[0] = key_enc[j];
212 for (i = 1, j--; j > 0; i++, j--) 209 for (i = 1, j--; j > 0; i++, j--)
213 __asm__("ld1 {v0.16b}, %[in] ;" 210 __asm__("ld1 {v0.4s}, %[in] ;"
214 "aesimc v1.16b, v0.16b ;" 211 "aesimc v1.16b, v0.16b ;"
215 "st1 {v1.16b}, %[out] ;" 212 "st1 {v1.4s}, %[out] ;"
216 213
217 : [out] "=Q"(key_dec[i]) 214 : [out] "=Q"(key_dec[i])
218 : [in] "Q"(key_enc[j]) 215 : [in] "Q"(key_enc[j])
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index b46093d567e5..50330f5c3adc 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -2,7 +2,7 @@
2 * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with 2 * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with
3 * Crypto Extensions 3 * Crypto Extensions
4 * 4 *
5 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -22,11 +22,11 @@
22 cmp \rounds, #12 22 cmp \rounds, #12
23 blo 2222f /* 128 bits */ 23 blo 2222f /* 128 bits */
24 beq 1111f /* 192 bits */ 24 beq 1111f /* 192 bits */
25 ld1 {v17.16b-v18.16b}, [\rk], #32 25 ld1 {v17.4s-v18.4s}, [\rk], #32
261111: ld1 {v19.16b-v20.16b}, [\rk], #32 261111: ld1 {v19.4s-v20.4s}, [\rk], #32
272222: ld1 {v21.16b-v24.16b}, [\rk], #64 272222: ld1 {v21.4s-v24.4s}, [\rk], #64
28 ld1 {v25.16b-v28.16b}, [\rk], #64 28 ld1 {v25.4s-v28.4s}, [\rk], #64
29 ld1 {v29.16b-v31.16b}, [\rk] 29 ld1 {v29.4s-v31.4s}, [\rk]
30 .endm 30 .endm
31 31
32 /* prepare for encryption with key in rk[] */ 32 /* prepare for encryption with key in rk[] */