aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/aes_ti.c
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2017-02-02 11:37:40 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2017-02-11 04:50:43 -0500
commitb5e0b032b6c31c052ee0132ee70b155c22cf7b28 (patch)
treecea293543e163a375ca0275278cca5f63b8077d2 /crypto/aes_ti.c
parentec38a9376163f9f7cb671e49b7667129c7bb8f8b (diff)
crypto: aes - add generic time invariant AES cipher
Lookup table based AES is sensitive to timing attacks, which is due to the fact that such table lookups are data dependent, and the fact that 8 KB worth of tables covers a significant number of cachelines on any architecture, resulting in an exploitable correlation between the key and the processing time for known plaintexts. For network facing algorithms such as CTR, CCM or GCM, this presents a security risk, which is why arch specific AES ports are typically time invariant, either through the use of special instructions, or by using SIMD algorithms that don't rely on table lookups. For generic code, this is difficult to achieve without losing too much performance, but we can improve the situation significantly by switching to an implementation that only needs 256 bytes of table data (the actual S-box itself), which can be prefetched at the start of each block to eliminate data dependent latencies. This code encrypts at ~25 cycles per byte on ARM Cortex-A57 (while the ordinary generic AES driver manages 18 cycles per byte on this hardware). Decryption is substantially slower. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'crypto/aes_ti.c')
-rw-r--r--crypto/aes_ti.c375
1 files changed, 375 insertions, 0 deletions
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
new file mode 100644
index 000000000000..92644fd1ac19
--- /dev/null
+++ b/crypto/aes_ti.c
@@ -0,0 +1,375 @@
1/*
2 * Scalar fixed time AES core transform
3 *
4 * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <crypto/aes.h>
12#include <linux/crypto.h>
13#include <linux/module.h>
14#include <asm/unaligned.h>
15
16/*
17 * Emit the sbox as volatile const to prevent the compiler from doing
18 * constant folding on sbox references involving fixed indexes.
19 */
20static volatile const u8 __cacheline_aligned __aesti_sbox[] = {
21 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
22 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
23 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
24 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
25 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
26 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
27 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
28 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
29 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
30 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
31 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
32 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
33 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
34 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
35 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
36 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
37 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
38 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
39 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
40 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
41 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
42 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
43 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
44 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
45 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
46 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
47 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
48 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
49 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
50 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
51 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
52 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
53};
54
55static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = {
56 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
57 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
58 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
59 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
60 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
61 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
62 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
63 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
64 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
65 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
66 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
67 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
68 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
69 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
70 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
71 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
72 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
73 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
74 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
75 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
76 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
77 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
78 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
79 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
80 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
81 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
82 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
83 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
84 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
85 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
86 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
87 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
88};
89
90static u32 mul_by_x(u32 w)
91{
92 u32 x = w & 0x7f7f7f7f;
93 u32 y = w & 0x80808080;
94
95 /* multiply by polynomial 'x' (0b10) in GF(2^8) */
96 return (x << 1) ^ (y >> 7) * 0x1b;
97}
98
99static u32 mul_by_x2(u32 w)
100{
101 u32 x = w & 0x3f3f3f3f;
102 u32 y = w & 0x80808080;
103 u32 z = w & 0x40404040;
104
105 /* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
106 return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
107}
108
109static u32 mix_columns(u32 x)
110{
111 /*
112 * Perform the following matrix multiplication in GF(2^8)
113 *
114 * | 0x2 0x3 0x1 0x1 | | x[0] |
115 * | 0x1 0x2 0x3 0x1 | | x[1] |
116 * | 0x1 0x1 0x2 0x3 | x | x[2] |
117 * | 0x3 0x1 0x1 0x3 | | x[3] |
118 */
119 u32 y = mul_by_x(x) ^ ror32(x, 16);
120
121 return y ^ ror32(x ^ y, 8);
122}
123
124static u32 inv_mix_columns(u32 x)
125{
126 /*
127 * Perform the following matrix multiplication in GF(2^8)
128 *
129 * | 0xe 0xb 0xd 0x9 | | x[0] |
130 * | 0x9 0xe 0xb 0xd | | x[1] |
131 * | 0xd 0x9 0xe 0xb | x | x[2] |
132 * | 0xb 0xd 0x9 0xe | | x[3] |
133 *
134 * which can conveniently be reduced to
135 *
136 * | 0x2 0x3 0x1 0x1 | | 0x5 0x0 0x4 0x0 | | x[0] |
137 * | 0x1 0x2 0x3 0x1 | | 0x0 0x5 0x0 0x4 | | x[1] |
138 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
139 * | 0x3 0x1 0x1 0x2 | | 0x0 0x4 0x0 0x5 | | x[3] |
140 */
141 u32 y = mul_by_x2(x);
142
143 return mix_columns(x ^ y ^ ror32(y, 16));
144}
145
146static __always_inline u32 subshift(u32 in[], int pos)
147{
148 return (__aesti_sbox[in[pos] & 0xff]) ^
149 (__aesti_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^
150 (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
151 (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
152}
153
154static __always_inline u32 inv_subshift(u32 in[], int pos)
155{
156 return (__aesti_inv_sbox[in[pos] & 0xff]) ^
157 (__aesti_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^
158 (__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
159 (__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
160}
161
162static u32 subw(u32 in)
163{
164 return (__aesti_sbox[in & 0xff]) ^
165 (__aesti_sbox[(in >> 8) & 0xff] << 8) ^
166 (__aesti_sbox[(in >> 16) & 0xff] << 16) ^
167 (__aesti_sbox[(in >> 24) & 0xff] << 24);
168}
169
170static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
171 unsigned int key_len)
172{
173 u32 kwords = key_len / sizeof(u32);
174 u32 rc, i, j;
175
176 if (key_len != AES_KEYSIZE_128 &&
177 key_len != AES_KEYSIZE_192 &&
178 key_len != AES_KEYSIZE_256)
179 return -EINVAL;
180
181 ctx->key_length = key_len;
182
183 for (i = 0; i < kwords; i++)
184 ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
185
186 for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
187 u32 *rki = ctx->key_enc + (i * kwords);
188 u32 *rko = rki + kwords;
189
190 rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
191 rko[1] = rko[0] ^ rki[1];
192 rko[2] = rko[1] ^ rki[2];
193 rko[3] = rko[2] ^ rki[3];
194
195 if (key_len == 24) {
196 if (i >= 7)
197 break;
198 rko[4] = rko[3] ^ rki[4];
199 rko[5] = rko[4] ^ rki[5];
200 } else if (key_len == 32) {
201 if (i >= 6)
202 break;
203 rko[4] = subw(rko[3]) ^ rki[4];
204 rko[5] = rko[4] ^ rki[5];
205 rko[6] = rko[5] ^ rki[6];
206 rko[7] = rko[6] ^ rki[7];
207 }
208 }
209
210 /*
211 * Generate the decryption keys for the Equivalent Inverse Cipher.
212 * This involves reversing the order of the round keys, and applying
213 * the Inverse Mix Columns transformation to all but the first and
214 * the last one.
215 */
216 ctx->key_dec[0] = ctx->key_enc[key_len + 24];
217 ctx->key_dec[1] = ctx->key_enc[key_len + 25];
218 ctx->key_dec[2] = ctx->key_enc[key_len + 26];
219 ctx->key_dec[3] = ctx->key_enc[key_len + 27];
220
221 for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
222 ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]);
223 ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
224 ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
225 ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
226 }
227
228 ctx->key_dec[i] = ctx->key_enc[0];
229 ctx->key_dec[i + 1] = ctx->key_enc[1];
230 ctx->key_dec[i + 2] = ctx->key_enc[2];
231 ctx->key_dec[i + 3] = ctx->key_enc[3];
232
233 return 0;
234}
235
236static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
237 unsigned int key_len)
238{
239 struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
240 int err;
241
242 err = aesti_expand_key(ctx, in_key, key_len);
243 if (err)
244 return err;
245
246 /*
247 * In order to force the compiler to emit data independent Sbox lookups
248 * at the start of each block, xor the first round key with values at
249 * fixed indexes in the Sbox. This will need to be repeated each time
250 * the key is used, which will pull the entire Sbox into the D-cache
251 * before any data dependent Sbox lookups are performed.
252 */
253 ctx->key_enc[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
254 ctx->key_enc[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
255 ctx->key_enc[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
256 ctx->key_enc[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
257
258 ctx->key_dec[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
259 ctx->key_dec[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
260 ctx->key_dec[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
261 ctx->key_dec[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
262
263 return 0;
264}
265
266static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
267{
268 const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
269 const u32 *rkp = ctx->key_enc + 4;
270 int rounds = 6 + ctx->key_length / 4;
271 u32 st0[4], st1[4];
272 int round;
273
274 st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
275 st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
276 st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
277 st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
278
279 st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
280 st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
281 st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
282 st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
283
284 for (round = 0;; round += 2, rkp += 8) {
285 st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
286 st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
287 st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
288 st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
289
290 if (round == rounds - 2)
291 break;
292
293 st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
294 st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
295 st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
296 st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
297 }
298
299 put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
300 put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
301 put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
302 put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
303}
304
305static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
306{
307 const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
308 const u32 *rkp = ctx->key_dec + 4;
309 int rounds = 6 + ctx->key_length / 4;
310 u32 st0[4], st1[4];
311 int round;
312
313 st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
314 st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
315 st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
316 st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
317
318 st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
319 st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
320 st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
321 st0[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
322
323 for (round = 0;; round += 2, rkp += 8) {
324 st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
325 st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
326 st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
327 st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
328
329 if (round == rounds - 2)
330 break;
331
332 st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
333 st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
334 st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
335 st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
336 }
337
338 put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
339 put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
340 put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
341 put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
342}
343
344static struct crypto_alg aes_alg = {
345 .cra_name = "aes",
346 .cra_driver_name = "aes-fixed-time",
347 .cra_priority = 100 + 1,
348 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
349 .cra_blocksize = AES_BLOCK_SIZE,
350 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
351 .cra_module = THIS_MODULE,
352
353 .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
354 .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
355 .cra_cipher.cia_setkey = aesti_set_key,
356 .cra_cipher.cia_encrypt = aesti_encrypt,
357 .cra_cipher.cia_decrypt = aesti_decrypt
358};
359
360static int __init aes_init(void)
361{
362 return crypto_register_alg(&aes_alg);
363}
364
365static void __exit aes_fini(void)
366{
367 crypto_unregister_alg(&aes_alg);
368}
369
370module_init(aes_init);
371module_exit(aes_fini);
372
373MODULE_DESCRIPTION("Generic fixed time AES");
374MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
375MODULE_LICENSE("GPL v2");