aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/crypto/aes.c
diff options
context:
space:
mode:
authorAndreas Steinmetz <ast@domdv.de>2005-07-06 16:55:00 -0400
committerDavid S. Miller <davem@davemloft.net>2005-07-06 16:55:00 -0400
commita2a892a236d03a6e985471a7e57d1c863de144c8 (patch)
tree33b52c87bdecf0f24936b952a565a445ce03c616 /arch/x86_64/crypto/aes.c
parenta61cc44812ff94793987bf43b70a3d9bc64a6820 (diff)
[CRYPTO] Add x86_64 asm AES
Implementation: =============== The encrypt/decrypt code is based on an x86 implementation I did a while ago which I never published. This unpublished implementation does include an assembler based key schedule and precomputed tables. For simplicity and best acceptance, however, I took Gladman's in-kernel code for table generation and key schedule for the kernel port of my assembler code and modified this code to produce the key schedule as required by my assembler implementation. File locations and Kconfig are kept similar to the i586 AES assembler implementation. It may seem a little bit strange to use 32 bit I/O and registers in the assembler implementation but this gives the best code size. My implementation takes one instruction more per round compared to Gladman's x86 assembler but it doesn't require any stack for local variables or saved registers and it is less serialized than Gladman's code. Note that all comparisons to Gladman's code were done after my code was implemented. I did only use FIPS PUB 197 for the implementation so my implementation is independent work. If anybody has a better assembler solution for x86_64 I'll be pleased to have my code replaced with the better solution. Testing: ======== The implementation passes the in-kernel crypto testing module and I'm running it without any problems on my laptop where it is mainly used for dm-crypt. Microbenchmark: =============== The microbenchmark was done in userspace with similar compile flags as used during kernel compile. Encrypt/decrypt is about 35% faster than the generic C implementation. As the generic C as well as my assembler implementation are both table I don't really expect that there is much room for further improvements though I'll be glad to be corrected here. The key schedule is about 5% slower than the generic C implementation. This is due to the fact that some more work has to be done in the key schedule routine to fit the schedule to the assembler implementation. Code Size: ========== Encrypt and decrypt are together about 2.1 Kbytes smaller than the generic C implementation which is important with regard to L1 cache usage. The key schedule routine is about 100 bytes larger than the generic C implementation. Data Size: ========== There's no difference in data size requirements between the assembler implementation and the generic C implementation. License: ======== Gladmans's code is dual BSD/GPL whereas my assembler code is GPLv2 only (I'm not going to change the license for my code). So I had to change the module license for the x86_64 aes module from 'Dual BSD/GPL' to 'GPL' to reflect the most restrictive license within the module. Signed-off-by: Andreas Steinmetz <ast@domdv.de> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86_64/crypto/aes.c')
-rw-r--r--arch/x86_64/crypto/aes.c324
1 files changed, 324 insertions, 0 deletions
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
new file mode 100644
index 000000000000..2b5c4010ce38
--- /dev/null
+++ b/arch/x86_64/crypto/aes.c
@@ -0,0 +1,324 @@
1/*
2 * Cryptographic API.
3 *
4 * AES Cipher Algorithm.
5 *
6 * Based on Brian Gladman's code.
7 *
8 * Linux developers:
9 * Alexander Kjeldaas <astor@fast.no>
10 * Herbert Valerio Riedel <hvr@hvrlab.org>
11 * Kyle McMartin <kyle@debian.org>
12 * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
13 * Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * ---------------------------------------------------------------------------
21 * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
22 * All rights reserved.
23 *
24 * LICENSE TERMS
25 *
26 * The free distribution and use of this software in both source and binary
27 * form is allowed (with or without changes) provided that:
28 *
29 * 1. distributions of this source code include the above copyright
30 * notice, this list of conditions and the following disclaimer;
31 *
32 * 2. distributions in binary form include the above copyright
33 * notice, this list of conditions and the following disclaimer
34 * in the documentation and/or other associated materials;
35 *
36 * 3. the copyright holder's name is not used to endorse products
37 * built using this software without specific written permission.
38 *
39 * ALTERNATIVELY, provided that this notice is retained in full, this product
40 * may be distributed under the terms of the GNU General Public License (GPL),
41 * in which case the provisions of the GPL apply INSTEAD OF those given above.
42 *
43 * DISCLAIMER
44 *
45 * This software is provided 'as is' with no explicit or implied warranties
46 * in respect of its properties, including, but not limited to, correctness
47 * and/or fitness for purpose.
48 * ---------------------------------------------------------------------------
49 */
50
51/* Some changes from the Gladman version:
52 s/RIJNDAEL(e_key)/E_KEY/g
53 s/RIJNDAEL(d_key)/D_KEY/g
54*/
55
56#include <asm/byteorder.h>
57#include <linux/bitops.h>
58#include <linux/crypto.h>
59#include <linux/errno.h>
60#include <linux/init.h>
61#include <linux/module.h>
62#include <linux/types.h>
63
64#define AES_MIN_KEY_SIZE 16
65#define AES_MAX_KEY_SIZE 32
66
67#define AES_BLOCK_SIZE 16
68
69/*
70 * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
71 */
72static inline u8 byte(const u32 x, const unsigned n)
73{
74 return x >> (n << 3);
75}
76
77#define u32_in(x) le32_to_cpu(*(const __le32 *)(x))
78
79struct aes_ctx
80{
81 u32 key_length;
82 u32 E[60];
83 u32 D[60];
84};
85
86#define E_KEY ctx->E
87#define D_KEY ctx->D
88
89static u8 pow_tab[256] __initdata;
90static u8 log_tab[256] __initdata;
91static u8 sbx_tab[256] __initdata;
92static u8 isb_tab[256] __initdata;
93static u32 rco_tab[10];
94u32 aes_ft_tab[4][256];
95u32 aes_it_tab[4][256];
96
97u32 aes_fl_tab[4][256];
98u32 aes_il_tab[4][256];
99
100static inline u8 f_mult(u8 a, u8 b)
101{
102 u8 aa = log_tab[a], cc = aa + log_tab[b];
103
104 return pow_tab[cc + (cc < aa ? 1 : 0)];
105}
106
107#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
108
109#define ls_box(x) \
110 (aes_fl_tab[0][byte(x, 0)] ^ \
111 aes_fl_tab[1][byte(x, 1)] ^ \
112 aes_fl_tab[2][byte(x, 2)] ^ \
113 aes_fl_tab[3][byte(x, 3)])
114
115static void __init gen_tabs(void)
116{
117 u32 i, t;
118 u8 p, q;
119
120 /* log and power tables for GF(2**8) finite field with
121 0x011b as modular polynomial - the simplest primitive
122 root is 0x03, used here to generate the tables */
123
124 for (i = 0, p = 1; i < 256; ++i) {
125 pow_tab[i] = (u8)p;
126 log_tab[p] = (u8)i;
127
128 p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
129 }
130
131 log_tab[1] = 0;
132
133 for (i = 0, p = 1; i < 10; ++i) {
134 rco_tab[i] = p;
135
136 p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
137 }
138
139 for (i = 0; i < 256; ++i) {
140 p = (i ? pow_tab[255 - log_tab[i]] : 0);
141 q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
142 p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
143 sbx_tab[i] = p;
144 isb_tab[p] = (u8)i;
145 }
146
147 for (i = 0; i < 256; ++i) {
148 p = sbx_tab[i];
149
150 t = p;
151 aes_fl_tab[0][i] = t;
152 aes_fl_tab[1][i] = rol32(t, 8);
153 aes_fl_tab[2][i] = rol32(t, 16);
154 aes_fl_tab[3][i] = rol32(t, 24);
155
156 t = ((u32)ff_mult(2, p)) |
157 ((u32)p << 8) |
158 ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
159
160 aes_ft_tab[0][i] = t;
161 aes_ft_tab[1][i] = rol32(t, 8);
162 aes_ft_tab[2][i] = rol32(t, 16);
163 aes_ft_tab[3][i] = rol32(t, 24);
164
165 p = isb_tab[i];
166
167 t = p;
168 aes_il_tab[0][i] = t;
169 aes_il_tab[1][i] = rol32(t, 8);
170 aes_il_tab[2][i] = rol32(t, 16);
171 aes_il_tab[3][i] = rol32(t, 24);
172
173 t = ((u32)ff_mult(14, p)) |
174 ((u32)ff_mult(9, p) << 8) |
175 ((u32)ff_mult(13, p) << 16) |
176 ((u32)ff_mult(11, p) << 24);
177
178 aes_it_tab[0][i] = t;
179 aes_it_tab[1][i] = rol32(t, 8);
180 aes_it_tab[2][i] = rol32(t, 16);
181 aes_it_tab[3][i] = rol32(t, 24);
182 }
183}
184
185#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
186
187#define imix_col(y, x) \
188 u = star_x(x); \
189 v = star_x(u); \
190 w = star_x(v); \
191 t = w ^ (x); \
192 (y) = u ^ v ^ w; \
193 (y) ^= ror32(u ^ t, 8) ^ \
194 ror32(v ^ t, 16) ^ \
195 ror32(t, 24)
196
197/* initialise the key schedule from the user supplied key */
198
199#define loop4(i) \
200{ \
201 t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
202 t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
203 t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
204 t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
205 t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
206}
207
208#define loop6(i) \
209{ \
210 t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
211 t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
212 t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
213 t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
214 t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
215 t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
216 t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
217}
218
219#define loop8(i) \
220{ \
221 t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
222 t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
223 t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
224 t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
225 t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
226 t = E_KEY[8 * i + 4] ^ ls_box(t); \
227 E_KEY[8 * i + 12] = t; \
228 t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
229 t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
230 t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
231}
232
233static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len,
234 u32 *flags)
235{
236 struct aes_ctx *ctx = ctx_arg;
237 u32 i, j, t, u, v, w;
238
239 if (key_len != 16 && key_len != 24 && key_len != 32) {
240 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
241 return -EINVAL;
242 }
243
244 ctx->key_length = key_len;
245
246 D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key);
247 D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4);
248 D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8);
249 D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12);
250
251 switch (key_len) {
252 case 16:
253 t = E_KEY[3];
254 for (i = 0; i < 10; ++i)
255 loop4(i);
256 break;
257
258 case 24:
259 E_KEY[4] = u32_in(in_key + 16);
260 t = E_KEY[5] = u32_in(in_key + 20);
261 for (i = 0; i < 8; ++i)
262 loop6 (i);
263 break;
264
265 case 32:
266 E_KEY[4] = u32_in(in_key + 16);
267 E_KEY[5] = u32_in(in_key + 20);
268 E_KEY[6] = u32_in(in_key + 24);
269 t = E_KEY[7] = u32_in(in_key + 28);
270 for (i = 0; i < 7; ++i)
271 loop8(i);
272 break;
273 }
274
275 D_KEY[0] = E_KEY[key_len + 24];
276 D_KEY[1] = E_KEY[key_len + 25];
277 D_KEY[2] = E_KEY[key_len + 26];
278 D_KEY[3] = E_KEY[key_len + 27];
279
280 for (i = 4; i < key_len + 24; ++i) {
281 j = key_len + 24 - (i & ~3) + (i & 3);
282 imix_col(D_KEY[j], E_KEY[i]);
283 }
284
285 return 0;
286}
287
288extern void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in);
289extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in);
290
291static struct crypto_alg aes_alg = {
292 .cra_name = "aes",
293 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
294 .cra_blocksize = AES_BLOCK_SIZE,
295 .cra_ctxsize = sizeof(struct aes_ctx),
296 .cra_module = THIS_MODULE,
297 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
298 .cra_u = {
299 .cipher = {
300 .cia_min_keysize = AES_MIN_KEY_SIZE,
301 .cia_max_keysize = AES_MAX_KEY_SIZE,
302 .cia_setkey = aes_set_key,
303 .cia_encrypt = aes_encrypt,
304 .cia_decrypt = aes_decrypt
305 }
306 }
307};
308
309static int __init aes_init(void)
310{
311 gen_tabs();
312 return crypto_register_alg(&aes_alg);
313}
314
315static void __exit aes_fini(void)
316{
317 crypto_unregister_alg(&aes_alg);
318}
319
320module_init(aes_init);
321module_exit(aes_fini);
322
323MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
324MODULE_LICENSE("GPL");