aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Steinmetz <ast@domdv.de>2005-07-06 16:55:00 -0400
committerDavid S. Miller <davem@davemloft.net>2005-07-06 16:55:00 -0400
commita2a892a236d03a6e985471a7e57d1c863de144c8 (patch)
tree33b52c87bdecf0f24936b952a565a445ce03c616
parenta61cc44812ff94793987bf43b70a3d9bc64a6820 (diff)
[CRYPTO] Add x86_64 asm AES
Implementation: =============== The encrypt/decrypt code is based on an x86 implementation I did a while ago which I never published. This unpublished implementation does include an assembler based key schedule and precomputed tables. For simplicity and best acceptance, however, I took Gladman's in-kernel code for table generation and key schedule for the kernel port of my assembler code and modified this code to produce the key schedule as required by my assembler implementation. File locations and Kconfig are kept similar to the i586 AES assembler implementation. It may seem a little bit strange to use 32 bit I/O and registers in the assembler implementation but this gives the best code size. My implementation takes one instruction more per round compared to Gladman's x86 assembler but it doesn't require any stack for local variables or saved registers and it is less serialized than Gladman's code. Note that all comparisons to Gladman's code were done after my code was implemented. I did only use FIPS PUB 197 for the implementation so my implementation is independent work. If anybody has a better assembler solution for x86_64 I'll be pleased to have my code replaced with the better solution. Testing: ======== The implementation passes the in-kernel crypto testing module and I'm running it without any problems on my laptop where it is mainly used for dm-crypt. Microbenchmark: =============== The microbenchmark was done in userspace with similar compile flags as used during kernel compile. Encrypt/decrypt is about 35% faster than the generic C implementation. As the generic C as well as my assembler implementation are both table I don't really expect that there is much room for further improvements though I'll be glad to be corrected here. The key schedule is about 5% slower than the generic C implementation. This is due to the fact that some more work has to be done in the key schedule routine to fit the schedule to the assembler implementation. Code Size: ========== Encrypt and decrypt are together about 2.1 Kbytes smaller than the generic C implementation which is important with regard to L1 cache usage. The key schedule routine is about 100 bytes larger than the generic C implementation. Data Size: ========== There's no difference in data size requirements between the assembler implementation and the generic C implementation. License: ======== Gladmans's code is dual BSD/GPL whereas my assembler code is GPLv2 only (I'm not going to change the license for my code). So I had to change the module license for the x86_64 aes module from 'Dual BSD/GPL' to 'GPL' to reflect the most restrictive license within the module. Signed-off-by: Andreas Steinmetz <ast@domdv.de> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/x86_64/Makefile4
-rw-r--r--arch/x86_64/crypto/Makefile9
-rw-r--r--arch/x86_64/crypto/aes-x86_64-asm.S186
-rw-r--r--arch/x86_64/crypto/aes.c324
-rw-r--r--crypto/Kconfig22
5 files changed, 543 insertions, 2 deletions
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 8a73794f9b90..428915697675 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -65,7 +65,9 @@ CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
65head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o 65head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
66 66
67libs-y += arch/x86_64/lib/ 67libs-y += arch/x86_64/lib/
68core-y += arch/x86_64/kernel/ arch/x86_64/mm/ 68core-y += arch/x86_64/kernel/ \
69 arch/x86_64/mm/ \
70 arch/x86_64/crypto/
69core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ 71core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
70drivers-$(CONFIG_PCI) += arch/x86_64/pci/ 72drivers-$(CONFIG_PCI) += arch/x86_64/pci/
71drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ 73drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile
new file mode 100644
index 000000000000..426d20f4b72e
--- /dev/null
+++ b/arch/x86_64/crypto/Makefile
@@ -0,0 +1,9 @@
1#
2# x86_64/crypto/Makefile
3#
4# Arch-specific CryptoAPI modules.
5#
6
7obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
8
9aes-x86_64-y := aes-x86_64-asm.o aes.o
diff --git a/arch/x86_64/crypto/aes-x86_64-asm.S b/arch/x86_64/crypto/aes-x86_64-asm.S
new file mode 100644
index 000000000000..483cbb23ab8d
--- /dev/null
+++ b/arch/x86_64/crypto/aes-x86_64-asm.S
@@ -0,0 +1,186 @@
1/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2 *
3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4 *
5 * License:
6 * This code can be distributed under the terms of the GNU General Public
7 * License (GPL) Version 2 provided that the above header down to and
8 * including this sentence is retained in full.
9 */
10
11.extern aes_ft_tab
12.extern aes_it_tab
13.extern aes_fl_tab
14.extern aes_il_tab
15
16.text
17
18#define R1 %rax
19#define R1E %eax
20#define R1X %ax
21#define R1H %ah
22#define R1L %al
23#define R2 %rbx
24#define R2E %ebx
25#define R2X %bx
26#define R2H %bh
27#define R2L %bl
28#define R3 %rcx
29#define R3E %ecx
30#define R3X %cx
31#define R3H %ch
32#define R3L %cl
33#define R4 %rdx
34#define R4E %edx
35#define R4X %dx
36#define R4H %dh
37#define R4L %dl
38#define R5 %rsi
39#define R5E %esi
40#define R6 %rdi
41#define R6E %edi
42#define R7 %rbp
43#define R7E %ebp
44#define R8 %r8
45#define R9 %r9
46#define R10 %r10
47#define R11 %r11
48
49#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
50 .global FUNC; \
51 .type FUNC,@function; \
52 .align 8; \
53FUNC: movq r1,r2; \
54 movq r3,r4; \
55 leaq BASE+52(r8),r9; \
56 movq r10,r11; \
57 movl (r7),r5 ## E; \
58 movl 4(r7),r1 ## E; \
59 movl 8(r7),r6 ## E; \
60 movl 12(r7),r7 ## E; \
61 movl (r8),r10 ## E; \
62 xorl -48(r9),r5 ## E; \
63 xorl -44(r9),r1 ## E; \
64 xorl -40(r9),r6 ## E; \
65 xorl -36(r9),r7 ## E; \
66 cmpl $24,r10 ## E; \
67 jb B128; \
68 leaq 32(r9),r9; \
69 je B192; \
70 leaq 32(r9),r9;
71
72#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
73 movq r1,r2; \
74 movq r3,r4; \
75 movl r5 ## E,(r9); \
76 movl r6 ## E,4(r9); \
77 movl r7 ## E,8(r9); \
78 movl r8 ## E,12(r9); \
79 ret;
80
81#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
82 movzbl r2 ## H,r5 ## E; \
83 movzbl r2 ## L,r6 ## E; \
84 movl TAB+1024(,r5,4),r5 ## E;\
85 movw r4 ## X,r2 ## X; \
86 movl TAB(,r6,4),r6 ## E; \
87 roll $16,r2 ## E; \
88 shrl $16,r4 ## E; \
89 movzbl r4 ## H,r7 ## E; \
90 movzbl r4 ## L,r4 ## E; \
91 xorl OFFSET(r8),ra ## E; \
92 xorl OFFSET+4(r8),rb ## E; \
93 xorl TAB+3072(,r7,4),r5 ## E;\
94 xorl TAB+2048(,r4,4),r6 ## E;\
95 movzbl r1 ## L,r7 ## E; \
96 movzbl r1 ## H,r4 ## E; \
97 movl TAB+1024(,r4,4),r4 ## E;\
98 movw r3 ## X,r1 ## X; \
99 roll $16,r1 ## E; \
100 shrl $16,r3 ## E; \
101 xorl TAB(,r7,4),r5 ## E; \
102 movzbl r3 ## H,r7 ## E; \
103 movzbl r3 ## L,r3 ## E; \
104 xorl TAB+3072(,r7,4),r4 ## E;\
105 xorl TAB+2048(,r3,4),r5 ## E;\
106 movzbl r1 ## H,r7 ## E; \
107 movzbl r1 ## L,r3 ## E; \
108 shrl $16,r1 ## E; \
109 xorl TAB+3072(,r7,4),r6 ## E;\
110 movl TAB+2048(,r3,4),r3 ## E;\
111 movzbl r1 ## H,r7 ## E; \
112 movzbl r1 ## L,r1 ## E; \
113 xorl TAB+1024(,r7,4),r6 ## E;\
114 xorl TAB(,r1,4),r3 ## E; \
115 movzbl r2 ## H,r1 ## E; \
116 movzbl r2 ## L,r7 ## E; \
117 shrl $16,r2 ## E; \
118 xorl TAB+3072(,r1,4),r3 ## E;\
119 xorl TAB+2048(,r7,4),r4 ## E;\
120 movzbl r2 ## H,r1 ## E; \
121 movzbl r2 ## L,r2 ## E; \
122 xorl OFFSET+8(r8),rc ## E; \
123 xorl OFFSET+12(r8),rd ## E; \
124 xorl TAB+1024(,r1,4),r3 ## E;\
125 xorl TAB(,r2,4),r4 ## E;
126
127#define move_regs(r1,r2,r3,r4) \
128 movl r3 ## E,r1 ## E; \
129 movl r4 ## E,r2 ## E;
130
131#define entry(FUNC,BASE,B128,B192) \
132 prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
133
134#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
135
136#define encrypt_round(TAB,OFFSET) \
137 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
138 move_regs(R1,R2,R5,R6)
139
140#define encrypt_final(TAB,OFFSET) \
141 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
142
143#define decrypt_round(TAB,OFFSET) \
144 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
145 move_regs(R1,R2,R5,R6)
146
147#define decrypt_final(TAB,OFFSET) \
148 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
149
150/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
151
152 entry(aes_encrypt,0,enc128,enc192)
153 encrypt_round(aes_ft_tab,-96)
154 encrypt_round(aes_ft_tab,-80)
155enc192: encrypt_round(aes_ft_tab,-64)
156 encrypt_round(aes_ft_tab,-48)
157enc128: encrypt_round(aes_ft_tab,-32)
158 encrypt_round(aes_ft_tab,-16)
159 encrypt_round(aes_ft_tab, 0)
160 encrypt_round(aes_ft_tab, 16)
161 encrypt_round(aes_ft_tab, 32)
162 encrypt_round(aes_ft_tab, 48)
163 encrypt_round(aes_ft_tab, 64)
164 encrypt_round(aes_ft_tab, 80)
165 encrypt_round(aes_ft_tab, 96)
166 encrypt_final(aes_fl_tab,112)
167 return
168
169/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
170
171 entry(aes_decrypt,240,dec128,dec192)
172 decrypt_round(aes_it_tab,-96)
173 decrypt_round(aes_it_tab,-80)
174dec192: decrypt_round(aes_it_tab,-64)
175 decrypt_round(aes_it_tab,-48)
176dec128: decrypt_round(aes_it_tab,-32)
177 decrypt_round(aes_it_tab,-16)
178 decrypt_round(aes_it_tab, 0)
179 decrypt_round(aes_it_tab, 16)
180 decrypt_round(aes_it_tab, 32)
181 decrypt_round(aes_it_tab, 48)
182 decrypt_round(aes_it_tab, 64)
183 decrypt_round(aes_it_tab, 80)
184 decrypt_round(aes_it_tab, 96)
185 decrypt_final(aes_il_tab,112)
186 return
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
new file mode 100644
index 000000000000..2b5c4010ce38
--- /dev/null
+++ b/arch/x86_64/crypto/aes.c
@@ -0,0 +1,324 @@
1/*
2 * Cryptographic API.
3 *
4 * AES Cipher Algorithm.
5 *
6 * Based on Brian Gladman's code.
7 *
8 * Linux developers:
9 * Alexander Kjeldaas <astor@fast.no>
10 * Herbert Valerio Riedel <hvr@hvrlab.org>
11 * Kyle McMartin <kyle@debian.org>
12 * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
13 * Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * ---------------------------------------------------------------------------
21 * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
22 * All rights reserved.
23 *
24 * LICENSE TERMS
25 *
26 * The free distribution and use of this software in both source and binary
27 * form is allowed (with or without changes) provided that:
28 *
29 * 1. distributions of this source code include the above copyright
30 * notice, this list of conditions and the following disclaimer;
31 *
32 * 2. distributions in binary form include the above copyright
33 * notice, this list of conditions and the following disclaimer
34 * in the documentation and/or other associated materials;
35 *
36 * 3. the copyright holder's name is not used to endorse products
37 * built using this software without specific written permission.
38 *
39 * ALTERNATIVELY, provided that this notice is retained in full, this product
40 * may be distributed under the terms of the GNU General Public License (GPL),
41 * in which case the provisions of the GPL apply INSTEAD OF those given above.
42 *
43 * DISCLAIMER
44 *
45 * This software is provided 'as is' with no explicit or implied warranties
46 * in respect of its properties, including, but not limited to, correctness
47 * and/or fitness for purpose.
48 * ---------------------------------------------------------------------------
49 */
50
51/* Some changes from the Gladman version:
52 s/RIJNDAEL(e_key)/E_KEY/g
53 s/RIJNDAEL(d_key)/D_KEY/g
54*/
55
56#include <asm/byteorder.h>
57#include <linux/bitops.h>
58#include <linux/crypto.h>
59#include <linux/errno.h>
60#include <linux/init.h>
61#include <linux/module.h>
62#include <linux/types.h>
63
64#define AES_MIN_KEY_SIZE 16
65#define AES_MAX_KEY_SIZE 32
66
67#define AES_BLOCK_SIZE 16
68
69/*
70 * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
71 */
72static inline u8 byte(const u32 x, const unsigned n)
73{
74 return x >> (n << 3);
75}
76
77#define u32_in(x) le32_to_cpu(*(const __le32 *)(x))
78
79struct aes_ctx
80{
81 u32 key_length;
82 u32 E[60];
83 u32 D[60];
84};
85
86#define E_KEY ctx->E
87#define D_KEY ctx->D
88
89static u8 pow_tab[256] __initdata;
90static u8 log_tab[256] __initdata;
91static u8 sbx_tab[256] __initdata;
92static u8 isb_tab[256] __initdata;
93static u32 rco_tab[10];
94u32 aes_ft_tab[4][256];
95u32 aes_it_tab[4][256];
96
97u32 aes_fl_tab[4][256];
98u32 aes_il_tab[4][256];
99
100static inline u8 f_mult(u8 a, u8 b)
101{
102 u8 aa = log_tab[a], cc = aa + log_tab[b];
103
104 return pow_tab[cc + (cc < aa ? 1 : 0)];
105}
106
107#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
108
109#define ls_box(x) \
110 (aes_fl_tab[0][byte(x, 0)] ^ \
111 aes_fl_tab[1][byte(x, 1)] ^ \
112 aes_fl_tab[2][byte(x, 2)] ^ \
113 aes_fl_tab[3][byte(x, 3)])
114
115static void __init gen_tabs(void)
116{
117 u32 i, t;
118 u8 p, q;
119
120 /* log and power tables for GF(2**8) finite field with
121 0x011b as modular polynomial - the simplest primitive
122 root is 0x03, used here to generate the tables */
123
124 for (i = 0, p = 1; i < 256; ++i) {
125 pow_tab[i] = (u8)p;
126 log_tab[p] = (u8)i;
127
128 p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
129 }
130
131 log_tab[1] = 0;
132
133 for (i = 0, p = 1; i < 10; ++i) {
134 rco_tab[i] = p;
135
136 p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
137 }
138
139 for (i = 0; i < 256; ++i) {
140 p = (i ? pow_tab[255 - log_tab[i]] : 0);
141 q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
142 p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
143 sbx_tab[i] = p;
144 isb_tab[p] = (u8)i;
145 }
146
147 for (i = 0; i < 256; ++i) {
148 p = sbx_tab[i];
149
150 t = p;
151 aes_fl_tab[0][i] = t;
152 aes_fl_tab[1][i] = rol32(t, 8);
153 aes_fl_tab[2][i] = rol32(t, 16);
154 aes_fl_tab[3][i] = rol32(t, 24);
155
156 t = ((u32)ff_mult(2, p)) |
157 ((u32)p << 8) |
158 ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
159
160 aes_ft_tab[0][i] = t;
161 aes_ft_tab[1][i] = rol32(t, 8);
162 aes_ft_tab[2][i] = rol32(t, 16);
163 aes_ft_tab[3][i] = rol32(t, 24);
164
165 p = isb_tab[i];
166
167 t = p;
168 aes_il_tab[0][i] = t;
169 aes_il_tab[1][i] = rol32(t, 8);
170 aes_il_tab[2][i] = rol32(t, 16);
171 aes_il_tab[3][i] = rol32(t, 24);
172
173 t = ((u32)ff_mult(14, p)) |
174 ((u32)ff_mult(9, p) << 8) |
175 ((u32)ff_mult(13, p) << 16) |
176 ((u32)ff_mult(11, p) << 24);
177
178 aes_it_tab[0][i] = t;
179 aes_it_tab[1][i] = rol32(t, 8);
180 aes_it_tab[2][i] = rol32(t, 16);
181 aes_it_tab[3][i] = rol32(t, 24);
182 }
183}
184
185#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
186
187#define imix_col(y, x) \
188 u = star_x(x); \
189 v = star_x(u); \
190 w = star_x(v); \
191 t = w ^ (x); \
192 (y) = u ^ v ^ w; \
193 (y) ^= ror32(u ^ t, 8) ^ \
194 ror32(v ^ t, 16) ^ \
195 ror32(t, 24)
196
197/* initialise the key schedule from the user supplied key */
198
199#define loop4(i) \
200{ \
201 t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
202 t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
203 t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
204 t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
205 t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
206}
207
208#define loop6(i) \
209{ \
210 t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
211 t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
212 t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
213 t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
214 t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
215 t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
216 t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
217}
218
219#define loop8(i) \
220{ \
221 t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
222 t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
223 t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
224 t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
225 t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
226 t = E_KEY[8 * i + 4] ^ ls_box(t); \
227 E_KEY[8 * i + 12] = t; \
228 t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
229 t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
230 t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
231}
232
233static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len,
234 u32 *flags)
235{
236 struct aes_ctx *ctx = ctx_arg;
237 u32 i, j, t, u, v, w;
238
239 if (key_len != 16 && key_len != 24 && key_len != 32) {
240 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
241 return -EINVAL;
242 }
243
244 ctx->key_length = key_len;
245
246 D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key);
247 D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4);
248 D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8);
249 D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12);
250
251 switch (key_len) {
252 case 16:
253 t = E_KEY[3];
254 for (i = 0; i < 10; ++i)
255 loop4(i);
256 break;
257
258 case 24:
259 E_KEY[4] = u32_in(in_key + 16);
260 t = E_KEY[5] = u32_in(in_key + 20);
261 for (i = 0; i < 8; ++i)
262 loop6 (i);
263 break;
264
265 case 32:
266 E_KEY[4] = u32_in(in_key + 16);
267 E_KEY[5] = u32_in(in_key + 20);
268 E_KEY[6] = u32_in(in_key + 24);
269 t = E_KEY[7] = u32_in(in_key + 28);
270 for (i = 0; i < 7; ++i)
271 loop8(i);
272 break;
273 }
274
275 D_KEY[0] = E_KEY[key_len + 24];
276 D_KEY[1] = E_KEY[key_len + 25];
277 D_KEY[2] = E_KEY[key_len + 26];
278 D_KEY[3] = E_KEY[key_len + 27];
279
280 for (i = 4; i < key_len + 24; ++i) {
281 j = key_len + 24 - (i & ~3) + (i & 3);
282 imix_col(D_KEY[j], E_KEY[i]);
283 }
284
285 return 0;
286}
287
288extern void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in);
289extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in);
290
291static struct crypto_alg aes_alg = {
292 .cra_name = "aes",
293 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
294 .cra_blocksize = AES_BLOCK_SIZE,
295 .cra_ctxsize = sizeof(struct aes_ctx),
296 .cra_module = THIS_MODULE,
297 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
298 .cra_u = {
299 .cipher = {
300 .cia_min_keysize = AES_MIN_KEY_SIZE,
301 .cia_max_keysize = AES_MAX_KEY_SIZE,
302 .cia_setkey = aes_set_key,
303 .cia_encrypt = aes_encrypt,
304 .cia_decrypt = aes_decrypt
305 }
306 }
307};
308
309static int __init aes_init(void)
310{
311 gen_tabs();
312 return crypto_register_alg(&aes_alg);
313}
314
315static void __exit aes_fini(void)
316{
317 crypto_unregister_alg(&aes_alg);
318}
319
320module_init(aes_init);
321module_exit(aes_fini);
322
323MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
324MODULE_LICENSE("GPL");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 90d6089d60ed..256c0b1fed10 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -146,7 +146,7 @@ config CRYPTO_SERPENT
146 146
147config CRYPTO_AES 147config CRYPTO_AES
148 tristate "AES cipher algorithms" 148 tristate "AES cipher algorithms"
149 depends on CRYPTO && !((X86 || UML_X86) && !64BIT) 149 depends on CRYPTO && !(X86 || UML_X86)
150 help 150 help
151 AES cipher algorithms (FIPS-197). AES uses the Rijndael 151 AES cipher algorithms (FIPS-197). AES uses the Rijndael
152 algorithm. 152 algorithm.
@@ -184,6 +184,26 @@ config CRYPTO_AES_586
184 184
185 See <http://csrc.nist.gov/encryption/aes/> for more information. 185 See <http://csrc.nist.gov/encryption/aes/> for more information.
186 186
187config CRYPTO_AES_X86_64
188 tristate "AES cipher algorithms (x86_64)"
189 depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
190 help
191 AES cipher algorithms (FIPS-197). AES uses the Rijndael
192 algorithm.
193
194 Rijndael appears to be consistently a very good performer in
195 both hardware and software across a wide range of computing
196 environments regardless of its use in feedback or non-feedback
197 modes. Its key setup time is excellent, and its key agility is
198 good. Rijndael's very low memory requirements make it very well
199 suited for restricted-space environments, in which it also
200 demonstrates excellent performance. Rijndael's operations are
201 among the easiest to defend against power and timing attacks.
202
203 The AES specifies three key sizes: 128, 192 and 256 bits
204
205 See <http://csrc.nist.gov/encryption/aes/> for more information.
206
187config CRYPTO_CAST5 207config CRYPTO_CAST5
188 tristate "CAST5 (CAST-128) cipher algorithm" 208 tristate "CAST5 (CAST-128) cipher algorithm"
189 depends on CRYPTO 209 depends on CRYPTO