aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2007-10-11 05:16:21 -0400
committerThomas Gleixner <tglx@linutronix.de>2007-10-11 05:16:21 -0400
commit9c2019421511a1bc646981d55528334ae46464c0 (patch)
tree12bd39b5201d0afc74dccd8e06464233d3058e58 /arch/x86/crypto
parentaf49d41e8c0e6649b3966470aa6319585144f8e8 (diff)
i386: move crypto
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile5
-rw-r--r--arch/x86/crypto/Makefile_3212
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S373
-rw-r--r--arch/x86/crypto/aes_32.c515
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S335
-rw-r--r--arch/x86/crypto/twofish_32.c97
6 files changed, 1337 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
new file mode 100644
index 000000000000..b1bcf7c63028
--- /dev/null
+++ b/arch/x86/crypto/Makefile
@@ -0,0 +1,5 @@
1ifeq ($(CONFIG_X86_32),y)
2include ${srctree}/arch/x86/crypto/Makefile_32
3else
4include ${srctree}/arch/x86_64/crypto/Makefile_64
5endif
diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32
new file mode 100644
index 000000000000..2d873a2388ed
--- /dev/null
+++ b/arch/x86/crypto/Makefile_32
@@ -0,0 +1,12 @@
1#
2# x86/crypto/Makefile
3#
4# Arch-specific CryptoAPI modules.
5#
6
7obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
8obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
9
10aes-i586-y := aes-i586-asm_32.o aes_32.o
11twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
12
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
new file mode 100644
index 000000000000..f942f0c8f630
--- /dev/null
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -0,0 +1,373 @@
1// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10// 1. distributions of this source code include the above copyright
11// notice, this list of conditions and the following disclaimer//
12//
13// 2. distributions in binary form include the above copyright
14// notice, this list of conditions and the following disclaimer
15// in the documentation and/or other associated materials//
16//
17// 3. the copyright holder's name is not used to endorse products
18// built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
39#include <asm/asm-offsets.h>
40
41#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
42
43/* offsets to parameters with one register pushed onto stack */
44#define tfm 8
45#define out_blk 12
46#define in_blk 16
47
48/* offsets in crypto_tfm structure */
49#define ekey (crypto_tfm_ctx_offset + 0)
50#define nrnd (crypto_tfm_ctx_offset + 256)
51#define dkey (crypto_tfm_ctx_offset + 260)
52
53// register mapping for encrypt and decrypt subroutines
54
55#define r0 eax
56#define r1 ebx
57#define r2 ecx
58#define r3 edx
59#define r4 esi
60#define r5 edi
61
62#define eaxl al
63#define eaxh ah
64#define ebxl bl
65#define ebxh bh
66#define ecxl cl
67#define ecxh ch
68#define edxl dl
69#define edxh dh
70
71#define _h(reg) reg##h
72#define h(reg) _h(reg)
73
74#define _l(reg) reg##l
75#define l(reg) _l(reg)
76
77// This macro takes a 32-bit word representing a column and uses
78// each of its four bytes to index into four tables of 256 32-bit
79// words to obtain values that are then xored into the appropriate
80// output registers r0, r1, r4 or r5.
81
82// Parameters:
83// table table base address
84// %1 out_state[0]
85// %2 out_state[1]
86// %3 out_state[2]
87// %4 out_state[3]
88// idx input register for the round (destroyed)
89// tmp scratch register for the round
90// sched key schedule
91
92#define do_col(table, a1,a2,a3,a4, idx, tmp) \
93 movzx %l(idx),%tmp; \
94 xor table(,%tmp,4),%a1; \
95 movzx %h(idx),%tmp; \
96 shr $16,%idx; \
97 xor table+tlen(,%tmp,4),%a2; \
98 movzx %l(idx),%tmp; \
99 movzx %h(idx),%idx; \
100 xor table+2*tlen(,%tmp,4),%a3; \
101 xor table+3*tlen(,%idx,4),%a4;
102
103// initialise output registers from the key schedule
104// NB1: original value of a3 is in idx on exit
105// NB2: original values of a1,a2,a4 aren't used
106#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
107 mov 0 sched,%a1; \
108 movzx %l(idx),%tmp; \
109 mov 12 sched,%a2; \
110 xor table(,%tmp,4),%a1; \
111 mov 4 sched,%a4; \
112 movzx %h(idx),%tmp; \
113 shr $16,%idx; \
114 xor table+tlen(,%tmp,4),%a2; \
115 movzx %l(idx),%tmp; \
116 movzx %h(idx),%idx; \
117 xor table+3*tlen(,%idx,4),%a4; \
118 mov %a3,%idx; \
119 mov 8 sched,%a3; \
120 xor table+2*tlen(,%tmp,4),%a3;
121
122// initialise output registers from the key schedule
123// NB1: original value of a3 is in idx on exit
124// NB2: original values of a1,a2,a4 aren't used
125#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
126 mov 0 sched,%a1; \
127 movzx %l(idx),%tmp; \
128 mov 4 sched,%a2; \
129 xor table(,%tmp,4),%a1; \
130 mov 12 sched,%a4; \
131 movzx %h(idx),%tmp; \
132 shr $16,%idx; \
133 xor table+tlen(,%tmp,4),%a2; \
134 movzx %l(idx),%tmp; \
135 movzx %h(idx),%idx; \
136 xor table+3*tlen(,%idx,4),%a4; \
137 mov %a3,%idx; \
138 mov 8 sched,%a3; \
139 xor table+2*tlen(,%tmp,4),%a3;
140
141
142// original Gladman had conditional saves to MMX regs.
143#define save(a1, a2) \
144 mov %a2,4*a1(%esp)
145
146#define restore(a1, a2) \
147 mov 4*a2(%esp),%a1
148
149// These macros perform a forward encryption cycle. They are entered with
150// the first previous round column values in r0,r1,r4,r5 and
151// exit with the final values in the same registers, using stack
152// for temporary storage.
153
154// round column values
155// on entry: r0,r1,r4,r5
156// on exit: r2,r1,r4,r5
157#define fwd_rnd1(arg, table) \
158 save (0,r1); \
159 save (1,r5); \
160 \
161 /* compute new column values */ \
162 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
163 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
164 restore(r0,0); \
165 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
166 restore(r0,1); \
167 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
168
169// round column values
170// on entry: r2,r1,r4,r5
171// on exit: r0,r1,r4,r5
172#define fwd_rnd2(arg, table) \
173 save (0,r1); \
174 save (1,r5); \
175 \
176 /* compute new column values */ \
177 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
178 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
179 restore(r2,0); \
180 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
181 restore(r2,1); \
182 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
183
184// These macros performs an inverse encryption cycle. They are entered with
185// the first previous round column values in r0,r1,r4,r5 and
186// exit with the final values in the same registers, using stack
187// for temporary storage
188
189// round column values
190// on entry: r0,r1,r4,r5
191// on exit: r2,r1,r4,r5
192#define inv_rnd1(arg, table) \
193 save (0,r1); \
194 save (1,r5); \
195 \
196 /* compute new column values */ \
197 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
198 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
199 restore(r0,0); \
200 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
201 restore(r0,1); \
202 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
203
204// round column values
205// on entry: r2,r1,r4,r5
206// on exit: r0,r1,r4,r5
207#define inv_rnd2(arg, table) \
208 save (0,r1); \
209 save (1,r5); \
210 \
211 /* compute new column values */ \
212 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
213 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
214 restore(r2,0); \
215 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
216 restore(r2,1); \
217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
218
219// AES (Rijndael) Encryption Subroutine
220/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
221
222.global aes_enc_blk
223
224.extern ft_tab
225.extern fl_tab
226
227.align 4
228
229aes_enc_blk:
230 push %ebp
231 mov tfm(%esp),%ebp
232
233// CAUTION: the order and the values used in these assigns
234// rely on the register mappings
235
2361: push %ebx
237 mov in_blk+4(%esp),%r2
238 push %esi
239 mov nrnd(%ebp),%r3 // number of rounds
240 push %edi
241#if ekey != 0
242 lea ekey(%ebp),%ebp // key pointer
243#endif
244
245// input four columns and xor in first round key
246
247 mov (%r2),%r0
248 mov 4(%r2),%r1
249 mov 8(%r2),%r4
250 mov 12(%r2),%r5
251 xor (%ebp),%r0
252 xor 4(%ebp),%r1
253 xor 8(%ebp),%r4
254 xor 12(%ebp),%r5
255
256 sub $8,%esp // space for register saves on stack
257 add $16,%ebp // increment to next round key
258 cmp $12,%r3
259 jb 4f // 10 rounds for 128-bit key
260 lea 32(%ebp),%ebp
261 je 3f // 12 rounds for 192-bit key
262 lea 32(%ebp),%ebp
263
2642: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key
265 fwd_rnd2( -48(%ebp) ,ft_tab)
2663: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key
267 fwd_rnd2( -16(%ebp) ,ft_tab)
2684: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
269 fwd_rnd2( +16(%ebp) ,ft_tab)
270 fwd_rnd1( +32(%ebp) ,ft_tab)
271 fwd_rnd2( +48(%ebp) ,ft_tab)
272 fwd_rnd1( +64(%ebp) ,ft_tab)
273 fwd_rnd2( +80(%ebp) ,ft_tab)
274 fwd_rnd1( +96(%ebp) ,ft_tab)
275 fwd_rnd2(+112(%ebp) ,ft_tab)
276 fwd_rnd1(+128(%ebp) ,ft_tab)
277 fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
278
279// move final values to the output array. CAUTION: the
280// order of these assigns rely on the register mappings
281
282 add $8,%esp
283 mov out_blk+12(%esp),%ebp
284 mov %r5,12(%ebp)
285 pop %edi
286 mov %r4,8(%ebp)
287 pop %esi
288 mov %r1,4(%ebp)
289 pop %ebx
290 mov %r0,(%ebp)
291 pop %ebp
292 mov $1,%eax
293 ret
294
295// AES (Rijndael) Decryption Subroutine
296/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */
297
298.global aes_dec_blk
299
300.extern it_tab
301.extern il_tab
302
303.align 4
304
305aes_dec_blk:
306 push %ebp
307 mov tfm(%esp),%ebp
308
309// CAUTION: the order and the values used in these assigns
310// rely on the register mappings
311
3121: push %ebx
313 mov in_blk+4(%esp),%r2
314 push %esi
315 mov nrnd(%ebp),%r3 // number of rounds
316 push %edi
317#if dkey != 0
318 lea dkey(%ebp),%ebp // key pointer
319#endif
320 mov %r3,%r0
321 shl $4,%r0
322 add %r0,%ebp
323
324// input four columns and xor in first round key
325
326 mov (%r2),%r0
327 mov 4(%r2),%r1
328 mov 8(%r2),%r4
329 mov 12(%r2),%r5
330 xor (%ebp),%r0
331 xor 4(%ebp),%r1
332 xor 8(%ebp),%r4
333 xor 12(%ebp),%r5
334
335 sub $8,%esp // space for register saves on stack
336 sub $16,%ebp // increment to next round key
337 cmp $12,%r3
338 jb 4f // 10 rounds for 128-bit key
339 lea -32(%ebp),%ebp
340 je 3f // 12 rounds for 192-bit key
341 lea -32(%ebp),%ebp
342
3432: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key
344 inv_rnd2( +48(%ebp), it_tab)
3453: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key
346 inv_rnd2( +16(%ebp), it_tab)
3474: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
348 inv_rnd2( -16(%ebp), it_tab)
349 inv_rnd1( -32(%ebp), it_tab)
350 inv_rnd2( -48(%ebp), it_tab)
351 inv_rnd1( -64(%ebp), it_tab)
352 inv_rnd2( -80(%ebp), it_tab)
353 inv_rnd1( -96(%ebp), it_tab)
354 inv_rnd2(-112(%ebp), it_tab)
355 inv_rnd1(-128(%ebp), it_tab)
356 inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
357
358// move final values to the output array. CAUTION: the
359// order of these assigns rely on the register mappings
360
361 add $8,%esp
362 mov out_blk+12(%esp),%ebp
363 mov %r5,12(%ebp)
364 pop %edi
365 mov %r4,8(%ebp)
366 pop %esi
367 mov %r1,4(%ebp)
368 pop %ebx
369 mov %r0,(%ebp)
370 pop %ebp
371 mov $1,%eax
372 ret
373
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c
new file mode 100644
index 000000000000..49aad9397f10
--- /dev/null
+++ b/arch/x86/crypto/aes_32.c
@@ -0,0 +1,515 @@
1/*
2 *
3 * Glue Code for optimized 586 assembler version of AES
4 *
5 * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
6 * All rights reserved.
7 *
8 * LICENSE TERMS
9 *
10 * The free distribution and use of this software in both source and binary
11 * form is allowed (with or without changes) provided that:
12 *
13 * 1. distributions of this source code include the above copyright
14 * notice, this list of conditions and the following disclaimer;
15 *
16 * 2. distributions in binary form include the above copyright
17 * notice, this list of conditions and the following disclaimer
18 * in the documentation and/or other associated materials;
19 *
20 * 3. the copyright holder's name is not used to endorse products
21 * built using this software without specific written permission.
22 *
23 * ALTERNATIVELY, provided that this notice is retained in full, this product
24 * may be distributed under the terms of the GNU General Public License (GPL),
25 * in which case the provisions of the GPL apply INSTEAD OF those given above.
26 *
27 * DISCLAIMER
28 *
29 * This software is provided 'as is' with no explicit or implied warranties
30 * in respect of its properties, including, but not limited to, correctness
31 * and/or fitness for purpose.
32 *
33 * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
34 * 2.5 API).
35 * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
36 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
37 *
38 */
39
40#include <asm/byteorder.h>
41#include <linux/kernel.h>
42#include <linux/module.h>
43#include <linux/init.h>
44#include <linux/types.h>
45#include <linux/crypto.h>
46#include <linux/linkage.h>
47
48asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50
51#define AES_MIN_KEY_SIZE 16
52#define AES_MAX_KEY_SIZE 32
53#define AES_BLOCK_SIZE 16
54#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
55#define RC_LENGTH 29
56
57struct aes_ctx {
58 u32 ekey[AES_KS_LENGTH];
59 u32 rounds;
60 u32 dkey[AES_KS_LENGTH];
61};
62
63#define WPOLY 0x011b
64#define bytes2word(b0, b1, b2, b3) \
65 (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
66
67/* define the finite field multiplies required for Rijndael */
68#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
69#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
70#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
71#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
72#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
73#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
74#define fi(x) ((x) ? pow[255 - log[x]]: 0)
75
76static inline u32 upr(u32 x, int n)
77{
78 return (x << 8 * n) | (x >> (32 - 8 * n));
79}
80
81static inline u8 bval(u32 x, int n)
82{
83 return x >> 8 * n;
84}
85
86/* The forward and inverse affine transformations used in the S-box */
87#define fwd_affine(x) \
88 (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
89
90#define inv_affine(x) \
91 (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
92
93static u32 rcon_tab[RC_LENGTH];
94
95u32 ft_tab[4][256];
96u32 fl_tab[4][256];
97static u32 im_tab[4][256];
98u32 il_tab[4][256];
99u32 it_tab[4][256];
100
101static void gen_tabs(void)
102{
103 u32 i, w;
104 u8 pow[512], log[256];
105
106 /*
107 * log and power tables for GF(2^8) finite field with
108 * WPOLY as modular polynomial - the simplest primitive
109 * root is 0x03, used here to generate the tables.
110 */
111 i = 0; w = 1;
112
113 do {
114 pow[i] = (u8)w;
115 pow[i + 255] = (u8)w;
116 log[w] = (u8)i++;
117 w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0);
118 } while (w != 1);
119
120 for(i = 0, w = 1; i < RC_LENGTH; ++i) {
121 rcon_tab[i] = bytes2word(w, 0, 0, 0);
122 w = f2(w);
123 }
124
125 for(i = 0; i < 256; ++i) {
126 u8 b;
127
128 b = fwd_affine(fi((u8)i));
129 w = bytes2word(f2(b), b, b, f3(b));
130
131 /* tables for a normal encryption round */
132 ft_tab[0][i] = w;
133 ft_tab[1][i] = upr(w, 1);
134 ft_tab[2][i] = upr(w, 2);
135 ft_tab[3][i] = upr(w, 3);
136 w = bytes2word(b, 0, 0, 0);
137
138 /*
139 * tables for last encryption round
140 * (may also be used in the key schedule)
141 */
142 fl_tab[0][i] = w;
143 fl_tab[1][i] = upr(w, 1);
144 fl_tab[2][i] = upr(w, 2);
145 fl_tab[3][i] = upr(w, 3);
146
147 b = fi(inv_affine((u8)i));
148 w = bytes2word(fe(b), f9(b), fd(b), fb(b));
149
150 /* tables for the inverse mix column operation */
151 im_tab[0][b] = w;
152 im_tab[1][b] = upr(w, 1);
153 im_tab[2][b] = upr(w, 2);
154 im_tab[3][b] = upr(w, 3);
155
156 /* tables for a normal decryption round */
157 it_tab[0][i] = w;
158 it_tab[1][i] = upr(w,1);
159 it_tab[2][i] = upr(w,2);
160 it_tab[3][i] = upr(w,3);
161
162 w = bytes2word(b, 0, 0, 0);
163
164 /* tables for last decryption round */
165 il_tab[0][i] = w;
166 il_tab[1][i] = upr(w,1);
167 il_tab[2][i] = upr(w,2);
168 il_tab[3][i] = upr(w,3);
169 }
170}
171
172#define four_tables(x,tab,vf,rf,c) \
173( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \
174 tab[1][bval(vf(x,1,c),rf(1,c))] ^ \
175 tab[2][bval(vf(x,2,c),rf(2,c))] ^ \
176 tab[3][bval(vf(x,3,c),rf(3,c))] \
177)
178
179#define vf1(x,r,c) (x)
180#define rf1(r,c) (r)
181#define rf2(r,c) ((r-c)&3)
182
183#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
184#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
185
186#define ff(x) inv_mcol(x)
187
188#define ke4(k,i) \
189{ \
190 k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
191 k[4*(i)+5] = ss[1] ^= ss[0]; \
192 k[4*(i)+6] = ss[2] ^= ss[1]; \
193 k[4*(i)+7] = ss[3] ^= ss[2]; \
194}
195
196#define kel4(k,i) \
197{ \
198 k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
199 k[4*(i)+5] = ss[1] ^= ss[0]; \
200 k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
201}
202
203#define ke6(k,i) \
204{ \
205 k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
206 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
207 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
208 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
209 k[6*(i)+10] = ss[4] ^= ss[3]; \
210 k[6*(i)+11] = ss[5] ^= ss[4]; \
211}
212
213#define kel6(k,i) \
214{ \
215 k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
216 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
217 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
218 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
219}
220
221#define ke8(k,i) \
222{ \
223 k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
224 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
225 k[8*(i)+10] = ss[2] ^= ss[1]; \
226 k[8*(i)+11] = ss[3] ^= ss[2]; \
227 k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
228 k[8*(i)+13] = ss[5] ^= ss[4]; \
229 k[8*(i)+14] = ss[6] ^= ss[5]; \
230 k[8*(i)+15] = ss[7] ^= ss[6]; \
231}
232
233#define kel8(k,i) \
234{ \
235 k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
236 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
237 k[8*(i)+10] = ss[2] ^= ss[1]; \
238 k[8*(i)+11] = ss[3] ^= ss[2]; \
239}
240
241#define kdf4(k,i) \
242{ \
243 ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
244 ss[1] = ss[1] ^ ss[3]; \
245 ss[2] = ss[2] ^ ss[3]; \
246 ss[3] = ss[3]; \
247 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
248 ss[i % 4] ^= ss[4]; \
249 ss[4] ^= k[4*(i)]; \
250 k[4*(i)+4] = ff(ss[4]); \
251 ss[4] ^= k[4*(i)+1]; \
252 k[4*(i)+5] = ff(ss[4]); \
253 ss[4] ^= k[4*(i)+2]; \
254 k[4*(i)+6] = ff(ss[4]); \
255 ss[4] ^= k[4*(i)+3]; \
256 k[4*(i)+7] = ff(ss[4]); \
257}
258
259#define kd4(k,i) \
260{ \
261 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
262 ss[i % 4] ^= ss[4]; \
263 ss[4] = ff(ss[4]); \
264 k[4*(i)+4] = ss[4] ^= k[4*(i)]; \
265 k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
266 k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \
267 k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
268}
269
270#define kdl4(k,i) \
271{ \
272 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
273 ss[i % 4] ^= ss[4]; \
274 k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
275 k[4*(i)+5] = ss[1] ^ ss[3]; \
276 k[4*(i)+6] = ss[0]; \
277 k[4*(i)+7] = ss[1]; \
278}
279
280#define kdf6(k,i) \
281{ \
282 ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
283 k[6*(i)+ 6] = ff(ss[0]); \
284 ss[1] ^= ss[0]; \
285 k[6*(i)+ 7] = ff(ss[1]); \
286 ss[2] ^= ss[1]; \
287 k[6*(i)+ 8] = ff(ss[2]); \
288 ss[3] ^= ss[2]; \
289 k[6*(i)+ 9] = ff(ss[3]); \
290 ss[4] ^= ss[3]; \
291 k[6*(i)+10] = ff(ss[4]); \
292 ss[5] ^= ss[4]; \
293 k[6*(i)+11] = ff(ss[5]); \
294}
295
296#define kd6(k,i) \
297{ \
298 ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
299 ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
300 k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
301 ss[1] ^= ss[0]; \
302 k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
303 ss[2] ^= ss[1]; \
304 k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
305 ss[3] ^= ss[2]; \
306 k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
307 ss[4] ^= ss[3]; \
308 k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
309 ss[5] ^= ss[4]; \
310 k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
311}
312
313#define kdl6(k,i) \
314{ \
315 ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
316 k[6*(i)+ 6] = ss[0]; \
317 ss[1] ^= ss[0]; \
318 k[6*(i)+ 7] = ss[1]; \
319 ss[2] ^= ss[1]; \
320 k[6*(i)+ 8] = ss[2]; \
321 ss[3] ^= ss[2]; \
322 k[6*(i)+ 9] = ss[3]; \
323}
324
325#define kdf8(k,i) \
326{ \
327 ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
328 k[8*(i)+ 8] = ff(ss[0]); \
329 ss[1] ^= ss[0]; \
330 k[8*(i)+ 9] = ff(ss[1]); \
331 ss[2] ^= ss[1]; \
332 k[8*(i)+10] = ff(ss[2]); \
333 ss[3] ^= ss[2]; \
334 k[8*(i)+11] = ff(ss[3]); \
335 ss[4] ^= ls_box(ss[3],0); \
336 k[8*(i)+12] = ff(ss[4]); \
337 ss[5] ^= ss[4]; \
338 k[8*(i)+13] = ff(ss[5]); \
339 ss[6] ^= ss[5]; \
340 k[8*(i)+14] = ff(ss[6]); \
341 ss[7] ^= ss[6]; \
342 k[8*(i)+15] = ff(ss[7]); \
343}
344
345#define kd8(k,i) \
346{ \
347 u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \
348 ss[0] ^= __g; \
349 __g = ff(__g); \
350 k[8*(i)+ 8] = __g ^= k[8*(i)]; \
351 ss[1] ^= ss[0]; \
352 k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \
353 ss[2] ^= ss[1]; \
354 k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \
355 ss[3] ^= ss[2]; \
356 k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \
357 __g = ls_box(ss[3],0); \
358 ss[4] ^= __g; \
359 __g = ff(__g); \
360 k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \
361 ss[5] ^= ss[4]; \
362 k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \
363 ss[6] ^= ss[5]; \
364 k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \
365 ss[7] ^= ss[6]; \
366 k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \
367}
368
369#define kdl8(k,i) \
370{ \
371 ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
372 k[8*(i)+ 8] = ss[0]; \
373 ss[1] ^= ss[0]; \
374 k[8*(i)+ 9] = ss[1]; \
375 ss[2] ^= ss[1]; \
376 k[8*(i)+10] = ss[2]; \
377 ss[3] ^= ss[2]; \
378 k[8*(i)+11] = ss[3]; \
379}
380
381static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
382 unsigned int key_len)
383{
384 int i;
385 u32 ss[8];
386 struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
387 const __le32 *key = (const __le32 *)in_key;
388 u32 *flags = &tfm->crt_flags;
389
390 /* encryption schedule */
391
392 ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
393 ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
394 ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
395 ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
396
397 switch(key_len) {
398 case 16:
399 for (i = 0; i < 9; i++)
400 ke4(ctx->ekey, i);
401 kel4(ctx->ekey, 9);
402 ctx->rounds = 10;
403 break;
404
405 case 24:
406 ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
407 ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
408 for (i = 0; i < 7; i++)
409 ke6(ctx->ekey, i);
410 kel6(ctx->ekey, 7);
411 ctx->rounds = 12;
412 break;
413
414 case 32:
415 ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
416 ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
417 ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
418 ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
419 for (i = 0; i < 6; i++)
420 ke8(ctx->ekey, i);
421 kel8(ctx->ekey, 6);
422 ctx->rounds = 14;
423 break;
424
425 default:
426 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
427 return -EINVAL;
428 }
429
430 /* decryption schedule */
431
432 ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
433 ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
434 ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
435 ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
436
437 switch (key_len) {
438 case 16:
439 kdf4(ctx->dkey, 0);
440 for (i = 1; i < 9; i++)
441 kd4(ctx->dkey, i);
442 kdl4(ctx->dkey, 9);
443 break;
444
445 case 24:
446 ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
447 ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
448 kdf6(ctx->dkey, 0);
449 for (i = 1; i < 7; i++)
450 kd6(ctx->dkey, i);
451 kdl6(ctx->dkey, 7);
452 break;
453
454 case 32:
455 ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
456 ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
457 ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
458 ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
459 kdf8(ctx->dkey, 0);
460 for (i = 1; i < 6; i++)
461 kd8(ctx->dkey, i);
462 kdl8(ctx->dkey, 6);
463 break;
464 }
465 return 0;
466}
467
468static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
469{
470 aes_enc_blk(tfm, dst, src);
471}
472
473static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
474{
475 aes_dec_blk(tfm, dst, src);
476}
477
478static struct crypto_alg aes_alg = {
479 .cra_name = "aes",
480 .cra_driver_name = "aes-i586",
481 .cra_priority = 200,
482 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
483 .cra_blocksize = AES_BLOCK_SIZE,
484 .cra_ctxsize = sizeof(struct aes_ctx),
485 .cra_module = THIS_MODULE,
486 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
487 .cra_u = {
488 .cipher = {
489 .cia_min_keysize = AES_MIN_KEY_SIZE,
490 .cia_max_keysize = AES_MAX_KEY_SIZE,
491 .cia_setkey = aes_set_key,
492 .cia_encrypt = aes_encrypt,
493 .cia_decrypt = aes_decrypt
494 }
495 }
496};
497
498static int __init aes_init(void)
499{
500 gen_tabs();
501 return crypto_register_alg(&aes_alg);
502}
503
504static void __exit aes_fini(void)
505{
506 crypto_unregister_alg(&aes_alg);
507}
508
509module_init(aes_init);
510module_exit(aes_fini);
511
512MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
513MODULE_LICENSE("Dual BSD/GPL");
514MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
515MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
new file mode 100644
index 000000000000..39b98ed2c1b9
--- /dev/null
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -0,0 +1,335 @@
1/***************************************************************************
2* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
3* *
4* This program is free software; you can redistribute it and/or modify *
5* it under the terms of the GNU General Public License as published by *
6* the Free Software Foundation; either version 2 of the License, or *
7* (at your option) any later version. *
8* *
9* This program is distributed in the hope that it will be useful, *
10* but WITHOUT ANY WARRANTY; without even the implied warranty of *
11* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12* GNU General Public License for more details. *
13* *
14* You should have received a copy of the GNU General Public License *
15* along with this program; if not, write to the *
16* Free Software Foundation, Inc., *
17* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
18***************************************************************************/
19
20.file "twofish-i586-asm.S"
21.text
22
23#include <asm/asm-offsets.h>
24
25/* return adress at 0 */
26
27#define in_blk 12 /* input byte array address parameter*/
28#define out_blk 8 /* output byte array address parameter*/
29#define tfm 4 /* Twofish context structure */
30
31#define a_offset 0
32#define b_offset 4
33#define c_offset 8
34#define d_offset 12
35
36/* Structure of the crypto context struct*/
37
38#define s0 0 /* S0 Array 256 Words each */
39#define s1 1024 /* S1 Array */
40#define s2 2048 /* S2 Array */
41#define s3 3072 /* S3 Array */
42#define w 4096 /* 8 whitening keys (word) */
43#define k 4128 /* key 1-32 ( word ) */
44
45/* define a few register aliases to allow macro substitution */
46
47#define R0D %eax
48#define R0B %al
49#define R0H %ah
50
51#define R1D %ebx
52#define R1B %bl
53#define R1H %bh
54
55#define R2D %ecx
56#define R2B %cl
57#define R2H %ch
58
59#define R3D %edx
60#define R3B %dl
61#define R3H %dh
62
63
64/* performs input whitening */
65#define input_whitening(src,context,offset)\
66 xor w+offset(context), src;
67
68/* performs input whitening */
69#define output_whitening(src,context,offset)\
70 xor w+16+offset(context), src;
71
72/*
73 * a input register containing a (rotated 16)
74 * b input register containing b
75 * c input register containing c
76 * d input register containing d (already rol $1)
77 * operations on a and b are interleaved to increase performance
78 */
79#define encrypt_round(a,b,c,d,round)\
80 push d ## D;\
81 movzx b ## B, %edi;\
82 mov s1(%ebp,%edi,4),d ## D;\
83 movzx a ## B, %edi;\
84 mov s2(%ebp,%edi,4),%esi;\
85 movzx b ## H, %edi;\
86 ror $16, b ## D;\
87 xor s2(%ebp,%edi,4),d ## D;\
88 movzx a ## H, %edi;\
89 ror $16, a ## D;\
90 xor s3(%ebp,%edi,4),%esi;\
91 movzx b ## B, %edi;\
92 xor s3(%ebp,%edi,4),d ## D;\
93 movzx a ## B, %edi;\
94 xor (%ebp,%edi,4), %esi;\
95 movzx b ## H, %edi;\
96 ror $15, b ## D;\
97 xor (%ebp,%edi,4), d ## D;\
98 movzx a ## H, %edi;\
99 xor s1(%ebp,%edi,4),%esi;\
100 pop %edi;\
101 add d ## D, %esi;\
102 add %esi, d ## D;\
103 add k+round(%ebp), %esi;\
104 xor %esi, c ## D;\
105 rol $15, c ## D;\
106 add k+4+round(%ebp),d ## D;\
107 xor %edi, d ## D;
108
109/*
110 * a input register containing a (rotated 16)
111 * b input register containing b
112 * c input register containing c
113 * d input register containing d (already rol $1)
114 * operations on a and b are interleaved to increase performance
115 * last round has different rotations for the output preparation
116 */
117#define encrypt_last_round(a,b,c,d,round)\
118 push d ## D;\
119 movzx b ## B, %edi;\
120 mov s1(%ebp,%edi,4),d ## D;\
121 movzx a ## B, %edi;\
122 mov s2(%ebp,%edi,4),%esi;\
123 movzx b ## H, %edi;\
124 ror $16, b ## D;\
125 xor s2(%ebp,%edi,4),d ## D;\
126 movzx a ## H, %edi;\
127 ror $16, a ## D;\
128 xor s3(%ebp,%edi,4),%esi;\
129 movzx b ## B, %edi;\
130 xor s3(%ebp,%edi,4),d ## D;\
131 movzx a ## B, %edi;\
132 xor (%ebp,%edi,4), %esi;\
133 movzx b ## H, %edi;\
134 ror $16, b ## D;\
135 xor (%ebp,%edi,4), d ## D;\
136 movzx a ## H, %edi;\
137 xor s1(%ebp,%edi,4),%esi;\
138 pop %edi;\
139 add d ## D, %esi;\
140 add %esi, d ## D;\
141 add k+round(%ebp), %esi;\
142 xor %esi, c ## D;\
143 ror $1, c ## D;\
144 add k+4+round(%ebp),d ## D;\
145 xor %edi, d ## D;
146
147/*
148 * a input register containing a
149 * b input register containing b (rotated 16)
150 * c input register containing c
151 * d input register containing d (already rol $1)
152 * operations on a and b are interleaved to increase performance
153 */
154#define decrypt_round(a,b,c,d,round)\
155 push c ## D;\
156 movzx a ## B, %edi;\
157 mov (%ebp,%edi,4), c ## D;\
158 movzx b ## B, %edi;\
159 mov s3(%ebp,%edi,4),%esi;\
160 movzx a ## H, %edi;\
161 ror $16, a ## D;\
162 xor s1(%ebp,%edi,4),c ## D;\
163 movzx b ## H, %edi;\
164 ror $16, b ## D;\
165 xor (%ebp,%edi,4), %esi;\
166 movzx a ## B, %edi;\
167 xor s2(%ebp,%edi,4),c ## D;\
168 movzx b ## B, %edi;\
169 xor s1(%ebp,%edi,4),%esi;\
170 movzx a ## H, %edi;\
171 ror $15, a ## D;\
172 xor s3(%ebp,%edi,4),c ## D;\
173 movzx b ## H, %edi;\
174 xor s2(%ebp,%edi,4),%esi;\
175 pop %edi;\
176 add %esi, c ## D;\
177 add c ## D, %esi;\
178 add k+round(%ebp), c ## D;\
179 xor %edi, c ## D;\
180 add k+4+round(%ebp),%esi;\
181 xor %esi, d ## D;\
182 rol $15, d ## D;
183
184/*
185 * a input register containing a
186 * b input register containing b (rotated 16)
187 * c input register containing c
188 * d input register containing d (already rol $1)
189 * operations on a and b are interleaved to increase performance
190 * last round has different rotations for the output preparation
191 */
192#define decrypt_last_round(a,b,c,d,round)\
193 push c ## D;\
194 movzx a ## B, %edi;\
195 mov (%ebp,%edi,4), c ## D;\
196 movzx b ## B, %edi;\
197 mov s3(%ebp,%edi,4),%esi;\
198 movzx a ## H, %edi;\
199 ror $16, a ## D;\
200 xor s1(%ebp,%edi,4),c ## D;\
201 movzx b ## H, %edi;\
202 ror $16, b ## D;\
203 xor (%ebp,%edi,4), %esi;\
204 movzx a ## B, %edi;\
205 xor s2(%ebp,%edi,4),c ## D;\
206 movzx b ## B, %edi;\
207 xor s1(%ebp,%edi,4),%esi;\
208 movzx a ## H, %edi;\
209 ror $16, a ## D;\
210 xor s3(%ebp,%edi,4),c ## D;\
211 movzx b ## H, %edi;\
212 xor s2(%ebp,%edi,4),%esi;\
213 pop %edi;\
214 add %esi, c ## D;\
215 add c ## D, %esi;\
216 add k+round(%ebp), c ## D;\
217 xor %edi, c ## D;\
218 add k+4+round(%ebp),%esi;\
219 xor %esi, d ## D;\
220 ror $1, d ## D;
221
222.align 4
223.global twofish_enc_blk
224.global twofish_dec_blk
225
226twofish_enc_blk:
227 push %ebp /* save registers according to calling convention*/
228 push %ebx
229 push %esi
230 push %edi
231
232 mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
233 add $crypto_tfm_ctx_offset, %ebp /* ctx adress */
234 mov in_blk+16(%esp),%edi /* input adress in edi */
235
236 mov (%edi), %eax
237 mov b_offset(%edi), %ebx
238 mov c_offset(%edi), %ecx
239 mov d_offset(%edi), %edx
240 input_whitening(%eax,%ebp,a_offset)
241 ror $16, %eax
242 input_whitening(%ebx,%ebp,b_offset)
243 input_whitening(%ecx,%ebp,c_offset)
244 input_whitening(%edx,%ebp,d_offset)
245 rol $1, %edx
246
247 encrypt_round(R0,R1,R2,R3,0);
248 encrypt_round(R2,R3,R0,R1,8);
249 encrypt_round(R0,R1,R2,R3,2*8);
250 encrypt_round(R2,R3,R0,R1,3*8);
251 encrypt_round(R0,R1,R2,R3,4*8);
252 encrypt_round(R2,R3,R0,R1,5*8);
253 encrypt_round(R0,R1,R2,R3,6*8);
254 encrypt_round(R2,R3,R0,R1,7*8);
255 encrypt_round(R0,R1,R2,R3,8*8);
256 encrypt_round(R2,R3,R0,R1,9*8);
257 encrypt_round(R0,R1,R2,R3,10*8);
258 encrypt_round(R2,R3,R0,R1,11*8);
259 encrypt_round(R0,R1,R2,R3,12*8);
260 encrypt_round(R2,R3,R0,R1,13*8);
261 encrypt_round(R0,R1,R2,R3,14*8);
262 encrypt_last_round(R2,R3,R0,R1,15*8);
263
264 output_whitening(%eax,%ebp,c_offset)
265 output_whitening(%ebx,%ebp,d_offset)
266 output_whitening(%ecx,%ebp,a_offset)
267 output_whitening(%edx,%ebp,b_offset)
268 mov out_blk+16(%esp),%edi;
269 mov %eax, c_offset(%edi)
270 mov %ebx, d_offset(%edi)
271 mov %ecx, (%edi)
272 mov %edx, b_offset(%edi)
273
274 pop %edi
275 pop %esi
276 pop %ebx
277 pop %ebp
278 mov $1, %eax
279 ret
280
281twofish_dec_blk:
282 push %ebp /* save registers according to calling convention*/
283 push %ebx
284 push %esi
285 push %edi
286
287
288 mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */
289 add $crypto_tfm_ctx_offset, %ebp /* ctx adress */
290 mov in_blk+16(%esp),%edi /* input adress in edi */
291
292 mov (%edi), %eax
293 mov b_offset(%edi), %ebx
294 mov c_offset(%edi), %ecx
295 mov d_offset(%edi), %edx
296 output_whitening(%eax,%ebp,a_offset)
297 output_whitening(%ebx,%ebp,b_offset)
298 ror $16, %ebx
299 output_whitening(%ecx,%ebp,c_offset)
300 output_whitening(%edx,%ebp,d_offset)
301 rol $1, %ecx
302
303 decrypt_round(R0,R1,R2,R3,15*8);
304 decrypt_round(R2,R3,R0,R1,14*8);
305 decrypt_round(R0,R1,R2,R3,13*8);
306 decrypt_round(R2,R3,R0,R1,12*8);
307 decrypt_round(R0,R1,R2,R3,11*8);
308 decrypt_round(R2,R3,R0,R1,10*8);
309 decrypt_round(R0,R1,R2,R3,9*8);
310 decrypt_round(R2,R3,R0,R1,8*8);
311 decrypt_round(R0,R1,R2,R3,7*8);
312 decrypt_round(R2,R3,R0,R1,6*8);
313 decrypt_round(R0,R1,R2,R3,5*8);
314 decrypt_round(R2,R3,R0,R1,4*8);
315 decrypt_round(R0,R1,R2,R3,3*8);
316 decrypt_round(R2,R3,R0,R1,2*8);
317 decrypt_round(R0,R1,R2,R3,1*8);
318 decrypt_last_round(R2,R3,R0,R1,0);
319
320 input_whitening(%eax,%ebp,c_offset)
321 input_whitening(%ebx,%ebp,d_offset)
322 input_whitening(%ecx,%ebp,a_offset)
323 input_whitening(%edx,%ebp,b_offset)
324 mov out_blk+16(%esp),%edi;
325 mov %eax, c_offset(%edi)
326 mov %ebx, d_offset(%edi)
327 mov %ecx, (%edi)
328 mov %edx, b_offset(%edi)
329
330 pop %edi
331 pop %esi
332 pop %ebx
333 pop %ebp
334 mov $1, %eax
335 ret
diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c
new file mode 100644
index 000000000000..e3004dfe9c7a
--- /dev/null
+++ b/arch/x86/crypto/twofish_32.c
@@ -0,0 +1,97 @@
1/*
2 * Glue Code for optimized 586 assembler version of TWOFISH
3 *
4 * Originally Twofish for GPG
5 * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
6 * 256-bit key length added March 20, 1999
7 * Some modifications to reduce the text size by Werner Koch, April, 1998
8 * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
9 * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
10 *
11 * The original author has disclaimed all copyright interest in this
12 * code and thus put it in the public domain. The subsequent authors
13 * have put this under the GNU General Public License.
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
28 * USA
29 *
30 * This code is a "clean room" implementation, written from the paper
31 * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
32 * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
33 * through http://www.counterpane.com/twofish.html
34 *
35 * For background information on multiplication in finite fields, used for
36 * the matrix operations in the key schedule, see the book _Contemporary
37 * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
38 * Third Edition.
39 */
40
41#include <crypto/twofish.h>
42#include <linux/crypto.h>
43#include <linux/init.h>
44#include <linux/module.h>
45#include <linux/types.h>
46
47
48asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50
51static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
52{
53 twofish_enc_blk(tfm, dst, src);
54}
55
56static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
57{
58 twofish_dec_blk(tfm, dst, src);
59}
60
61static struct crypto_alg alg = {
62 .cra_name = "twofish",
63 .cra_driver_name = "twofish-i586",
64 .cra_priority = 200,
65 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
66 .cra_blocksize = TF_BLOCK_SIZE,
67 .cra_ctxsize = sizeof(struct twofish_ctx),
68 .cra_alignmask = 3,
69 .cra_module = THIS_MODULE,
70 .cra_list = LIST_HEAD_INIT(alg.cra_list),
71 .cra_u = {
72 .cipher = {
73 .cia_min_keysize = TF_MIN_KEY_SIZE,
74 .cia_max_keysize = TF_MAX_KEY_SIZE,
75 .cia_setkey = twofish_setkey,
76 .cia_encrypt = twofish_encrypt,
77 .cia_decrypt = twofish_decrypt
78 }
79 }
80};
81
82static int __init init(void)
83{
84 return crypto_register_alg(&alg);
85}
86
87static void __exit fini(void)
88{
89 crypto_unregister_alg(&alg);
90}
91
92module_init(init);
93module_exit(fini);
94
95MODULE_LICENSE("GPL");
96MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized");
97MODULE_ALIAS("twofish");