diff options
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/crypto/Makefile_32 | 12 | ||||
-rw-r--r-- | arch/x86/crypto/aes-i586-asm_32.S | 373 | ||||
-rw-r--r-- | arch/x86/crypto/aes_32.c | 515 | ||||
-rw-r--r-- | arch/x86/crypto/twofish-i586-asm_32.S | 335 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_32.c | 97 |
6 files changed, 1337 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile new file mode 100644 index 000000000000..b1bcf7c63028 --- /dev/null +++ b/arch/x86/crypto/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | ifeq ($(CONFIG_X86_32),y) | ||
2 | include ${srctree}/arch/x86/crypto/Makefile_32 | ||
3 | else | ||
4 | include ${srctree}/arch/x86_64/crypto/Makefile_64 | ||
5 | endif | ||
diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32 new file mode 100644 index 000000000000..2d873a2388ed --- /dev/null +++ b/arch/x86/crypto/Makefile_32 | |||
@@ -0,0 +1,12 @@ | |||
1 | # | ||
2 | # x86/crypto/Makefile | ||
3 | # | ||
4 | # Arch-specific CryptoAPI modules. | ||
5 | # | ||
6 | |||
7 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | ||
8 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | ||
9 | |||
10 | aes-i586-y := aes-i586-asm_32.o aes_32.o | ||
11 | twofish-i586-y := twofish-i586-asm_32.o twofish_32.o | ||
12 | |||
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S new file mode 100644 index 000000000000..f942f0c8f630 --- /dev/null +++ b/arch/x86/crypto/aes-i586-asm_32.S | |||
@@ -0,0 +1,373 @@ | |||
1 | // ------------------------------------------------------------------------- | ||
2 | // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. | ||
3 | // All rights reserved. | ||
4 | // | ||
5 | // LICENSE TERMS | ||
6 | // | ||
7 | // The free distribution and use of this software in both source and binary | ||
8 | // form is allowed (with or without changes) provided that: | ||
9 | // | ||
10 | // 1. distributions of this source code include the above copyright | ||
11 | // notice, this list of conditions and the following disclaimer// | ||
12 | // | ||
13 | // 2. distributions in binary form include the above copyright | ||
14 | // notice, this list of conditions and the following disclaimer | ||
15 | // in the documentation and/or other associated materials// | ||
16 | // | ||
17 | // 3. the copyright holder's name is not used to endorse products | ||
18 | // built using this software without specific written permission. | ||
19 | // | ||
20 | // | ||
21 | // ALTERNATIVELY, provided that this notice is retained in full, this product | ||
22 | // may be distributed under the terms of the GNU General Public License (GPL), | ||
23 | // in which case the provisions of the GPL apply INSTEAD OF those given above. | ||
24 | // | ||
25 | // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> | ||
26 | // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | ||
27 | |||
28 | // DISCLAIMER | ||
29 | // | ||
30 | // This software is provided 'as is' with no explicit or implied warranties | ||
31 | // in respect of its properties including, but not limited to, correctness | ||
32 | // and fitness for purpose. | ||
33 | // ------------------------------------------------------------------------- | ||
34 | // Issue Date: 29/07/2002 | ||
35 | |||
36 | .file "aes-i586-asm.S" | ||
37 | .text | ||
38 | |||
39 | #include <asm/asm-offsets.h> | ||
40 | |||
41 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) | ||
42 | |||
43 | /* offsets to parameters with one register pushed onto stack */ | ||
44 | #define tfm 8 | ||
45 | #define out_blk 12 | ||
46 | #define in_blk 16 | ||
47 | |||
48 | /* offsets in crypto_tfm structure */ | ||
49 | #define ekey (crypto_tfm_ctx_offset + 0) | ||
50 | #define nrnd (crypto_tfm_ctx_offset + 256) | ||
51 | #define dkey (crypto_tfm_ctx_offset + 260) | ||
52 | |||
53 | // register mapping for encrypt and decrypt subroutines | ||
54 | |||
55 | #define r0 eax | ||
56 | #define r1 ebx | ||
57 | #define r2 ecx | ||
58 | #define r3 edx | ||
59 | #define r4 esi | ||
60 | #define r5 edi | ||
61 | |||
62 | #define eaxl al | ||
63 | #define eaxh ah | ||
64 | #define ebxl bl | ||
65 | #define ebxh bh | ||
66 | #define ecxl cl | ||
67 | #define ecxh ch | ||
68 | #define edxl dl | ||
69 | #define edxh dh | ||
70 | |||
71 | #define _h(reg) reg##h | ||
72 | #define h(reg) _h(reg) | ||
73 | |||
74 | #define _l(reg) reg##l | ||
75 | #define l(reg) _l(reg) | ||
76 | |||
77 | // This macro takes a 32-bit word representing a column and uses | ||
78 | // each of its four bytes to index into four tables of 256 32-bit | ||
79 | // words to obtain values that are then xored into the appropriate | ||
80 | // output registers r0, r1, r4 or r5. | ||
81 | |||
82 | // Parameters: | ||
83 | // table table base address | ||
84 | // %1 out_state[0] | ||
85 | // %2 out_state[1] | ||
86 | // %3 out_state[2] | ||
87 | // %4 out_state[3] | ||
88 | // idx input register for the round (destroyed) | ||
89 | // tmp scratch register for the round | ||
90 | // sched key schedule | ||
91 | |||
92 | #define do_col(table, a1,a2,a3,a4, idx, tmp) \ | ||
93 | movzx %l(idx),%tmp; \ | ||
94 | xor table(,%tmp,4),%a1; \ | ||
95 | movzx %h(idx),%tmp; \ | ||
96 | shr $16,%idx; \ | ||
97 | xor table+tlen(,%tmp,4),%a2; \ | ||
98 | movzx %l(idx),%tmp; \ | ||
99 | movzx %h(idx),%idx; \ | ||
100 | xor table+2*tlen(,%tmp,4),%a3; \ | ||
101 | xor table+3*tlen(,%idx,4),%a4; | ||
102 | |||
103 | // initialise output registers from the key schedule | ||
104 | // NB1: original value of a3 is in idx on exit | ||
105 | // NB2: original values of a1,a2,a4 aren't used | ||
106 | #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ | ||
107 | mov 0 sched,%a1; \ | ||
108 | movzx %l(idx),%tmp; \ | ||
109 | mov 12 sched,%a2; \ | ||
110 | xor table(,%tmp,4),%a1; \ | ||
111 | mov 4 sched,%a4; \ | ||
112 | movzx %h(idx),%tmp; \ | ||
113 | shr $16,%idx; \ | ||
114 | xor table+tlen(,%tmp,4),%a2; \ | ||
115 | movzx %l(idx),%tmp; \ | ||
116 | movzx %h(idx),%idx; \ | ||
117 | xor table+3*tlen(,%idx,4),%a4; \ | ||
118 | mov %a3,%idx; \ | ||
119 | mov 8 sched,%a3; \ | ||
120 | xor table+2*tlen(,%tmp,4),%a3; | ||
121 | |||
122 | // initialise output registers from the key schedule | ||
123 | // NB1: original value of a3 is in idx on exit | ||
124 | // NB2: original values of a1,a2,a4 aren't used | ||
125 | #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ | ||
126 | mov 0 sched,%a1; \ | ||
127 | movzx %l(idx),%tmp; \ | ||
128 | mov 4 sched,%a2; \ | ||
129 | xor table(,%tmp,4),%a1; \ | ||
130 | mov 12 sched,%a4; \ | ||
131 | movzx %h(idx),%tmp; \ | ||
132 | shr $16,%idx; \ | ||
133 | xor table+tlen(,%tmp,4),%a2; \ | ||
134 | movzx %l(idx),%tmp; \ | ||
135 | movzx %h(idx),%idx; \ | ||
136 | xor table+3*tlen(,%idx,4),%a4; \ | ||
137 | mov %a3,%idx; \ | ||
138 | mov 8 sched,%a3; \ | ||
139 | xor table+2*tlen(,%tmp,4),%a3; | ||
140 | |||
141 | |||
142 | // original Gladman had conditional saves to MMX regs. | ||
143 | #define save(a1, a2) \ | ||
144 | mov %a2,4*a1(%esp) | ||
145 | |||
146 | #define restore(a1, a2) \ | ||
147 | mov 4*a2(%esp),%a1 | ||
148 | |||
149 | // These macros perform a forward encryption cycle. They are entered with | ||
150 | // the first previous round column values in r0,r1,r4,r5 and | ||
151 | // exit with the final values in the same registers, using stack | ||
152 | // for temporary storage. | ||
153 | |||
154 | // round column values | ||
155 | // on entry: r0,r1,r4,r5 | ||
156 | // on exit: r2,r1,r4,r5 | ||
157 | #define fwd_rnd1(arg, table) \ | ||
158 | save (0,r1); \ | ||
159 | save (1,r5); \ | ||
160 | \ | ||
161 | /* compute new column values */ \ | ||
162 | do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ | ||
163 | do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ | ||
164 | restore(r0,0); \ | ||
165 | do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ | ||
166 | restore(r0,1); \ | ||
167 | do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ | ||
168 | |||
169 | // round column values | ||
170 | // on entry: r2,r1,r4,r5 | ||
171 | // on exit: r0,r1,r4,r5 | ||
172 | #define fwd_rnd2(arg, table) \ | ||
173 | save (0,r1); \ | ||
174 | save (1,r5); \ | ||
175 | \ | ||
176 | /* compute new column values */ \ | ||
177 | do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ | ||
178 | do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ | ||
179 | restore(r2,0); \ | ||
180 | do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ | ||
181 | restore(r2,1); \ | ||
182 | do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ | ||
183 | |||
184 | // These macros performs an inverse encryption cycle. They are entered with | ||
185 | // the first previous round column values in r0,r1,r4,r5 and | ||
186 | // exit with the final values in the same registers, using stack | ||
187 | // for temporary storage | ||
188 | |||
189 | // round column values | ||
190 | // on entry: r0,r1,r4,r5 | ||
191 | // on exit: r2,r1,r4,r5 | ||
192 | #define inv_rnd1(arg, table) \ | ||
193 | save (0,r1); \ | ||
194 | save (1,r5); \ | ||
195 | \ | ||
196 | /* compute new column values */ \ | ||
197 | do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ | ||
198 | do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ | ||
199 | restore(r0,0); \ | ||
200 | do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ | ||
201 | restore(r0,1); \ | ||
202 | do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ | ||
203 | |||
204 | // round column values | ||
205 | // on entry: r2,r1,r4,r5 | ||
206 | // on exit: r0,r1,r4,r5 | ||
207 | #define inv_rnd2(arg, table) \ | ||
208 | save (0,r1); \ | ||
209 | save (1,r5); \ | ||
210 | \ | ||
211 | /* compute new column values */ \ | ||
212 | do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ | ||
213 | do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ | ||
214 | restore(r2,0); \ | ||
215 | do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ | ||
216 | restore(r2,1); \ | ||
217 | do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ | ||
218 | |||
219 | // AES (Rijndael) Encryption Subroutine | ||
220 | /* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | ||
221 | |||
222 | .global aes_enc_blk | ||
223 | |||
224 | .extern ft_tab | ||
225 | .extern fl_tab | ||
226 | |||
227 | .align 4 | ||
228 | |||
229 | aes_enc_blk: | ||
230 | push %ebp | ||
231 | mov tfm(%esp),%ebp | ||
232 | |||
233 | // CAUTION: the order and the values used in these assigns | ||
234 | // rely on the register mappings | ||
235 | |||
236 | 1: push %ebx | ||
237 | mov in_blk+4(%esp),%r2 | ||
238 | push %esi | ||
239 | mov nrnd(%ebp),%r3 // number of rounds | ||
240 | push %edi | ||
241 | #if ekey != 0 | ||
242 | lea ekey(%ebp),%ebp // key pointer | ||
243 | #endif | ||
244 | |||
245 | // input four columns and xor in first round key | ||
246 | |||
247 | mov (%r2),%r0 | ||
248 | mov 4(%r2),%r1 | ||
249 | mov 8(%r2),%r4 | ||
250 | mov 12(%r2),%r5 | ||
251 | xor (%ebp),%r0 | ||
252 | xor 4(%ebp),%r1 | ||
253 | xor 8(%ebp),%r4 | ||
254 | xor 12(%ebp),%r5 | ||
255 | |||
256 | sub $8,%esp // space for register saves on stack | ||
257 | add $16,%ebp // increment to next round key | ||
258 | cmp $12,%r3 | ||
259 | jb 4f // 10 rounds for 128-bit key | ||
260 | lea 32(%ebp),%ebp | ||
261 | je 3f // 12 rounds for 192-bit key | ||
262 | lea 32(%ebp),%ebp | ||
263 | |||
264 | 2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key | ||
265 | fwd_rnd2( -48(%ebp) ,ft_tab) | ||
266 | 3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key | ||
267 | fwd_rnd2( -16(%ebp) ,ft_tab) | ||
268 | 4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key | ||
269 | fwd_rnd2( +16(%ebp) ,ft_tab) | ||
270 | fwd_rnd1( +32(%ebp) ,ft_tab) | ||
271 | fwd_rnd2( +48(%ebp) ,ft_tab) | ||
272 | fwd_rnd1( +64(%ebp) ,ft_tab) | ||
273 | fwd_rnd2( +80(%ebp) ,ft_tab) | ||
274 | fwd_rnd1( +96(%ebp) ,ft_tab) | ||
275 | fwd_rnd2(+112(%ebp) ,ft_tab) | ||
276 | fwd_rnd1(+128(%ebp) ,ft_tab) | ||
277 | fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table | ||
278 | |||
279 | // move final values to the output array. CAUTION: the | ||
280 | // order of these assigns rely on the register mappings | ||
281 | |||
282 | add $8,%esp | ||
283 | mov out_blk+12(%esp),%ebp | ||
284 | mov %r5,12(%ebp) | ||
285 | pop %edi | ||
286 | mov %r4,8(%ebp) | ||
287 | pop %esi | ||
288 | mov %r1,4(%ebp) | ||
289 | pop %ebx | ||
290 | mov %r0,(%ebp) | ||
291 | pop %ebp | ||
292 | mov $1,%eax | ||
293 | ret | ||
294 | |||
295 | // AES (Rijndael) Decryption Subroutine | ||
296 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | ||
297 | |||
298 | .global aes_dec_blk | ||
299 | |||
300 | .extern it_tab | ||
301 | .extern il_tab | ||
302 | |||
303 | .align 4 | ||
304 | |||
305 | aes_dec_blk: | ||
306 | push %ebp | ||
307 | mov tfm(%esp),%ebp | ||
308 | |||
309 | // CAUTION: the order and the values used in these assigns | ||
310 | // rely on the register mappings | ||
311 | |||
312 | 1: push %ebx | ||
313 | mov in_blk+4(%esp),%r2 | ||
314 | push %esi | ||
315 | mov nrnd(%ebp),%r3 // number of rounds | ||
316 | push %edi | ||
317 | #if dkey != 0 | ||
318 | lea dkey(%ebp),%ebp // key pointer | ||
319 | #endif | ||
320 | mov %r3,%r0 | ||
321 | shl $4,%r0 | ||
322 | add %r0,%ebp | ||
323 | |||
324 | // input four columns and xor in first round key | ||
325 | |||
326 | mov (%r2),%r0 | ||
327 | mov 4(%r2),%r1 | ||
328 | mov 8(%r2),%r4 | ||
329 | mov 12(%r2),%r5 | ||
330 | xor (%ebp),%r0 | ||
331 | xor 4(%ebp),%r1 | ||
332 | xor 8(%ebp),%r4 | ||
333 | xor 12(%ebp),%r5 | ||
334 | |||
335 | sub $8,%esp // space for register saves on stack | ||
336 | sub $16,%ebp // increment to next round key | ||
337 | cmp $12,%r3 | ||
338 | jb 4f // 10 rounds for 128-bit key | ||
339 | lea -32(%ebp),%ebp | ||
340 | je 3f // 12 rounds for 192-bit key | ||
341 | lea -32(%ebp),%ebp | ||
342 | |||
343 | 2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key | ||
344 | inv_rnd2( +48(%ebp), it_tab) | ||
345 | 3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key | ||
346 | inv_rnd2( +16(%ebp), it_tab) | ||
347 | 4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key | ||
348 | inv_rnd2( -16(%ebp), it_tab) | ||
349 | inv_rnd1( -32(%ebp), it_tab) | ||
350 | inv_rnd2( -48(%ebp), it_tab) | ||
351 | inv_rnd1( -64(%ebp), it_tab) | ||
352 | inv_rnd2( -80(%ebp), it_tab) | ||
353 | inv_rnd1( -96(%ebp), it_tab) | ||
354 | inv_rnd2(-112(%ebp), it_tab) | ||
355 | inv_rnd1(-128(%ebp), it_tab) | ||
356 | inv_rnd2(-144(%ebp), il_tab) // last round uses a different table | ||
357 | |||
358 | // move final values to the output array. CAUTION: the | ||
359 | // order of these assigns rely on the register mappings | ||
360 | |||
361 | add $8,%esp | ||
362 | mov out_blk+12(%esp),%ebp | ||
363 | mov %r5,12(%ebp) | ||
364 | pop %edi | ||
365 | mov %r4,8(%ebp) | ||
366 | pop %esi | ||
367 | mov %r1,4(%ebp) | ||
368 | pop %ebx | ||
369 | mov %r0,(%ebp) | ||
370 | pop %ebp | ||
371 | mov $1,%eax | ||
372 | ret | ||
373 | |||
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c new file mode 100644 index 000000000000..49aad9397f10 --- /dev/null +++ b/arch/x86/crypto/aes_32.c | |||
@@ -0,0 +1,515 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Glue Code for optimized 586 assembler version of AES | ||
4 | * | ||
5 | * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * LICENSE TERMS | ||
9 | * | ||
10 | * The free distribution and use of this software in both source and binary | ||
11 | * form is allowed (with or without changes) provided that: | ||
12 | * | ||
13 | * 1. distributions of this source code include the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer; | ||
15 | * | ||
16 | * 2. distributions in binary form include the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer | ||
18 | * in the documentation and/or other associated materials; | ||
19 | * | ||
20 | * 3. the copyright holder's name is not used to endorse products | ||
21 | * built using this software without specific written permission. | ||
22 | * | ||
23 | * ALTERNATIVELY, provided that this notice is retained in full, this product | ||
24 | * may be distributed under the terms of the GNU General Public License (GPL), | ||
25 | * in which case the provisions of the GPL apply INSTEAD OF those given above. | ||
26 | * | ||
27 | * DISCLAIMER | ||
28 | * | ||
29 | * This software is provided 'as is' with no explicit or implied warranties | ||
30 | * in respect of its properties, including, but not limited to, correctness | ||
31 | * and/or fitness for purpose. | ||
32 | * | ||
33 | * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to | ||
34 | * 2.5 API). | ||
35 | * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org> | ||
36 | * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | ||
37 | * | ||
38 | */ | ||
39 | |||
40 | #include <asm/byteorder.h> | ||
41 | #include <linux/kernel.h> | ||
42 | #include <linux/module.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/types.h> | ||
45 | #include <linux/crypto.h> | ||
46 | #include <linux/linkage.h> | ||
47 | |||
48 | asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
49 | asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
50 | |||
51 | #define AES_MIN_KEY_SIZE 16 | ||
52 | #define AES_MAX_KEY_SIZE 32 | ||
53 | #define AES_BLOCK_SIZE 16 | ||
54 | #define AES_KS_LENGTH 4 * AES_BLOCK_SIZE | ||
55 | #define RC_LENGTH 29 | ||
56 | |||
57 | struct aes_ctx { | ||
58 | u32 ekey[AES_KS_LENGTH]; | ||
59 | u32 rounds; | ||
60 | u32 dkey[AES_KS_LENGTH]; | ||
61 | }; | ||
62 | |||
63 | #define WPOLY 0x011b | ||
64 | #define bytes2word(b0, b1, b2, b3) \ | ||
65 | (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) | ||
66 | |||
67 | /* define the finite field multiplies required for Rijndael */ | ||
68 | #define f2(x) ((x) ? pow[log[x] + 0x19] : 0) | ||
69 | #define f3(x) ((x) ? pow[log[x] + 0x01] : 0) | ||
70 | #define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) | ||
71 | #define fb(x) ((x) ? pow[log[x] + 0x68] : 0) | ||
72 | #define fd(x) ((x) ? pow[log[x] + 0xee] : 0) | ||
73 | #define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) | ||
74 | #define fi(x) ((x) ? pow[255 - log[x]]: 0) | ||
75 | |||
76 | static inline u32 upr(u32 x, int n) | ||
77 | { | ||
78 | return (x << 8 * n) | (x >> (32 - 8 * n)); | ||
79 | } | ||
80 | |||
81 | static inline u8 bval(u32 x, int n) | ||
82 | { | ||
83 | return x >> 8 * n; | ||
84 | } | ||
85 | |||
86 | /* The forward and inverse affine transformations used in the S-box */ | ||
87 | #define fwd_affine(x) \ | ||
88 | (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) | ||
89 | |||
90 | #define inv_affine(x) \ | ||
91 | (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) | ||
92 | |||
93 | static u32 rcon_tab[RC_LENGTH]; | ||
94 | |||
95 | u32 ft_tab[4][256]; | ||
96 | u32 fl_tab[4][256]; | ||
97 | static u32 im_tab[4][256]; | ||
98 | u32 il_tab[4][256]; | ||
99 | u32 it_tab[4][256]; | ||
100 | |||
101 | static void gen_tabs(void) | ||
102 | { | ||
103 | u32 i, w; | ||
104 | u8 pow[512], log[256]; | ||
105 | |||
106 | /* | ||
107 | * log and power tables for GF(2^8) finite field with | ||
108 | * WPOLY as modular polynomial - the simplest primitive | ||
109 | * root is 0x03, used here to generate the tables. | ||
110 | */ | ||
111 | i = 0; w = 1; | ||
112 | |||
113 | do { | ||
114 | pow[i] = (u8)w; | ||
115 | pow[i + 255] = (u8)w; | ||
116 | log[w] = (u8)i++; | ||
117 | w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); | ||
118 | } while (w != 1); | ||
119 | |||
120 | for(i = 0, w = 1; i < RC_LENGTH; ++i) { | ||
121 | rcon_tab[i] = bytes2word(w, 0, 0, 0); | ||
122 | w = f2(w); | ||
123 | } | ||
124 | |||
125 | for(i = 0; i < 256; ++i) { | ||
126 | u8 b; | ||
127 | |||
128 | b = fwd_affine(fi((u8)i)); | ||
129 | w = bytes2word(f2(b), b, b, f3(b)); | ||
130 | |||
131 | /* tables for a normal encryption round */ | ||
132 | ft_tab[0][i] = w; | ||
133 | ft_tab[1][i] = upr(w, 1); | ||
134 | ft_tab[2][i] = upr(w, 2); | ||
135 | ft_tab[3][i] = upr(w, 3); | ||
136 | w = bytes2word(b, 0, 0, 0); | ||
137 | |||
138 | /* | ||
139 | * tables for last encryption round | ||
140 | * (may also be used in the key schedule) | ||
141 | */ | ||
142 | fl_tab[0][i] = w; | ||
143 | fl_tab[1][i] = upr(w, 1); | ||
144 | fl_tab[2][i] = upr(w, 2); | ||
145 | fl_tab[3][i] = upr(w, 3); | ||
146 | |||
147 | b = fi(inv_affine((u8)i)); | ||
148 | w = bytes2word(fe(b), f9(b), fd(b), fb(b)); | ||
149 | |||
150 | /* tables for the inverse mix column operation */ | ||
151 | im_tab[0][b] = w; | ||
152 | im_tab[1][b] = upr(w, 1); | ||
153 | im_tab[2][b] = upr(w, 2); | ||
154 | im_tab[3][b] = upr(w, 3); | ||
155 | |||
156 | /* tables for a normal decryption round */ | ||
157 | it_tab[0][i] = w; | ||
158 | it_tab[1][i] = upr(w,1); | ||
159 | it_tab[2][i] = upr(w,2); | ||
160 | it_tab[3][i] = upr(w,3); | ||
161 | |||
162 | w = bytes2word(b, 0, 0, 0); | ||
163 | |||
164 | /* tables for last decryption round */ | ||
165 | il_tab[0][i] = w; | ||
166 | il_tab[1][i] = upr(w,1); | ||
167 | il_tab[2][i] = upr(w,2); | ||
168 | il_tab[3][i] = upr(w,3); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | #define four_tables(x,tab,vf,rf,c) \ | ||
173 | ( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ | ||
174 | tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ | ||
175 | tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ | ||
176 | tab[3][bval(vf(x,3,c),rf(3,c))] \ | ||
177 | ) | ||
178 | |||
179 | #define vf1(x,r,c) (x) | ||
180 | #define rf1(r,c) (r) | ||
181 | #define rf2(r,c) ((r-c)&3) | ||
182 | |||
183 | #define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) | ||
184 | #define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) | ||
185 | |||
186 | #define ff(x) inv_mcol(x) | ||
187 | |||
188 | #define ke4(k,i) \ | ||
189 | { \ | ||
190 | k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ | ||
191 | k[4*(i)+5] = ss[1] ^= ss[0]; \ | ||
192 | k[4*(i)+6] = ss[2] ^= ss[1]; \ | ||
193 | k[4*(i)+7] = ss[3] ^= ss[2]; \ | ||
194 | } | ||
195 | |||
196 | #define kel4(k,i) \ | ||
197 | { \ | ||
198 | k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ | ||
199 | k[4*(i)+5] = ss[1] ^= ss[0]; \ | ||
200 | k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ | ||
201 | } | ||
202 | |||
203 | #define ke6(k,i) \ | ||
204 | { \ | ||
205 | k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
206 | k[6*(i)+ 7] = ss[1] ^= ss[0]; \ | ||
207 | k[6*(i)+ 8] = ss[2] ^= ss[1]; \ | ||
208 | k[6*(i)+ 9] = ss[3] ^= ss[2]; \ | ||
209 | k[6*(i)+10] = ss[4] ^= ss[3]; \ | ||
210 | k[6*(i)+11] = ss[5] ^= ss[4]; \ | ||
211 | } | ||
212 | |||
213 | #define kel6(k,i) \ | ||
214 | { \ | ||
215 | k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
216 | k[6*(i)+ 7] = ss[1] ^= ss[0]; \ | ||
217 | k[6*(i)+ 8] = ss[2] ^= ss[1]; \ | ||
218 | k[6*(i)+ 9] = ss[3] ^= ss[2]; \ | ||
219 | } | ||
220 | |||
221 | #define ke8(k,i) \ | ||
222 | { \ | ||
223 | k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
224 | k[8*(i)+ 9] = ss[1] ^= ss[0]; \ | ||
225 | k[8*(i)+10] = ss[2] ^= ss[1]; \ | ||
226 | k[8*(i)+11] = ss[3] ^= ss[2]; \ | ||
227 | k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ | ||
228 | k[8*(i)+13] = ss[5] ^= ss[4]; \ | ||
229 | k[8*(i)+14] = ss[6] ^= ss[5]; \ | ||
230 | k[8*(i)+15] = ss[7] ^= ss[6]; \ | ||
231 | } | ||
232 | |||
233 | #define kel8(k,i) \ | ||
234 | { \ | ||
235 | k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
236 | k[8*(i)+ 9] = ss[1] ^= ss[0]; \ | ||
237 | k[8*(i)+10] = ss[2] ^= ss[1]; \ | ||
238 | k[8*(i)+11] = ss[3] ^= ss[2]; \ | ||
239 | } | ||
240 | |||
241 | #define kdf4(k,i) \ | ||
242 | { \ | ||
243 | ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ | ||
244 | ss[1] = ss[1] ^ ss[3]; \ | ||
245 | ss[2] = ss[2] ^ ss[3]; \ | ||
246 | ss[3] = ss[3]; \ | ||
247 | ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ | ||
248 | ss[i % 4] ^= ss[4]; \ | ||
249 | ss[4] ^= k[4*(i)]; \ | ||
250 | k[4*(i)+4] = ff(ss[4]); \ | ||
251 | ss[4] ^= k[4*(i)+1]; \ | ||
252 | k[4*(i)+5] = ff(ss[4]); \ | ||
253 | ss[4] ^= k[4*(i)+2]; \ | ||
254 | k[4*(i)+6] = ff(ss[4]); \ | ||
255 | ss[4] ^= k[4*(i)+3]; \ | ||
256 | k[4*(i)+7] = ff(ss[4]); \ | ||
257 | } | ||
258 | |||
259 | #define kd4(k,i) \ | ||
260 | { \ | ||
261 | ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ | ||
262 | ss[i % 4] ^= ss[4]; \ | ||
263 | ss[4] = ff(ss[4]); \ | ||
264 | k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ | ||
265 | k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ | ||
266 | k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ | ||
267 | k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ | ||
268 | } | ||
269 | |||
270 | #define kdl4(k,i) \ | ||
271 | { \ | ||
272 | ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ | ||
273 | ss[i % 4] ^= ss[4]; \ | ||
274 | k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ | ||
275 | k[4*(i)+5] = ss[1] ^ ss[3]; \ | ||
276 | k[4*(i)+6] = ss[0]; \ | ||
277 | k[4*(i)+7] = ss[1]; \ | ||
278 | } | ||
279 | |||
280 | #define kdf6(k,i) \ | ||
281 | { \ | ||
282 | ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
283 | k[6*(i)+ 6] = ff(ss[0]); \ | ||
284 | ss[1] ^= ss[0]; \ | ||
285 | k[6*(i)+ 7] = ff(ss[1]); \ | ||
286 | ss[2] ^= ss[1]; \ | ||
287 | k[6*(i)+ 8] = ff(ss[2]); \ | ||
288 | ss[3] ^= ss[2]; \ | ||
289 | k[6*(i)+ 9] = ff(ss[3]); \ | ||
290 | ss[4] ^= ss[3]; \ | ||
291 | k[6*(i)+10] = ff(ss[4]); \ | ||
292 | ss[5] ^= ss[4]; \ | ||
293 | k[6*(i)+11] = ff(ss[5]); \ | ||
294 | } | ||
295 | |||
296 | #define kd6(k,i) \ | ||
297 | { \ | ||
298 | ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
299 | ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ | ||
300 | k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ | ||
301 | ss[1] ^= ss[0]; \ | ||
302 | k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ | ||
303 | ss[2] ^= ss[1]; \ | ||
304 | k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ | ||
305 | ss[3] ^= ss[2]; \ | ||
306 | k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ | ||
307 | ss[4] ^= ss[3]; \ | ||
308 | k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ | ||
309 | ss[5] ^= ss[4]; \ | ||
310 | k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ | ||
311 | } | ||
312 | |||
313 | #define kdl6(k,i) \ | ||
314 | { \ | ||
315 | ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
316 | k[6*(i)+ 6] = ss[0]; \ | ||
317 | ss[1] ^= ss[0]; \ | ||
318 | k[6*(i)+ 7] = ss[1]; \ | ||
319 | ss[2] ^= ss[1]; \ | ||
320 | k[6*(i)+ 8] = ss[2]; \ | ||
321 | ss[3] ^= ss[2]; \ | ||
322 | k[6*(i)+ 9] = ss[3]; \ | ||
323 | } | ||
324 | |||
325 | #define kdf8(k,i) \ | ||
326 | { \ | ||
327 | ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
328 | k[8*(i)+ 8] = ff(ss[0]); \ | ||
329 | ss[1] ^= ss[0]; \ | ||
330 | k[8*(i)+ 9] = ff(ss[1]); \ | ||
331 | ss[2] ^= ss[1]; \ | ||
332 | k[8*(i)+10] = ff(ss[2]); \ | ||
333 | ss[3] ^= ss[2]; \ | ||
334 | k[8*(i)+11] = ff(ss[3]); \ | ||
335 | ss[4] ^= ls_box(ss[3],0); \ | ||
336 | k[8*(i)+12] = ff(ss[4]); \ | ||
337 | ss[5] ^= ss[4]; \ | ||
338 | k[8*(i)+13] = ff(ss[5]); \ | ||
339 | ss[6] ^= ss[5]; \ | ||
340 | k[8*(i)+14] = ff(ss[6]); \ | ||
341 | ss[7] ^= ss[6]; \ | ||
342 | k[8*(i)+15] = ff(ss[7]); \ | ||
343 | } | ||
344 | |||
345 | #define kd8(k,i) \ | ||
346 | { \ | ||
347 | u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
348 | ss[0] ^= __g; \ | ||
349 | __g = ff(__g); \ | ||
350 | k[8*(i)+ 8] = __g ^= k[8*(i)]; \ | ||
351 | ss[1] ^= ss[0]; \ | ||
352 | k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ | ||
353 | ss[2] ^= ss[1]; \ | ||
354 | k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ | ||
355 | ss[3] ^= ss[2]; \ | ||
356 | k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ | ||
357 | __g = ls_box(ss[3],0); \ | ||
358 | ss[4] ^= __g; \ | ||
359 | __g = ff(__g); \ | ||
360 | k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ | ||
361 | ss[5] ^= ss[4]; \ | ||
362 | k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ | ||
363 | ss[6] ^= ss[5]; \ | ||
364 | k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ | ||
365 | ss[7] ^= ss[6]; \ | ||
366 | k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ | ||
367 | } | ||
368 | |||
369 | #define kdl8(k,i) \ | ||
370 | { \ | ||
371 | ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
372 | k[8*(i)+ 8] = ss[0]; \ | ||
373 | ss[1] ^= ss[0]; \ | ||
374 | k[8*(i)+ 9] = ss[1]; \ | ||
375 | ss[2] ^= ss[1]; \ | ||
376 | k[8*(i)+10] = ss[2]; \ | ||
377 | ss[3] ^= ss[2]; \ | ||
378 | k[8*(i)+11] = ss[3]; \ | ||
379 | } | ||
380 | |||
381 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
382 | unsigned int key_len) | ||
383 | { | ||
384 | int i; | ||
385 | u32 ss[8]; | ||
386 | struct aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
387 | const __le32 *key = (const __le32 *)in_key; | ||
388 | u32 *flags = &tfm->crt_flags; | ||
389 | |||
390 | /* encryption schedule */ | ||
391 | |||
392 | ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); | ||
393 | ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); | ||
394 | ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); | ||
395 | ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); | ||
396 | |||
397 | switch(key_len) { | ||
398 | case 16: | ||
399 | for (i = 0; i < 9; i++) | ||
400 | ke4(ctx->ekey, i); | ||
401 | kel4(ctx->ekey, 9); | ||
402 | ctx->rounds = 10; | ||
403 | break; | ||
404 | |||
405 | case 24: | ||
406 | ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); | ||
407 | ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); | ||
408 | for (i = 0; i < 7; i++) | ||
409 | ke6(ctx->ekey, i); | ||
410 | kel6(ctx->ekey, 7); | ||
411 | ctx->rounds = 12; | ||
412 | break; | ||
413 | |||
414 | case 32: | ||
415 | ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); | ||
416 | ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); | ||
417 | ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); | ||
418 | ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); | ||
419 | for (i = 0; i < 6; i++) | ||
420 | ke8(ctx->ekey, i); | ||
421 | kel8(ctx->ekey, 6); | ||
422 | ctx->rounds = 14; | ||
423 | break; | ||
424 | |||
425 | default: | ||
426 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
427 | return -EINVAL; | ||
428 | } | ||
429 | |||
430 | /* decryption schedule */ | ||
431 | |||
432 | ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); | ||
433 | ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); | ||
434 | ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); | ||
435 | ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); | ||
436 | |||
437 | switch (key_len) { | ||
438 | case 16: | ||
439 | kdf4(ctx->dkey, 0); | ||
440 | for (i = 1; i < 9; i++) | ||
441 | kd4(ctx->dkey, i); | ||
442 | kdl4(ctx->dkey, 9); | ||
443 | break; | ||
444 | |||
445 | case 24: | ||
446 | ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); | ||
447 | ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); | ||
448 | kdf6(ctx->dkey, 0); | ||
449 | for (i = 1; i < 7; i++) | ||
450 | kd6(ctx->dkey, i); | ||
451 | kdl6(ctx->dkey, 7); | ||
452 | break; | ||
453 | |||
454 | case 32: | ||
455 | ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); | ||
456 | ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); | ||
457 | ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); | ||
458 | ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); | ||
459 | kdf8(ctx->dkey, 0); | ||
460 | for (i = 1; i < 6; i++) | ||
461 | kd8(ctx->dkey, i); | ||
462 | kdl8(ctx->dkey, 6); | ||
463 | break; | ||
464 | } | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
469 | { | ||
470 | aes_enc_blk(tfm, dst, src); | ||
471 | } | ||
472 | |||
473 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
474 | { | ||
475 | aes_dec_blk(tfm, dst, src); | ||
476 | } | ||
477 | |||
478 | static struct crypto_alg aes_alg = { | ||
479 | .cra_name = "aes", | ||
480 | .cra_driver_name = "aes-i586", | ||
481 | .cra_priority = 200, | ||
482 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
483 | .cra_blocksize = AES_BLOCK_SIZE, | ||
484 | .cra_ctxsize = sizeof(struct aes_ctx), | ||
485 | .cra_module = THIS_MODULE, | ||
486 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | ||
487 | .cra_u = { | ||
488 | .cipher = { | ||
489 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
490 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
491 | .cia_setkey = aes_set_key, | ||
492 | .cia_encrypt = aes_encrypt, | ||
493 | .cia_decrypt = aes_decrypt | ||
494 | } | ||
495 | } | ||
496 | }; | ||
497 | |||
498 | static int __init aes_init(void) | ||
499 | { | ||
500 | gen_tabs(); | ||
501 | return crypto_register_alg(&aes_alg); | ||
502 | } | ||
503 | |||
504 | static void __exit aes_fini(void) | ||
505 | { | ||
506 | crypto_unregister_alg(&aes_alg); | ||
507 | } | ||
508 | |||
509 | module_init(aes_init); | ||
510 | module_exit(aes_fini); | ||
511 | |||
512 | MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); | ||
513 | MODULE_LICENSE("Dual BSD/GPL"); | ||
514 | MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); | ||
515 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 000000000000..39b98ed2c1b9 --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S | |||
@@ -0,0 +1,335 @@ | |||
1 | /*************************************************************************** | ||
2 | * Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * | ||
3 | * * | ||
4 | * This program is free software; you can redistribute it and/or modify * | ||
5 | * it under the terms of the GNU General Public License as published by * | ||
6 | * the Free Software Foundation; either version 2 of the License, or * | ||
7 | * (at your option) any later version. * | ||
8 | * * | ||
9 | * This program is distributed in the hope that it will be useful, * | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | ||
12 | * GNU General Public License for more details. * | ||
13 | * * | ||
14 | * You should have received a copy of the GNU General Public License * | ||
15 | * along with this program; if not, write to the * | ||
16 | * Free Software Foundation, Inc., * | ||
17 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * | ||
18 | ***************************************************************************/ | ||
19 | |||
20 | .file "twofish-i586-asm.S" | ||
21 | .text | ||
22 | |||
23 | #include <asm/asm-offsets.h> | ||
24 | |||
25 | /* return adress at 0 */ | ||
26 | |||
27 | #define in_blk 12 /* input byte array address parameter*/ | ||
28 | #define out_blk 8 /* output byte array address parameter*/ | ||
29 | #define tfm 4 /* Twofish context structure */ | ||
30 | |||
31 | #define a_offset 0 | ||
32 | #define b_offset 4 | ||
33 | #define c_offset 8 | ||
34 | #define d_offset 12 | ||
35 | |||
36 | /* Structure of the crypto context struct*/ | ||
37 | |||
38 | #define s0 0 /* S0 Array 256 Words each */ | ||
39 | #define s1 1024 /* S1 Array */ | ||
40 | #define s2 2048 /* S2 Array */ | ||
41 | #define s3 3072 /* S3 Array */ | ||
42 | #define w 4096 /* 8 whitening keys (word) */ | ||
43 | #define k 4128 /* key 1-32 ( word ) */ | ||
44 | |||
45 | /* define a few register aliases to allow macro substitution */ | ||
46 | |||
47 | #define R0D %eax | ||
48 | #define R0B %al | ||
49 | #define R0H %ah | ||
50 | |||
51 | #define R1D %ebx | ||
52 | #define R1B %bl | ||
53 | #define R1H %bh | ||
54 | |||
55 | #define R2D %ecx | ||
56 | #define R2B %cl | ||
57 | #define R2H %ch | ||
58 | |||
59 | #define R3D %edx | ||
60 | #define R3B %dl | ||
61 | #define R3H %dh | ||
62 | |||
63 | |||
64 | /* performs input whitening */ | ||
65 | #define input_whitening(src,context,offset)\ | ||
66 | xor w+offset(context), src; | ||
67 | |||
68 | /* performs input whitening */ | ||
69 | #define output_whitening(src,context,offset)\ | ||
70 | xor w+16+offset(context), src; | ||
71 | |||
72 | /* | ||
73 | * a input register containing a (rotated 16) | ||
74 | * b input register containing b | ||
75 | * c input register containing c | ||
76 | * d input register containing d (already rol $1) | ||
77 | * operations on a and b are interleaved to increase performance | ||
78 | */ | ||
79 | #define encrypt_round(a,b,c,d,round)\ | ||
80 | push d ## D;\ | ||
81 | movzx b ## B, %edi;\ | ||
82 | mov s1(%ebp,%edi,4),d ## D;\ | ||
83 | movzx a ## B, %edi;\ | ||
84 | mov s2(%ebp,%edi,4),%esi;\ | ||
85 | movzx b ## H, %edi;\ | ||
86 | ror $16, b ## D;\ | ||
87 | xor s2(%ebp,%edi,4),d ## D;\ | ||
88 | movzx a ## H, %edi;\ | ||
89 | ror $16, a ## D;\ | ||
90 | xor s3(%ebp,%edi,4),%esi;\ | ||
91 | movzx b ## B, %edi;\ | ||
92 | xor s3(%ebp,%edi,4),d ## D;\ | ||
93 | movzx a ## B, %edi;\ | ||
94 | xor (%ebp,%edi,4), %esi;\ | ||
95 | movzx b ## H, %edi;\ | ||
96 | ror $15, b ## D;\ | ||
97 | xor (%ebp,%edi,4), d ## D;\ | ||
98 | movzx a ## H, %edi;\ | ||
99 | xor s1(%ebp,%edi,4),%esi;\ | ||
100 | pop %edi;\ | ||
101 | add d ## D, %esi;\ | ||
102 | add %esi, d ## D;\ | ||
103 | add k+round(%ebp), %esi;\ | ||
104 | xor %esi, c ## D;\ | ||
105 | rol $15, c ## D;\ | ||
106 | add k+4+round(%ebp),d ## D;\ | ||
107 | xor %edi, d ## D; | ||
108 | |||
109 | /* | ||
110 | * a input register containing a (rotated 16) | ||
111 | * b input register containing b | ||
112 | * c input register containing c | ||
113 | * d input register containing d (already rol $1) | ||
114 | * operations on a and b are interleaved to increase performance | ||
115 | * last round has different rotations for the output preparation | ||
116 | */ | ||
117 | #define encrypt_last_round(a,b,c,d,round)\ | ||
118 | push d ## D;\ | ||
119 | movzx b ## B, %edi;\ | ||
120 | mov s1(%ebp,%edi,4),d ## D;\ | ||
121 | movzx a ## B, %edi;\ | ||
122 | mov s2(%ebp,%edi,4),%esi;\ | ||
123 | movzx b ## H, %edi;\ | ||
124 | ror $16, b ## D;\ | ||
125 | xor s2(%ebp,%edi,4),d ## D;\ | ||
126 | movzx a ## H, %edi;\ | ||
127 | ror $16, a ## D;\ | ||
128 | xor s3(%ebp,%edi,4),%esi;\ | ||
129 | movzx b ## B, %edi;\ | ||
130 | xor s3(%ebp,%edi,4),d ## D;\ | ||
131 | movzx a ## B, %edi;\ | ||
132 | xor (%ebp,%edi,4), %esi;\ | ||
133 | movzx b ## H, %edi;\ | ||
134 | ror $16, b ## D;\ | ||
135 | xor (%ebp,%edi,4), d ## D;\ | ||
136 | movzx a ## H, %edi;\ | ||
137 | xor s1(%ebp,%edi,4),%esi;\ | ||
138 | pop %edi;\ | ||
139 | add d ## D, %esi;\ | ||
140 | add %esi, d ## D;\ | ||
141 | add k+round(%ebp), %esi;\ | ||
142 | xor %esi, c ## D;\ | ||
143 | ror $1, c ## D;\ | ||
144 | add k+4+round(%ebp),d ## D;\ | ||
145 | xor %edi, d ## D; | ||
146 | |||
147 | /* | ||
148 | * a input register containing a | ||
149 | * b input register containing b (rotated 16) | ||
150 | * c input register containing c | ||
151 | * d input register containing d (already rol $1) | ||
152 | * operations on a and b are interleaved to increase performance | ||
153 | */ | ||
154 | #define decrypt_round(a,b,c,d,round)\ | ||
155 | push c ## D;\ | ||
156 | movzx a ## B, %edi;\ | ||
157 | mov (%ebp,%edi,4), c ## D;\ | ||
158 | movzx b ## B, %edi;\ | ||
159 | mov s3(%ebp,%edi,4),%esi;\ | ||
160 | movzx a ## H, %edi;\ | ||
161 | ror $16, a ## D;\ | ||
162 | xor s1(%ebp,%edi,4),c ## D;\ | ||
163 | movzx b ## H, %edi;\ | ||
164 | ror $16, b ## D;\ | ||
165 | xor (%ebp,%edi,4), %esi;\ | ||
166 | movzx a ## B, %edi;\ | ||
167 | xor s2(%ebp,%edi,4),c ## D;\ | ||
168 | movzx b ## B, %edi;\ | ||
169 | xor s1(%ebp,%edi,4),%esi;\ | ||
170 | movzx a ## H, %edi;\ | ||
171 | ror $15, a ## D;\ | ||
172 | xor s3(%ebp,%edi,4),c ## D;\ | ||
173 | movzx b ## H, %edi;\ | ||
174 | xor s2(%ebp,%edi,4),%esi;\ | ||
175 | pop %edi;\ | ||
176 | add %esi, c ## D;\ | ||
177 | add c ## D, %esi;\ | ||
178 | add k+round(%ebp), c ## D;\ | ||
179 | xor %edi, c ## D;\ | ||
180 | add k+4+round(%ebp),%esi;\ | ||
181 | xor %esi, d ## D;\ | ||
182 | rol $15, d ## D; | ||
183 | |||
184 | /* | ||
185 | * a input register containing a | ||
186 | * b input register containing b (rotated 16) | ||
187 | * c input register containing c | ||
188 | * d input register containing d (already rol $1) | ||
189 | * operations on a and b are interleaved to increase performance | ||
190 | * last round has different rotations for the output preparation | ||
191 | */ | ||
192 | #define decrypt_last_round(a,b,c,d,round)\ | ||
193 | push c ## D;\ | ||
194 | movzx a ## B, %edi;\ | ||
195 | mov (%ebp,%edi,4), c ## D;\ | ||
196 | movzx b ## B, %edi;\ | ||
197 | mov s3(%ebp,%edi,4),%esi;\ | ||
198 | movzx a ## H, %edi;\ | ||
199 | ror $16, a ## D;\ | ||
200 | xor s1(%ebp,%edi,4),c ## D;\ | ||
201 | movzx b ## H, %edi;\ | ||
202 | ror $16, b ## D;\ | ||
203 | xor (%ebp,%edi,4), %esi;\ | ||
204 | movzx a ## B, %edi;\ | ||
205 | xor s2(%ebp,%edi,4),c ## D;\ | ||
206 | movzx b ## B, %edi;\ | ||
207 | xor s1(%ebp,%edi,4),%esi;\ | ||
208 | movzx a ## H, %edi;\ | ||
209 | ror $16, a ## D;\ | ||
210 | xor s3(%ebp,%edi,4),c ## D;\ | ||
211 | movzx b ## H, %edi;\ | ||
212 | xor s2(%ebp,%edi,4),%esi;\ | ||
213 | pop %edi;\ | ||
214 | add %esi, c ## D;\ | ||
215 | add c ## D, %esi;\ | ||
216 | add k+round(%ebp), c ## D;\ | ||
217 | xor %edi, c ## D;\ | ||
218 | add k+4+round(%ebp),%esi;\ | ||
219 | xor %esi, d ## D;\ | ||
220 | ror $1, d ## D; | ||
221 | |||
222 | .align 4 | ||
223 | .global twofish_enc_blk | ||
224 | .global twofish_dec_blk | ||
225 | |||
226 | twofish_enc_blk: | ||
227 | push %ebp /* save registers according to calling convention*/ | ||
228 | push %ebx | ||
229 | push %esi | ||
230 | push %edi | ||
231 | |||
232 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ | ||
233 | add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ | ||
234 | mov in_blk+16(%esp),%edi /* input adress in edi */ | ||
235 | |||
236 | mov (%edi), %eax | ||
237 | mov b_offset(%edi), %ebx | ||
238 | mov c_offset(%edi), %ecx | ||
239 | mov d_offset(%edi), %edx | ||
240 | input_whitening(%eax,%ebp,a_offset) | ||
241 | ror $16, %eax | ||
242 | input_whitening(%ebx,%ebp,b_offset) | ||
243 | input_whitening(%ecx,%ebp,c_offset) | ||
244 | input_whitening(%edx,%ebp,d_offset) | ||
245 | rol $1, %edx | ||
246 | |||
247 | encrypt_round(R0,R1,R2,R3,0); | ||
248 | encrypt_round(R2,R3,R0,R1,8); | ||
249 | encrypt_round(R0,R1,R2,R3,2*8); | ||
250 | encrypt_round(R2,R3,R0,R1,3*8); | ||
251 | encrypt_round(R0,R1,R2,R3,4*8); | ||
252 | encrypt_round(R2,R3,R0,R1,5*8); | ||
253 | encrypt_round(R0,R1,R2,R3,6*8); | ||
254 | encrypt_round(R2,R3,R0,R1,7*8); | ||
255 | encrypt_round(R0,R1,R2,R3,8*8); | ||
256 | encrypt_round(R2,R3,R0,R1,9*8); | ||
257 | encrypt_round(R0,R1,R2,R3,10*8); | ||
258 | encrypt_round(R2,R3,R0,R1,11*8); | ||
259 | encrypt_round(R0,R1,R2,R3,12*8); | ||
260 | encrypt_round(R2,R3,R0,R1,13*8); | ||
261 | encrypt_round(R0,R1,R2,R3,14*8); | ||
262 | encrypt_last_round(R2,R3,R0,R1,15*8); | ||
263 | |||
264 | output_whitening(%eax,%ebp,c_offset) | ||
265 | output_whitening(%ebx,%ebp,d_offset) | ||
266 | output_whitening(%ecx,%ebp,a_offset) | ||
267 | output_whitening(%edx,%ebp,b_offset) | ||
268 | mov out_blk+16(%esp),%edi; | ||
269 | mov %eax, c_offset(%edi) | ||
270 | mov %ebx, d_offset(%edi) | ||
271 | mov %ecx, (%edi) | ||
272 | mov %edx, b_offset(%edi) | ||
273 | |||
274 | pop %edi | ||
275 | pop %esi | ||
276 | pop %ebx | ||
277 | pop %ebp | ||
278 | mov $1, %eax | ||
279 | ret | ||
280 | |||
281 | twofish_dec_blk: | ||
282 | push %ebp /* save registers according to calling convention*/ | ||
283 | push %ebx | ||
284 | push %esi | ||
285 | push %edi | ||
286 | |||
287 | |||
288 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ | ||
289 | add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ | ||
290 | mov in_blk+16(%esp),%edi /* input adress in edi */ | ||
291 | |||
292 | mov (%edi), %eax | ||
293 | mov b_offset(%edi), %ebx | ||
294 | mov c_offset(%edi), %ecx | ||
295 | mov d_offset(%edi), %edx | ||
296 | output_whitening(%eax,%ebp,a_offset) | ||
297 | output_whitening(%ebx,%ebp,b_offset) | ||
298 | ror $16, %ebx | ||
299 | output_whitening(%ecx,%ebp,c_offset) | ||
300 | output_whitening(%edx,%ebp,d_offset) | ||
301 | rol $1, %ecx | ||
302 | |||
303 | decrypt_round(R0,R1,R2,R3,15*8); | ||
304 | decrypt_round(R2,R3,R0,R1,14*8); | ||
305 | decrypt_round(R0,R1,R2,R3,13*8); | ||
306 | decrypt_round(R2,R3,R0,R1,12*8); | ||
307 | decrypt_round(R0,R1,R2,R3,11*8); | ||
308 | decrypt_round(R2,R3,R0,R1,10*8); | ||
309 | decrypt_round(R0,R1,R2,R3,9*8); | ||
310 | decrypt_round(R2,R3,R0,R1,8*8); | ||
311 | decrypt_round(R0,R1,R2,R3,7*8); | ||
312 | decrypt_round(R2,R3,R0,R1,6*8); | ||
313 | decrypt_round(R0,R1,R2,R3,5*8); | ||
314 | decrypt_round(R2,R3,R0,R1,4*8); | ||
315 | decrypt_round(R0,R1,R2,R3,3*8); | ||
316 | decrypt_round(R2,R3,R0,R1,2*8); | ||
317 | decrypt_round(R0,R1,R2,R3,1*8); | ||
318 | decrypt_last_round(R2,R3,R0,R1,0); | ||
319 | |||
320 | input_whitening(%eax,%ebp,c_offset) | ||
321 | input_whitening(%ebx,%ebp,d_offset) | ||
322 | input_whitening(%ecx,%ebp,a_offset) | ||
323 | input_whitening(%edx,%ebp,b_offset) | ||
324 | mov out_blk+16(%esp),%edi; | ||
325 | mov %eax, c_offset(%edi) | ||
326 | mov %ebx, d_offset(%edi) | ||
327 | mov %ecx, (%edi) | ||
328 | mov %edx, b_offset(%edi) | ||
329 | |||
330 | pop %edi | ||
331 | pop %esi | ||
332 | pop %ebx | ||
333 | pop %ebp | ||
334 | mov $1, %eax | ||
335 | ret | ||
diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c new file mode 100644 index 000000000000..e3004dfe9c7a --- /dev/null +++ b/arch/x86/crypto/twofish_32.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* | ||
2 | * Glue Code for optimized 586 assembler version of TWOFISH | ||
3 | * | ||
4 | * Originally Twofish for GPG | ||
5 | * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 | ||
6 | * 256-bit key length added March 20, 1999 | ||
7 | * Some modifications to reduce the text size by Werner Koch, April, 1998 | ||
8 | * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com> | ||
9 | * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net> | ||
10 | * | ||
11 | * The original author has disclaimed all copyright interest in this | ||
12 | * code and thus put it in the public domain. The subsequent authors | ||
13 | * have put this under the GNU General Public License. | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 | * GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
28 | * USA | ||
29 | * | ||
30 | * This code is a "clean room" implementation, written from the paper | ||
31 | * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, | ||
32 | * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available | ||
33 | * through http://www.counterpane.com/twofish.html | ||
34 | * | ||
35 | * For background information on multiplication in finite fields, used for | ||
36 | * the matrix operations in the key schedule, see the book _Contemporary | ||
37 | * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the | ||
38 | * Third Edition. | ||
39 | */ | ||
40 | |||
41 | #include <crypto/twofish.h> | ||
42 | #include <linux/crypto.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/module.h> | ||
45 | #include <linux/types.h> | ||
46 | |||
47 | |||
48 | asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
49 | asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
50 | |||
51 | static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
52 | { | ||
53 | twofish_enc_blk(tfm, dst, src); | ||
54 | } | ||
55 | |||
56 | static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
57 | { | ||
58 | twofish_dec_blk(tfm, dst, src); | ||
59 | } | ||
60 | |||
61 | static struct crypto_alg alg = { | ||
62 | .cra_name = "twofish", | ||
63 | .cra_driver_name = "twofish-i586", | ||
64 | .cra_priority = 200, | ||
65 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
66 | .cra_blocksize = TF_BLOCK_SIZE, | ||
67 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
68 | .cra_alignmask = 3, | ||
69 | .cra_module = THIS_MODULE, | ||
70 | .cra_list = LIST_HEAD_INIT(alg.cra_list), | ||
71 | .cra_u = { | ||
72 | .cipher = { | ||
73 | .cia_min_keysize = TF_MIN_KEY_SIZE, | ||
74 | .cia_max_keysize = TF_MAX_KEY_SIZE, | ||
75 | .cia_setkey = twofish_setkey, | ||
76 | .cia_encrypt = twofish_encrypt, | ||
77 | .cia_decrypt = twofish_decrypt | ||
78 | } | ||
79 | } | ||
80 | }; | ||
81 | |||
82 | static int __init init(void) | ||
83 | { | ||
84 | return crypto_register_alg(&alg); | ||
85 | } | ||
86 | |||
87 | static void __exit fini(void) | ||
88 | { | ||
89 | crypto_unregister_alg(&alg); | ||
90 | } | ||
91 | |||
92 | module_init(init); | ||
93 | module_exit(fini); | ||
94 | |||
95 | MODULE_LICENSE("GPL"); | ||
96 | MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); | ||
97 | MODULE_ALIAS("twofish"); | ||