diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:16:21 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:16:21 -0400 |
commit | 9c2019421511a1bc646981d55528334ae46464c0 (patch) | |
tree | 12bd39b5201d0afc74dccd8e06464233d3058e58 /arch/x86/crypto/aes-i586-asm_32.S | |
parent | af49d41e8c0e6649b3966470aa6319585144f8e8 (diff) |
i386: move crypto
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/crypto/aes-i586-asm_32.S')
-rw-r--r-- | arch/x86/crypto/aes-i586-asm_32.S | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S new file mode 100644 index 000000000000..f942f0c8f630 --- /dev/null +++ b/arch/x86/crypto/aes-i586-asm_32.S | |||
@@ -0,0 +1,373 @@ | |||
1 | // ------------------------------------------------------------------------- | ||
2 | // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. | ||
3 | // All rights reserved. | ||
4 | // | ||
5 | // LICENSE TERMS | ||
6 | // | ||
7 | // The free distribution and use of this software in both source and binary | ||
8 | // form is allowed (with or without changes) provided that: | ||
9 | // | ||
10 | // 1. distributions of this source code include the above copyright | ||
11 | // notice, this list of conditions and the following disclaimer// | ||
12 | // | ||
13 | // 2. distributions in binary form include the above copyright | ||
14 | // notice, this list of conditions and the following disclaimer | ||
15 | // in the documentation and/or other associated materials// | ||
16 | // | ||
17 | // 3. the copyright holder's name is not used to endorse products | ||
18 | // built using this software without specific written permission. | ||
19 | // | ||
20 | // | ||
21 | // ALTERNATIVELY, provided that this notice is retained in full, this product | ||
22 | // may be distributed under the terms of the GNU General Public License (GPL), | ||
23 | // in which case the provisions of the GPL apply INSTEAD OF those given above. | ||
24 | // | ||
25 | // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> | ||
26 | // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | ||
27 | |||
28 | // DISCLAIMER | ||
29 | // | ||
30 | // This software is provided 'as is' with no explicit or implied warranties | ||
31 | // in respect of its properties including, but not limited to, correctness | ||
32 | // and fitness for purpose. | ||
33 | // ------------------------------------------------------------------------- | ||
34 | // Issue Date: 29/07/2002 | ||
35 | |||
36 | .file "aes-i586-asm.S" | ||
37 | .text | ||
38 | |||
39 | #include <asm/asm-offsets.h> | ||
40 | |||
41 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) | ||
42 | |||
43 | /* offsets to parameters with one register pushed onto stack */ | ||
44 | #define tfm 8 | ||
45 | #define out_blk 12 | ||
46 | #define in_blk 16 | ||
47 | |||
48 | /* offsets in crypto_tfm structure */ | ||
49 | #define ekey (crypto_tfm_ctx_offset + 0) | ||
50 | #define nrnd (crypto_tfm_ctx_offset + 256) | ||
51 | #define dkey (crypto_tfm_ctx_offset + 260) | ||
52 | |||
53 | // register mapping for encrypt and decrypt subroutines | ||
54 | |||
55 | #define r0 eax | ||
56 | #define r1 ebx | ||
57 | #define r2 ecx | ||
58 | #define r3 edx | ||
59 | #define r4 esi | ||
60 | #define r5 edi | ||
61 | |||
62 | #define eaxl al | ||
63 | #define eaxh ah | ||
64 | #define ebxl bl | ||
65 | #define ebxh bh | ||
66 | #define ecxl cl | ||
67 | #define ecxh ch | ||
68 | #define edxl dl | ||
69 | #define edxh dh | ||
70 | |||
71 | #define _h(reg) reg##h | ||
72 | #define h(reg) _h(reg) | ||
73 | |||
74 | #define _l(reg) reg##l | ||
75 | #define l(reg) _l(reg) | ||
76 | |||
77 | // This macro takes a 32-bit word representing a column and uses | ||
78 | // each of its four bytes to index into four tables of 256 32-bit | ||
79 | // words to obtain values that are then xored into the appropriate | ||
80 | // output registers r0, r1, r4 or r5. | ||
81 | |||
82 | // Parameters: | ||
83 | // table table base address | ||
84 | // %1 out_state[0] | ||
85 | // %2 out_state[1] | ||
86 | // %3 out_state[2] | ||
87 | // %4 out_state[3] | ||
88 | // idx input register for the round (destroyed) | ||
89 | // tmp scratch register for the round | ||
90 | // sched key schedule | ||
91 | |||
92 | #define do_col(table, a1,a2,a3,a4, idx, tmp) \ | ||
93 | movzx %l(idx),%tmp; \ | ||
94 | xor table(,%tmp,4),%a1; \ | ||
95 | movzx %h(idx),%tmp; \ | ||
96 | shr $16,%idx; \ | ||
97 | xor table+tlen(,%tmp,4),%a2; \ | ||
98 | movzx %l(idx),%tmp; \ | ||
99 | movzx %h(idx),%idx; \ | ||
100 | xor table+2*tlen(,%tmp,4),%a3; \ | ||
101 | xor table+3*tlen(,%idx,4),%a4; | ||
102 | |||
103 | // initialise output registers from the key schedule | ||
104 | // NB1: original value of a3 is in idx on exit | ||
105 | // NB2: original values of a1,a2,a4 aren't used | ||
106 | #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ | ||
107 | mov 0 sched,%a1; \ | ||
108 | movzx %l(idx),%tmp; \ | ||
109 | mov 12 sched,%a2; \ | ||
110 | xor table(,%tmp,4),%a1; \ | ||
111 | mov 4 sched,%a4; \ | ||
112 | movzx %h(idx),%tmp; \ | ||
113 | shr $16,%idx; \ | ||
114 | xor table+tlen(,%tmp,4),%a2; \ | ||
115 | movzx %l(idx),%tmp; \ | ||
116 | movzx %h(idx),%idx; \ | ||
117 | xor table+3*tlen(,%idx,4),%a4; \ | ||
118 | mov %a3,%idx; \ | ||
119 | mov 8 sched,%a3; \ | ||
120 | xor table+2*tlen(,%tmp,4),%a3; | ||
121 | |||
122 | // initialise output registers from the key schedule | ||
123 | // NB1: original value of a3 is in idx on exit | ||
124 | // NB2: original values of a1,a2,a4 aren't used | ||
125 | #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ | ||
126 | mov 0 sched,%a1; \ | ||
127 | movzx %l(idx),%tmp; \ | ||
128 | mov 4 sched,%a2; \ | ||
129 | xor table(,%tmp,4),%a1; \ | ||
130 | mov 12 sched,%a4; \ | ||
131 | movzx %h(idx),%tmp; \ | ||
132 | shr $16,%idx; \ | ||
133 | xor table+tlen(,%tmp,4),%a2; \ | ||
134 | movzx %l(idx),%tmp; \ | ||
135 | movzx %h(idx),%idx; \ | ||
136 | xor table+3*tlen(,%idx,4),%a4; \ | ||
137 | mov %a3,%idx; \ | ||
138 | mov 8 sched,%a3; \ | ||
139 | xor table+2*tlen(,%tmp,4),%a3; | ||
140 | |||
141 | |||
142 | // original Gladman had conditional saves to MMX regs. | ||
143 | #define save(a1, a2) \ | ||
144 | mov %a2,4*a1(%esp) | ||
145 | |||
146 | #define restore(a1, a2) \ | ||
147 | mov 4*a2(%esp),%a1 | ||
148 | |||
149 | // These macros perform a forward encryption cycle. They are entered with | ||
150 | // the first previous round column values in r0,r1,r4,r5 and | ||
151 | // exit with the final values in the same registers, using stack | ||
152 | // for temporary storage. | ||
153 | |||
154 | // round column values | ||
155 | // on entry: r0,r1,r4,r5 | ||
156 | // on exit: r2,r1,r4,r5 | ||
157 | #define fwd_rnd1(arg, table) \ | ||
158 | save (0,r1); \ | ||
159 | save (1,r5); \ | ||
160 | \ | ||
161 | /* compute new column values */ \ | ||
162 | do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ | ||
163 | do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ | ||
164 | restore(r0,0); \ | ||
165 | do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ | ||
166 | restore(r0,1); \ | ||
167 | do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ | ||
168 | |||
169 | // round column values | ||
170 | // on entry: r2,r1,r4,r5 | ||
171 | // on exit: r0,r1,r4,r5 | ||
172 | #define fwd_rnd2(arg, table) \ | ||
173 | save (0,r1); \ | ||
174 | save (1,r5); \ | ||
175 | \ | ||
176 | /* compute new column values */ \ | ||
177 | do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ | ||
178 | do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ | ||
179 | restore(r2,0); \ | ||
180 | do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ | ||
181 | restore(r2,1); \ | ||
182 | do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ | ||
183 | |||
184 | // These macros performs an inverse encryption cycle. They are entered with | ||
185 | // the first previous round column values in r0,r1,r4,r5 and | ||
186 | // exit with the final values in the same registers, using stack | ||
187 | // for temporary storage | ||
188 | |||
189 | // round column values | ||
190 | // on entry: r0,r1,r4,r5 | ||
191 | // on exit: r2,r1,r4,r5 | ||
192 | #define inv_rnd1(arg, table) \ | ||
193 | save (0,r1); \ | ||
194 | save (1,r5); \ | ||
195 | \ | ||
196 | /* compute new column values */ \ | ||
197 | do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ | ||
198 | do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ | ||
199 | restore(r0,0); \ | ||
200 | do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ | ||
201 | restore(r0,1); \ | ||
202 | do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ | ||
203 | |||
204 | // round column values | ||
205 | // on entry: r2,r1,r4,r5 | ||
206 | // on exit: r0,r1,r4,r5 | ||
207 | #define inv_rnd2(arg, table) \ | ||
208 | save (0,r1); \ | ||
209 | save (1,r5); \ | ||
210 | \ | ||
211 | /* compute new column values */ \ | ||
212 | do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ | ||
213 | do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ | ||
214 | restore(r2,0); \ | ||
215 | do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ | ||
216 | restore(r2,1); \ | ||
217 | do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ | ||
218 | |||
219 | // AES (Rijndael) Encryption Subroutine | ||
220 | /* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | ||
221 | |||
222 | .global aes_enc_blk | ||
223 | |||
224 | .extern ft_tab | ||
225 | .extern fl_tab | ||
226 | |||
227 | .align 4 | ||
228 | |||
229 | aes_enc_blk: | ||
230 | push %ebp | ||
231 | mov tfm(%esp),%ebp | ||
232 | |||
233 | // CAUTION: the order and the values used in these assigns | ||
234 | // rely on the register mappings | ||
235 | |||
236 | 1: push %ebx | ||
237 | mov in_blk+4(%esp),%r2 | ||
238 | push %esi | ||
239 | mov nrnd(%ebp),%r3 // number of rounds | ||
240 | push %edi | ||
241 | #if ekey != 0 | ||
242 | lea ekey(%ebp),%ebp // key pointer | ||
243 | #endif | ||
244 | |||
245 | // input four columns and xor in first round key | ||
246 | |||
247 | mov (%r2),%r0 | ||
248 | mov 4(%r2),%r1 | ||
249 | mov 8(%r2),%r4 | ||
250 | mov 12(%r2),%r5 | ||
251 | xor (%ebp),%r0 | ||
252 | xor 4(%ebp),%r1 | ||
253 | xor 8(%ebp),%r4 | ||
254 | xor 12(%ebp),%r5 | ||
255 | |||
256 | sub $8,%esp // space for register saves on stack | ||
257 | add $16,%ebp // increment to next round key | ||
258 | cmp $12,%r3 | ||
259 | jb 4f // 10 rounds for 128-bit key | ||
260 | lea 32(%ebp),%ebp | ||
261 | je 3f // 12 rounds for 192-bit key | ||
262 | lea 32(%ebp),%ebp | ||
263 | |||
264 | 2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key | ||
265 | fwd_rnd2( -48(%ebp) ,ft_tab) | ||
266 | 3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key | ||
267 | fwd_rnd2( -16(%ebp) ,ft_tab) | ||
268 | 4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key | ||
269 | fwd_rnd2( +16(%ebp) ,ft_tab) | ||
270 | fwd_rnd1( +32(%ebp) ,ft_tab) | ||
271 | fwd_rnd2( +48(%ebp) ,ft_tab) | ||
272 | fwd_rnd1( +64(%ebp) ,ft_tab) | ||
273 | fwd_rnd2( +80(%ebp) ,ft_tab) | ||
274 | fwd_rnd1( +96(%ebp) ,ft_tab) | ||
275 | fwd_rnd2(+112(%ebp) ,ft_tab) | ||
276 | fwd_rnd1(+128(%ebp) ,ft_tab) | ||
277 | fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table | ||
278 | |||
279 | // move final values to the output array. CAUTION: the | ||
280 | // order of these assigns rely on the register mappings | ||
281 | |||
282 | add $8,%esp | ||
283 | mov out_blk+12(%esp),%ebp | ||
284 | mov %r5,12(%ebp) | ||
285 | pop %edi | ||
286 | mov %r4,8(%ebp) | ||
287 | pop %esi | ||
288 | mov %r1,4(%ebp) | ||
289 | pop %ebx | ||
290 | mov %r0,(%ebp) | ||
291 | pop %ebp | ||
292 | mov $1,%eax | ||
293 | ret | ||
294 | |||
295 | // AES (Rijndael) Decryption Subroutine | ||
296 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ | ||
297 | |||
298 | .global aes_dec_blk | ||
299 | |||
300 | .extern it_tab | ||
301 | .extern il_tab | ||
302 | |||
303 | .align 4 | ||
304 | |||
305 | aes_dec_blk: | ||
306 | push %ebp | ||
307 | mov tfm(%esp),%ebp | ||
308 | |||
309 | // CAUTION: the order and the values used in these assigns | ||
310 | // rely on the register mappings | ||
311 | |||
312 | 1: push %ebx | ||
313 | mov in_blk+4(%esp),%r2 | ||
314 | push %esi | ||
315 | mov nrnd(%ebp),%r3 // number of rounds | ||
316 | push %edi | ||
317 | #if dkey != 0 | ||
318 | lea dkey(%ebp),%ebp // key pointer | ||
319 | #endif | ||
320 | mov %r3,%r0 | ||
321 | shl $4,%r0 | ||
322 | add %r0,%ebp | ||
323 | |||
324 | // input four columns and xor in first round key | ||
325 | |||
326 | mov (%r2),%r0 | ||
327 | mov 4(%r2),%r1 | ||
328 | mov 8(%r2),%r4 | ||
329 | mov 12(%r2),%r5 | ||
330 | xor (%ebp),%r0 | ||
331 | xor 4(%ebp),%r1 | ||
332 | xor 8(%ebp),%r4 | ||
333 | xor 12(%ebp),%r5 | ||
334 | |||
335 | sub $8,%esp // space for register saves on stack | ||
336 | sub $16,%ebp // increment to next round key | ||
337 | cmp $12,%r3 | ||
338 | jb 4f // 10 rounds for 128-bit key | ||
339 | lea -32(%ebp),%ebp | ||
340 | je 3f // 12 rounds for 192-bit key | ||
341 | lea -32(%ebp),%ebp | ||
342 | |||
343 | 2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key | ||
344 | inv_rnd2( +48(%ebp), it_tab) | ||
345 | 3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key | ||
346 | inv_rnd2( +16(%ebp), it_tab) | ||
347 | 4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key | ||
348 | inv_rnd2( -16(%ebp), it_tab) | ||
349 | inv_rnd1( -32(%ebp), it_tab) | ||
350 | inv_rnd2( -48(%ebp), it_tab) | ||
351 | inv_rnd1( -64(%ebp), it_tab) | ||
352 | inv_rnd2( -80(%ebp), it_tab) | ||
353 | inv_rnd1( -96(%ebp), it_tab) | ||
354 | inv_rnd2(-112(%ebp), it_tab) | ||
355 | inv_rnd1(-128(%ebp), it_tab) | ||
356 | inv_rnd2(-144(%ebp), il_tab) // last round uses a different table | ||
357 | |||
358 | // move final values to the output array. CAUTION: the | ||
359 | // order of these assigns rely on the register mappings | ||
360 | |||
361 | add $8,%esp | ||
362 | mov out_blk+12(%esp),%ebp | ||
363 | mov %r5,12(%ebp) | ||
364 | pop %edi | ||
365 | mov %r4,8(%ebp) | ||
366 | pop %esi | ||
367 | mov %r1,4(%ebp) | ||
368 | pop %ebx | ||
369 | mov %r0,(%ebp) | ||
370 | pop %ebp | ||
371 | mov $1,%eax | ||
372 | ret | ||
373 | |||