diff options
Diffstat (limited to 'arch/x86/crypto/twofish-i586-asm_32.S')
-rw-r--r-- | arch/x86/crypto/twofish-i586-asm_32.S | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 000000000000..39b98ed2c1b9 --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S | |||
@@ -0,0 +1,335 @@ | |||
1 | /*************************************************************************** | ||
2 | * Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> * | ||
3 | * * | ||
4 | * This program is free software; you can redistribute it and/or modify * | ||
5 | * it under the terms of the GNU General Public License as published by * | ||
6 | * the Free Software Foundation; either version 2 of the License, or * | ||
7 | * (at your option) any later version. * | ||
8 | * * | ||
9 | * This program is distributed in the hope that it will be useful, * | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * | ||
12 | * GNU General Public License for more details. * | ||
13 | * * | ||
14 | * You should have received a copy of the GNU General Public License * | ||
15 | * along with this program; if not, write to the * | ||
16 | * Free Software Foundation, Inc., * | ||
17 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * | ||
18 | ***************************************************************************/ | ||
19 | |||
20 | .file "twofish-i586-asm.S" | ||
21 | .text | ||
22 | |||
23 | #include <asm/asm-offsets.h> | ||
24 | |||
25 | /* return adress at 0 */ | ||
26 | |||
27 | #define in_blk 12 /* input byte array address parameter*/ | ||
28 | #define out_blk 8 /* output byte array address parameter*/ | ||
29 | #define tfm 4 /* Twofish context structure */ | ||
30 | |||
31 | #define a_offset 0 | ||
32 | #define b_offset 4 | ||
33 | #define c_offset 8 | ||
34 | #define d_offset 12 | ||
35 | |||
36 | /* Structure of the crypto context struct*/ | ||
37 | |||
38 | #define s0 0 /* S0 Array 256 Words each */ | ||
39 | #define s1 1024 /* S1 Array */ | ||
40 | #define s2 2048 /* S2 Array */ | ||
41 | #define s3 3072 /* S3 Array */ | ||
42 | #define w 4096 /* 8 whitening keys (word) */ | ||
43 | #define k 4128 /* key 1-32 ( word ) */ | ||
44 | |||
45 | /* define a few register aliases to allow macro substitution */ | ||
46 | |||
47 | #define R0D %eax | ||
48 | #define R0B %al | ||
49 | #define R0H %ah | ||
50 | |||
51 | #define R1D %ebx | ||
52 | #define R1B %bl | ||
53 | #define R1H %bh | ||
54 | |||
55 | #define R2D %ecx | ||
56 | #define R2B %cl | ||
57 | #define R2H %ch | ||
58 | |||
59 | #define R3D %edx | ||
60 | #define R3B %dl | ||
61 | #define R3H %dh | ||
62 | |||
63 | |||
64 | /* performs input whitening */ | ||
65 | #define input_whitening(src,context,offset)\ | ||
66 | xor w+offset(context), src; | ||
67 | |||
68 | /* performs input whitening */ | ||
69 | #define output_whitening(src,context,offset)\ | ||
70 | xor w+16+offset(context), src; | ||
71 | |||
72 | /* | ||
73 | * a input register containing a (rotated 16) | ||
74 | * b input register containing b | ||
75 | * c input register containing c | ||
76 | * d input register containing d (already rol $1) | ||
77 | * operations on a and b are interleaved to increase performance | ||
78 | */ | ||
79 | #define encrypt_round(a,b,c,d,round)\ | ||
80 | push d ## D;\ | ||
81 | movzx b ## B, %edi;\ | ||
82 | mov s1(%ebp,%edi,4),d ## D;\ | ||
83 | movzx a ## B, %edi;\ | ||
84 | mov s2(%ebp,%edi,4),%esi;\ | ||
85 | movzx b ## H, %edi;\ | ||
86 | ror $16, b ## D;\ | ||
87 | xor s2(%ebp,%edi,4),d ## D;\ | ||
88 | movzx a ## H, %edi;\ | ||
89 | ror $16, a ## D;\ | ||
90 | xor s3(%ebp,%edi,4),%esi;\ | ||
91 | movzx b ## B, %edi;\ | ||
92 | xor s3(%ebp,%edi,4),d ## D;\ | ||
93 | movzx a ## B, %edi;\ | ||
94 | xor (%ebp,%edi,4), %esi;\ | ||
95 | movzx b ## H, %edi;\ | ||
96 | ror $15, b ## D;\ | ||
97 | xor (%ebp,%edi,4), d ## D;\ | ||
98 | movzx a ## H, %edi;\ | ||
99 | xor s1(%ebp,%edi,4),%esi;\ | ||
100 | pop %edi;\ | ||
101 | add d ## D, %esi;\ | ||
102 | add %esi, d ## D;\ | ||
103 | add k+round(%ebp), %esi;\ | ||
104 | xor %esi, c ## D;\ | ||
105 | rol $15, c ## D;\ | ||
106 | add k+4+round(%ebp),d ## D;\ | ||
107 | xor %edi, d ## D; | ||
108 | |||
109 | /* | ||
110 | * a input register containing a (rotated 16) | ||
111 | * b input register containing b | ||
112 | * c input register containing c | ||
113 | * d input register containing d (already rol $1) | ||
114 | * operations on a and b are interleaved to increase performance | ||
115 | * last round has different rotations for the output preparation | ||
116 | */ | ||
117 | #define encrypt_last_round(a,b,c,d,round)\ | ||
118 | push d ## D;\ | ||
119 | movzx b ## B, %edi;\ | ||
120 | mov s1(%ebp,%edi,4),d ## D;\ | ||
121 | movzx a ## B, %edi;\ | ||
122 | mov s2(%ebp,%edi,4),%esi;\ | ||
123 | movzx b ## H, %edi;\ | ||
124 | ror $16, b ## D;\ | ||
125 | xor s2(%ebp,%edi,4),d ## D;\ | ||
126 | movzx a ## H, %edi;\ | ||
127 | ror $16, a ## D;\ | ||
128 | xor s3(%ebp,%edi,4),%esi;\ | ||
129 | movzx b ## B, %edi;\ | ||
130 | xor s3(%ebp,%edi,4),d ## D;\ | ||
131 | movzx a ## B, %edi;\ | ||
132 | xor (%ebp,%edi,4), %esi;\ | ||
133 | movzx b ## H, %edi;\ | ||
134 | ror $16, b ## D;\ | ||
135 | xor (%ebp,%edi,4), d ## D;\ | ||
136 | movzx a ## H, %edi;\ | ||
137 | xor s1(%ebp,%edi,4),%esi;\ | ||
138 | pop %edi;\ | ||
139 | add d ## D, %esi;\ | ||
140 | add %esi, d ## D;\ | ||
141 | add k+round(%ebp), %esi;\ | ||
142 | xor %esi, c ## D;\ | ||
143 | ror $1, c ## D;\ | ||
144 | add k+4+round(%ebp),d ## D;\ | ||
145 | xor %edi, d ## D; | ||
146 | |||
147 | /* | ||
148 | * a input register containing a | ||
149 | * b input register containing b (rotated 16) | ||
150 | * c input register containing c | ||
151 | * d input register containing d (already rol $1) | ||
152 | * operations on a and b are interleaved to increase performance | ||
153 | */ | ||
154 | #define decrypt_round(a,b,c,d,round)\ | ||
155 | push c ## D;\ | ||
156 | movzx a ## B, %edi;\ | ||
157 | mov (%ebp,%edi,4), c ## D;\ | ||
158 | movzx b ## B, %edi;\ | ||
159 | mov s3(%ebp,%edi,4),%esi;\ | ||
160 | movzx a ## H, %edi;\ | ||
161 | ror $16, a ## D;\ | ||
162 | xor s1(%ebp,%edi,4),c ## D;\ | ||
163 | movzx b ## H, %edi;\ | ||
164 | ror $16, b ## D;\ | ||
165 | xor (%ebp,%edi,4), %esi;\ | ||
166 | movzx a ## B, %edi;\ | ||
167 | xor s2(%ebp,%edi,4),c ## D;\ | ||
168 | movzx b ## B, %edi;\ | ||
169 | xor s1(%ebp,%edi,4),%esi;\ | ||
170 | movzx a ## H, %edi;\ | ||
171 | ror $15, a ## D;\ | ||
172 | xor s3(%ebp,%edi,4),c ## D;\ | ||
173 | movzx b ## H, %edi;\ | ||
174 | xor s2(%ebp,%edi,4),%esi;\ | ||
175 | pop %edi;\ | ||
176 | add %esi, c ## D;\ | ||
177 | add c ## D, %esi;\ | ||
178 | add k+round(%ebp), c ## D;\ | ||
179 | xor %edi, c ## D;\ | ||
180 | add k+4+round(%ebp),%esi;\ | ||
181 | xor %esi, d ## D;\ | ||
182 | rol $15, d ## D; | ||
183 | |||
184 | /* | ||
185 | * a input register containing a | ||
186 | * b input register containing b (rotated 16) | ||
187 | * c input register containing c | ||
188 | * d input register containing d (already rol $1) | ||
189 | * operations on a and b are interleaved to increase performance | ||
190 | * last round has different rotations for the output preparation | ||
191 | */ | ||
192 | #define decrypt_last_round(a,b,c,d,round)\ | ||
193 | push c ## D;\ | ||
194 | movzx a ## B, %edi;\ | ||
195 | mov (%ebp,%edi,4), c ## D;\ | ||
196 | movzx b ## B, %edi;\ | ||
197 | mov s3(%ebp,%edi,4),%esi;\ | ||
198 | movzx a ## H, %edi;\ | ||
199 | ror $16, a ## D;\ | ||
200 | xor s1(%ebp,%edi,4),c ## D;\ | ||
201 | movzx b ## H, %edi;\ | ||
202 | ror $16, b ## D;\ | ||
203 | xor (%ebp,%edi,4), %esi;\ | ||
204 | movzx a ## B, %edi;\ | ||
205 | xor s2(%ebp,%edi,4),c ## D;\ | ||
206 | movzx b ## B, %edi;\ | ||
207 | xor s1(%ebp,%edi,4),%esi;\ | ||
208 | movzx a ## H, %edi;\ | ||
209 | ror $16, a ## D;\ | ||
210 | xor s3(%ebp,%edi,4),c ## D;\ | ||
211 | movzx b ## H, %edi;\ | ||
212 | xor s2(%ebp,%edi,4),%esi;\ | ||
213 | pop %edi;\ | ||
214 | add %esi, c ## D;\ | ||
215 | add c ## D, %esi;\ | ||
216 | add k+round(%ebp), c ## D;\ | ||
217 | xor %edi, c ## D;\ | ||
218 | add k+4+round(%ebp),%esi;\ | ||
219 | xor %esi, d ## D;\ | ||
220 | ror $1, d ## D; | ||
221 | |||
222 | .align 4 | ||
223 | .global twofish_enc_blk | ||
224 | .global twofish_dec_blk | ||
225 | |||
226 | twofish_enc_blk: | ||
227 | push %ebp /* save registers according to calling convention*/ | ||
228 | push %ebx | ||
229 | push %esi | ||
230 | push %edi | ||
231 | |||
232 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ | ||
233 | add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ | ||
234 | mov in_blk+16(%esp),%edi /* input adress in edi */ | ||
235 | |||
236 | mov (%edi), %eax | ||
237 | mov b_offset(%edi), %ebx | ||
238 | mov c_offset(%edi), %ecx | ||
239 | mov d_offset(%edi), %edx | ||
240 | input_whitening(%eax,%ebp,a_offset) | ||
241 | ror $16, %eax | ||
242 | input_whitening(%ebx,%ebp,b_offset) | ||
243 | input_whitening(%ecx,%ebp,c_offset) | ||
244 | input_whitening(%edx,%ebp,d_offset) | ||
245 | rol $1, %edx | ||
246 | |||
247 | encrypt_round(R0,R1,R2,R3,0); | ||
248 | encrypt_round(R2,R3,R0,R1,8); | ||
249 | encrypt_round(R0,R1,R2,R3,2*8); | ||
250 | encrypt_round(R2,R3,R0,R1,3*8); | ||
251 | encrypt_round(R0,R1,R2,R3,4*8); | ||
252 | encrypt_round(R2,R3,R0,R1,5*8); | ||
253 | encrypt_round(R0,R1,R2,R3,6*8); | ||
254 | encrypt_round(R2,R3,R0,R1,7*8); | ||
255 | encrypt_round(R0,R1,R2,R3,8*8); | ||
256 | encrypt_round(R2,R3,R0,R1,9*8); | ||
257 | encrypt_round(R0,R1,R2,R3,10*8); | ||
258 | encrypt_round(R2,R3,R0,R1,11*8); | ||
259 | encrypt_round(R0,R1,R2,R3,12*8); | ||
260 | encrypt_round(R2,R3,R0,R1,13*8); | ||
261 | encrypt_round(R0,R1,R2,R3,14*8); | ||
262 | encrypt_last_round(R2,R3,R0,R1,15*8); | ||
263 | |||
264 | output_whitening(%eax,%ebp,c_offset) | ||
265 | output_whitening(%ebx,%ebp,d_offset) | ||
266 | output_whitening(%ecx,%ebp,a_offset) | ||
267 | output_whitening(%edx,%ebp,b_offset) | ||
268 | mov out_blk+16(%esp),%edi; | ||
269 | mov %eax, c_offset(%edi) | ||
270 | mov %ebx, d_offset(%edi) | ||
271 | mov %ecx, (%edi) | ||
272 | mov %edx, b_offset(%edi) | ||
273 | |||
274 | pop %edi | ||
275 | pop %esi | ||
276 | pop %ebx | ||
277 | pop %ebp | ||
278 | mov $1, %eax | ||
279 | ret | ||
280 | |||
281 | twofish_dec_blk: | ||
282 | push %ebp /* save registers according to calling convention*/ | ||
283 | push %ebx | ||
284 | push %esi | ||
285 | push %edi | ||
286 | |||
287 | |||
288 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ | ||
289 | add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ | ||
290 | mov in_blk+16(%esp),%edi /* input adress in edi */ | ||
291 | |||
292 | mov (%edi), %eax | ||
293 | mov b_offset(%edi), %ebx | ||
294 | mov c_offset(%edi), %ecx | ||
295 | mov d_offset(%edi), %edx | ||
296 | output_whitening(%eax,%ebp,a_offset) | ||
297 | output_whitening(%ebx,%ebp,b_offset) | ||
298 | ror $16, %ebx | ||
299 | output_whitening(%ecx,%ebp,c_offset) | ||
300 | output_whitening(%edx,%ebp,d_offset) | ||
301 | rol $1, %ecx | ||
302 | |||
303 | decrypt_round(R0,R1,R2,R3,15*8); | ||
304 | decrypt_round(R2,R3,R0,R1,14*8); | ||
305 | decrypt_round(R0,R1,R2,R3,13*8); | ||
306 | decrypt_round(R2,R3,R0,R1,12*8); | ||
307 | decrypt_round(R0,R1,R2,R3,11*8); | ||
308 | decrypt_round(R2,R3,R0,R1,10*8); | ||
309 | decrypt_round(R0,R1,R2,R3,9*8); | ||
310 | decrypt_round(R2,R3,R0,R1,8*8); | ||
311 | decrypt_round(R0,R1,R2,R3,7*8); | ||
312 | decrypt_round(R2,R3,R0,R1,6*8); | ||
313 | decrypt_round(R0,R1,R2,R3,5*8); | ||
314 | decrypt_round(R2,R3,R0,R1,4*8); | ||
315 | decrypt_round(R0,R1,R2,R3,3*8); | ||
316 | decrypt_round(R2,R3,R0,R1,2*8); | ||
317 | decrypt_round(R0,R1,R2,R3,1*8); | ||
318 | decrypt_last_round(R2,R3,R0,R1,0); | ||
319 | |||
320 | input_whitening(%eax,%ebp,c_offset) | ||
321 | input_whitening(%ebx,%ebp,d_offset) | ||
322 | input_whitening(%ecx,%ebp,a_offset) | ||
323 | input_whitening(%edx,%ebp,b_offset) | ||
324 | mov out_blk+16(%esp),%edi; | ||
325 | mov %eax, c_offset(%edi) | ||
326 | mov %ebx, d_offset(%edi) | ||
327 | mov %ecx, (%edi) | ||
328 | mov %edx, b_offset(%edi) | ||
329 | |||
330 | pop %edi | ||
331 | pop %esi | ||
332 | pop %ebx | ||
333 | pop %ebp | ||
334 | mov $1, %eax | ||
335 | ret | ||