diff options
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/Makefile | 12 | ||||
-rw-r--r-- | arch/x86/crypto/aes-i586-asm_32.S | 89 | ||||
-rw-r--r-- | arch/x86/crypto/aes-x86_64-asm_64.S | 68 | ||||
-rw-r--r-- | arch/x86/crypto/aes_32.c | 515 | ||||
-rw-r--r-- | arch/x86/crypto/aes_64.c | 336 | ||||
-rw-r--r-- | arch/x86/crypto/aes_glue.c | 57 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20-i586-asm_32.S | 1114 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20-x86_64-asm_64.S | 920 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20_glue.c | 129 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_64.c | 97 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_glue.c (renamed from arch/x86/crypto/twofish_32.c) | 8 |
11 files changed, 2309 insertions, 1036 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 46bb609e2444..3874c2de5403 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -4,12 +4,16 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | ||
7 | 8 | ||
8 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 9 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
9 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 10 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
11 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | ||
10 | 12 | ||
11 | aes-i586-y := aes-i586-asm_32.o aes_32.o | 13 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
12 | twofish-i586-y := twofish-i586-asm_32.o twofish_32.o | 14 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o |
15 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o | ||
13 | 16 | ||
14 | aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o | 17 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
15 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o | 18 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
19 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | ||
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index f942f0c8f630..1093bede3e0a 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S | |||
@@ -46,9 +46,9 @@ | |||
46 | #define in_blk 16 | 46 | #define in_blk 16 |
47 | 47 | ||
48 | /* offsets in crypto_tfm structure */ | 48 | /* offsets in crypto_tfm structure */ |
49 | #define ekey (crypto_tfm_ctx_offset + 0) | 49 | #define klen (crypto_tfm_ctx_offset + 0) |
50 | #define nrnd (crypto_tfm_ctx_offset + 256) | 50 | #define ekey (crypto_tfm_ctx_offset + 4) |
51 | #define dkey (crypto_tfm_ctx_offset + 260) | 51 | #define dkey (crypto_tfm_ctx_offset + 244) |
52 | 52 | ||
53 | // register mapping for encrypt and decrypt subroutines | 53 | // register mapping for encrypt and decrypt subroutines |
54 | 54 | ||
@@ -221,8 +221,8 @@ | |||
221 | 221 | ||
222 | .global aes_enc_blk | 222 | .global aes_enc_blk |
223 | 223 | ||
224 | .extern ft_tab | 224 | .extern crypto_ft_tab |
225 | .extern fl_tab | 225 | .extern crypto_fl_tab |
226 | 226 | ||
227 | .align 4 | 227 | .align 4 |
228 | 228 | ||
@@ -236,7 +236,7 @@ aes_enc_blk: | |||
236 | 1: push %ebx | 236 | 1: push %ebx |
237 | mov in_blk+4(%esp),%r2 | 237 | mov in_blk+4(%esp),%r2 |
238 | push %esi | 238 | push %esi |
239 | mov nrnd(%ebp),%r3 // number of rounds | 239 | mov klen(%ebp),%r3 // key size |
240 | push %edi | 240 | push %edi |
241 | #if ekey != 0 | 241 | #if ekey != 0 |
242 | lea ekey(%ebp),%ebp // key pointer | 242 | lea ekey(%ebp),%ebp // key pointer |
@@ -255,26 +255,26 @@ aes_enc_blk: | |||
255 | 255 | ||
256 | sub $8,%esp // space for register saves on stack | 256 | sub $8,%esp // space for register saves on stack |
257 | add $16,%ebp // increment to next round key | 257 | add $16,%ebp // increment to next round key |
258 | cmp $12,%r3 | 258 | cmp $24,%r3 |
259 | jb 4f // 10 rounds for 128-bit key | 259 | jb 4f // 10 rounds for 128-bit key |
260 | lea 32(%ebp),%ebp | 260 | lea 32(%ebp),%ebp |
261 | je 3f // 12 rounds for 192-bit key | 261 | je 3f // 12 rounds for 192-bit key |
262 | lea 32(%ebp),%ebp | 262 | lea 32(%ebp),%ebp |
263 | 263 | ||
264 | 2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key | 264 | 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key |
265 | fwd_rnd2( -48(%ebp) ,ft_tab) | 265 | fwd_rnd2( -48(%ebp), crypto_ft_tab) |
266 | 3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key | 266 | 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key |
267 | fwd_rnd2( -16(%ebp) ,ft_tab) | 267 | fwd_rnd2( -16(%ebp), crypto_ft_tab) |
268 | 4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key | 268 | 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key |
269 | fwd_rnd2( +16(%ebp) ,ft_tab) | 269 | fwd_rnd2( +16(%ebp), crypto_ft_tab) |
270 | fwd_rnd1( +32(%ebp) ,ft_tab) | 270 | fwd_rnd1( +32(%ebp), crypto_ft_tab) |
271 | fwd_rnd2( +48(%ebp) ,ft_tab) | 271 | fwd_rnd2( +48(%ebp), crypto_ft_tab) |
272 | fwd_rnd1( +64(%ebp) ,ft_tab) | 272 | fwd_rnd1( +64(%ebp), crypto_ft_tab) |
273 | fwd_rnd2( +80(%ebp) ,ft_tab) | 273 | fwd_rnd2( +80(%ebp), crypto_ft_tab) |
274 | fwd_rnd1( +96(%ebp) ,ft_tab) | 274 | fwd_rnd1( +96(%ebp), crypto_ft_tab) |
275 | fwd_rnd2(+112(%ebp) ,ft_tab) | 275 | fwd_rnd2(+112(%ebp), crypto_ft_tab) |
276 | fwd_rnd1(+128(%ebp) ,ft_tab) | 276 | fwd_rnd1(+128(%ebp), crypto_ft_tab) |
277 | fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table | 277 | fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table |
278 | 278 | ||
279 | // move final values to the output array. CAUTION: the | 279 | // move final values to the output array. CAUTION: the |
280 | // order of these assigns rely on the register mappings | 280 | // order of these assigns rely on the register mappings |
@@ -297,8 +297,8 @@ aes_enc_blk: | |||
297 | 297 | ||
298 | .global aes_dec_blk | 298 | .global aes_dec_blk |
299 | 299 | ||
300 | .extern it_tab | 300 | .extern crypto_it_tab |
301 | .extern il_tab | 301 | .extern crypto_il_tab |
302 | 302 | ||
303 | .align 4 | 303 | .align 4 |
304 | 304 | ||
@@ -312,14 +312,11 @@ aes_dec_blk: | |||
312 | 1: push %ebx | 312 | 1: push %ebx |
313 | mov in_blk+4(%esp),%r2 | 313 | mov in_blk+4(%esp),%r2 |
314 | push %esi | 314 | push %esi |
315 | mov nrnd(%ebp),%r3 // number of rounds | 315 | mov klen(%ebp),%r3 // key size |
316 | push %edi | 316 | push %edi |
317 | #if dkey != 0 | 317 | #if dkey != 0 |
318 | lea dkey(%ebp),%ebp // key pointer | 318 | lea dkey(%ebp),%ebp // key pointer |
319 | #endif | 319 | #endif |
320 | mov %r3,%r0 | ||
321 | shl $4,%r0 | ||
322 | add %r0,%ebp | ||
323 | 320 | ||
324 | // input four columns and xor in first round key | 321 | // input four columns and xor in first round key |
325 | 322 | ||
@@ -333,27 +330,27 @@ aes_dec_blk: | |||
333 | xor 12(%ebp),%r5 | 330 | xor 12(%ebp),%r5 |
334 | 331 | ||
335 | sub $8,%esp // space for register saves on stack | 332 | sub $8,%esp // space for register saves on stack |
336 | sub $16,%ebp // increment to next round key | 333 | add $16,%ebp // increment to next round key |
337 | cmp $12,%r3 | 334 | cmp $24,%r3 |
338 | jb 4f // 10 rounds for 128-bit key | 335 | jb 4f // 10 rounds for 128-bit key |
339 | lea -32(%ebp),%ebp | 336 | lea 32(%ebp),%ebp |
340 | je 3f // 12 rounds for 192-bit key | 337 | je 3f // 12 rounds for 192-bit key |
341 | lea -32(%ebp),%ebp | 338 | lea 32(%ebp),%ebp |
342 | 339 | ||
343 | 2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key | 340 | 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key |
344 | inv_rnd2( +48(%ebp), it_tab) | 341 | inv_rnd2( -48(%ebp), crypto_it_tab) |
345 | 3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key | 342 | 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key |
346 | inv_rnd2( +16(%ebp), it_tab) | 343 | inv_rnd2( -16(%ebp), crypto_it_tab) |
347 | 4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key | 344 | 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key |
348 | inv_rnd2( -16(%ebp), it_tab) | 345 | inv_rnd2( +16(%ebp), crypto_it_tab) |
349 | inv_rnd1( -32(%ebp), it_tab) | 346 | inv_rnd1( +32(%ebp), crypto_it_tab) |
350 | inv_rnd2( -48(%ebp), it_tab) | 347 | inv_rnd2( +48(%ebp), crypto_it_tab) |
351 | inv_rnd1( -64(%ebp), it_tab) | 348 | inv_rnd1( +64(%ebp), crypto_it_tab) |
352 | inv_rnd2( -80(%ebp), it_tab) | 349 | inv_rnd2( +80(%ebp), crypto_it_tab) |
353 | inv_rnd1( -96(%ebp), it_tab) | 350 | inv_rnd1( +96(%ebp), crypto_it_tab) |
354 | inv_rnd2(-112(%ebp), it_tab) | 351 | inv_rnd2(+112(%ebp), crypto_it_tab) |
355 | inv_rnd1(-128(%ebp), it_tab) | 352 | inv_rnd1(+128(%ebp), crypto_it_tab) |
356 | inv_rnd2(-144(%ebp), il_tab) // last round uses a different table | 353 | inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table |
357 | 354 | ||
358 | // move final values to the output array. CAUTION: the | 355 | // move final values to the output array. CAUTION: the |
359 | // order of these assigns rely on the register mappings | 356 | // order of these assigns rely on the register mappings |
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 26b40de4d0b0..a120f526c3df 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S | |||
@@ -8,10 +8,10 @@ | |||
8 | * including this sentence is retained in full. | 8 | * including this sentence is retained in full. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | .extern aes_ft_tab | 11 | .extern crypto_ft_tab |
12 | .extern aes_it_tab | 12 | .extern crypto_it_tab |
13 | .extern aes_fl_tab | 13 | .extern crypto_fl_tab |
14 | .extern aes_il_tab | 14 | .extern crypto_il_tab |
15 | 15 | ||
16 | .text | 16 | .text |
17 | 17 | ||
@@ -56,13 +56,13 @@ | |||
56 | .align 8; \ | 56 | .align 8; \ |
57 | FUNC: movq r1,r2; \ | 57 | FUNC: movq r1,r2; \ |
58 | movq r3,r4; \ | 58 | movq r3,r4; \ |
59 | leaq BASE+KEY+52(r8),r9; \ | 59 | leaq BASE+KEY+48+4(r8),r9; \ |
60 | movq r10,r11; \ | 60 | movq r10,r11; \ |
61 | movl (r7),r5 ## E; \ | 61 | movl (r7),r5 ## E; \ |
62 | movl 4(r7),r1 ## E; \ | 62 | movl 4(r7),r1 ## E; \ |
63 | movl 8(r7),r6 ## E; \ | 63 | movl 8(r7),r6 ## E; \ |
64 | movl 12(r7),r7 ## E; \ | 64 | movl 12(r7),r7 ## E; \ |
65 | movl BASE(r8),r10 ## E; \ | 65 | movl BASE+0(r8),r10 ## E; \ |
66 | xorl -48(r9),r5 ## E; \ | 66 | xorl -48(r9),r5 ## E; \ |
67 | xorl -44(r9),r1 ## E; \ | 67 | xorl -44(r9),r1 ## E; \ |
68 | xorl -40(r9),r6 ## E; \ | 68 | xorl -40(r9),r6 ## E; \ |
@@ -154,37 +154,37 @@ FUNC: movq r1,r2; \ | |||
154 | /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ | 154 | /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ |
155 | 155 | ||
156 | entry(aes_enc_blk,0,enc128,enc192) | 156 | entry(aes_enc_blk,0,enc128,enc192) |
157 | encrypt_round(aes_ft_tab,-96) | 157 | encrypt_round(crypto_ft_tab,-96) |
158 | encrypt_round(aes_ft_tab,-80) | 158 | encrypt_round(crypto_ft_tab,-80) |
159 | enc192: encrypt_round(aes_ft_tab,-64) | 159 | enc192: encrypt_round(crypto_ft_tab,-64) |
160 | encrypt_round(aes_ft_tab,-48) | 160 | encrypt_round(crypto_ft_tab,-48) |
161 | enc128: encrypt_round(aes_ft_tab,-32) | 161 | enc128: encrypt_round(crypto_ft_tab,-32) |
162 | encrypt_round(aes_ft_tab,-16) | 162 | encrypt_round(crypto_ft_tab,-16) |
163 | encrypt_round(aes_ft_tab, 0) | 163 | encrypt_round(crypto_ft_tab, 0) |
164 | encrypt_round(aes_ft_tab, 16) | 164 | encrypt_round(crypto_ft_tab, 16) |
165 | encrypt_round(aes_ft_tab, 32) | 165 | encrypt_round(crypto_ft_tab, 32) |
166 | encrypt_round(aes_ft_tab, 48) | 166 | encrypt_round(crypto_ft_tab, 48) |
167 | encrypt_round(aes_ft_tab, 64) | 167 | encrypt_round(crypto_ft_tab, 64) |
168 | encrypt_round(aes_ft_tab, 80) | 168 | encrypt_round(crypto_ft_tab, 80) |
169 | encrypt_round(aes_ft_tab, 96) | 169 | encrypt_round(crypto_ft_tab, 96) |
170 | encrypt_final(aes_fl_tab,112) | 170 | encrypt_final(crypto_fl_tab,112) |
171 | return | 171 | return |
172 | 172 | ||
173 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ | 173 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ |
174 | 174 | ||
175 | entry(aes_dec_blk,240,dec128,dec192) | 175 | entry(aes_dec_blk,240,dec128,dec192) |
176 | decrypt_round(aes_it_tab,-96) | 176 | decrypt_round(crypto_it_tab,-96) |
177 | decrypt_round(aes_it_tab,-80) | 177 | decrypt_round(crypto_it_tab,-80) |
178 | dec192: decrypt_round(aes_it_tab,-64) | 178 | dec192: decrypt_round(crypto_it_tab,-64) |
179 | decrypt_round(aes_it_tab,-48) | 179 | decrypt_round(crypto_it_tab,-48) |
180 | dec128: decrypt_round(aes_it_tab,-32) | 180 | dec128: decrypt_round(crypto_it_tab,-32) |
181 | decrypt_round(aes_it_tab,-16) | 181 | decrypt_round(crypto_it_tab,-16) |
182 | decrypt_round(aes_it_tab, 0) | 182 | decrypt_round(crypto_it_tab, 0) |
183 | decrypt_round(aes_it_tab, 16) | 183 | decrypt_round(crypto_it_tab, 16) |
184 | decrypt_round(aes_it_tab, 32) | 184 | decrypt_round(crypto_it_tab, 32) |
185 | decrypt_round(aes_it_tab, 48) | 185 | decrypt_round(crypto_it_tab, 48) |
186 | decrypt_round(aes_it_tab, 64) | 186 | decrypt_round(crypto_it_tab, 64) |
187 | decrypt_round(aes_it_tab, 80) | 187 | decrypt_round(crypto_it_tab, 80) |
188 | decrypt_round(aes_it_tab, 96) | 188 | decrypt_round(crypto_it_tab, 96) |
189 | decrypt_final(aes_il_tab,112) | 189 | decrypt_final(crypto_il_tab,112) |
190 | return | 190 | return |
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c deleted file mode 100644 index 49aad9397f10..000000000000 --- a/arch/x86/crypto/aes_32.c +++ /dev/null | |||
@@ -1,515 +0,0 @@ | |||
1 | /* | ||
2 | * | ||
3 | * Glue Code for optimized 586 assembler version of AES | ||
4 | * | ||
5 | * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * LICENSE TERMS | ||
9 | * | ||
10 | * The free distribution and use of this software in both source and binary | ||
11 | * form is allowed (with or without changes) provided that: | ||
12 | * | ||
13 | * 1. distributions of this source code include the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer; | ||
15 | * | ||
16 | * 2. distributions in binary form include the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer | ||
18 | * in the documentation and/or other associated materials; | ||
19 | * | ||
20 | * 3. the copyright holder's name is not used to endorse products | ||
21 | * built using this software without specific written permission. | ||
22 | * | ||
23 | * ALTERNATIVELY, provided that this notice is retained in full, this product | ||
24 | * may be distributed under the terms of the GNU General Public License (GPL), | ||
25 | * in which case the provisions of the GPL apply INSTEAD OF those given above. | ||
26 | * | ||
27 | * DISCLAIMER | ||
28 | * | ||
29 | * This software is provided 'as is' with no explicit or implied warranties | ||
30 | * in respect of its properties, including, but not limited to, correctness | ||
31 | * and/or fitness for purpose. | ||
32 | * | ||
33 | * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to | ||
34 | * 2.5 API). | ||
35 | * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org> | ||
36 | * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | ||
37 | * | ||
38 | */ | ||
39 | |||
40 | #include <asm/byteorder.h> | ||
41 | #include <linux/kernel.h> | ||
42 | #include <linux/module.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/types.h> | ||
45 | #include <linux/crypto.h> | ||
46 | #include <linux/linkage.h> | ||
47 | |||
48 | asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
49 | asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
50 | |||
51 | #define AES_MIN_KEY_SIZE 16 | ||
52 | #define AES_MAX_KEY_SIZE 32 | ||
53 | #define AES_BLOCK_SIZE 16 | ||
54 | #define AES_KS_LENGTH 4 * AES_BLOCK_SIZE | ||
55 | #define RC_LENGTH 29 | ||
56 | |||
57 | struct aes_ctx { | ||
58 | u32 ekey[AES_KS_LENGTH]; | ||
59 | u32 rounds; | ||
60 | u32 dkey[AES_KS_LENGTH]; | ||
61 | }; | ||
62 | |||
63 | #define WPOLY 0x011b | ||
64 | #define bytes2word(b0, b1, b2, b3) \ | ||
65 | (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) | ||
66 | |||
67 | /* define the finite field multiplies required for Rijndael */ | ||
68 | #define f2(x) ((x) ? pow[log[x] + 0x19] : 0) | ||
69 | #define f3(x) ((x) ? pow[log[x] + 0x01] : 0) | ||
70 | #define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) | ||
71 | #define fb(x) ((x) ? pow[log[x] + 0x68] : 0) | ||
72 | #define fd(x) ((x) ? pow[log[x] + 0xee] : 0) | ||
73 | #define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) | ||
74 | #define fi(x) ((x) ? pow[255 - log[x]]: 0) | ||
75 | |||
76 | static inline u32 upr(u32 x, int n) | ||
77 | { | ||
78 | return (x << 8 * n) | (x >> (32 - 8 * n)); | ||
79 | } | ||
80 | |||
81 | static inline u8 bval(u32 x, int n) | ||
82 | { | ||
83 | return x >> 8 * n; | ||
84 | } | ||
85 | |||
86 | /* The forward and inverse affine transformations used in the S-box */ | ||
87 | #define fwd_affine(x) \ | ||
88 | (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) | ||
89 | |||
90 | #define inv_affine(x) \ | ||
91 | (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) | ||
92 | |||
93 | static u32 rcon_tab[RC_LENGTH]; | ||
94 | |||
95 | u32 ft_tab[4][256]; | ||
96 | u32 fl_tab[4][256]; | ||
97 | static u32 im_tab[4][256]; | ||
98 | u32 il_tab[4][256]; | ||
99 | u32 it_tab[4][256]; | ||
100 | |||
101 | static void gen_tabs(void) | ||
102 | { | ||
103 | u32 i, w; | ||
104 | u8 pow[512], log[256]; | ||
105 | |||
106 | /* | ||
107 | * log and power tables for GF(2^8) finite field with | ||
108 | * WPOLY as modular polynomial - the simplest primitive | ||
109 | * root is 0x03, used here to generate the tables. | ||
110 | */ | ||
111 | i = 0; w = 1; | ||
112 | |||
113 | do { | ||
114 | pow[i] = (u8)w; | ||
115 | pow[i + 255] = (u8)w; | ||
116 | log[w] = (u8)i++; | ||
117 | w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); | ||
118 | } while (w != 1); | ||
119 | |||
120 | for(i = 0, w = 1; i < RC_LENGTH; ++i) { | ||
121 | rcon_tab[i] = bytes2word(w, 0, 0, 0); | ||
122 | w = f2(w); | ||
123 | } | ||
124 | |||
125 | for(i = 0; i < 256; ++i) { | ||
126 | u8 b; | ||
127 | |||
128 | b = fwd_affine(fi((u8)i)); | ||
129 | w = bytes2word(f2(b), b, b, f3(b)); | ||
130 | |||
131 | /* tables for a normal encryption round */ | ||
132 | ft_tab[0][i] = w; | ||
133 | ft_tab[1][i] = upr(w, 1); | ||
134 | ft_tab[2][i] = upr(w, 2); | ||
135 | ft_tab[3][i] = upr(w, 3); | ||
136 | w = bytes2word(b, 0, 0, 0); | ||
137 | |||
138 | /* | ||
139 | * tables for last encryption round | ||
140 | * (may also be used in the key schedule) | ||
141 | */ | ||
142 | fl_tab[0][i] = w; | ||
143 | fl_tab[1][i] = upr(w, 1); | ||
144 | fl_tab[2][i] = upr(w, 2); | ||
145 | fl_tab[3][i] = upr(w, 3); | ||
146 | |||
147 | b = fi(inv_affine((u8)i)); | ||
148 | w = bytes2word(fe(b), f9(b), fd(b), fb(b)); | ||
149 | |||
150 | /* tables for the inverse mix column operation */ | ||
151 | im_tab[0][b] = w; | ||
152 | im_tab[1][b] = upr(w, 1); | ||
153 | im_tab[2][b] = upr(w, 2); | ||
154 | im_tab[3][b] = upr(w, 3); | ||
155 | |||
156 | /* tables for a normal decryption round */ | ||
157 | it_tab[0][i] = w; | ||
158 | it_tab[1][i] = upr(w,1); | ||
159 | it_tab[2][i] = upr(w,2); | ||
160 | it_tab[3][i] = upr(w,3); | ||
161 | |||
162 | w = bytes2word(b, 0, 0, 0); | ||
163 | |||
164 | /* tables for last decryption round */ | ||
165 | il_tab[0][i] = w; | ||
166 | il_tab[1][i] = upr(w,1); | ||
167 | il_tab[2][i] = upr(w,2); | ||
168 | il_tab[3][i] = upr(w,3); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | #define four_tables(x,tab,vf,rf,c) \ | ||
173 | ( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ | ||
174 | tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ | ||
175 | tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ | ||
176 | tab[3][bval(vf(x,3,c),rf(3,c))] \ | ||
177 | ) | ||
178 | |||
179 | #define vf1(x,r,c) (x) | ||
180 | #define rf1(r,c) (r) | ||
181 | #define rf2(r,c) ((r-c)&3) | ||
182 | |||
183 | #define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) | ||
184 | #define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) | ||
185 | |||
186 | #define ff(x) inv_mcol(x) | ||
187 | |||
188 | #define ke4(k,i) \ | ||
189 | { \ | ||
190 | k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ | ||
191 | k[4*(i)+5] = ss[1] ^= ss[0]; \ | ||
192 | k[4*(i)+6] = ss[2] ^= ss[1]; \ | ||
193 | k[4*(i)+7] = ss[3] ^= ss[2]; \ | ||
194 | } | ||
195 | |||
196 | #define kel4(k,i) \ | ||
197 | { \ | ||
198 | k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ | ||
199 | k[4*(i)+5] = ss[1] ^= ss[0]; \ | ||
200 | k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ | ||
201 | } | ||
202 | |||
203 | #define ke6(k,i) \ | ||
204 | { \ | ||
205 | k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
206 | k[6*(i)+ 7] = ss[1] ^= ss[0]; \ | ||
207 | k[6*(i)+ 8] = ss[2] ^= ss[1]; \ | ||
208 | k[6*(i)+ 9] = ss[3] ^= ss[2]; \ | ||
209 | k[6*(i)+10] = ss[4] ^= ss[3]; \ | ||
210 | k[6*(i)+11] = ss[5] ^= ss[4]; \ | ||
211 | } | ||
212 | |||
213 | #define kel6(k,i) \ | ||
214 | { \ | ||
215 | k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
216 | k[6*(i)+ 7] = ss[1] ^= ss[0]; \ | ||
217 | k[6*(i)+ 8] = ss[2] ^= ss[1]; \ | ||
218 | k[6*(i)+ 9] = ss[3] ^= ss[2]; \ | ||
219 | } | ||
220 | |||
221 | #define ke8(k,i) \ | ||
222 | { \ | ||
223 | k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
224 | k[8*(i)+ 9] = ss[1] ^= ss[0]; \ | ||
225 | k[8*(i)+10] = ss[2] ^= ss[1]; \ | ||
226 | k[8*(i)+11] = ss[3] ^= ss[2]; \ | ||
227 | k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ | ||
228 | k[8*(i)+13] = ss[5] ^= ss[4]; \ | ||
229 | k[8*(i)+14] = ss[6] ^= ss[5]; \ | ||
230 | k[8*(i)+15] = ss[7] ^= ss[6]; \ | ||
231 | } | ||
232 | |||
233 | #define kel8(k,i) \ | ||
234 | { \ | ||
235 | k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
236 | k[8*(i)+ 9] = ss[1] ^= ss[0]; \ | ||
237 | k[8*(i)+10] = ss[2] ^= ss[1]; \ | ||
238 | k[8*(i)+11] = ss[3] ^= ss[2]; \ | ||
239 | } | ||
240 | |||
241 | #define kdf4(k,i) \ | ||
242 | { \ | ||
243 | ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ | ||
244 | ss[1] = ss[1] ^ ss[3]; \ | ||
245 | ss[2] = ss[2] ^ ss[3]; \ | ||
246 | ss[3] = ss[3]; \ | ||
247 | ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ | ||
248 | ss[i % 4] ^= ss[4]; \ | ||
249 | ss[4] ^= k[4*(i)]; \ | ||
250 | k[4*(i)+4] = ff(ss[4]); \ | ||
251 | ss[4] ^= k[4*(i)+1]; \ | ||
252 | k[4*(i)+5] = ff(ss[4]); \ | ||
253 | ss[4] ^= k[4*(i)+2]; \ | ||
254 | k[4*(i)+6] = ff(ss[4]); \ | ||
255 | ss[4] ^= k[4*(i)+3]; \ | ||
256 | k[4*(i)+7] = ff(ss[4]); \ | ||
257 | } | ||
258 | |||
259 | #define kd4(k,i) \ | ||
260 | { \ | ||
261 | ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ | ||
262 | ss[i % 4] ^= ss[4]; \ | ||
263 | ss[4] = ff(ss[4]); \ | ||
264 | k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ | ||
265 | k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ | ||
266 | k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ | ||
267 | k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ | ||
268 | } | ||
269 | |||
270 | #define kdl4(k,i) \ | ||
271 | { \ | ||
272 | ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ | ||
273 | ss[i % 4] ^= ss[4]; \ | ||
274 | k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ | ||
275 | k[4*(i)+5] = ss[1] ^ ss[3]; \ | ||
276 | k[4*(i)+6] = ss[0]; \ | ||
277 | k[4*(i)+7] = ss[1]; \ | ||
278 | } | ||
279 | |||
280 | #define kdf6(k,i) \ | ||
281 | { \ | ||
282 | ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
283 | k[6*(i)+ 6] = ff(ss[0]); \ | ||
284 | ss[1] ^= ss[0]; \ | ||
285 | k[6*(i)+ 7] = ff(ss[1]); \ | ||
286 | ss[2] ^= ss[1]; \ | ||
287 | k[6*(i)+ 8] = ff(ss[2]); \ | ||
288 | ss[3] ^= ss[2]; \ | ||
289 | k[6*(i)+ 9] = ff(ss[3]); \ | ||
290 | ss[4] ^= ss[3]; \ | ||
291 | k[6*(i)+10] = ff(ss[4]); \ | ||
292 | ss[5] ^= ss[4]; \ | ||
293 | k[6*(i)+11] = ff(ss[5]); \ | ||
294 | } | ||
295 | |||
296 | #define kd6(k,i) \ | ||
297 | { \ | ||
298 | ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
299 | ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ | ||
300 | k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ | ||
301 | ss[1] ^= ss[0]; \ | ||
302 | k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ | ||
303 | ss[2] ^= ss[1]; \ | ||
304 | k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ | ||
305 | ss[3] ^= ss[2]; \ | ||
306 | k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ | ||
307 | ss[4] ^= ss[3]; \ | ||
308 | k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ | ||
309 | ss[5] ^= ss[4]; \ | ||
310 | k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ | ||
311 | } | ||
312 | |||
313 | #define kdl6(k,i) \ | ||
314 | { \ | ||
315 | ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ | ||
316 | k[6*(i)+ 6] = ss[0]; \ | ||
317 | ss[1] ^= ss[0]; \ | ||
318 | k[6*(i)+ 7] = ss[1]; \ | ||
319 | ss[2] ^= ss[1]; \ | ||
320 | k[6*(i)+ 8] = ss[2]; \ | ||
321 | ss[3] ^= ss[2]; \ | ||
322 | k[6*(i)+ 9] = ss[3]; \ | ||
323 | } | ||
324 | |||
325 | #define kdf8(k,i) \ | ||
326 | { \ | ||
327 | ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
328 | k[8*(i)+ 8] = ff(ss[0]); \ | ||
329 | ss[1] ^= ss[0]; \ | ||
330 | k[8*(i)+ 9] = ff(ss[1]); \ | ||
331 | ss[2] ^= ss[1]; \ | ||
332 | k[8*(i)+10] = ff(ss[2]); \ | ||
333 | ss[3] ^= ss[2]; \ | ||
334 | k[8*(i)+11] = ff(ss[3]); \ | ||
335 | ss[4] ^= ls_box(ss[3],0); \ | ||
336 | k[8*(i)+12] = ff(ss[4]); \ | ||
337 | ss[5] ^= ss[4]; \ | ||
338 | k[8*(i)+13] = ff(ss[5]); \ | ||
339 | ss[6] ^= ss[5]; \ | ||
340 | k[8*(i)+14] = ff(ss[6]); \ | ||
341 | ss[7] ^= ss[6]; \ | ||
342 | k[8*(i)+15] = ff(ss[7]); \ | ||
343 | } | ||
344 | |||
345 | #define kd8(k,i) \ | ||
346 | { \ | ||
347 | u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
348 | ss[0] ^= __g; \ | ||
349 | __g = ff(__g); \ | ||
350 | k[8*(i)+ 8] = __g ^= k[8*(i)]; \ | ||
351 | ss[1] ^= ss[0]; \ | ||
352 | k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ | ||
353 | ss[2] ^= ss[1]; \ | ||
354 | k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ | ||
355 | ss[3] ^= ss[2]; \ | ||
356 | k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ | ||
357 | __g = ls_box(ss[3],0); \ | ||
358 | ss[4] ^= __g; \ | ||
359 | __g = ff(__g); \ | ||
360 | k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ | ||
361 | ss[5] ^= ss[4]; \ | ||
362 | k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ | ||
363 | ss[6] ^= ss[5]; \ | ||
364 | k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ | ||
365 | ss[7] ^= ss[6]; \ | ||
366 | k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ | ||
367 | } | ||
368 | |||
369 | #define kdl8(k,i) \ | ||
370 | { \ | ||
371 | ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ | ||
372 | k[8*(i)+ 8] = ss[0]; \ | ||
373 | ss[1] ^= ss[0]; \ | ||
374 | k[8*(i)+ 9] = ss[1]; \ | ||
375 | ss[2] ^= ss[1]; \ | ||
376 | k[8*(i)+10] = ss[2]; \ | ||
377 | ss[3] ^= ss[2]; \ | ||
378 | k[8*(i)+11] = ss[3]; \ | ||
379 | } | ||
380 | |||
381 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
382 | unsigned int key_len) | ||
383 | { | ||
384 | int i; | ||
385 | u32 ss[8]; | ||
386 | struct aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
387 | const __le32 *key = (const __le32 *)in_key; | ||
388 | u32 *flags = &tfm->crt_flags; | ||
389 | |||
390 | /* encryption schedule */ | ||
391 | |||
392 | ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); | ||
393 | ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); | ||
394 | ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); | ||
395 | ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); | ||
396 | |||
397 | switch(key_len) { | ||
398 | case 16: | ||
399 | for (i = 0; i < 9; i++) | ||
400 | ke4(ctx->ekey, i); | ||
401 | kel4(ctx->ekey, 9); | ||
402 | ctx->rounds = 10; | ||
403 | break; | ||
404 | |||
405 | case 24: | ||
406 | ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); | ||
407 | ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); | ||
408 | for (i = 0; i < 7; i++) | ||
409 | ke6(ctx->ekey, i); | ||
410 | kel6(ctx->ekey, 7); | ||
411 | ctx->rounds = 12; | ||
412 | break; | ||
413 | |||
414 | case 32: | ||
415 | ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); | ||
416 | ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); | ||
417 | ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); | ||
418 | ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); | ||
419 | for (i = 0; i < 6; i++) | ||
420 | ke8(ctx->ekey, i); | ||
421 | kel8(ctx->ekey, 6); | ||
422 | ctx->rounds = 14; | ||
423 | break; | ||
424 | |||
425 | default: | ||
426 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
427 | return -EINVAL; | ||
428 | } | ||
429 | |||
430 | /* decryption schedule */ | ||
431 | |||
432 | ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); | ||
433 | ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); | ||
434 | ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); | ||
435 | ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); | ||
436 | |||
437 | switch (key_len) { | ||
438 | case 16: | ||
439 | kdf4(ctx->dkey, 0); | ||
440 | for (i = 1; i < 9; i++) | ||
441 | kd4(ctx->dkey, i); | ||
442 | kdl4(ctx->dkey, 9); | ||
443 | break; | ||
444 | |||
445 | case 24: | ||
446 | ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); | ||
447 | ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); | ||
448 | kdf6(ctx->dkey, 0); | ||
449 | for (i = 1; i < 7; i++) | ||
450 | kd6(ctx->dkey, i); | ||
451 | kdl6(ctx->dkey, 7); | ||
452 | break; | ||
453 | |||
454 | case 32: | ||
455 | ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); | ||
456 | ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); | ||
457 | ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); | ||
458 | ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); | ||
459 | kdf8(ctx->dkey, 0); | ||
460 | for (i = 1; i < 6; i++) | ||
461 | kd8(ctx->dkey, i); | ||
462 | kdl8(ctx->dkey, 6); | ||
463 | break; | ||
464 | } | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
469 | { | ||
470 | aes_enc_blk(tfm, dst, src); | ||
471 | } | ||
472 | |||
473 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
474 | { | ||
475 | aes_dec_blk(tfm, dst, src); | ||
476 | } | ||
477 | |||
478 | static struct crypto_alg aes_alg = { | ||
479 | .cra_name = "aes", | ||
480 | .cra_driver_name = "aes-i586", | ||
481 | .cra_priority = 200, | ||
482 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
483 | .cra_blocksize = AES_BLOCK_SIZE, | ||
484 | .cra_ctxsize = sizeof(struct aes_ctx), | ||
485 | .cra_module = THIS_MODULE, | ||
486 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | ||
487 | .cra_u = { | ||
488 | .cipher = { | ||
489 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
490 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
491 | .cia_setkey = aes_set_key, | ||
492 | .cia_encrypt = aes_encrypt, | ||
493 | .cia_decrypt = aes_decrypt | ||
494 | } | ||
495 | } | ||
496 | }; | ||
497 | |||
498 | static int __init aes_init(void) | ||
499 | { | ||
500 | gen_tabs(); | ||
501 | return crypto_register_alg(&aes_alg); | ||
502 | } | ||
503 | |||
504 | static void __exit aes_fini(void) | ||
505 | { | ||
506 | crypto_unregister_alg(&aes_alg); | ||
507 | } | ||
508 | |||
509 | module_init(aes_init); | ||
510 | module_exit(aes_fini); | ||
511 | |||
512 | MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); | ||
513 | MODULE_LICENSE("Dual BSD/GPL"); | ||
514 | MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); | ||
515 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c deleted file mode 100644 index 5cdb13ea5cc2..000000000000 --- a/arch/x86/crypto/aes_64.c +++ /dev/null | |||
@@ -1,336 +0,0 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * AES Cipher Algorithm. | ||
5 | * | ||
6 | * Based on Brian Gladman's code. | ||
7 | * | ||
8 | * Linux developers: | ||
9 | * Alexander Kjeldaas <astor@fast.no> | ||
10 | * Herbert Valerio Riedel <hvr@hvrlab.org> | ||
11 | * Kyle McMartin <kyle@debian.org> | ||
12 | * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API). | ||
13 | * Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler) | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * --------------------------------------------------------------------------- | ||
21 | * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. | ||
22 | * All rights reserved. | ||
23 | * | ||
24 | * LICENSE TERMS | ||
25 | * | ||
26 | * The free distribution and use of this software in both source and binary | ||
27 | * form is allowed (with or without changes) provided that: | ||
28 | * | ||
29 | * 1. distributions of this source code include the above copyright | ||
30 | * notice, this list of conditions and the following disclaimer; | ||
31 | * | ||
32 | * 2. distributions in binary form include the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer | ||
34 | * in the documentation and/or other associated materials; | ||
35 | * | ||
36 | * 3. the copyright holder's name is not used to endorse products | ||
37 | * built using this software without specific written permission. | ||
38 | * | ||
39 | * ALTERNATIVELY, provided that this notice is retained in full, this product | ||
40 | * may be distributed under the terms of the GNU General Public License (GPL), | ||
41 | * in which case the provisions of the GPL apply INSTEAD OF those given above. | ||
42 | * | ||
43 | * DISCLAIMER | ||
44 | * | ||
45 | * This software is provided 'as is' with no explicit or implied warranties | ||
46 | * in respect of its properties, including, but not limited to, correctness | ||
47 | * and/or fitness for purpose. | ||
48 | * --------------------------------------------------------------------------- | ||
49 | */ | ||
50 | |||
51 | /* Some changes from the Gladman version: | ||
52 | s/RIJNDAEL(e_key)/E_KEY/g | ||
53 | s/RIJNDAEL(d_key)/D_KEY/g | ||
54 | */ | ||
55 | |||
56 | #include <asm/byteorder.h> | ||
57 | #include <linux/bitops.h> | ||
58 | #include <linux/crypto.h> | ||
59 | #include <linux/errno.h> | ||
60 | #include <linux/init.h> | ||
61 | #include <linux/module.h> | ||
62 | #include <linux/types.h> | ||
63 | |||
64 | #define AES_MIN_KEY_SIZE 16 | ||
65 | #define AES_MAX_KEY_SIZE 32 | ||
66 | |||
67 | #define AES_BLOCK_SIZE 16 | ||
68 | |||
69 | /* | ||
70 | * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) | ||
71 | */ | ||
72 | static inline u8 byte(const u32 x, const unsigned n) | ||
73 | { | ||
74 | return x >> (n << 3); | ||
75 | } | ||
76 | |||
77 | struct aes_ctx | ||
78 | { | ||
79 | u32 key_length; | ||
80 | u32 buf[120]; | ||
81 | }; | ||
82 | |||
83 | #define E_KEY (&ctx->buf[0]) | ||
84 | #define D_KEY (&ctx->buf[60]) | ||
85 | |||
86 | static u8 pow_tab[256] __initdata; | ||
87 | static u8 log_tab[256] __initdata; | ||
88 | static u8 sbx_tab[256] __initdata; | ||
89 | static u8 isb_tab[256] __initdata; | ||
90 | static u32 rco_tab[10]; | ||
91 | u32 aes_ft_tab[4][256]; | ||
92 | u32 aes_it_tab[4][256]; | ||
93 | |||
94 | u32 aes_fl_tab[4][256]; | ||
95 | u32 aes_il_tab[4][256]; | ||
96 | |||
97 | static inline u8 f_mult(u8 a, u8 b) | ||
98 | { | ||
99 | u8 aa = log_tab[a], cc = aa + log_tab[b]; | ||
100 | |||
101 | return pow_tab[cc + (cc < aa ? 1 : 0)]; | ||
102 | } | ||
103 | |||
104 | #define ff_mult(a, b) (a && b ? f_mult(a, b) : 0) | ||
105 | |||
106 | #define ls_box(x) \ | ||
107 | (aes_fl_tab[0][byte(x, 0)] ^ \ | ||
108 | aes_fl_tab[1][byte(x, 1)] ^ \ | ||
109 | aes_fl_tab[2][byte(x, 2)] ^ \ | ||
110 | aes_fl_tab[3][byte(x, 3)]) | ||
111 | |||
112 | static void __init gen_tabs(void) | ||
113 | { | ||
114 | u32 i, t; | ||
115 | u8 p, q; | ||
116 | |||
117 | /* log and power tables for GF(2**8) finite field with | ||
118 | 0x011b as modular polynomial - the simplest primitive | ||
119 | root is 0x03, used here to generate the tables */ | ||
120 | |||
121 | for (i = 0, p = 1; i < 256; ++i) { | ||
122 | pow_tab[i] = (u8)p; | ||
123 | log_tab[p] = (u8)i; | ||
124 | |||
125 | p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0); | ||
126 | } | ||
127 | |||
128 | log_tab[1] = 0; | ||
129 | |||
130 | for (i = 0, p = 1; i < 10; ++i) { | ||
131 | rco_tab[i] = p; | ||
132 | |||
133 | p = (p << 1) ^ (p & 0x80 ? 0x01b : 0); | ||
134 | } | ||
135 | |||
136 | for (i = 0; i < 256; ++i) { | ||
137 | p = (i ? pow_tab[255 - log_tab[i]] : 0); | ||
138 | q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2)); | ||
139 | p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2)); | ||
140 | sbx_tab[i] = p; | ||
141 | isb_tab[p] = (u8)i; | ||
142 | } | ||
143 | |||
144 | for (i = 0; i < 256; ++i) { | ||
145 | p = sbx_tab[i]; | ||
146 | |||
147 | t = p; | ||
148 | aes_fl_tab[0][i] = t; | ||
149 | aes_fl_tab[1][i] = rol32(t, 8); | ||
150 | aes_fl_tab[2][i] = rol32(t, 16); | ||
151 | aes_fl_tab[3][i] = rol32(t, 24); | ||
152 | |||
153 | t = ((u32)ff_mult(2, p)) | | ||
154 | ((u32)p << 8) | | ||
155 | ((u32)p << 16) | ((u32)ff_mult(3, p) << 24); | ||
156 | |||
157 | aes_ft_tab[0][i] = t; | ||
158 | aes_ft_tab[1][i] = rol32(t, 8); | ||
159 | aes_ft_tab[2][i] = rol32(t, 16); | ||
160 | aes_ft_tab[3][i] = rol32(t, 24); | ||
161 | |||
162 | p = isb_tab[i]; | ||
163 | |||
164 | t = p; | ||
165 | aes_il_tab[0][i] = t; | ||
166 | aes_il_tab[1][i] = rol32(t, 8); | ||
167 | aes_il_tab[2][i] = rol32(t, 16); | ||
168 | aes_il_tab[3][i] = rol32(t, 24); | ||
169 | |||
170 | t = ((u32)ff_mult(14, p)) | | ||
171 | ((u32)ff_mult(9, p) << 8) | | ||
172 | ((u32)ff_mult(13, p) << 16) | | ||
173 | ((u32)ff_mult(11, p) << 24); | ||
174 | |||
175 | aes_it_tab[0][i] = t; | ||
176 | aes_it_tab[1][i] = rol32(t, 8); | ||
177 | aes_it_tab[2][i] = rol32(t, 16); | ||
178 | aes_it_tab[3][i] = rol32(t, 24); | ||
179 | } | ||
180 | } | ||
181 | |||
182 | #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) | ||
183 | |||
184 | #define imix_col(y, x) \ | ||
185 | u = star_x(x); \ | ||
186 | v = star_x(u); \ | ||
187 | w = star_x(v); \ | ||
188 | t = w ^ (x); \ | ||
189 | (y) = u ^ v ^ w; \ | ||
190 | (y) ^= ror32(u ^ t, 8) ^ \ | ||
191 | ror32(v ^ t, 16) ^ \ | ||
192 | ror32(t, 24) | ||
193 | |||
194 | /* initialise the key schedule from the user supplied key */ | ||
195 | |||
196 | #define loop4(i) \ | ||
197 | { \ | ||
198 | t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \ | ||
199 | t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \ | ||
200 | t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \ | ||
201 | t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \ | ||
202 | t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \ | ||
203 | } | ||
204 | |||
205 | #define loop6(i) \ | ||
206 | { \ | ||
207 | t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \ | ||
208 | t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \ | ||
209 | t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \ | ||
210 | t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \ | ||
211 | t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \ | ||
212 | t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \ | ||
213 | t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \ | ||
214 | } | ||
215 | |||
216 | #define loop8(i) \ | ||
217 | { \ | ||
218 | t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \ | ||
219 | t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \ | ||
220 | t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \ | ||
221 | t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \ | ||
222 | t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \ | ||
223 | t = E_KEY[8 * i + 4] ^ ls_box(t); \ | ||
224 | E_KEY[8 * i + 12] = t; \ | ||
225 | t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \ | ||
226 | t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \ | ||
227 | t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \ | ||
228 | } | ||
229 | |||
230 | static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, | ||
231 | unsigned int key_len) | ||
232 | { | ||
233 | struct aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
234 | const __le32 *key = (const __le32 *)in_key; | ||
235 | u32 *flags = &tfm->crt_flags; | ||
236 | u32 i, j, t, u, v, w; | ||
237 | |||
238 | if (key_len % 8) { | ||
239 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
240 | return -EINVAL; | ||
241 | } | ||
242 | |||
243 | ctx->key_length = key_len; | ||
244 | |||
245 | D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]); | ||
246 | D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]); | ||
247 | D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]); | ||
248 | D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]); | ||
249 | |||
250 | switch (key_len) { | ||
251 | case 16: | ||
252 | t = E_KEY[3]; | ||
253 | for (i = 0; i < 10; ++i) | ||
254 | loop4(i); | ||
255 | break; | ||
256 | |||
257 | case 24: | ||
258 | E_KEY[4] = le32_to_cpu(key[4]); | ||
259 | t = E_KEY[5] = le32_to_cpu(key[5]); | ||
260 | for (i = 0; i < 8; ++i) | ||
261 | loop6 (i); | ||
262 | break; | ||
263 | |||
264 | case 32: | ||
265 | E_KEY[4] = le32_to_cpu(key[4]); | ||
266 | E_KEY[5] = le32_to_cpu(key[5]); | ||
267 | E_KEY[6] = le32_to_cpu(key[6]); | ||
268 | t = E_KEY[7] = le32_to_cpu(key[7]); | ||
269 | for (i = 0; i < 7; ++i) | ||
270 | loop8(i); | ||
271 | break; | ||
272 | } | ||
273 | |||
274 | D_KEY[0] = E_KEY[key_len + 24]; | ||
275 | D_KEY[1] = E_KEY[key_len + 25]; | ||
276 | D_KEY[2] = E_KEY[key_len + 26]; | ||
277 | D_KEY[3] = E_KEY[key_len + 27]; | ||
278 | |||
279 | for (i = 4; i < key_len + 24; ++i) { | ||
280 | j = key_len + 24 - (i & ~3) + (i & 3); | ||
281 | imix_col(D_KEY[j], E_KEY[i]); | ||
282 | } | ||
283 | |||
284 | return 0; | ||
285 | } | ||
286 | |||
287 | asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); | ||
288 | asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); | ||
289 | |||
290 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
291 | { | ||
292 | aes_enc_blk(tfm, dst, src); | ||
293 | } | ||
294 | |||
295 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
296 | { | ||
297 | aes_dec_blk(tfm, dst, src); | ||
298 | } | ||
299 | |||
300 | static struct crypto_alg aes_alg = { | ||
301 | .cra_name = "aes", | ||
302 | .cra_driver_name = "aes-x86_64", | ||
303 | .cra_priority = 200, | ||
304 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
305 | .cra_blocksize = AES_BLOCK_SIZE, | ||
306 | .cra_ctxsize = sizeof(struct aes_ctx), | ||
307 | .cra_module = THIS_MODULE, | ||
308 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | ||
309 | .cra_u = { | ||
310 | .cipher = { | ||
311 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
312 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
313 | .cia_setkey = aes_set_key, | ||
314 | .cia_encrypt = aes_encrypt, | ||
315 | .cia_decrypt = aes_decrypt | ||
316 | } | ||
317 | } | ||
318 | }; | ||
319 | |||
320 | static int __init aes_init(void) | ||
321 | { | ||
322 | gen_tabs(); | ||
323 | return crypto_register_alg(&aes_alg); | ||
324 | } | ||
325 | |||
326 | static void __exit aes_fini(void) | ||
327 | { | ||
328 | crypto_unregister_alg(&aes_alg); | ||
329 | } | ||
330 | |||
331 | module_init(aes_init); | ||
332 | module_exit(aes_fini); | ||
333 | |||
334 | MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); | ||
335 | MODULE_LICENSE("GPL"); | ||
336 | MODULE_ALIAS("aes"); | ||
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c new file mode 100644 index 000000000000..71f457827116 --- /dev/null +++ b/arch/x86/crypto/aes_glue.c | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Glue Code for the asm optimized version of the AES Cipher Algorithm | ||
3 | * | ||
4 | */ | ||
5 | |||
6 | #include <crypto/aes.h> | ||
7 | |||
8 | asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); | ||
9 | asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); | ||
10 | |||
11 | static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
12 | { | ||
13 | aes_enc_blk(tfm, dst, src); | ||
14 | } | ||
15 | |||
16 | static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
17 | { | ||
18 | aes_dec_blk(tfm, dst, src); | ||
19 | } | ||
20 | |||
21 | static struct crypto_alg aes_alg = { | ||
22 | .cra_name = "aes", | ||
23 | .cra_driver_name = "aes-asm", | ||
24 | .cra_priority = 200, | ||
25 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
26 | .cra_blocksize = AES_BLOCK_SIZE, | ||
27 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | ||
28 | .cra_module = THIS_MODULE, | ||
29 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | ||
30 | .cra_u = { | ||
31 | .cipher = { | ||
32 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
33 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
34 | .cia_setkey = crypto_aes_set_key, | ||
35 | .cia_encrypt = aes_encrypt, | ||
36 | .cia_decrypt = aes_decrypt | ||
37 | } | ||
38 | } | ||
39 | }; | ||
40 | |||
41 | static int __init aes_init(void) | ||
42 | { | ||
43 | return crypto_register_alg(&aes_alg); | ||
44 | } | ||
45 | |||
46 | static void __exit aes_fini(void) | ||
47 | { | ||
48 | crypto_unregister_alg(&aes_alg); | ||
49 | } | ||
50 | |||
51 | module_init(aes_init); | ||
52 | module_exit(aes_fini); | ||
53 | |||
54 | MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); | ||
55 | MODULE_LICENSE("GPL"); | ||
56 | MODULE_ALIAS("aes"); | ||
57 | MODULE_ALIAS("aes-asm"); | ||
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S new file mode 100644 index 000000000000..72eb306680b2 --- /dev/null +++ b/arch/x86/crypto/salsa20-i586-asm_32.S | |||
@@ -0,0 +1,1114 @@ | |||
1 | # salsa20_pm.s version 20051229 | ||
2 | # D. J. Bernstein | ||
3 | # Public domain. | ||
4 | |||
5 | # enter ECRYPT_encrypt_bytes | ||
6 | .text | ||
7 | .p2align 5 | ||
8 | .globl ECRYPT_encrypt_bytes | ||
9 | ECRYPT_encrypt_bytes: | ||
10 | mov %esp,%eax | ||
11 | and $31,%eax | ||
12 | add $256,%eax | ||
13 | sub %eax,%esp | ||
14 | # eax_stack = eax | ||
15 | movl %eax,80(%esp) | ||
16 | # ebx_stack = ebx | ||
17 | movl %ebx,84(%esp) | ||
18 | # esi_stack = esi | ||
19 | movl %esi,88(%esp) | ||
20 | # edi_stack = edi | ||
21 | movl %edi,92(%esp) | ||
22 | # ebp_stack = ebp | ||
23 | movl %ebp,96(%esp) | ||
24 | # x = arg1 | ||
25 | movl 4(%esp,%eax),%edx | ||
26 | # m = arg2 | ||
27 | movl 8(%esp,%eax),%esi | ||
28 | # out = arg3 | ||
29 | movl 12(%esp,%eax),%edi | ||
30 | # bytes = arg4 | ||
31 | movl 16(%esp,%eax),%ebx | ||
32 | # bytes -= 0 | ||
33 | sub $0,%ebx | ||
34 | # goto done if unsigned<= | ||
35 | jbe ._done | ||
36 | ._start: | ||
37 | # in0 = *(uint32 *) (x + 0) | ||
38 | movl 0(%edx),%eax | ||
39 | # in1 = *(uint32 *) (x + 4) | ||
40 | movl 4(%edx),%ecx | ||
41 | # in2 = *(uint32 *) (x + 8) | ||
42 | movl 8(%edx),%ebp | ||
43 | # j0 = in0 | ||
44 | movl %eax,164(%esp) | ||
45 | # in3 = *(uint32 *) (x + 12) | ||
46 | movl 12(%edx),%eax | ||
47 | # j1 = in1 | ||
48 | movl %ecx,168(%esp) | ||
49 | # in4 = *(uint32 *) (x + 16) | ||
50 | movl 16(%edx),%ecx | ||
51 | # j2 = in2 | ||
52 | movl %ebp,172(%esp) | ||
53 | # in5 = *(uint32 *) (x + 20) | ||
54 | movl 20(%edx),%ebp | ||
55 | # j3 = in3 | ||
56 | movl %eax,176(%esp) | ||
57 | # in6 = *(uint32 *) (x + 24) | ||
58 | movl 24(%edx),%eax | ||
59 | # j4 = in4 | ||
60 | movl %ecx,180(%esp) | ||
61 | # in7 = *(uint32 *) (x + 28) | ||
62 | movl 28(%edx),%ecx | ||
63 | # j5 = in5 | ||
64 | movl %ebp,184(%esp) | ||
65 | # in8 = *(uint32 *) (x + 32) | ||
66 | movl 32(%edx),%ebp | ||
67 | # j6 = in6 | ||
68 | movl %eax,188(%esp) | ||
69 | # in9 = *(uint32 *) (x + 36) | ||
70 | movl 36(%edx),%eax | ||
71 | # j7 = in7 | ||
72 | movl %ecx,192(%esp) | ||
73 | # in10 = *(uint32 *) (x + 40) | ||
74 | movl 40(%edx),%ecx | ||
75 | # j8 = in8 | ||
76 | movl %ebp,196(%esp) | ||
77 | # in11 = *(uint32 *) (x + 44) | ||
78 | movl 44(%edx),%ebp | ||
79 | # j9 = in9 | ||
80 | movl %eax,200(%esp) | ||
81 | # in12 = *(uint32 *) (x + 48) | ||
82 | movl 48(%edx),%eax | ||
83 | # j10 = in10 | ||
84 | movl %ecx,204(%esp) | ||
85 | # in13 = *(uint32 *) (x + 52) | ||
86 | movl 52(%edx),%ecx | ||
87 | # j11 = in11 | ||
88 | movl %ebp,208(%esp) | ||
89 | # in14 = *(uint32 *) (x + 56) | ||
90 | movl 56(%edx),%ebp | ||
91 | # j12 = in12 | ||
92 | movl %eax,212(%esp) | ||
93 | # in15 = *(uint32 *) (x + 60) | ||
94 | movl 60(%edx),%eax | ||
95 | # j13 = in13 | ||
96 | movl %ecx,216(%esp) | ||
97 | # j14 = in14 | ||
98 | movl %ebp,220(%esp) | ||
99 | # j15 = in15 | ||
100 | movl %eax,224(%esp) | ||
101 | # x_backup = x | ||
102 | movl %edx,64(%esp) | ||
103 | ._bytesatleast1: | ||
104 | # bytes - 64 | ||
105 | cmp $64,%ebx | ||
106 | # goto nocopy if unsigned>= | ||
107 | jae ._nocopy | ||
108 | # ctarget = out | ||
109 | movl %edi,228(%esp) | ||
110 | # out = &tmp | ||
111 | leal 0(%esp),%edi | ||
112 | # i = bytes | ||
113 | mov %ebx,%ecx | ||
114 | # while (i) { *out++ = *m++; --i } | ||
115 | rep movsb | ||
116 | # out = &tmp | ||
117 | leal 0(%esp),%edi | ||
118 | # m = &tmp | ||
119 | leal 0(%esp),%esi | ||
120 | ._nocopy: | ||
121 | # out_backup = out | ||
122 | movl %edi,72(%esp) | ||
123 | # m_backup = m | ||
124 | movl %esi,68(%esp) | ||
125 | # bytes_backup = bytes | ||
126 | movl %ebx,76(%esp) | ||
127 | # in0 = j0 | ||
128 | movl 164(%esp),%eax | ||
129 | # in1 = j1 | ||
130 | movl 168(%esp),%ecx | ||
131 | # in2 = j2 | ||
132 | movl 172(%esp),%edx | ||
133 | # in3 = j3 | ||
134 | movl 176(%esp),%ebx | ||
135 | # x0 = in0 | ||
136 | movl %eax,100(%esp) | ||
137 | # x1 = in1 | ||
138 | movl %ecx,104(%esp) | ||
139 | # x2 = in2 | ||
140 | movl %edx,108(%esp) | ||
141 | # x3 = in3 | ||
142 | movl %ebx,112(%esp) | ||
143 | # in4 = j4 | ||
144 | movl 180(%esp),%eax | ||
145 | # in5 = j5 | ||
146 | movl 184(%esp),%ecx | ||
147 | # in6 = j6 | ||
148 | movl 188(%esp),%edx | ||
149 | # in7 = j7 | ||
150 | movl 192(%esp),%ebx | ||
151 | # x4 = in4 | ||
152 | movl %eax,116(%esp) | ||
153 | # x5 = in5 | ||
154 | movl %ecx,120(%esp) | ||
155 | # x6 = in6 | ||
156 | movl %edx,124(%esp) | ||
157 | # x7 = in7 | ||
158 | movl %ebx,128(%esp) | ||
159 | # in8 = j8 | ||
160 | movl 196(%esp),%eax | ||
161 | # in9 = j9 | ||
162 | movl 200(%esp),%ecx | ||
163 | # in10 = j10 | ||
164 | movl 204(%esp),%edx | ||
165 | # in11 = j11 | ||
166 | movl 208(%esp),%ebx | ||
167 | # x8 = in8 | ||
168 | movl %eax,132(%esp) | ||
169 | # x9 = in9 | ||
170 | movl %ecx,136(%esp) | ||
171 | # x10 = in10 | ||
172 | movl %edx,140(%esp) | ||
173 | # x11 = in11 | ||
174 | movl %ebx,144(%esp) | ||
175 | # in12 = j12 | ||
176 | movl 212(%esp),%eax | ||
177 | # in13 = j13 | ||
178 | movl 216(%esp),%ecx | ||
179 | # in14 = j14 | ||
180 | movl 220(%esp),%edx | ||
181 | # in15 = j15 | ||
182 | movl 224(%esp),%ebx | ||
183 | # x12 = in12 | ||
184 | movl %eax,148(%esp) | ||
185 | # x13 = in13 | ||
186 | movl %ecx,152(%esp) | ||
187 | # x14 = in14 | ||
188 | movl %edx,156(%esp) | ||
189 | # x15 = in15 | ||
190 | movl %ebx,160(%esp) | ||
191 | # i = 20 | ||
192 | mov $20,%ebp | ||
193 | # p = x0 | ||
194 | movl 100(%esp),%eax | ||
195 | # s = x5 | ||
196 | movl 120(%esp),%ecx | ||
197 | # t = x10 | ||
198 | movl 140(%esp),%edx | ||
199 | # w = x15 | ||
200 | movl 160(%esp),%ebx | ||
201 | ._mainloop: | ||
202 | # x0 = p | ||
203 | movl %eax,100(%esp) | ||
204 | # x10 = t | ||
205 | movl %edx,140(%esp) | ||
206 | # p += x12 | ||
207 | addl 148(%esp),%eax | ||
208 | # x5 = s | ||
209 | movl %ecx,120(%esp) | ||
210 | # t += x6 | ||
211 | addl 124(%esp),%edx | ||
212 | # x15 = w | ||
213 | movl %ebx,160(%esp) | ||
214 | # r = x1 | ||
215 | movl 104(%esp),%esi | ||
216 | # r += s | ||
217 | add %ecx,%esi | ||
218 | # v = x11 | ||
219 | movl 144(%esp),%edi | ||
220 | # v += w | ||
221 | add %ebx,%edi | ||
222 | # p <<<= 7 | ||
223 | rol $7,%eax | ||
224 | # p ^= x4 | ||
225 | xorl 116(%esp),%eax | ||
226 | # t <<<= 7 | ||
227 | rol $7,%edx | ||
228 | # t ^= x14 | ||
229 | xorl 156(%esp),%edx | ||
230 | # r <<<= 7 | ||
231 | rol $7,%esi | ||
232 | # r ^= x9 | ||
233 | xorl 136(%esp),%esi | ||
234 | # v <<<= 7 | ||
235 | rol $7,%edi | ||
236 | # v ^= x3 | ||
237 | xorl 112(%esp),%edi | ||
238 | # x4 = p | ||
239 | movl %eax,116(%esp) | ||
240 | # x14 = t | ||
241 | movl %edx,156(%esp) | ||
242 | # p += x0 | ||
243 | addl 100(%esp),%eax | ||
244 | # x9 = r | ||
245 | movl %esi,136(%esp) | ||
246 | # t += x10 | ||
247 | addl 140(%esp),%edx | ||
248 | # x3 = v | ||
249 | movl %edi,112(%esp) | ||
250 | # p <<<= 9 | ||
251 | rol $9,%eax | ||
252 | # p ^= x8 | ||
253 | xorl 132(%esp),%eax | ||
254 | # t <<<= 9 | ||
255 | rol $9,%edx | ||
256 | # t ^= x2 | ||
257 | xorl 108(%esp),%edx | ||
258 | # s += r | ||
259 | add %esi,%ecx | ||
260 | # s <<<= 9 | ||
261 | rol $9,%ecx | ||
262 | # s ^= x13 | ||
263 | xorl 152(%esp),%ecx | ||
264 | # w += v | ||
265 | add %edi,%ebx | ||
266 | # w <<<= 9 | ||
267 | rol $9,%ebx | ||
268 | # w ^= x7 | ||
269 | xorl 128(%esp),%ebx | ||
270 | # x8 = p | ||
271 | movl %eax,132(%esp) | ||
272 | # x2 = t | ||
273 | movl %edx,108(%esp) | ||
274 | # p += x4 | ||
275 | addl 116(%esp),%eax | ||
276 | # x13 = s | ||
277 | movl %ecx,152(%esp) | ||
278 | # t += x14 | ||
279 | addl 156(%esp),%edx | ||
280 | # x7 = w | ||
281 | movl %ebx,128(%esp) | ||
282 | # p <<<= 13 | ||
283 | rol $13,%eax | ||
284 | # p ^= x12 | ||
285 | xorl 148(%esp),%eax | ||
286 | # t <<<= 13 | ||
287 | rol $13,%edx | ||
288 | # t ^= x6 | ||
289 | xorl 124(%esp),%edx | ||
290 | # r += s | ||
291 | add %ecx,%esi | ||
292 | # r <<<= 13 | ||
293 | rol $13,%esi | ||
294 | # r ^= x1 | ||
295 | xorl 104(%esp),%esi | ||
296 | # v += w | ||
297 | add %ebx,%edi | ||
298 | # v <<<= 13 | ||
299 | rol $13,%edi | ||
300 | # v ^= x11 | ||
301 | xorl 144(%esp),%edi | ||
302 | # x12 = p | ||
303 | movl %eax,148(%esp) | ||
304 | # x6 = t | ||
305 | movl %edx,124(%esp) | ||
306 | # p += x8 | ||
307 | addl 132(%esp),%eax | ||
308 | # x1 = r | ||
309 | movl %esi,104(%esp) | ||
310 | # t += x2 | ||
311 | addl 108(%esp),%edx | ||
312 | # x11 = v | ||
313 | movl %edi,144(%esp) | ||
314 | # p <<<= 18 | ||
315 | rol $18,%eax | ||
316 | # p ^= x0 | ||
317 | xorl 100(%esp),%eax | ||
318 | # t <<<= 18 | ||
319 | rol $18,%edx | ||
320 | # t ^= x10 | ||
321 | xorl 140(%esp),%edx | ||
322 | # s += r | ||
323 | add %esi,%ecx | ||
324 | # s <<<= 18 | ||
325 | rol $18,%ecx | ||
326 | # s ^= x5 | ||
327 | xorl 120(%esp),%ecx | ||
328 | # w += v | ||
329 | add %edi,%ebx | ||
330 | # w <<<= 18 | ||
331 | rol $18,%ebx | ||
332 | # w ^= x15 | ||
333 | xorl 160(%esp),%ebx | ||
334 | # x0 = p | ||
335 | movl %eax,100(%esp) | ||
336 | # x10 = t | ||
337 | movl %edx,140(%esp) | ||
338 | # p += x3 | ||
339 | addl 112(%esp),%eax | ||
340 | # p <<<= 7 | ||
341 | rol $7,%eax | ||
342 | # x5 = s | ||
343 | movl %ecx,120(%esp) | ||
344 | # t += x9 | ||
345 | addl 136(%esp),%edx | ||
346 | # x15 = w | ||
347 | movl %ebx,160(%esp) | ||
348 | # r = x4 | ||
349 | movl 116(%esp),%esi | ||
350 | # r += s | ||
351 | add %ecx,%esi | ||
352 | # v = x14 | ||
353 | movl 156(%esp),%edi | ||
354 | # v += w | ||
355 | add %ebx,%edi | ||
356 | # p ^= x1 | ||
357 | xorl 104(%esp),%eax | ||
358 | # t <<<= 7 | ||
359 | rol $7,%edx | ||
360 | # t ^= x11 | ||
361 | xorl 144(%esp),%edx | ||
362 | # r <<<= 7 | ||
363 | rol $7,%esi | ||
364 | # r ^= x6 | ||
365 | xorl 124(%esp),%esi | ||
366 | # v <<<= 7 | ||
367 | rol $7,%edi | ||
368 | # v ^= x12 | ||
369 | xorl 148(%esp),%edi | ||
370 | # x1 = p | ||
371 | movl %eax,104(%esp) | ||
372 | # x11 = t | ||
373 | movl %edx,144(%esp) | ||
374 | # p += x0 | ||
375 | addl 100(%esp),%eax | ||
376 | # x6 = r | ||
377 | movl %esi,124(%esp) | ||
378 | # t += x10 | ||
379 | addl 140(%esp),%edx | ||
380 | # x12 = v | ||
381 | movl %edi,148(%esp) | ||
382 | # p <<<= 9 | ||
383 | rol $9,%eax | ||
384 | # p ^= x2 | ||
385 | xorl 108(%esp),%eax | ||
386 | # t <<<= 9 | ||
387 | rol $9,%edx | ||
388 | # t ^= x8 | ||
389 | xorl 132(%esp),%edx | ||
390 | # s += r | ||
391 | add %esi,%ecx | ||
392 | # s <<<= 9 | ||
393 | rol $9,%ecx | ||
394 | # s ^= x7 | ||
395 | xorl 128(%esp),%ecx | ||
396 | # w += v | ||
397 | add %edi,%ebx | ||
398 | # w <<<= 9 | ||
399 | rol $9,%ebx | ||
400 | # w ^= x13 | ||
401 | xorl 152(%esp),%ebx | ||
402 | # x2 = p | ||
403 | movl %eax,108(%esp) | ||
404 | # x8 = t | ||
405 | movl %edx,132(%esp) | ||
406 | # p += x1 | ||
407 | addl 104(%esp),%eax | ||
408 | # x7 = s | ||
409 | movl %ecx,128(%esp) | ||
410 | # t += x11 | ||
411 | addl 144(%esp),%edx | ||
412 | # x13 = w | ||
413 | movl %ebx,152(%esp) | ||
414 | # p <<<= 13 | ||
415 | rol $13,%eax | ||
416 | # p ^= x3 | ||
417 | xorl 112(%esp),%eax | ||
418 | # t <<<= 13 | ||
419 | rol $13,%edx | ||
420 | # t ^= x9 | ||
421 | xorl 136(%esp),%edx | ||
422 | # r += s | ||
423 | add %ecx,%esi | ||
424 | # r <<<= 13 | ||
425 | rol $13,%esi | ||
426 | # r ^= x4 | ||
427 | xorl 116(%esp),%esi | ||
428 | # v += w | ||
429 | add %ebx,%edi | ||
430 | # v <<<= 13 | ||
431 | rol $13,%edi | ||
432 | # v ^= x14 | ||
433 | xorl 156(%esp),%edi | ||
434 | # x3 = p | ||
435 | movl %eax,112(%esp) | ||
436 | # x9 = t | ||
437 | movl %edx,136(%esp) | ||
438 | # p += x2 | ||
439 | addl 108(%esp),%eax | ||
440 | # x4 = r | ||
441 | movl %esi,116(%esp) | ||
442 | # t += x8 | ||
443 | addl 132(%esp),%edx | ||
444 | # x14 = v | ||
445 | movl %edi,156(%esp) | ||
446 | # p <<<= 18 | ||
447 | rol $18,%eax | ||
448 | # p ^= x0 | ||
449 | xorl 100(%esp),%eax | ||
450 | # t <<<= 18 | ||
451 | rol $18,%edx | ||
452 | # t ^= x10 | ||
453 | xorl 140(%esp),%edx | ||
454 | # s += r | ||
455 | add %esi,%ecx | ||
456 | # s <<<= 18 | ||
457 | rol $18,%ecx | ||
458 | # s ^= x5 | ||
459 | xorl 120(%esp),%ecx | ||
460 | # w += v | ||
461 | add %edi,%ebx | ||
462 | # w <<<= 18 | ||
463 | rol $18,%ebx | ||
464 | # w ^= x15 | ||
465 | xorl 160(%esp),%ebx | ||
466 | # x0 = p | ||
467 | movl %eax,100(%esp) | ||
468 | # x10 = t | ||
469 | movl %edx,140(%esp) | ||
470 | # p += x12 | ||
471 | addl 148(%esp),%eax | ||
472 | # x5 = s | ||
473 | movl %ecx,120(%esp) | ||
474 | # t += x6 | ||
475 | addl 124(%esp),%edx | ||
476 | # x15 = w | ||
477 | movl %ebx,160(%esp) | ||
478 | # r = x1 | ||
479 | movl 104(%esp),%esi | ||
480 | # r += s | ||
481 | add %ecx,%esi | ||
482 | # v = x11 | ||
483 | movl 144(%esp),%edi | ||
484 | # v += w | ||
485 | add %ebx,%edi | ||
486 | # p <<<= 7 | ||
487 | rol $7,%eax | ||
488 | # p ^= x4 | ||
489 | xorl 116(%esp),%eax | ||
490 | # t <<<= 7 | ||
491 | rol $7,%edx | ||
492 | # t ^= x14 | ||
493 | xorl 156(%esp),%edx | ||
494 | # r <<<= 7 | ||
495 | rol $7,%esi | ||
496 | # r ^= x9 | ||
497 | xorl 136(%esp),%esi | ||
498 | # v <<<= 7 | ||
499 | rol $7,%edi | ||
500 | # v ^= x3 | ||
501 | xorl 112(%esp),%edi | ||
502 | # x4 = p | ||
503 | movl %eax,116(%esp) | ||
504 | # x14 = t | ||
505 | movl %edx,156(%esp) | ||
506 | # p += x0 | ||
507 | addl 100(%esp),%eax | ||
508 | # x9 = r | ||
509 | movl %esi,136(%esp) | ||
510 | # t += x10 | ||
511 | addl 140(%esp),%edx | ||
512 | # x3 = v | ||
513 | movl %edi,112(%esp) | ||
514 | # p <<<= 9 | ||
515 | rol $9,%eax | ||
516 | # p ^= x8 | ||
517 | xorl 132(%esp),%eax | ||
518 | # t <<<= 9 | ||
519 | rol $9,%edx | ||
520 | # t ^= x2 | ||
521 | xorl 108(%esp),%edx | ||
522 | # s += r | ||
523 | add %esi,%ecx | ||
524 | # s <<<= 9 | ||
525 | rol $9,%ecx | ||
526 | # s ^= x13 | ||
527 | xorl 152(%esp),%ecx | ||
528 | # w += v | ||
529 | add %edi,%ebx | ||
530 | # w <<<= 9 | ||
531 | rol $9,%ebx | ||
532 | # w ^= x7 | ||
533 | xorl 128(%esp),%ebx | ||
534 | # x8 = p | ||
535 | movl %eax,132(%esp) | ||
536 | # x2 = t | ||
537 | movl %edx,108(%esp) | ||
538 | # p += x4 | ||
539 | addl 116(%esp),%eax | ||
540 | # x13 = s | ||
541 | movl %ecx,152(%esp) | ||
542 | # t += x14 | ||
543 | addl 156(%esp),%edx | ||
544 | # x7 = w | ||
545 | movl %ebx,128(%esp) | ||
546 | # p <<<= 13 | ||
547 | rol $13,%eax | ||
548 | # p ^= x12 | ||
549 | xorl 148(%esp),%eax | ||
550 | # t <<<= 13 | ||
551 | rol $13,%edx | ||
552 | # t ^= x6 | ||
553 | xorl 124(%esp),%edx | ||
554 | # r += s | ||
555 | add %ecx,%esi | ||
556 | # r <<<= 13 | ||
557 | rol $13,%esi | ||
558 | # r ^= x1 | ||
559 | xorl 104(%esp),%esi | ||
560 | # v += w | ||
561 | add %ebx,%edi | ||
562 | # v <<<= 13 | ||
563 | rol $13,%edi | ||
564 | # v ^= x11 | ||
565 | xorl 144(%esp),%edi | ||
566 | # x12 = p | ||
567 | movl %eax,148(%esp) | ||
568 | # x6 = t | ||
569 | movl %edx,124(%esp) | ||
570 | # p += x8 | ||
571 | addl 132(%esp),%eax | ||
572 | # x1 = r | ||
573 | movl %esi,104(%esp) | ||
574 | # t += x2 | ||
575 | addl 108(%esp),%edx | ||
576 | # x11 = v | ||
577 | movl %edi,144(%esp) | ||
578 | # p <<<= 18 | ||
579 | rol $18,%eax | ||
580 | # p ^= x0 | ||
581 | xorl 100(%esp),%eax | ||
582 | # t <<<= 18 | ||
583 | rol $18,%edx | ||
584 | # t ^= x10 | ||
585 | xorl 140(%esp),%edx | ||
586 | # s += r | ||
587 | add %esi,%ecx | ||
588 | # s <<<= 18 | ||
589 | rol $18,%ecx | ||
590 | # s ^= x5 | ||
591 | xorl 120(%esp),%ecx | ||
592 | # w += v | ||
593 | add %edi,%ebx | ||
594 | # w <<<= 18 | ||
595 | rol $18,%ebx | ||
596 | # w ^= x15 | ||
597 | xorl 160(%esp),%ebx | ||
598 | # x0 = p | ||
599 | movl %eax,100(%esp) | ||
600 | # x10 = t | ||
601 | movl %edx,140(%esp) | ||
602 | # p += x3 | ||
603 | addl 112(%esp),%eax | ||
604 | # p <<<= 7 | ||
605 | rol $7,%eax | ||
606 | # x5 = s | ||
607 | movl %ecx,120(%esp) | ||
608 | # t += x9 | ||
609 | addl 136(%esp),%edx | ||
610 | # x15 = w | ||
611 | movl %ebx,160(%esp) | ||
612 | # r = x4 | ||
613 | movl 116(%esp),%esi | ||
614 | # r += s | ||
615 | add %ecx,%esi | ||
616 | # v = x14 | ||
617 | movl 156(%esp),%edi | ||
618 | # v += w | ||
619 | add %ebx,%edi | ||
620 | # p ^= x1 | ||
621 | xorl 104(%esp),%eax | ||
622 | # t <<<= 7 | ||
623 | rol $7,%edx | ||
624 | # t ^= x11 | ||
625 | xorl 144(%esp),%edx | ||
626 | # r <<<= 7 | ||
627 | rol $7,%esi | ||
628 | # r ^= x6 | ||
629 | xorl 124(%esp),%esi | ||
630 | # v <<<= 7 | ||
631 | rol $7,%edi | ||
632 | # v ^= x12 | ||
633 | xorl 148(%esp),%edi | ||
634 | # x1 = p | ||
635 | movl %eax,104(%esp) | ||
636 | # x11 = t | ||
637 | movl %edx,144(%esp) | ||
638 | # p += x0 | ||
639 | addl 100(%esp),%eax | ||
640 | # x6 = r | ||
641 | movl %esi,124(%esp) | ||
642 | # t += x10 | ||
643 | addl 140(%esp),%edx | ||
644 | # x12 = v | ||
645 | movl %edi,148(%esp) | ||
646 | # p <<<= 9 | ||
647 | rol $9,%eax | ||
648 | # p ^= x2 | ||
649 | xorl 108(%esp),%eax | ||
650 | # t <<<= 9 | ||
651 | rol $9,%edx | ||
652 | # t ^= x8 | ||
653 | xorl 132(%esp),%edx | ||
654 | # s += r | ||
655 | add %esi,%ecx | ||
656 | # s <<<= 9 | ||
657 | rol $9,%ecx | ||
658 | # s ^= x7 | ||
659 | xorl 128(%esp),%ecx | ||
660 | # w += v | ||
661 | add %edi,%ebx | ||
662 | # w <<<= 9 | ||
663 | rol $9,%ebx | ||
664 | # w ^= x13 | ||
665 | xorl 152(%esp),%ebx | ||
666 | # x2 = p | ||
667 | movl %eax,108(%esp) | ||
668 | # x8 = t | ||
669 | movl %edx,132(%esp) | ||
670 | # p += x1 | ||
671 | addl 104(%esp),%eax | ||
672 | # x7 = s | ||
673 | movl %ecx,128(%esp) | ||
674 | # t += x11 | ||
675 | addl 144(%esp),%edx | ||
676 | # x13 = w | ||
677 | movl %ebx,152(%esp) | ||
678 | # p <<<= 13 | ||
679 | rol $13,%eax | ||
680 | # p ^= x3 | ||
681 | xorl 112(%esp),%eax | ||
682 | # t <<<= 13 | ||
683 | rol $13,%edx | ||
684 | # t ^= x9 | ||
685 | xorl 136(%esp),%edx | ||
686 | # r += s | ||
687 | add %ecx,%esi | ||
688 | # r <<<= 13 | ||
689 | rol $13,%esi | ||
690 | # r ^= x4 | ||
691 | xorl 116(%esp),%esi | ||
692 | # v += w | ||
693 | add %ebx,%edi | ||
694 | # v <<<= 13 | ||
695 | rol $13,%edi | ||
696 | # v ^= x14 | ||
697 | xorl 156(%esp),%edi | ||
698 | # x3 = p | ||
699 | movl %eax,112(%esp) | ||
700 | # x9 = t | ||
701 | movl %edx,136(%esp) | ||
702 | # p += x2 | ||
703 | addl 108(%esp),%eax | ||
704 | # x4 = r | ||
705 | movl %esi,116(%esp) | ||
706 | # t += x8 | ||
707 | addl 132(%esp),%edx | ||
708 | # x14 = v | ||
709 | movl %edi,156(%esp) | ||
710 | # p <<<= 18 | ||
711 | rol $18,%eax | ||
712 | # p ^= x0 | ||
713 | xorl 100(%esp),%eax | ||
714 | # t <<<= 18 | ||
715 | rol $18,%edx | ||
716 | # t ^= x10 | ||
717 | xorl 140(%esp),%edx | ||
718 | # s += r | ||
719 | add %esi,%ecx | ||
720 | # s <<<= 18 | ||
721 | rol $18,%ecx | ||
722 | # s ^= x5 | ||
723 | xorl 120(%esp),%ecx | ||
724 | # w += v | ||
725 | add %edi,%ebx | ||
726 | # w <<<= 18 | ||
727 | rol $18,%ebx | ||
728 | # w ^= x15 | ||
729 | xorl 160(%esp),%ebx | ||
730 | # i -= 4 | ||
731 | sub $4,%ebp | ||
732 | # goto mainloop if unsigned > | ||
733 | ja ._mainloop | ||
734 | # x0 = p | ||
735 | movl %eax,100(%esp) | ||
736 | # x5 = s | ||
737 | movl %ecx,120(%esp) | ||
738 | # x10 = t | ||
739 | movl %edx,140(%esp) | ||
740 | # x15 = w | ||
741 | movl %ebx,160(%esp) | ||
742 | # out = out_backup | ||
743 | movl 72(%esp),%edi | ||
744 | # m = m_backup | ||
745 | movl 68(%esp),%esi | ||
746 | # in0 = x0 | ||
747 | movl 100(%esp),%eax | ||
748 | # in1 = x1 | ||
749 | movl 104(%esp),%ecx | ||
750 | # in0 += j0 | ||
751 | addl 164(%esp),%eax | ||
752 | # in1 += j1 | ||
753 | addl 168(%esp),%ecx | ||
754 | # in0 ^= *(uint32 *) (m + 0) | ||
755 | xorl 0(%esi),%eax | ||
756 | # in1 ^= *(uint32 *) (m + 4) | ||
757 | xorl 4(%esi),%ecx | ||
758 | # *(uint32 *) (out + 0) = in0 | ||
759 | movl %eax,0(%edi) | ||
760 | # *(uint32 *) (out + 4) = in1 | ||
761 | movl %ecx,4(%edi) | ||
762 | # in2 = x2 | ||
763 | movl 108(%esp),%eax | ||
764 | # in3 = x3 | ||
765 | movl 112(%esp),%ecx | ||
766 | # in2 += j2 | ||
767 | addl 172(%esp),%eax | ||
768 | # in3 += j3 | ||
769 | addl 176(%esp),%ecx | ||
770 | # in2 ^= *(uint32 *) (m + 8) | ||
771 | xorl 8(%esi),%eax | ||
772 | # in3 ^= *(uint32 *) (m + 12) | ||
773 | xorl 12(%esi),%ecx | ||
774 | # *(uint32 *) (out + 8) = in2 | ||
775 | movl %eax,8(%edi) | ||
776 | # *(uint32 *) (out + 12) = in3 | ||
777 | movl %ecx,12(%edi) | ||
778 | # in4 = x4 | ||
779 | movl 116(%esp),%eax | ||
780 | # in5 = x5 | ||
781 | movl 120(%esp),%ecx | ||
782 | # in4 += j4 | ||
783 | addl 180(%esp),%eax | ||
784 | # in5 += j5 | ||
785 | addl 184(%esp),%ecx | ||
786 | # in4 ^= *(uint32 *) (m + 16) | ||
787 | xorl 16(%esi),%eax | ||
788 | # in5 ^= *(uint32 *) (m + 20) | ||
789 | xorl 20(%esi),%ecx | ||
790 | # *(uint32 *) (out + 16) = in4 | ||
791 | movl %eax,16(%edi) | ||
792 | # *(uint32 *) (out + 20) = in5 | ||
793 | movl %ecx,20(%edi) | ||
794 | # in6 = x6 | ||
795 | movl 124(%esp),%eax | ||
796 | # in7 = x7 | ||
797 | movl 128(%esp),%ecx | ||
798 | # in6 += j6 | ||
799 | addl 188(%esp),%eax | ||
800 | # in7 += j7 | ||
801 | addl 192(%esp),%ecx | ||
802 | # in6 ^= *(uint32 *) (m + 24) | ||
803 | xorl 24(%esi),%eax | ||
804 | # in7 ^= *(uint32 *) (m + 28) | ||
805 | xorl 28(%esi),%ecx | ||
806 | # *(uint32 *) (out + 24) = in6 | ||
807 | movl %eax,24(%edi) | ||
808 | # *(uint32 *) (out + 28) = in7 | ||
809 | movl %ecx,28(%edi) | ||
810 | # in8 = x8 | ||
811 | movl 132(%esp),%eax | ||
812 | # in9 = x9 | ||
813 | movl 136(%esp),%ecx | ||
814 | # in8 += j8 | ||
815 | addl 196(%esp),%eax | ||
816 | # in9 += j9 | ||
817 | addl 200(%esp),%ecx | ||
818 | # in8 ^= *(uint32 *) (m + 32) | ||
819 | xorl 32(%esi),%eax | ||
820 | # in9 ^= *(uint32 *) (m + 36) | ||
821 | xorl 36(%esi),%ecx | ||
822 | # *(uint32 *) (out + 32) = in8 | ||
823 | movl %eax,32(%edi) | ||
824 | # *(uint32 *) (out + 36) = in9 | ||
825 | movl %ecx,36(%edi) | ||
826 | # in10 = x10 | ||
827 | movl 140(%esp),%eax | ||
828 | # in11 = x11 | ||
829 | movl 144(%esp),%ecx | ||
830 | # in10 += j10 | ||
831 | addl 204(%esp),%eax | ||
832 | # in11 += j11 | ||
833 | addl 208(%esp),%ecx | ||
834 | # in10 ^= *(uint32 *) (m + 40) | ||
835 | xorl 40(%esi),%eax | ||
836 | # in11 ^= *(uint32 *) (m + 44) | ||
837 | xorl 44(%esi),%ecx | ||
838 | # *(uint32 *) (out + 40) = in10 | ||
839 | movl %eax,40(%edi) | ||
840 | # *(uint32 *) (out + 44) = in11 | ||
841 | movl %ecx,44(%edi) | ||
842 | # in12 = x12 | ||
843 | movl 148(%esp),%eax | ||
844 | # in13 = x13 | ||
845 | movl 152(%esp),%ecx | ||
846 | # in12 += j12 | ||
847 | addl 212(%esp),%eax | ||
848 | # in13 += j13 | ||
849 | addl 216(%esp),%ecx | ||
850 | # in12 ^= *(uint32 *) (m + 48) | ||
851 | xorl 48(%esi),%eax | ||
852 | # in13 ^= *(uint32 *) (m + 52) | ||
853 | xorl 52(%esi),%ecx | ||
854 | # *(uint32 *) (out + 48) = in12 | ||
855 | movl %eax,48(%edi) | ||
856 | # *(uint32 *) (out + 52) = in13 | ||
857 | movl %ecx,52(%edi) | ||
858 | # in14 = x14 | ||
859 | movl 156(%esp),%eax | ||
860 | # in15 = x15 | ||
861 | movl 160(%esp),%ecx | ||
862 | # in14 += j14 | ||
863 | addl 220(%esp),%eax | ||
864 | # in15 += j15 | ||
865 | addl 224(%esp),%ecx | ||
866 | # in14 ^= *(uint32 *) (m + 56) | ||
867 | xorl 56(%esi),%eax | ||
868 | # in15 ^= *(uint32 *) (m + 60) | ||
869 | xorl 60(%esi),%ecx | ||
870 | # *(uint32 *) (out + 56) = in14 | ||
871 | movl %eax,56(%edi) | ||
872 | # *(uint32 *) (out + 60) = in15 | ||
873 | movl %ecx,60(%edi) | ||
874 | # bytes = bytes_backup | ||
875 | movl 76(%esp),%ebx | ||
876 | # in8 = j8 | ||
877 | movl 196(%esp),%eax | ||
878 | # in9 = j9 | ||
879 | movl 200(%esp),%ecx | ||
880 | # in8 += 1 | ||
881 | add $1,%eax | ||
882 | # in9 += 0 + carry | ||
883 | adc $0,%ecx | ||
884 | # j8 = in8 | ||
885 | movl %eax,196(%esp) | ||
886 | # j9 = in9 | ||
887 | movl %ecx,200(%esp) | ||
888 | # bytes - 64 | ||
889 | cmp $64,%ebx | ||
890 | # goto bytesatleast65 if unsigned> | ||
891 | ja ._bytesatleast65 | ||
892 | # goto bytesatleast64 if unsigned>= | ||
893 | jae ._bytesatleast64 | ||
894 | # m = out | ||
895 | mov %edi,%esi | ||
896 | # out = ctarget | ||
897 | movl 228(%esp),%edi | ||
898 | # i = bytes | ||
899 | mov %ebx,%ecx | ||
900 | # while (i) { *out++ = *m++; --i } | ||
901 | rep movsb | ||
902 | ._bytesatleast64: | ||
903 | # x = x_backup | ||
904 | movl 64(%esp),%eax | ||
905 | # in8 = j8 | ||
906 | movl 196(%esp),%ecx | ||
907 | # in9 = j9 | ||
908 | movl 200(%esp),%edx | ||
909 | # *(uint32 *) (x + 32) = in8 | ||
910 | movl %ecx,32(%eax) | ||
911 | # *(uint32 *) (x + 36) = in9 | ||
912 | movl %edx,36(%eax) | ||
913 | ._done: | ||
914 | # eax = eax_stack | ||
915 | movl 80(%esp),%eax | ||
916 | # ebx = ebx_stack | ||
917 | movl 84(%esp),%ebx | ||
918 | # esi = esi_stack | ||
919 | movl 88(%esp),%esi | ||
920 | # edi = edi_stack | ||
921 | movl 92(%esp),%edi | ||
922 | # ebp = ebp_stack | ||
923 | movl 96(%esp),%ebp | ||
924 | # leave | ||
925 | add %eax,%esp | ||
926 | ret | ||
927 | ._bytesatleast65: | ||
928 | # bytes -= 64 | ||
929 | sub $64,%ebx | ||
930 | # out += 64 | ||
931 | add $64,%edi | ||
932 | # m += 64 | ||
933 | add $64,%esi | ||
934 | # goto bytesatleast1 | ||
935 | jmp ._bytesatleast1 | ||
936 | # enter ECRYPT_keysetup | ||
937 | .text | ||
938 | .p2align 5 | ||
939 | .globl ECRYPT_keysetup | ||
940 | ECRYPT_keysetup: | ||
941 | mov %esp,%eax | ||
942 | and $31,%eax | ||
943 | add $256,%eax | ||
944 | sub %eax,%esp | ||
945 | # eax_stack = eax | ||
946 | movl %eax,64(%esp) | ||
947 | # ebx_stack = ebx | ||
948 | movl %ebx,68(%esp) | ||
949 | # esi_stack = esi | ||
950 | movl %esi,72(%esp) | ||
951 | # edi_stack = edi | ||
952 | movl %edi,76(%esp) | ||
953 | # ebp_stack = ebp | ||
954 | movl %ebp,80(%esp) | ||
955 | # k = arg2 | ||
956 | movl 8(%esp,%eax),%ecx | ||
957 | # kbits = arg3 | ||
958 | movl 12(%esp,%eax),%edx | ||
959 | # x = arg1 | ||
960 | movl 4(%esp,%eax),%eax | ||
961 | # in1 = *(uint32 *) (k + 0) | ||
962 | movl 0(%ecx),%ebx | ||
963 | # in2 = *(uint32 *) (k + 4) | ||
964 | movl 4(%ecx),%esi | ||
965 | # in3 = *(uint32 *) (k + 8) | ||
966 | movl 8(%ecx),%edi | ||
967 | # in4 = *(uint32 *) (k + 12) | ||
968 | movl 12(%ecx),%ebp | ||
969 | # *(uint32 *) (x + 4) = in1 | ||
970 | movl %ebx,4(%eax) | ||
971 | # *(uint32 *) (x + 8) = in2 | ||
972 | movl %esi,8(%eax) | ||
973 | # *(uint32 *) (x + 12) = in3 | ||
974 | movl %edi,12(%eax) | ||
975 | # *(uint32 *) (x + 16) = in4 | ||
976 | movl %ebp,16(%eax) | ||
977 | # kbits - 256 | ||
978 | cmp $256,%edx | ||
979 | # goto kbits128 if unsigned< | ||
980 | jb ._kbits128 | ||
981 | ._kbits256: | ||
982 | # in11 = *(uint32 *) (k + 16) | ||
983 | movl 16(%ecx),%edx | ||
984 | # in12 = *(uint32 *) (k + 20) | ||
985 | movl 20(%ecx),%ebx | ||
986 | # in13 = *(uint32 *) (k + 24) | ||
987 | movl 24(%ecx),%esi | ||
988 | # in14 = *(uint32 *) (k + 28) | ||
989 | movl 28(%ecx),%ecx | ||
990 | # *(uint32 *) (x + 44) = in11 | ||
991 | movl %edx,44(%eax) | ||
992 | # *(uint32 *) (x + 48) = in12 | ||
993 | movl %ebx,48(%eax) | ||
994 | # *(uint32 *) (x + 52) = in13 | ||
995 | movl %esi,52(%eax) | ||
996 | # *(uint32 *) (x + 56) = in14 | ||
997 | movl %ecx,56(%eax) | ||
998 | # in0 = 1634760805 | ||
999 | mov $1634760805,%ecx | ||
1000 | # in5 = 857760878 | ||
1001 | mov $857760878,%edx | ||
1002 | # in10 = 2036477234 | ||
1003 | mov $2036477234,%ebx | ||
1004 | # in15 = 1797285236 | ||
1005 | mov $1797285236,%esi | ||
1006 | # *(uint32 *) (x + 0) = in0 | ||
1007 | movl %ecx,0(%eax) | ||
1008 | # *(uint32 *) (x + 20) = in5 | ||
1009 | movl %edx,20(%eax) | ||
1010 | # *(uint32 *) (x + 40) = in10 | ||
1011 | movl %ebx,40(%eax) | ||
1012 | # *(uint32 *) (x + 60) = in15 | ||
1013 | movl %esi,60(%eax) | ||
1014 | # goto keysetupdone | ||
1015 | jmp ._keysetupdone | ||
1016 | ._kbits128: | ||
1017 | # in11 = *(uint32 *) (k + 0) | ||
1018 | movl 0(%ecx),%edx | ||
1019 | # in12 = *(uint32 *) (k + 4) | ||
1020 | movl 4(%ecx),%ebx | ||
1021 | # in13 = *(uint32 *) (k + 8) | ||
1022 | movl 8(%ecx),%esi | ||
1023 | # in14 = *(uint32 *) (k + 12) | ||
1024 | movl 12(%ecx),%ecx | ||
1025 | # *(uint32 *) (x + 44) = in11 | ||
1026 | movl %edx,44(%eax) | ||
1027 | # *(uint32 *) (x + 48) = in12 | ||
1028 | movl %ebx,48(%eax) | ||
1029 | # *(uint32 *) (x + 52) = in13 | ||
1030 | movl %esi,52(%eax) | ||
1031 | # *(uint32 *) (x + 56) = in14 | ||
1032 | movl %ecx,56(%eax) | ||
1033 | # in0 = 1634760805 | ||
1034 | mov $1634760805,%ecx | ||
1035 | # in5 = 824206446 | ||
1036 | mov $824206446,%edx | ||
1037 | # in10 = 2036477238 | ||
1038 | mov $2036477238,%ebx | ||
1039 | # in15 = 1797285236 | ||
1040 | mov $1797285236,%esi | ||
1041 | # *(uint32 *) (x + 0) = in0 | ||
1042 | movl %ecx,0(%eax) | ||
1043 | # *(uint32 *) (x + 20) = in5 | ||
1044 | movl %edx,20(%eax) | ||
1045 | # *(uint32 *) (x + 40) = in10 | ||
1046 | movl %ebx,40(%eax) | ||
1047 | # *(uint32 *) (x + 60) = in15 | ||
1048 | movl %esi,60(%eax) | ||
1049 | ._keysetupdone: | ||
1050 | # eax = eax_stack | ||
1051 | movl 64(%esp),%eax | ||
1052 | # ebx = ebx_stack | ||
1053 | movl 68(%esp),%ebx | ||
1054 | # esi = esi_stack | ||
1055 | movl 72(%esp),%esi | ||
1056 | # edi = edi_stack | ||
1057 | movl 76(%esp),%edi | ||
1058 | # ebp = ebp_stack | ||
1059 | movl 80(%esp),%ebp | ||
1060 | # leave | ||
1061 | add %eax,%esp | ||
1062 | ret | ||
1063 | # enter ECRYPT_ivsetup | ||
1064 | .text | ||
1065 | .p2align 5 | ||
1066 | .globl ECRYPT_ivsetup | ||
1067 | ECRYPT_ivsetup: | ||
1068 | mov %esp,%eax | ||
1069 | and $31,%eax | ||
1070 | add $256,%eax | ||
1071 | sub %eax,%esp | ||
1072 | # eax_stack = eax | ||
1073 | movl %eax,64(%esp) | ||
1074 | # ebx_stack = ebx | ||
1075 | movl %ebx,68(%esp) | ||
1076 | # esi_stack = esi | ||
1077 | movl %esi,72(%esp) | ||
1078 | # edi_stack = edi | ||
1079 | movl %edi,76(%esp) | ||
1080 | # ebp_stack = ebp | ||
1081 | movl %ebp,80(%esp) | ||
1082 | # iv = arg2 | ||
1083 | movl 8(%esp,%eax),%ecx | ||
1084 | # x = arg1 | ||
1085 | movl 4(%esp,%eax),%eax | ||
1086 | # in6 = *(uint32 *) (iv + 0) | ||
1087 | movl 0(%ecx),%edx | ||
1088 | # in7 = *(uint32 *) (iv + 4) | ||
1089 | movl 4(%ecx),%ecx | ||
1090 | # in8 = 0 | ||
1091 | mov $0,%ebx | ||
1092 | # in9 = 0 | ||
1093 | mov $0,%esi | ||
1094 | # *(uint32 *) (x + 24) = in6 | ||
1095 | movl %edx,24(%eax) | ||
1096 | # *(uint32 *) (x + 28) = in7 | ||
1097 | movl %ecx,28(%eax) | ||
1098 | # *(uint32 *) (x + 32) = in8 | ||
1099 | movl %ebx,32(%eax) | ||
1100 | # *(uint32 *) (x + 36) = in9 | ||
1101 | movl %esi,36(%eax) | ||
1102 | # eax = eax_stack | ||
1103 | movl 64(%esp),%eax | ||
1104 | # ebx = ebx_stack | ||
1105 | movl 68(%esp),%ebx | ||
1106 | # esi = esi_stack | ||
1107 | movl 72(%esp),%esi | ||
1108 | # edi = edi_stack | ||
1109 | movl 76(%esp),%edi | ||
1110 | # ebp = ebp_stack | ||
1111 | movl 80(%esp),%ebp | ||
1112 | # leave | ||
1113 | add %eax,%esp | ||
1114 | ret | ||
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S new file mode 100644 index 000000000000..6214a9b09706 --- /dev/null +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S | |||
@@ -0,0 +1,920 @@ | |||
1 | # enter ECRYPT_encrypt_bytes | ||
2 | .text | ||
3 | .p2align 5 | ||
4 | .globl ECRYPT_encrypt_bytes | ||
5 | ECRYPT_encrypt_bytes: | ||
6 | mov %rsp,%r11 | ||
7 | and $31,%r11 | ||
8 | add $256,%r11 | ||
9 | sub %r11,%rsp | ||
10 | # x = arg1 | ||
11 | mov %rdi,%r8 | ||
12 | # m = arg2 | ||
13 | mov %rsi,%rsi | ||
14 | # out = arg3 | ||
15 | mov %rdx,%rdi | ||
16 | # bytes = arg4 | ||
17 | mov %rcx,%rdx | ||
18 | # unsigned>? bytes - 0 | ||
19 | cmp $0,%rdx | ||
20 | # comment:fp stack unchanged by jump | ||
21 | # goto done if !unsigned> | ||
22 | jbe ._done | ||
23 | # comment:fp stack unchanged by fallthrough | ||
24 | # start: | ||
25 | ._start: | ||
26 | # r11_stack = r11 | ||
27 | movq %r11,0(%rsp) | ||
28 | # r12_stack = r12 | ||
29 | movq %r12,8(%rsp) | ||
30 | # r13_stack = r13 | ||
31 | movq %r13,16(%rsp) | ||
32 | # r14_stack = r14 | ||
33 | movq %r14,24(%rsp) | ||
34 | # r15_stack = r15 | ||
35 | movq %r15,32(%rsp) | ||
36 | # rbx_stack = rbx | ||
37 | movq %rbx,40(%rsp) | ||
38 | # rbp_stack = rbp | ||
39 | movq %rbp,48(%rsp) | ||
40 | # in0 = *(uint64 *) (x + 0) | ||
41 | movq 0(%r8),%rcx | ||
42 | # in2 = *(uint64 *) (x + 8) | ||
43 | movq 8(%r8),%r9 | ||
44 | # in4 = *(uint64 *) (x + 16) | ||
45 | movq 16(%r8),%rax | ||
46 | # in6 = *(uint64 *) (x + 24) | ||
47 | movq 24(%r8),%r10 | ||
48 | # in8 = *(uint64 *) (x + 32) | ||
49 | movq 32(%r8),%r11 | ||
50 | # in10 = *(uint64 *) (x + 40) | ||
51 | movq 40(%r8),%r12 | ||
52 | # in12 = *(uint64 *) (x + 48) | ||
53 | movq 48(%r8),%r13 | ||
54 | # in14 = *(uint64 *) (x + 56) | ||
55 | movq 56(%r8),%r14 | ||
56 | # j0 = in0 | ||
57 | movq %rcx,56(%rsp) | ||
58 | # j2 = in2 | ||
59 | movq %r9,64(%rsp) | ||
60 | # j4 = in4 | ||
61 | movq %rax,72(%rsp) | ||
62 | # j6 = in6 | ||
63 | movq %r10,80(%rsp) | ||
64 | # j8 = in8 | ||
65 | movq %r11,88(%rsp) | ||
66 | # j10 = in10 | ||
67 | movq %r12,96(%rsp) | ||
68 | # j12 = in12 | ||
69 | movq %r13,104(%rsp) | ||
70 | # j14 = in14 | ||
71 | movq %r14,112(%rsp) | ||
72 | # x_backup = x | ||
73 | movq %r8,120(%rsp) | ||
74 | # bytesatleast1: | ||
75 | ._bytesatleast1: | ||
76 | # unsigned<? bytes - 64 | ||
77 | cmp $64,%rdx | ||
78 | # comment:fp stack unchanged by jump | ||
79 | # goto nocopy if !unsigned< | ||
80 | jae ._nocopy | ||
81 | # ctarget = out | ||
82 | movq %rdi,128(%rsp) | ||
83 | # out = &tmp | ||
84 | leaq 192(%rsp),%rdi | ||
85 | # i = bytes | ||
86 | mov %rdx,%rcx | ||
87 | # while (i) { *out++ = *m++; --i } | ||
88 | rep movsb | ||
89 | # out = &tmp | ||
90 | leaq 192(%rsp),%rdi | ||
91 | # m = &tmp | ||
92 | leaq 192(%rsp),%rsi | ||
93 | # comment:fp stack unchanged by fallthrough | ||
94 | # nocopy: | ||
95 | ._nocopy: | ||
96 | # out_backup = out | ||
97 | movq %rdi,136(%rsp) | ||
98 | # m_backup = m | ||
99 | movq %rsi,144(%rsp) | ||
100 | # bytes_backup = bytes | ||
101 | movq %rdx,152(%rsp) | ||
102 | # x1 = j0 | ||
103 | movq 56(%rsp),%rdi | ||
104 | # x0 = x1 | ||
105 | mov %rdi,%rdx | ||
106 | # (uint64) x1 >>= 32 | ||
107 | shr $32,%rdi | ||
108 | # x3 = j2 | ||
109 | movq 64(%rsp),%rsi | ||
110 | # x2 = x3 | ||
111 | mov %rsi,%rcx | ||
112 | # (uint64) x3 >>= 32 | ||
113 | shr $32,%rsi | ||
114 | # x5 = j4 | ||
115 | movq 72(%rsp),%r8 | ||
116 | # x4 = x5 | ||
117 | mov %r8,%r9 | ||
118 | # (uint64) x5 >>= 32 | ||
119 | shr $32,%r8 | ||
120 | # x5_stack = x5 | ||
121 | movq %r8,160(%rsp) | ||
122 | # x7 = j6 | ||
123 | movq 80(%rsp),%r8 | ||
124 | # x6 = x7 | ||
125 | mov %r8,%rax | ||
126 | # (uint64) x7 >>= 32 | ||
127 | shr $32,%r8 | ||
128 | # x9 = j8 | ||
129 | movq 88(%rsp),%r10 | ||
130 | # x8 = x9 | ||
131 | mov %r10,%r11 | ||
132 | # (uint64) x9 >>= 32 | ||
133 | shr $32,%r10 | ||
134 | # x11 = j10 | ||
135 | movq 96(%rsp),%r12 | ||
136 | # x10 = x11 | ||
137 | mov %r12,%r13 | ||
138 | # x10_stack = x10 | ||
139 | movq %r13,168(%rsp) | ||
140 | # (uint64) x11 >>= 32 | ||
141 | shr $32,%r12 | ||
142 | # x13 = j12 | ||
143 | movq 104(%rsp),%r13 | ||
144 | # x12 = x13 | ||
145 | mov %r13,%r14 | ||
146 | # (uint64) x13 >>= 32 | ||
147 | shr $32,%r13 | ||
148 | # x15 = j14 | ||
149 | movq 112(%rsp),%r15 | ||
150 | # x14 = x15 | ||
151 | mov %r15,%rbx | ||
152 | # (uint64) x15 >>= 32 | ||
153 | shr $32,%r15 | ||
154 | # x15_stack = x15 | ||
155 | movq %r15,176(%rsp) | ||
156 | # i = 20 | ||
157 | mov $20,%r15 | ||
158 | # mainloop: | ||
159 | ._mainloop: | ||
160 | # i_backup = i | ||
161 | movq %r15,184(%rsp) | ||
162 | # x5 = x5_stack | ||
163 | movq 160(%rsp),%r15 | ||
164 | # a = x12 + x0 | ||
165 | lea (%r14,%rdx),%rbp | ||
166 | # (uint32) a <<<= 7 | ||
167 | rol $7,%ebp | ||
168 | # x4 ^= a | ||
169 | xor %rbp,%r9 | ||
170 | # b = x1 + x5 | ||
171 | lea (%rdi,%r15),%rbp | ||
172 | # (uint32) b <<<= 7 | ||
173 | rol $7,%ebp | ||
174 | # x9 ^= b | ||
175 | xor %rbp,%r10 | ||
176 | # a = x0 + x4 | ||
177 | lea (%rdx,%r9),%rbp | ||
178 | # (uint32) a <<<= 9 | ||
179 | rol $9,%ebp | ||
180 | # x8 ^= a | ||
181 | xor %rbp,%r11 | ||
182 | # b = x5 + x9 | ||
183 | lea (%r15,%r10),%rbp | ||
184 | # (uint32) b <<<= 9 | ||
185 | rol $9,%ebp | ||
186 | # x13 ^= b | ||
187 | xor %rbp,%r13 | ||
188 | # a = x4 + x8 | ||
189 | lea (%r9,%r11),%rbp | ||
190 | # (uint32) a <<<= 13 | ||
191 | rol $13,%ebp | ||
192 | # x12 ^= a | ||
193 | xor %rbp,%r14 | ||
194 | # b = x9 + x13 | ||
195 | lea (%r10,%r13),%rbp | ||
196 | # (uint32) b <<<= 13 | ||
197 | rol $13,%ebp | ||
198 | # x1 ^= b | ||
199 | xor %rbp,%rdi | ||
200 | # a = x8 + x12 | ||
201 | lea (%r11,%r14),%rbp | ||
202 | # (uint32) a <<<= 18 | ||
203 | rol $18,%ebp | ||
204 | # x0 ^= a | ||
205 | xor %rbp,%rdx | ||
206 | # b = x13 + x1 | ||
207 | lea (%r13,%rdi),%rbp | ||
208 | # (uint32) b <<<= 18 | ||
209 | rol $18,%ebp | ||
210 | # x5 ^= b | ||
211 | xor %rbp,%r15 | ||
212 | # x10 = x10_stack | ||
213 | movq 168(%rsp),%rbp | ||
214 | # x5_stack = x5 | ||
215 | movq %r15,160(%rsp) | ||
216 | # c = x6 + x10 | ||
217 | lea (%rax,%rbp),%r15 | ||
218 | # (uint32) c <<<= 7 | ||
219 | rol $7,%r15d | ||
220 | # x14 ^= c | ||
221 | xor %r15,%rbx | ||
222 | # c = x10 + x14 | ||
223 | lea (%rbp,%rbx),%r15 | ||
224 | # (uint32) c <<<= 9 | ||
225 | rol $9,%r15d | ||
226 | # x2 ^= c | ||
227 | xor %r15,%rcx | ||
228 | # c = x14 + x2 | ||
229 | lea (%rbx,%rcx),%r15 | ||
230 | # (uint32) c <<<= 13 | ||
231 | rol $13,%r15d | ||
232 | # x6 ^= c | ||
233 | xor %r15,%rax | ||
234 | # c = x2 + x6 | ||
235 | lea (%rcx,%rax),%r15 | ||
236 | # (uint32) c <<<= 18 | ||
237 | rol $18,%r15d | ||
238 | # x10 ^= c | ||
239 | xor %r15,%rbp | ||
240 | # x15 = x15_stack | ||
241 | movq 176(%rsp),%r15 | ||
242 | # x10_stack = x10 | ||
243 | movq %rbp,168(%rsp) | ||
244 | # d = x11 + x15 | ||
245 | lea (%r12,%r15),%rbp | ||
246 | # (uint32) d <<<= 7 | ||
247 | rol $7,%ebp | ||
248 | # x3 ^= d | ||
249 | xor %rbp,%rsi | ||
250 | # d = x15 + x3 | ||
251 | lea (%r15,%rsi),%rbp | ||
252 | # (uint32) d <<<= 9 | ||
253 | rol $9,%ebp | ||
254 | # x7 ^= d | ||
255 | xor %rbp,%r8 | ||
256 | # d = x3 + x7 | ||
257 | lea (%rsi,%r8),%rbp | ||
258 | # (uint32) d <<<= 13 | ||
259 | rol $13,%ebp | ||
260 | # x11 ^= d | ||
261 | xor %rbp,%r12 | ||
262 | # d = x7 + x11 | ||
263 | lea (%r8,%r12),%rbp | ||
264 | # (uint32) d <<<= 18 | ||
265 | rol $18,%ebp | ||
266 | # x15 ^= d | ||
267 | xor %rbp,%r15 | ||
268 | # x15_stack = x15 | ||
269 | movq %r15,176(%rsp) | ||
270 | # x5 = x5_stack | ||
271 | movq 160(%rsp),%r15 | ||
272 | # a = x3 + x0 | ||
273 | lea (%rsi,%rdx),%rbp | ||
274 | # (uint32) a <<<= 7 | ||
275 | rol $7,%ebp | ||
276 | # x1 ^= a | ||
277 | xor %rbp,%rdi | ||
278 | # b = x4 + x5 | ||
279 | lea (%r9,%r15),%rbp | ||
280 | # (uint32) b <<<= 7 | ||
281 | rol $7,%ebp | ||
282 | # x6 ^= b | ||
283 | xor %rbp,%rax | ||
284 | # a = x0 + x1 | ||
285 | lea (%rdx,%rdi),%rbp | ||
286 | # (uint32) a <<<= 9 | ||
287 | rol $9,%ebp | ||
288 | # x2 ^= a | ||
289 | xor %rbp,%rcx | ||
290 | # b = x5 + x6 | ||
291 | lea (%r15,%rax),%rbp | ||
292 | # (uint32) b <<<= 9 | ||
293 | rol $9,%ebp | ||
294 | # x7 ^= b | ||
295 | xor %rbp,%r8 | ||
296 | # a = x1 + x2 | ||
297 | lea (%rdi,%rcx),%rbp | ||
298 | # (uint32) a <<<= 13 | ||
299 | rol $13,%ebp | ||
300 | # x3 ^= a | ||
301 | xor %rbp,%rsi | ||
302 | # b = x6 + x7 | ||
303 | lea (%rax,%r8),%rbp | ||
304 | # (uint32) b <<<= 13 | ||
305 | rol $13,%ebp | ||
306 | # x4 ^= b | ||
307 | xor %rbp,%r9 | ||
308 | # a = x2 + x3 | ||
309 | lea (%rcx,%rsi),%rbp | ||
310 | # (uint32) a <<<= 18 | ||
311 | rol $18,%ebp | ||
312 | # x0 ^= a | ||
313 | xor %rbp,%rdx | ||
314 | # b = x7 + x4 | ||
315 | lea (%r8,%r9),%rbp | ||
316 | # (uint32) b <<<= 18 | ||
317 | rol $18,%ebp | ||
318 | # x5 ^= b | ||
319 | xor %rbp,%r15 | ||
320 | # x10 = x10_stack | ||
321 | movq 168(%rsp),%rbp | ||
322 | # x5_stack = x5 | ||
323 | movq %r15,160(%rsp) | ||
324 | # c = x9 + x10 | ||
325 | lea (%r10,%rbp),%r15 | ||
326 | # (uint32) c <<<= 7 | ||
327 | rol $7,%r15d | ||
328 | # x11 ^= c | ||
329 | xor %r15,%r12 | ||
330 | # c = x10 + x11 | ||
331 | lea (%rbp,%r12),%r15 | ||
332 | # (uint32) c <<<= 9 | ||
333 | rol $9,%r15d | ||
334 | # x8 ^= c | ||
335 | xor %r15,%r11 | ||
336 | # c = x11 + x8 | ||
337 | lea (%r12,%r11),%r15 | ||
338 | # (uint32) c <<<= 13 | ||
339 | rol $13,%r15d | ||
340 | # x9 ^= c | ||
341 | xor %r15,%r10 | ||
342 | # c = x8 + x9 | ||
343 | lea (%r11,%r10),%r15 | ||
344 | # (uint32) c <<<= 18 | ||
345 | rol $18,%r15d | ||
346 | # x10 ^= c | ||
347 | xor %r15,%rbp | ||
348 | # x15 = x15_stack | ||
349 | movq 176(%rsp),%r15 | ||
350 | # x10_stack = x10 | ||
351 | movq %rbp,168(%rsp) | ||
352 | # d = x14 + x15 | ||
353 | lea (%rbx,%r15),%rbp | ||
354 | # (uint32) d <<<= 7 | ||
355 | rol $7,%ebp | ||
356 | # x12 ^= d | ||
357 | xor %rbp,%r14 | ||
358 | # d = x15 + x12 | ||
359 | lea (%r15,%r14),%rbp | ||
360 | # (uint32) d <<<= 9 | ||
361 | rol $9,%ebp | ||
362 | # x13 ^= d | ||
363 | xor %rbp,%r13 | ||
364 | # d = x12 + x13 | ||
365 | lea (%r14,%r13),%rbp | ||
366 | # (uint32) d <<<= 13 | ||
367 | rol $13,%ebp | ||
368 | # x14 ^= d | ||
369 | xor %rbp,%rbx | ||
370 | # d = x13 + x14 | ||
371 | lea (%r13,%rbx),%rbp | ||
372 | # (uint32) d <<<= 18 | ||
373 | rol $18,%ebp | ||
374 | # x15 ^= d | ||
375 | xor %rbp,%r15 | ||
376 | # x15_stack = x15 | ||
377 | movq %r15,176(%rsp) | ||
378 | # x5 = x5_stack | ||
379 | movq 160(%rsp),%r15 | ||
380 | # a = x12 + x0 | ||
381 | lea (%r14,%rdx),%rbp | ||
382 | # (uint32) a <<<= 7 | ||
383 | rol $7,%ebp | ||
384 | # x4 ^= a | ||
385 | xor %rbp,%r9 | ||
386 | # b = x1 + x5 | ||
387 | lea (%rdi,%r15),%rbp | ||
388 | # (uint32) b <<<= 7 | ||
389 | rol $7,%ebp | ||
390 | # x9 ^= b | ||
391 | xor %rbp,%r10 | ||
392 | # a = x0 + x4 | ||
393 | lea (%rdx,%r9),%rbp | ||
394 | # (uint32) a <<<= 9 | ||
395 | rol $9,%ebp | ||
396 | # x8 ^= a | ||
397 | xor %rbp,%r11 | ||
398 | # b = x5 + x9 | ||
399 | lea (%r15,%r10),%rbp | ||
400 | # (uint32) b <<<= 9 | ||
401 | rol $9,%ebp | ||
402 | # x13 ^= b | ||
403 | xor %rbp,%r13 | ||
404 | # a = x4 + x8 | ||
405 | lea (%r9,%r11),%rbp | ||
406 | # (uint32) a <<<= 13 | ||
407 | rol $13,%ebp | ||
408 | # x12 ^= a | ||
409 | xor %rbp,%r14 | ||
410 | # b = x9 + x13 | ||
411 | lea (%r10,%r13),%rbp | ||
412 | # (uint32) b <<<= 13 | ||
413 | rol $13,%ebp | ||
414 | # x1 ^= b | ||
415 | xor %rbp,%rdi | ||
416 | # a = x8 + x12 | ||
417 | lea (%r11,%r14),%rbp | ||
418 | # (uint32) a <<<= 18 | ||
419 | rol $18,%ebp | ||
420 | # x0 ^= a | ||
421 | xor %rbp,%rdx | ||
422 | # b = x13 + x1 | ||
423 | lea (%r13,%rdi),%rbp | ||
424 | # (uint32) b <<<= 18 | ||
425 | rol $18,%ebp | ||
426 | # x5 ^= b | ||
427 | xor %rbp,%r15 | ||
428 | # x10 = x10_stack | ||
429 | movq 168(%rsp),%rbp | ||
430 | # x5_stack = x5 | ||
431 | movq %r15,160(%rsp) | ||
432 | # c = x6 + x10 | ||
433 | lea (%rax,%rbp),%r15 | ||
434 | # (uint32) c <<<= 7 | ||
435 | rol $7,%r15d | ||
436 | # x14 ^= c | ||
437 | xor %r15,%rbx | ||
438 | # c = x10 + x14 | ||
439 | lea (%rbp,%rbx),%r15 | ||
440 | # (uint32) c <<<= 9 | ||
441 | rol $9,%r15d | ||
442 | # x2 ^= c | ||
443 | xor %r15,%rcx | ||
444 | # c = x14 + x2 | ||
445 | lea (%rbx,%rcx),%r15 | ||
446 | # (uint32) c <<<= 13 | ||
447 | rol $13,%r15d | ||
448 | # x6 ^= c | ||
449 | xor %r15,%rax | ||
450 | # c = x2 + x6 | ||
451 | lea (%rcx,%rax),%r15 | ||
452 | # (uint32) c <<<= 18 | ||
453 | rol $18,%r15d | ||
454 | # x10 ^= c | ||
455 | xor %r15,%rbp | ||
456 | # x15 = x15_stack | ||
457 | movq 176(%rsp),%r15 | ||
458 | # x10_stack = x10 | ||
459 | movq %rbp,168(%rsp) | ||
460 | # d = x11 + x15 | ||
461 | lea (%r12,%r15),%rbp | ||
462 | # (uint32) d <<<= 7 | ||
463 | rol $7,%ebp | ||
464 | # x3 ^= d | ||
465 | xor %rbp,%rsi | ||
466 | # d = x15 + x3 | ||
467 | lea (%r15,%rsi),%rbp | ||
468 | # (uint32) d <<<= 9 | ||
469 | rol $9,%ebp | ||
470 | # x7 ^= d | ||
471 | xor %rbp,%r8 | ||
472 | # d = x3 + x7 | ||
473 | lea (%rsi,%r8),%rbp | ||
474 | # (uint32) d <<<= 13 | ||
475 | rol $13,%ebp | ||
476 | # x11 ^= d | ||
477 | xor %rbp,%r12 | ||
478 | # d = x7 + x11 | ||
479 | lea (%r8,%r12),%rbp | ||
480 | # (uint32) d <<<= 18 | ||
481 | rol $18,%ebp | ||
482 | # x15 ^= d | ||
483 | xor %rbp,%r15 | ||
484 | # x15_stack = x15 | ||
485 | movq %r15,176(%rsp) | ||
486 | # x5 = x5_stack | ||
487 | movq 160(%rsp),%r15 | ||
488 | # a = x3 + x0 | ||
489 | lea (%rsi,%rdx),%rbp | ||
490 | # (uint32) a <<<= 7 | ||
491 | rol $7,%ebp | ||
492 | # x1 ^= a | ||
493 | xor %rbp,%rdi | ||
494 | # b = x4 + x5 | ||
495 | lea (%r9,%r15),%rbp | ||
496 | # (uint32) b <<<= 7 | ||
497 | rol $7,%ebp | ||
498 | # x6 ^= b | ||
499 | xor %rbp,%rax | ||
500 | # a = x0 + x1 | ||
501 | lea (%rdx,%rdi),%rbp | ||
502 | # (uint32) a <<<= 9 | ||
503 | rol $9,%ebp | ||
504 | # x2 ^= a | ||
505 | xor %rbp,%rcx | ||
506 | # b = x5 + x6 | ||
507 | lea (%r15,%rax),%rbp | ||
508 | # (uint32) b <<<= 9 | ||
509 | rol $9,%ebp | ||
510 | # x7 ^= b | ||
511 | xor %rbp,%r8 | ||
512 | # a = x1 + x2 | ||
513 | lea (%rdi,%rcx),%rbp | ||
514 | # (uint32) a <<<= 13 | ||
515 | rol $13,%ebp | ||
516 | # x3 ^= a | ||
517 | xor %rbp,%rsi | ||
518 | # b = x6 + x7 | ||
519 | lea (%rax,%r8),%rbp | ||
520 | # (uint32) b <<<= 13 | ||
521 | rol $13,%ebp | ||
522 | # x4 ^= b | ||
523 | xor %rbp,%r9 | ||
524 | # a = x2 + x3 | ||
525 | lea (%rcx,%rsi),%rbp | ||
526 | # (uint32) a <<<= 18 | ||
527 | rol $18,%ebp | ||
528 | # x0 ^= a | ||
529 | xor %rbp,%rdx | ||
530 | # b = x7 + x4 | ||
531 | lea (%r8,%r9),%rbp | ||
532 | # (uint32) b <<<= 18 | ||
533 | rol $18,%ebp | ||
534 | # x5 ^= b | ||
535 | xor %rbp,%r15 | ||
536 | # x10 = x10_stack | ||
537 | movq 168(%rsp),%rbp | ||
538 | # x5_stack = x5 | ||
539 | movq %r15,160(%rsp) | ||
540 | # c = x9 + x10 | ||
541 | lea (%r10,%rbp),%r15 | ||
542 | # (uint32) c <<<= 7 | ||
543 | rol $7,%r15d | ||
544 | # x11 ^= c | ||
545 | xor %r15,%r12 | ||
546 | # c = x10 + x11 | ||
547 | lea (%rbp,%r12),%r15 | ||
548 | # (uint32) c <<<= 9 | ||
549 | rol $9,%r15d | ||
550 | # x8 ^= c | ||
551 | xor %r15,%r11 | ||
552 | # c = x11 + x8 | ||
553 | lea (%r12,%r11),%r15 | ||
554 | # (uint32) c <<<= 13 | ||
555 | rol $13,%r15d | ||
556 | # x9 ^= c | ||
557 | xor %r15,%r10 | ||
558 | # c = x8 + x9 | ||
559 | lea (%r11,%r10),%r15 | ||
560 | # (uint32) c <<<= 18 | ||
561 | rol $18,%r15d | ||
562 | # x10 ^= c | ||
563 | xor %r15,%rbp | ||
564 | # x15 = x15_stack | ||
565 | movq 176(%rsp),%r15 | ||
566 | # x10_stack = x10 | ||
567 | movq %rbp,168(%rsp) | ||
568 | # d = x14 + x15 | ||
569 | lea (%rbx,%r15),%rbp | ||
570 | # (uint32) d <<<= 7 | ||
571 | rol $7,%ebp | ||
572 | # x12 ^= d | ||
573 | xor %rbp,%r14 | ||
574 | # d = x15 + x12 | ||
575 | lea (%r15,%r14),%rbp | ||
576 | # (uint32) d <<<= 9 | ||
577 | rol $9,%ebp | ||
578 | # x13 ^= d | ||
579 | xor %rbp,%r13 | ||
580 | # d = x12 + x13 | ||
581 | lea (%r14,%r13),%rbp | ||
582 | # (uint32) d <<<= 13 | ||
583 | rol $13,%ebp | ||
584 | # x14 ^= d | ||
585 | xor %rbp,%rbx | ||
586 | # d = x13 + x14 | ||
587 | lea (%r13,%rbx),%rbp | ||
588 | # (uint32) d <<<= 18 | ||
589 | rol $18,%ebp | ||
590 | # x15 ^= d | ||
591 | xor %rbp,%r15 | ||
592 | # x15_stack = x15 | ||
593 | movq %r15,176(%rsp) | ||
594 | # i = i_backup | ||
595 | movq 184(%rsp),%r15 | ||
596 | # unsigned>? i -= 4 | ||
597 | sub $4,%r15 | ||
598 | # comment:fp stack unchanged by jump | ||
599 | # goto mainloop if unsigned> | ||
600 | ja ._mainloop | ||
601 | # (uint32) x2 += j2 | ||
602 | addl 64(%rsp),%ecx | ||
603 | # x3 <<= 32 | ||
604 | shl $32,%rsi | ||
605 | # x3 += j2 | ||
606 | addq 64(%rsp),%rsi | ||
607 | # (uint64) x3 >>= 32 | ||
608 | shr $32,%rsi | ||
609 | # x3 <<= 32 | ||
610 | shl $32,%rsi | ||
611 | # x2 += x3 | ||
612 | add %rsi,%rcx | ||
613 | # (uint32) x6 += j6 | ||
614 | addl 80(%rsp),%eax | ||
615 | # x7 <<= 32 | ||
616 | shl $32,%r8 | ||
617 | # x7 += j6 | ||
618 | addq 80(%rsp),%r8 | ||
619 | # (uint64) x7 >>= 32 | ||
620 | shr $32,%r8 | ||
621 | # x7 <<= 32 | ||
622 | shl $32,%r8 | ||
623 | # x6 += x7 | ||
624 | add %r8,%rax | ||
625 | # (uint32) x8 += j8 | ||
626 | addl 88(%rsp),%r11d | ||
627 | # x9 <<= 32 | ||
628 | shl $32,%r10 | ||
629 | # x9 += j8 | ||
630 | addq 88(%rsp),%r10 | ||
631 | # (uint64) x9 >>= 32 | ||
632 | shr $32,%r10 | ||
633 | # x9 <<= 32 | ||
634 | shl $32,%r10 | ||
635 | # x8 += x9 | ||
636 | add %r10,%r11 | ||
637 | # (uint32) x12 += j12 | ||
638 | addl 104(%rsp),%r14d | ||
639 | # x13 <<= 32 | ||
640 | shl $32,%r13 | ||
641 | # x13 += j12 | ||
642 | addq 104(%rsp),%r13 | ||
643 | # (uint64) x13 >>= 32 | ||
644 | shr $32,%r13 | ||
645 | # x13 <<= 32 | ||
646 | shl $32,%r13 | ||
647 | # x12 += x13 | ||
648 | add %r13,%r14 | ||
649 | # (uint32) x0 += j0 | ||
650 | addl 56(%rsp),%edx | ||
651 | # x1 <<= 32 | ||
652 | shl $32,%rdi | ||
653 | # x1 += j0 | ||
654 | addq 56(%rsp),%rdi | ||
655 | # (uint64) x1 >>= 32 | ||
656 | shr $32,%rdi | ||
657 | # x1 <<= 32 | ||
658 | shl $32,%rdi | ||
659 | # x0 += x1 | ||
660 | add %rdi,%rdx | ||
661 | # x5 = x5_stack | ||
662 | movq 160(%rsp),%rdi | ||
663 | # (uint32) x4 += j4 | ||
664 | addl 72(%rsp),%r9d | ||
665 | # x5 <<= 32 | ||
666 | shl $32,%rdi | ||
667 | # x5 += j4 | ||
668 | addq 72(%rsp),%rdi | ||
669 | # (uint64) x5 >>= 32 | ||
670 | shr $32,%rdi | ||
671 | # x5 <<= 32 | ||
672 | shl $32,%rdi | ||
673 | # x4 += x5 | ||
674 | add %rdi,%r9 | ||
675 | # x10 = x10_stack | ||
676 | movq 168(%rsp),%r8 | ||
677 | # (uint32) x10 += j10 | ||
678 | addl 96(%rsp),%r8d | ||
679 | # x11 <<= 32 | ||
680 | shl $32,%r12 | ||
681 | # x11 += j10 | ||
682 | addq 96(%rsp),%r12 | ||
683 | # (uint64) x11 >>= 32 | ||
684 | shr $32,%r12 | ||
685 | # x11 <<= 32 | ||
686 | shl $32,%r12 | ||
687 | # x10 += x11 | ||
688 | add %r12,%r8 | ||
689 | # x15 = x15_stack | ||
690 | movq 176(%rsp),%rdi | ||
691 | # (uint32) x14 += j14 | ||
692 | addl 112(%rsp),%ebx | ||
693 | # x15 <<= 32 | ||
694 | shl $32,%rdi | ||
695 | # x15 += j14 | ||
696 | addq 112(%rsp),%rdi | ||
697 | # (uint64) x15 >>= 32 | ||
698 | shr $32,%rdi | ||
699 | # x15 <<= 32 | ||
700 | shl $32,%rdi | ||
701 | # x14 += x15 | ||
702 | add %rdi,%rbx | ||
703 | # out = out_backup | ||
704 | movq 136(%rsp),%rdi | ||
705 | # m = m_backup | ||
706 | movq 144(%rsp),%rsi | ||
707 | # x0 ^= *(uint64 *) (m + 0) | ||
708 | xorq 0(%rsi),%rdx | ||
709 | # *(uint64 *) (out + 0) = x0 | ||
710 | movq %rdx,0(%rdi) | ||
711 | # x2 ^= *(uint64 *) (m + 8) | ||
712 | xorq 8(%rsi),%rcx | ||
713 | # *(uint64 *) (out + 8) = x2 | ||
714 | movq %rcx,8(%rdi) | ||
715 | # x4 ^= *(uint64 *) (m + 16) | ||
716 | xorq 16(%rsi),%r9 | ||
717 | # *(uint64 *) (out + 16) = x4 | ||
718 | movq %r9,16(%rdi) | ||
719 | # x6 ^= *(uint64 *) (m + 24) | ||
720 | xorq 24(%rsi),%rax | ||
721 | # *(uint64 *) (out + 24) = x6 | ||
722 | movq %rax,24(%rdi) | ||
723 | # x8 ^= *(uint64 *) (m + 32) | ||
724 | xorq 32(%rsi),%r11 | ||
725 | # *(uint64 *) (out + 32) = x8 | ||
726 | movq %r11,32(%rdi) | ||
727 | # x10 ^= *(uint64 *) (m + 40) | ||
728 | xorq 40(%rsi),%r8 | ||
729 | # *(uint64 *) (out + 40) = x10 | ||
730 | movq %r8,40(%rdi) | ||
731 | # x12 ^= *(uint64 *) (m + 48) | ||
732 | xorq 48(%rsi),%r14 | ||
733 | # *(uint64 *) (out + 48) = x12 | ||
734 | movq %r14,48(%rdi) | ||
735 | # x14 ^= *(uint64 *) (m + 56) | ||
736 | xorq 56(%rsi),%rbx | ||
737 | # *(uint64 *) (out + 56) = x14 | ||
738 | movq %rbx,56(%rdi) | ||
739 | # bytes = bytes_backup | ||
740 | movq 152(%rsp),%rdx | ||
741 | # in8 = j8 | ||
742 | movq 88(%rsp),%rcx | ||
743 | # in8 += 1 | ||
744 | add $1,%rcx | ||
745 | # j8 = in8 | ||
746 | movq %rcx,88(%rsp) | ||
747 | # unsigned>? unsigned<? bytes - 64 | ||
748 | cmp $64,%rdx | ||
749 | # comment:fp stack unchanged by jump | ||
750 | # goto bytesatleast65 if unsigned> | ||
751 | ja ._bytesatleast65 | ||
752 | # comment:fp stack unchanged by jump | ||
753 | # goto bytesatleast64 if !unsigned< | ||
754 | jae ._bytesatleast64 | ||
755 | # m = out | ||
756 | mov %rdi,%rsi | ||
757 | # out = ctarget | ||
758 | movq 128(%rsp),%rdi | ||
759 | # i = bytes | ||
760 | mov %rdx,%rcx | ||
761 | # while (i) { *out++ = *m++; --i } | ||
762 | rep movsb | ||
763 | # comment:fp stack unchanged by fallthrough | ||
764 | # bytesatleast64: | ||
765 | ._bytesatleast64: | ||
766 | # x = x_backup | ||
767 | movq 120(%rsp),%rdi | ||
768 | # in8 = j8 | ||
769 | movq 88(%rsp),%rsi | ||
770 | # *(uint64 *) (x + 32) = in8 | ||
771 | movq %rsi,32(%rdi) | ||
772 | # r11 = r11_stack | ||
773 | movq 0(%rsp),%r11 | ||
774 | # r12 = r12_stack | ||
775 | movq 8(%rsp),%r12 | ||
776 | # r13 = r13_stack | ||
777 | movq 16(%rsp),%r13 | ||
778 | # r14 = r14_stack | ||
779 | movq 24(%rsp),%r14 | ||
780 | # r15 = r15_stack | ||
781 | movq 32(%rsp),%r15 | ||
782 | # rbx = rbx_stack | ||
783 | movq 40(%rsp),%rbx | ||
784 | # rbp = rbp_stack | ||
785 | movq 48(%rsp),%rbp | ||
786 | # comment:fp stack unchanged by fallthrough | ||
787 | # done: | ||
788 | ._done: | ||
789 | # leave | ||
790 | add %r11,%rsp | ||
791 | mov %rdi,%rax | ||
792 | mov %rsi,%rdx | ||
793 | ret | ||
794 | # bytesatleast65: | ||
795 | ._bytesatleast65: | ||
796 | # bytes -= 64 | ||
797 | sub $64,%rdx | ||
798 | # out += 64 | ||
799 | add $64,%rdi | ||
800 | # m += 64 | ||
801 | add $64,%rsi | ||
802 | # comment:fp stack unchanged by jump | ||
803 | # goto bytesatleast1 | ||
804 | jmp ._bytesatleast1 | ||
805 | # enter ECRYPT_keysetup | ||
806 | .text | ||
807 | .p2align 5 | ||
808 | .globl ECRYPT_keysetup | ||
809 | ECRYPT_keysetup: | ||
810 | mov %rsp,%r11 | ||
811 | and $31,%r11 | ||
812 | add $256,%r11 | ||
813 | sub %r11,%rsp | ||
814 | # k = arg2 | ||
815 | mov %rsi,%rsi | ||
816 | # kbits = arg3 | ||
817 | mov %rdx,%rdx | ||
818 | # x = arg1 | ||
819 | mov %rdi,%rdi | ||
820 | # in0 = *(uint64 *) (k + 0) | ||
821 | movq 0(%rsi),%r8 | ||
822 | # in2 = *(uint64 *) (k + 8) | ||
823 | movq 8(%rsi),%r9 | ||
824 | # *(uint64 *) (x + 4) = in0 | ||
825 | movq %r8,4(%rdi) | ||
826 | # *(uint64 *) (x + 12) = in2 | ||
827 | movq %r9,12(%rdi) | ||
828 | # unsigned<? kbits - 256 | ||
829 | cmp $256,%rdx | ||
830 | # comment:fp stack unchanged by jump | ||
831 | # goto kbits128 if unsigned< | ||
832 | jb ._kbits128 | ||
833 | # kbits256: | ||
834 | ._kbits256: | ||
835 | # in10 = *(uint64 *) (k + 16) | ||
836 | movq 16(%rsi),%rdx | ||
837 | # in12 = *(uint64 *) (k + 24) | ||
838 | movq 24(%rsi),%rsi | ||
839 | # *(uint64 *) (x + 44) = in10 | ||
840 | movq %rdx,44(%rdi) | ||
841 | # *(uint64 *) (x + 52) = in12 | ||
842 | movq %rsi,52(%rdi) | ||
843 | # in0 = 1634760805 | ||
844 | mov $1634760805,%rsi | ||
845 | # in4 = 857760878 | ||
846 | mov $857760878,%rdx | ||
847 | # in10 = 2036477234 | ||
848 | mov $2036477234,%rcx | ||
849 | # in14 = 1797285236 | ||
850 | mov $1797285236,%r8 | ||
851 | # *(uint32 *) (x + 0) = in0 | ||
852 | movl %esi,0(%rdi) | ||
853 | # *(uint32 *) (x + 20) = in4 | ||
854 | movl %edx,20(%rdi) | ||
855 | # *(uint32 *) (x + 40) = in10 | ||
856 | movl %ecx,40(%rdi) | ||
857 | # *(uint32 *) (x + 60) = in14 | ||
858 | movl %r8d,60(%rdi) | ||
859 | # comment:fp stack unchanged by jump | ||
860 | # goto keysetupdone | ||
861 | jmp ._keysetupdone | ||
862 | # kbits128: | ||
863 | ._kbits128: | ||
864 | # in10 = *(uint64 *) (k + 0) | ||
865 | movq 0(%rsi),%rdx | ||
866 | # in12 = *(uint64 *) (k + 8) | ||
867 | movq 8(%rsi),%rsi | ||
868 | # *(uint64 *) (x + 44) = in10 | ||
869 | movq %rdx,44(%rdi) | ||
870 | # *(uint64 *) (x + 52) = in12 | ||
871 | movq %rsi,52(%rdi) | ||
872 | # in0 = 1634760805 | ||
873 | mov $1634760805,%rsi | ||
874 | # in4 = 824206446 | ||
875 | mov $824206446,%rdx | ||
876 | # in10 = 2036477238 | ||
877 | mov $2036477238,%rcx | ||
878 | # in14 = 1797285236 | ||
879 | mov $1797285236,%r8 | ||
880 | # *(uint32 *) (x + 0) = in0 | ||
881 | movl %esi,0(%rdi) | ||
882 | # *(uint32 *) (x + 20) = in4 | ||
883 | movl %edx,20(%rdi) | ||
884 | # *(uint32 *) (x + 40) = in10 | ||
885 | movl %ecx,40(%rdi) | ||
886 | # *(uint32 *) (x + 60) = in14 | ||
887 | movl %r8d,60(%rdi) | ||
888 | # keysetupdone: | ||
889 | ._keysetupdone: | ||
890 | # leave | ||
891 | add %r11,%rsp | ||
892 | mov %rdi,%rax | ||
893 | mov %rsi,%rdx | ||
894 | ret | ||
895 | # enter ECRYPT_ivsetup | ||
896 | .text | ||
897 | .p2align 5 | ||
898 | .globl ECRYPT_ivsetup | ||
899 | ECRYPT_ivsetup: | ||
900 | mov %rsp,%r11 | ||
901 | and $31,%r11 | ||
902 | add $256,%r11 | ||
903 | sub %r11,%rsp | ||
904 | # iv = arg2 | ||
905 | mov %rsi,%rsi | ||
906 | # x = arg1 | ||
907 | mov %rdi,%rdi | ||
908 | # in6 = *(uint64 *) (iv + 0) | ||
909 | movq 0(%rsi),%rsi | ||
910 | # in8 = 0 | ||
911 | mov $0,%r8 | ||
912 | # *(uint64 *) (x + 24) = in6 | ||
913 | movq %rsi,24(%rdi) | ||
914 | # *(uint64 *) (x + 32) = in8 | ||
915 | movq %r8,32(%rdi) | ||
916 | # leave | ||
917 | add %r11,%rsp | ||
918 | mov %rdi,%rax | ||
919 | mov %rsi,%rdx | ||
920 | ret | ||
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c new file mode 100644 index 000000000000..bccb76d80987 --- /dev/null +++ b/arch/x86/crypto/salsa20_glue.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* | ||
2 | * Glue code for optimized assembly version of Salsa20. | ||
3 | * | ||
4 | * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com> | ||
5 | * | ||
6 | * The assembly codes are public domain assembly codes written by Daniel. J. | ||
7 | * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation | ||
8 | * and to remove extraneous comments and functions that are not needed. | ||
9 | * - i586 version, renamed as salsa20-i586-asm_32.S | ||
10 | * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s> | ||
11 | * - x86-64 version, renamed as salsa20-x86_64-asm_64.S | ||
12 | * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the Free | ||
16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
17 | * any later version. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #include <crypto/algapi.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/crypto.h> | ||
24 | |||
25 | #define SALSA20_IV_SIZE 8U | ||
26 | #define SALSA20_MIN_KEY_SIZE 16U | ||
27 | #define SALSA20_MAX_KEY_SIZE 32U | ||
28 | |||
29 | // use the ECRYPT_* function names | ||
30 | #define salsa20_keysetup ECRYPT_keysetup | ||
31 | #define salsa20_ivsetup ECRYPT_ivsetup | ||
32 | #define salsa20_encrypt_bytes ECRYPT_encrypt_bytes | ||
33 | |||
34 | struct salsa20_ctx | ||
35 | { | ||
36 | u32 input[16]; | ||
37 | }; | ||
38 | |||
39 | asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, | ||
40 | u32 keysize, u32 ivsize); | ||
41 | asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); | ||
42 | asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, | ||
43 | const u8 *src, u8 *dst, u32 bytes); | ||
44 | |||
45 | static int setkey(struct crypto_tfm *tfm, const u8 *key, | ||
46 | unsigned int keysize) | ||
47 | { | ||
48 | struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); | ||
49 | salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | static int encrypt(struct blkcipher_desc *desc, | ||
54 | struct scatterlist *dst, struct scatterlist *src, | ||
55 | unsigned int nbytes) | ||
56 | { | ||
57 | struct blkcipher_walk walk; | ||
58 | struct crypto_blkcipher *tfm = desc->tfm; | ||
59 | struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); | ||
60 | int err; | ||
61 | |||
62 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
63 | err = blkcipher_walk_virt_block(desc, &walk, 64); | ||
64 | |||
65 | salsa20_ivsetup(ctx, walk.iv); | ||
66 | |||
67 | if (likely(walk.nbytes == nbytes)) | ||
68 | { | ||
69 | salsa20_encrypt_bytes(ctx, walk.src.virt.addr, | ||
70 | walk.dst.virt.addr, nbytes); | ||
71 | return blkcipher_walk_done(desc, &walk, 0); | ||
72 | } | ||
73 | |||
74 | while (walk.nbytes >= 64) { | ||
75 | salsa20_encrypt_bytes(ctx, walk.src.virt.addr, | ||
76 | walk.dst.virt.addr, | ||
77 | walk.nbytes - (walk.nbytes % 64)); | ||
78 | err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); | ||
79 | } | ||
80 | |||
81 | if (walk.nbytes) { | ||
82 | salsa20_encrypt_bytes(ctx, walk.src.virt.addr, | ||
83 | walk.dst.virt.addr, walk.nbytes); | ||
84 | err = blkcipher_walk_done(desc, &walk, 0); | ||
85 | } | ||
86 | |||
87 | return err; | ||
88 | } | ||
89 | |||
90 | static struct crypto_alg alg = { | ||
91 | .cra_name = "salsa20", | ||
92 | .cra_driver_name = "salsa20-asm", | ||
93 | .cra_priority = 200, | ||
94 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
95 | .cra_type = &crypto_blkcipher_type, | ||
96 | .cra_blocksize = 1, | ||
97 | .cra_ctxsize = sizeof(struct salsa20_ctx), | ||
98 | .cra_alignmask = 3, | ||
99 | .cra_module = THIS_MODULE, | ||
100 | .cra_list = LIST_HEAD_INIT(alg.cra_list), | ||
101 | .cra_u = { | ||
102 | .blkcipher = { | ||
103 | .setkey = setkey, | ||
104 | .encrypt = encrypt, | ||
105 | .decrypt = encrypt, | ||
106 | .min_keysize = SALSA20_MIN_KEY_SIZE, | ||
107 | .max_keysize = SALSA20_MAX_KEY_SIZE, | ||
108 | .ivsize = SALSA20_IV_SIZE, | ||
109 | } | ||
110 | } | ||
111 | }; | ||
112 | |||
113 | static int __init init(void) | ||
114 | { | ||
115 | return crypto_register_alg(&alg); | ||
116 | } | ||
117 | |||
118 | static void __exit fini(void) | ||
119 | { | ||
120 | crypto_unregister_alg(&alg); | ||
121 | } | ||
122 | |||
123 | module_init(init); | ||
124 | module_exit(fini); | ||
125 | |||
126 | MODULE_LICENSE("GPL"); | ||
127 | MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); | ||
128 | MODULE_ALIAS("salsa20"); | ||
129 | MODULE_ALIAS("salsa20-asm"); | ||
diff --git a/arch/x86/crypto/twofish_64.c b/arch/x86/crypto/twofish_64.c deleted file mode 100644 index 182d91d5cfb9..000000000000 --- a/arch/x86/crypto/twofish_64.c +++ /dev/null | |||
@@ -1,97 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for optimized x86_64 assembler version of TWOFISH | ||
3 | * | ||
4 | * Originally Twofish for GPG | ||
5 | * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 | ||
6 | * 256-bit key length added March 20, 1999 | ||
7 | * Some modifications to reduce the text size by Werner Koch, April, 1998 | ||
8 | * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com> | ||
9 | * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net> | ||
10 | * | ||
11 | * The original author has disclaimed all copyright interest in this | ||
12 | * code and thus put it in the public domain. The subsequent authors | ||
13 | * have put this under the GNU General Public License. | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 | * GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
28 | * USA | ||
29 | * | ||
30 | * This code is a "clean room" implementation, written from the paper | ||
31 | * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, | ||
32 | * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available | ||
33 | * through http://www.counterpane.com/twofish.html | ||
34 | * | ||
35 | * For background information on multiplication in finite fields, used for | ||
36 | * the matrix operations in the key schedule, see the book _Contemporary | ||
37 | * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the | ||
38 | * Third Edition. | ||
39 | */ | ||
40 | |||
41 | #include <crypto/twofish.h> | ||
42 | #include <linux/crypto.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <linux/kernel.h> | ||
45 | #include <linux/module.h> | ||
46 | #include <linux/types.h> | ||
47 | |||
48 | asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
49 | asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | ||
50 | |||
51 | static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
52 | { | ||
53 | twofish_enc_blk(tfm, dst, src); | ||
54 | } | ||
55 | |||
56 | static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
57 | { | ||
58 | twofish_dec_blk(tfm, dst, src); | ||
59 | } | ||
60 | |||
61 | static struct crypto_alg alg = { | ||
62 | .cra_name = "twofish", | ||
63 | .cra_driver_name = "twofish-x86_64", | ||
64 | .cra_priority = 200, | ||
65 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
66 | .cra_blocksize = TF_BLOCK_SIZE, | ||
67 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
68 | .cra_alignmask = 3, | ||
69 | .cra_module = THIS_MODULE, | ||
70 | .cra_list = LIST_HEAD_INIT(alg.cra_list), | ||
71 | .cra_u = { | ||
72 | .cipher = { | ||
73 | .cia_min_keysize = TF_MIN_KEY_SIZE, | ||
74 | .cia_max_keysize = TF_MAX_KEY_SIZE, | ||
75 | .cia_setkey = twofish_setkey, | ||
76 | .cia_encrypt = twofish_encrypt, | ||
77 | .cia_decrypt = twofish_decrypt | ||
78 | } | ||
79 | } | ||
80 | }; | ||
81 | |||
82 | static int __init init(void) | ||
83 | { | ||
84 | return crypto_register_alg(&alg); | ||
85 | } | ||
86 | |||
87 | static void __exit fini(void) | ||
88 | { | ||
89 | crypto_unregister_alg(&alg); | ||
90 | } | ||
91 | |||
92 | module_init(init); | ||
93 | module_exit(fini); | ||
94 | |||
95 | MODULE_LICENSE("GPL"); | ||
96 | MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized"); | ||
97 | MODULE_ALIAS("twofish"); | ||
diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_glue.c index e3004dfe9c7a..cefaf8b9aa18 100644 --- a/arch/x86/crypto/twofish_32.c +++ b/arch/x86/crypto/twofish_glue.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Glue Code for optimized 586 assembler version of TWOFISH | 2 | * Glue Code for assembler optimized version of TWOFISH |
3 | * | 3 | * |
4 | * Originally Twofish for GPG | 4 | * Originally Twofish for GPG |
5 | * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 | 5 | * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 |
@@ -44,7 +44,6 @@ | |||
44 | #include <linux/module.h> | 44 | #include <linux/module.h> |
45 | #include <linux/types.h> | 45 | #include <linux/types.h> |
46 | 46 | ||
47 | |||
48 | asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | 47 | asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); |
49 | asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); | 48 | asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); |
50 | 49 | ||
@@ -60,7 +59,7 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
60 | 59 | ||
61 | static struct crypto_alg alg = { | 60 | static struct crypto_alg alg = { |
62 | .cra_name = "twofish", | 61 | .cra_name = "twofish", |
63 | .cra_driver_name = "twofish-i586", | 62 | .cra_driver_name = "twofish-asm", |
64 | .cra_priority = 200, | 63 | .cra_priority = 200, |
65 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | 64 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, |
66 | .cra_blocksize = TF_BLOCK_SIZE, | 65 | .cra_blocksize = TF_BLOCK_SIZE, |
@@ -93,5 +92,6 @@ module_init(init); | |||
93 | module_exit(fini); | 92 | module_exit(fini); |
94 | 93 | ||
95 | MODULE_LICENSE("GPL"); | 94 | MODULE_LICENSE("GPL"); |
96 | MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); | 95 | MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); |
97 | MODULE_ALIAS("twofish"); | 96 | MODULE_ALIAS("twofish"); |
97 | MODULE_ALIAS("twofish-asm"); | ||