aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-01-25 11:38:25 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-01-25 11:38:25 -0500
commiteba0e319c12fb098d66316a8eafbaaa9174a07c3 (patch)
treeb2703117db9e36bb3510654efd55361f61c54742 /arch/x86
parentdf8dc74e8a383eaf2d9b44b80a71ec6f0e52b42e (diff)
parent15e7b4452b72ae890f2fcb027b4c4fa63a1c9a7a (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (125 commits) [CRYPTO] twofish: Merge common glue code [CRYPTO] hifn_795x: Fixup container_of() usage [CRYPTO] cast6: inline bloat-- [CRYPTO] api: Set default CRYPTO_MINALIGN to unsigned long long [CRYPTO] tcrypt: Make xcbc available as a standalone test [CRYPTO] xcbc: Remove bogus hash/cipher test [CRYPTO] xcbc: Fix algorithm leak when block size check fails [CRYPTO] tcrypt: Zero axbuf in the right function [CRYPTO] padlock: Only reset the key once for each CBC and ECB operation [CRYPTO] api: Include sched.h for cond_resched in scatterwalk.h [CRYPTO] salsa20-asm: Remove unnecessary dependency on CRYPTO_SALSA20 [CRYPTO] tcrypt: Add select of AEAD [CRYPTO] salsa20: Add x86-64 assembly version [CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version) [CRYPTO] gcm: Introduce rfc4106 [CRYPTO] api: Show async type [CRYPTO] chainiv: Avoid lock spinning where possible [CRYPTO] seqiv: Add select AEAD in Kconfig [CRYPTO] scatterwalk: Handle zero nbytes in scatterwalk_map_and_copy [CRYPTO] null: Allow setkey on digest_null ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/Makefile12
-rw-r--r--arch/x86/crypto/aes-i586-asm_32.S89
-rw-r--r--arch/x86/crypto/aes-x86_64-asm_64.S68
-rw-r--r--arch/x86/crypto/aes_32.c515
-rw-r--r--arch/x86/crypto/aes_64.c336
-rw-r--r--arch/x86/crypto/aes_glue.c57
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S1114
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S920
-rw-r--r--arch/x86/crypto/salsa20_glue.c129
-rw-r--r--arch/x86/crypto/twofish_64.c97
-rw-r--r--arch/x86/crypto/twofish_glue.c (renamed from arch/x86/crypto/twofish_32.c)8
11 files changed, 2309 insertions, 1036 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 46bb609e2444..3874c2de5403 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -4,12 +4,16 @@
4 4
5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o 5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o 6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
7obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
7 8
8obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
9obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 10obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
11obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
10 12
11aes-i586-y := aes-i586-asm_32.o aes_32.o 13aes-i586-y := aes-i586-asm_32.o aes_glue.o
12twofish-i586-y := twofish-i586-asm_32.o twofish_32.o 14twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
15salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
13 16
14aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o 17aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
15twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o 18twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
19salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
index f942f0c8f630..1093bede3e0a 100644
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ b/arch/x86/crypto/aes-i586-asm_32.S
@@ -46,9 +46,9 @@
46#define in_blk 16 46#define in_blk 16
47 47
48/* offsets in crypto_tfm structure */ 48/* offsets in crypto_tfm structure */
49#define ekey (crypto_tfm_ctx_offset + 0) 49#define klen (crypto_tfm_ctx_offset + 0)
50#define nrnd (crypto_tfm_ctx_offset + 256) 50#define ekey (crypto_tfm_ctx_offset + 4)
51#define dkey (crypto_tfm_ctx_offset + 260) 51#define dkey (crypto_tfm_ctx_offset + 244)
52 52
53// register mapping for encrypt and decrypt subroutines 53// register mapping for encrypt and decrypt subroutines
54 54
@@ -221,8 +221,8 @@
221 221
222.global aes_enc_blk 222.global aes_enc_blk
223 223
224.extern ft_tab 224.extern crypto_ft_tab
225.extern fl_tab 225.extern crypto_fl_tab
226 226
227.align 4 227.align 4
228 228
@@ -236,7 +236,7 @@ aes_enc_blk:
2361: push %ebx 2361: push %ebx
237 mov in_blk+4(%esp),%r2 237 mov in_blk+4(%esp),%r2
238 push %esi 238 push %esi
239 mov nrnd(%ebp),%r3 // number of rounds 239 mov klen(%ebp),%r3 // key size
240 push %edi 240 push %edi
241#if ekey != 0 241#if ekey != 0
242 lea ekey(%ebp),%ebp // key pointer 242 lea ekey(%ebp),%ebp // key pointer
@@ -255,26 +255,26 @@ aes_enc_blk:
255 255
256 sub $8,%esp // space for register saves on stack 256 sub $8,%esp // space for register saves on stack
257 add $16,%ebp // increment to next round key 257 add $16,%ebp // increment to next round key
258 cmp $12,%r3 258 cmp $24,%r3
259 jb 4f // 10 rounds for 128-bit key 259 jb 4f // 10 rounds for 128-bit key
260 lea 32(%ebp),%ebp 260 lea 32(%ebp),%ebp
261 je 3f // 12 rounds for 192-bit key 261 je 3f // 12 rounds for 192-bit key
262 lea 32(%ebp),%ebp 262 lea 32(%ebp),%ebp
263 263
2642: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key 2642: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
265 fwd_rnd2( -48(%ebp) ,ft_tab) 265 fwd_rnd2( -48(%ebp), crypto_ft_tab)
2663: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key 2663: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
267 fwd_rnd2( -16(%ebp) ,ft_tab) 267 fwd_rnd2( -16(%ebp), crypto_ft_tab)
2684: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key 2684: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
269 fwd_rnd2( +16(%ebp) ,ft_tab) 269 fwd_rnd2( +16(%ebp), crypto_ft_tab)
270 fwd_rnd1( +32(%ebp) ,ft_tab) 270 fwd_rnd1( +32(%ebp), crypto_ft_tab)
271 fwd_rnd2( +48(%ebp) ,ft_tab) 271 fwd_rnd2( +48(%ebp), crypto_ft_tab)
272 fwd_rnd1( +64(%ebp) ,ft_tab) 272 fwd_rnd1( +64(%ebp), crypto_ft_tab)
273 fwd_rnd2( +80(%ebp) ,ft_tab) 273 fwd_rnd2( +80(%ebp), crypto_ft_tab)
274 fwd_rnd1( +96(%ebp) ,ft_tab) 274 fwd_rnd1( +96(%ebp), crypto_ft_tab)
275 fwd_rnd2(+112(%ebp) ,ft_tab) 275 fwd_rnd2(+112(%ebp), crypto_ft_tab)
276 fwd_rnd1(+128(%ebp) ,ft_tab) 276 fwd_rnd1(+128(%ebp), crypto_ft_tab)
277 fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table 277 fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
278 278
279// move final values to the output array. CAUTION: the 279// move final values to the output array. CAUTION: the
280// order of these assigns rely on the register mappings 280// order of these assigns rely on the register mappings
@@ -297,8 +297,8 @@ aes_enc_blk:
297 297
298.global aes_dec_blk 298.global aes_dec_blk
299 299
300.extern it_tab 300.extern crypto_it_tab
301.extern il_tab 301.extern crypto_il_tab
302 302
303.align 4 303.align 4
304 304
@@ -312,14 +312,11 @@ aes_dec_blk:
3121: push %ebx 3121: push %ebx
313 mov in_blk+4(%esp),%r2 313 mov in_blk+4(%esp),%r2
314 push %esi 314 push %esi
315 mov nrnd(%ebp),%r3 // number of rounds 315 mov klen(%ebp),%r3 // key size
316 push %edi 316 push %edi
317#if dkey != 0 317#if dkey != 0
318 lea dkey(%ebp),%ebp // key pointer 318 lea dkey(%ebp),%ebp // key pointer
319#endif 319#endif
320 mov %r3,%r0
321 shl $4,%r0
322 add %r0,%ebp
323 320
324// input four columns and xor in first round key 321// input four columns and xor in first round key
325 322
@@ -333,27 +330,27 @@ aes_dec_blk:
333 xor 12(%ebp),%r5 330 xor 12(%ebp),%r5
334 331
335 sub $8,%esp // space for register saves on stack 332 sub $8,%esp // space for register saves on stack
336 sub $16,%ebp // increment to next round key 333 add $16,%ebp // increment to next round key
337 cmp $12,%r3 334 cmp $24,%r3
338 jb 4f // 10 rounds for 128-bit key 335 jb 4f // 10 rounds for 128-bit key
339 lea -32(%ebp),%ebp 336 lea 32(%ebp),%ebp
340 je 3f // 12 rounds for 192-bit key 337 je 3f // 12 rounds for 192-bit key
341 lea -32(%ebp),%ebp 338 lea 32(%ebp),%ebp
342 339
3432: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key 3402: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
344 inv_rnd2( +48(%ebp), it_tab) 341 inv_rnd2( -48(%ebp), crypto_it_tab)
3453: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key 3423: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
346 inv_rnd2( +16(%ebp), it_tab) 343 inv_rnd2( -16(%ebp), crypto_it_tab)
3474: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key 3444: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
348 inv_rnd2( -16(%ebp), it_tab) 345 inv_rnd2( +16(%ebp), crypto_it_tab)
349 inv_rnd1( -32(%ebp), it_tab) 346 inv_rnd1( +32(%ebp), crypto_it_tab)
350 inv_rnd2( -48(%ebp), it_tab) 347 inv_rnd2( +48(%ebp), crypto_it_tab)
351 inv_rnd1( -64(%ebp), it_tab) 348 inv_rnd1( +64(%ebp), crypto_it_tab)
352 inv_rnd2( -80(%ebp), it_tab) 349 inv_rnd2( +80(%ebp), crypto_it_tab)
353 inv_rnd1( -96(%ebp), it_tab) 350 inv_rnd1( +96(%ebp), crypto_it_tab)
354 inv_rnd2(-112(%ebp), it_tab) 351 inv_rnd2(+112(%ebp), crypto_it_tab)
355 inv_rnd1(-128(%ebp), it_tab) 352 inv_rnd1(+128(%ebp), crypto_it_tab)
356 inv_rnd2(-144(%ebp), il_tab) // last round uses a different table 353 inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
357 354
358// move final values to the output array. CAUTION: the 355// move final values to the output array. CAUTION: the
359// order of these assigns rely on the register mappings 356// order of these assigns rely on the register mappings
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 26b40de4d0b0..a120f526c3df 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -8,10 +8,10 @@
8 * including this sentence is retained in full. 8 * including this sentence is retained in full.
9 */ 9 */
10 10
11.extern aes_ft_tab 11.extern crypto_ft_tab
12.extern aes_it_tab 12.extern crypto_it_tab
13.extern aes_fl_tab 13.extern crypto_fl_tab
14.extern aes_il_tab 14.extern crypto_il_tab
15 15
16.text 16.text
17 17
@@ -56,13 +56,13 @@
56 .align 8; \ 56 .align 8; \
57FUNC: movq r1,r2; \ 57FUNC: movq r1,r2; \
58 movq r3,r4; \ 58 movq r3,r4; \
59 leaq BASE+KEY+52(r8),r9; \ 59 leaq BASE+KEY+48+4(r8),r9; \
60 movq r10,r11; \ 60 movq r10,r11; \
61 movl (r7),r5 ## E; \ 61 movl (r7),r5 ## E; \
62 movl 4(r7),r1 ## E; \ 62 movl 4(r7),r1 ## E; \
63 movl 8(r7),r6 ## E; \ 63 movl 8(r7),r6 ## E; \
64 movl 12(r7),r7 ## E; \ 64 movl 12(r7),r7 ## E; \
65 movl BASE(r8),r10 ## E; \ 65 movl BASE+0(r8),r10 ## E; \
66 xorl -48(r9),r5 ## E; \ 66 xorl -48(r9),r5 ## E; \
67 xorl -44(r9),r1 ## E; \ 67 xorl -44(r9),r1 ## E; \
68 xorl -40(r9),r6 ## E; \ 68 xorl -40(r9),r6 ## E; \
@@ -154,37 +154,37 @@ FUNC: movq r1,r2; \
154/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ 154/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
155 155
156 entry(aes_enc_blk,0,enc128,enc192) 156 entry(aes_enc_blk,0,enc128,enc192)
157 encrypt_round(aes_ft_tab,-96) 157 encrypt_round(crypto_ft_tab,-96)
158 encrypt_round(aes_ft_tab,-80) 158 encrypt_round(crypto_ft_tab,-80)
159enc192: encrypt_round(aes_ft_tab,-64) 159enc192: encrypt_round(crypto_ft_tab,-64)
160 encrypt_round(aes_ft_tab,-48) 160 encrypt_round(crypto_ft_tab,-48)
161enc128: encrypt_round(aes_ft_tab,-32) 161enc128: encrypt_round(crypto_ft_tab,-32)
162 encrypt_round(aes_ft_tab,-16) 162 encrypt_round(crypto_ft_tab,-16)
163 encrypt_round(aes_ft_tab, 0) 163 encrypt_round(crypto_ft_tab, 0)
164 encrypt_round(aes_ft_tab, 16) 164 encrypt_round(crypto_ft_tab, 16)
165 encrypt_round(aes_ft_tab, 32) 165 encrypt_round(crypto_ft_tab, 32)
166 encrypt_round(aes_ft_tab, 48) 166 encrypt_round(crypto_ft_tab, 48)
167 encrypt_round(aes_ft_tab, 64) 167 encrypt_round(crypto_ft_tab, 64)
168 encrypt_round(aes_ft_tab, 80) 168 encrypt_round(crypto_ft_tab, 80)
169 encrypt_round(aes_ft_tab, 96) 169 encrypt_round(crypto_ft_tab, 96)
170 encrypt_final(aes_fl_tab,112) 170 encrypt_final(crypto_fl_tab,112)
171 return 171 return
172 172
173/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ 173/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
174 174
175 entry(aes_dec_blk,240,dec128,dec192) 175 entry(aes_dec_blk,240,dec128,dec192)
176 decrypt_round(aes_it_tab,-96) 176 decrypt_round(crypto_it_tab,-96)
177 decrypt_round(aes_it_tab,-80) 177 decrypt_round(crypto_it_tab,-80)
178dec192: decrypt_round(aes_it_tab,-64) 178dec192: decrypt_round(crypto_it_tab,-64)
179 decrypt_round(aes_it_tab,-48) 179 decrypt_round(crypto_it_tab,-48)
180dec128: decrypt_round(aes_it_tab,-32) 180dec128: decrypt_round(crypto_it_tab,-32)
181 decrypt_round(aes_it_tab,-16) 181 decrypt_round(crypto_it_tab,-16)
182 decrypt_round(aes_it_tab, 0) 182 decrypt_round(crypto_it_tab, 0)
183 decrypt_round(aes_it_tab, 16) 183 decrypt_round(crypto_it_tab, 16)
184 decrypt_round(aes_it_tab, 32) 184 decrypt_round(crypto_it_tab, 32)
185 decrypt_round(aes_it_tab, 48) 185 decrypt_round(crypto_it_tab, 48)
186 decrypt_round(aes_it_tab, 64) 186 decrypt_round(crypto_it_tab, 64)
187 decrypt_round(aes_it_tab, 80) 187 decrypt_round(crypto_it_tab, 80)
188 decrypt_round(aes_it_tab, 96) 188 decrypt_round(crypto_it_tab, 96)
189 decrypt_final(aes_il_tab,112) 189 decrypt_final(crypto_il_tab,112)
190 return 190 return
diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c
deleted file mode 100644
index 49aad9397f10..000000000000
--- a/arch/x86/crypto/aes_32.c
+++ /dev/null
@@ -1,515 +0,0 @@
1/*
2 *
3 * Glue Code for optimized 586 assembler version of AES
4 *
5 * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK.
6 * All rights reserved.
7 *
8 * LICENSE TERMS
9 *
10 * The free distribution and use of this software in both source and binary
11 * form is allowed (with or without changes) provided that:
12 *
13 * 1. distributions of this source code include the above copyright
14 * notice, this list of conditions and the following disclaimer;
15 *
16 * 2. distributions in binary form include the above copyright
17 * notice, this list of conditions and the following disclaimer
18 * in the documentation and/or other associated materials;
19 *
20 * 3. the copyright holder's name is not used to endorse products
21 * built using this software without specific written permission.
22 *
23 * ALTERNATIVELY, provided that this notice is retained in full, this product
24 * may be distributed under the terms of the GNU General Public License (GPL),
25 * in which case the provisions of the GPL apply INSTEAD OF those given above.
26 *
27 * DISCLAIMER
28 *
29 * This software is provided 'as is' with no explicit or implied warranties
30 * in respect of its properties, including, but not limited to, correctness
31 * and/or fitness for purpose.
32 *
33 * Copyright (c) 2003, Adam J. Richter <adam@yggdrasil.com> (conversion to
34 * 2.5 API).
35 * Copyright (c) 2003, 2004 Fruhwirth Clemens <clemens@endorphin.org>
36 * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
37 *
38 */
39
40#include <asm/byteorder.h>
41#include <linux/kernel.h>
42#include <linux/module.h>
43#include <linux/init.h>
44#include <linux/types.h>
45#include <linux/crypto.h>
46#include <linux/linkage.h>
47
48asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50
51#define AES_MIN_KEY_SIZE 16
52#define AES_MAX_KEY_SIZE 32
53#define AES_BLOCK_SIZE 16
54#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE
55#define RC_LENGTH 29
56
57struct aes_ctx {
58 u32 ekey[AES_KS_LENGTH];
59 u32 rounds;
60 u32 dkey[AES_KS_LENGTH];
61};
62
63#define WPOLY 0x011b
64#define bytes2word(b0, b1, b2, b3) \
65 (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0))
66
67/* define the finite field multiplies required for Rijndael */
68#define f2(x) ((x) ? pow[log[x] + 0x19] : 0)
69#define f3(x) ((x) ? pow[log[x] + 0x01] : 0)
70#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0)
71#define fb(x) ((x) ? pow[log[x] + 0x68] : 0)
72#define fd(x) ((x) ? pow[log[x] + 0xee] : 0)
73#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0)
74#define fi(x) ((x) ? pow[255 - log[x]]: 0)
75
76static inline u32 upr(u32 x, int n)
77{
78 return (x << 8 * n) | (x >> (32 - 8 * n));
79}
80
81static inline u8 bval(u32 x, int n)
82{
83 return x >> 8 * n;
84}
85
86/* The forward and inverse affine transformations used in the S-box */
87#define fwd_affine(x) \
88 (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8)))
89
90#define inv_affine(x) \
91 (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8)))
92
93static u32 rcon_tab[RC_LENGTH];
94
95u32 ft_tab[4][256];
96u32 fl_tab[4][256];
97static u32 im_tab[4][256];
98u32 il_tab[4][256];
99u32 it_tab[4][256];
100
101static void gen_tabs(void)
102{
103 u32 i, w;
104 u8 pow[512], log[256];
105
106 /*
107 * log and power tables for GF(2^8) finite field with
108 * WPOLY as modular polynomial - the simplest primitive
109 * root is 0x03, used here to generate the tables.
110 */
111 i = 0; w = 1;
112
113 do {
114 pow[i] = (u8)w;
115 pow[i + 255] = (u8)w;
116 log[w] = (u8)i++;
117 w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0);
118 } while (w != 1);
119
120 for(i = 0, w = 1; i < RC_LENGTH; ++i) {
121 rcon_tab[i] = bytes2word(w, 0, 0, 0);
122 w = f2(w);
123 }
124
125 for(i = 0; i < 256; ++i) {
126 u8 b;
127
128 b = fwd_affine(fi((u8)i));
129 w = bytes2word(f2(b), b, b, f3(b));
130
131 /* tables for a normal encryption round */
132 ft_tab[0][i] = w;
133 ft_tab[1][i] = upr(w, 1);
134 ft_tab[2][i] = upr(w, 2);
135 ft_tab[3][i] = upr(w, 3);
136 w = bytes2word(b, 0, 0, 0);
137
138 /*
139 * tables for last encryption round
140 * (may also be used in the key schedule)
141 */
142 fl_tab[0][i] = w;
143 fl_tab[1][i] = upr(w, 1);
144 fl_tab[2][i] = upr(w, 2);
145 fl_tab[3][i] = upr(w, 3);
146
147 b = fi(inv_affine((u8)i));
148 w = bytes2word(fe(b), f9(b), fd(b), fb(b));
149
150 /* tables for the inverse mix column operation */
151 im_tab[0][b] = w;
152 im_tab[1][b] = upr(w, 1);
153 im_tab[2][b] = upr(w, 2);
154 im_tab[3][b] = upr(w, 3);
155
156 /* tables for a normal decryption round */
157 it_tab[0][i] = w;
158 it_tab[1][i] = upr(w,1);
159 it_tab[2][i] = upr(w,2);
160 it_tab[3][i] = upr(w,3);
161
162 w = bytes2word(b, 0, 0, 0);
163
164 /* tables for last decryption round */
165 il_tab[0][i] = w;
166 il_tab[1][i] = upr(w,1);
167 il_tab[2][i] = upr(w,2);
168 il_tab[3][i] = upr(w,3);
169 }
170}
171
172#define four_tables(x,tab,vf,rf,c) \
173( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \
174 tab[1][bval(vf(x,1,c),rf(1,c))] ^ \
175 tab[2][bval(vf(x,2,c),rf(2,c))] ^ \
176 tab[3][bval(vf(x,3,c),rf(3,c))] \
177)
178
179#define vf1(x,r,c) (x)
180#define rf1(r,c) (r)
181#define rf2(r,c) ((r-c)&3)
182
183#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
184#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
185
186#define ff(x) inv_mcol(x)
187
188#define ke4(k,i) \
189{ \
190 k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
191 k[4*(i)+5] = ss[1] ^= ss[0]; \
192 k[4*(i)+6] = ss[2] ^= ss[1]; \
193 k[4*(i)+7] = ss[3] ^= ss[2]; \
194}
195
196#define kel4(k,i) \
197{ \
198 k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \
199 k[4*(i)+5] = ss[1] ^= ss[0]; \
200 k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \
201}
202
203#define ke6(k,i) \
204{ \
205 k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
206 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
207 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
208 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
209 k[6*(i)+10] = ss[4] ^= ss[3]; \
210 k[6*(i)+11] = ss[5] ^= ss[4]; \
211}
212
213#define kel6(k,i) \
214{ \
215 k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
216 k[6*(i)+ 7] = ss[1] ^= ss[0]; \
217 k[6*(i)+ 8] = ss[2] ^= ss[1]; \
218 k[6*(i)+ 9] = ss[3] ^= ss[2]; \
219}
220
221#define ke8(k,i) \
222{ \
223 k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
224 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
225 k[8*(i)+10] = ss[2] ^= ss[1]; \
226 k[8*(i)+11] = ss[3] ^= ss[2]; \
227 k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \
228 k[8*(i)+13] = ss[5] ^= ss[4]; \
229 k[8*(i)+14] = ss[6] ^= ss[5]; \
230 k[8*(i)+15] = ss[7] ^= ss[6]; \
231}
232
233#define kel8(k,i) \
234{ \
235 k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
236 k[8*(i)+ 9] = ss[1] ^= ss[0]; \
237 k[8*(i)+10] = ss[2] ^= ss[1]; \
238 k[8*(i)+11] = ss[3] ^= ss[2]; \
239}
240
241#define kdf4(k,i) \
242{ \
243 ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
244 ss[1] = ss[1] ^ ss[3]; \
245 ss[2] = ss[2] ^ ss[3]; \
246 ss[3] = ss[3]; \
247 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
248 ss[i % 4] ^= ss[4]; \
249 ss[4] ^= k[4*(i)]; \
250 k[4*(i)+4] = ff(ss[4]); \
251 ss[4] ^= k[4*(i)+1]; \
252 k[4*(i)+5] = ff(ss[4]); \
253 ss[4] ^= k[4*(i)+2]; \
254 k[4*(i)+6] = ff(ss[4]); \
255 ss[4] ^= k[4*(i)+3]; \
256 k[4*(i)+7] = ff(ss[4]); \
257}
258
259#define kd4(k,i) \
260{ \
261 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
262 ss[i % 4] ^= ss[4]; \
263 ss[4] = ff(ss[4]); \
264 k[4*(i)+4] = ss[4] ^= k[4*(i)]; \
265 k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \
266 k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \
267 k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \
268}
269
270#define kdl4(k,i) \
271{ \
272 ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \
273 ss[i % 4] ^= ss[4]; \
274 k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
275 k[4*(i)+5] = ss[1] ^ ss[3]; \
276 k[4*(i)+6] = ss[0]; \
277 k[4*(i)+7] = ss[1]; \
278}
279
280#define kdf6(k,i) \
281{ \
282 ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
283 k[6*(i)+ 6] = ff(ss[0]); \
284 ss[1] ^= ss[0]; \
285 k[6*(i)+ 7] = ff(ss[1]); \
286 ss[2] ^= ss[1]; \
287 k[6*(i)+ 8] = ff(ss[2]); \
288 ss[3] ^= ss[2]; \
289 k[6*(i)+ 9] = ff(ss[3]); \
290 ss[4] ^= ss[3]; \
291 k[6*(i)+10] = ff(ss[4]); \
292 ss[5] ^= ss[4]; \
293 k[6*(i)+11] = ff(ss[5]); \
294}
295
296#define kd6(k,i) \
297{ \
298 ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \
299 ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
300 k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \
301 ss[1] ^= ss[0]; \
302 k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \
303 ss[2] ^= ss[1]; \
304 k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \
305 ss[3] ^= ss[2]; \
306 k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \
307 ss[4] ^= ss[3]; \
308 k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \
309 ss[5] ^= ss[4]; \
310 k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \
311}
312
313#define kdl6(k,i) \
314{ \
315 ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \
316 k[6*(i)+ 6] = ss[0]; \
317 ss[1] ^= ss[0]; \
318 k[6*(i)+ 7] = ss[1]; \
319 ss[2] ^= ss[1]; \
320 k[6*(i)+ 8] = ss[2]; \
321 ss[3] ^= ss[2]; \
322 k[6*(i)+ 9] = ss[3]; \
323}
324
325#define kdf8(k,i) \
326{ \
327 ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
328 k[8*(i)+ 8] = ff(ss[0]); \
329 ss[1] ^= ss[0]; \
330 k[8*(i)+ 9] = ff(ss[1]); \
331 ss[2] ^= ss[1]; \
332 k[8*(i)+10] = ff(ss[2]); \
333 ss[3] ^= ss[2]; \
334 k[8*(i)+11] = ff(ss[3]); \
335 ss[4] ^= ls_box(ss[3],0); \
336 k[8*(i)+12] = ff(ss[4]); \
337 ss[5] ^= ss[4]; \
338 k[8*(i)+13] = ff(ss[5]); \
339 ss[6] ^= ss[5]; \
340 k[8*(i)+14] = ff(ss[6]); \
341 ss[7] ^= ss[6]; \
342 k[8*(i)+15] = ff(ss[7]); \
343}
344
345#define kd8(k,i) \
346{ \
347 u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \
348 ss[0] ^= __g; \
349 __g = ff(__g); \
350 k[8*(i)+ 8] = __g ^= k[8*(i)]; \
351 ss[1] ^= ss[0]; \
352 k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \
353 ss[2] ^= ss[1]; \
354 k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \
355 ss[3] ^= ss[2]; \
356 k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \
357 __g = ls_box(ss[3],0); \
358 ss[4] ^= __g; \
359 __g = ff(__g); \
360 k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \
361 ss[5] ^= ss[4]; \
362 k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \
363 ss[6] ^= ss[5]; \
364 k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \
365 ss[7] ^= ss[6]; \
366 k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \
367}
368
369#define kdl8(k,i) \
370{ \
371 ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \
372 k[8*(i)+ 8] = ss[0]; \
373 ss[1] ^= ss[0]; \
374 k[8*(i)+ 9] = ss[1]; \
375 ss[2] ^= ss[1]; \
376 k[8*(i)+10] = ss[2]; \
377 ss[3] ^= ss[2]; \
378 k[8*(i)+11] = ss[3]; \
379}
380
381static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
382 unsigned int key_len)
383{
384 int i;
385 u32 ss[8];
386 struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
387 const __le32 *key = (const __le32 *)in_key;
388 u32 *flags = &tfm->crt_flags;
389
390 /* encryption schedule */
391
392 ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]);
393 ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]);
394 ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]);
395 ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]);
396
397 switch(key_len) {
398 case 16:
399 for (i = 0; i < 9; i++)
400 ke4(ctx->ekey, i);
401 kel4(ctx->ekey, 9);
402 ctx->rounds = 10;
403 break;
404
405 case 24:
406 ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
407 ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
408 for (i = 0; i < 7; i++)
409 ke6(ctx->ekey, i);
410 kel6(ctx->ekey, 7);
411 ctx->rounds = 12;
412 break;
413
414 case 32:
415 ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]);
416 ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]);
417 ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]);
418 ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]);
419 for (i = 0; i < 6; i++)
420 ke8(ctx->ekey, i);
421 kel8(ctx->ekey, 6);
422 ctx->rounds = 14;
423 break;
424
425 default:
426 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
427 return -EINVAL;
428 }
429
430 /* decryption schedule */
431
432 ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]);
433 ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]);
434 ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]);
435 ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]);
436
437 switch (key_len) {
438 case 16:
439 kdf4(ctx->dkey, 0);
440 for (i = 1; i < 9; i++)
441 kd4(ctx->dkey, i);
442 kdl4(ctx->dkey, 9);
443 break;
444
445 case 24:
446 ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
447 ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
448 kdf6(ctx->dkey, 0);
449 for (i = 1; i < 7; i++)
450 kd6(ctx->dkey, i);
451 kdl6(ctx->dkey, 7);
452 break;
453
454 case 32:
455 ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4]));
456 ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5]));
457 ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6]));
458 ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7]));
459 kdf8(ctx->dkey, 0);
460 for (i = 1; i < 6; i++)
461 kd8(ctx->dkey, i);
462 kdl8(ctx->dkey, 6);
463 break;
464 }
465 return 0;
466}
467
468static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
469{
470 aes_enc_blk(tfm, dst, src);
471}
472
473static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
474{
475 aes_dec_blk(tfm, dst, src);
476}
477
478static struct crypto_alg aes_alg = {
479 .cra_name = "aes",
480 .cra_driver_name = "aes-i586",
481 .cra_priority = 200,
482 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
483 .cra_blocksize = AES_BLOCK_SIZE,
484 .cra_ctxsize = sizeof(struct aes_ctx),
485 .cra_module = THIS_MODULE,
486 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
487 .cra_u = {
488 .cipher = {
489 .cia_min_keysize = AES_MIN_KEY_SIZE,
490 .cia_max_keysize = AES_MAX_KEY_SIZE,
491 .cia_setkey = aes_set_key,
492 .cia_encrypt = aes_encrypt,
493 .cia_decrypt = aes_decrypt
494 }
495 }
496};
497
498static int __init aes_init(void)
499{
500 gen_tabs();
501 return crypto_register_alg(&aes_alg);
502}
503
504static void __exit aes_fini(void)
505{
506 crypto_unregister_alg(&aes_alg);
507}
508
509module_init(aes_init);
510module_exit(aes_fini);
511
512MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized");
513MODULE_LICENSE("Dual BSD/GPL");
514MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter");
515MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c
deleted file mode 100644
index 5cdb13ea5cc2..000000000000
--- a/arch/x86/crypto/aes_64.c
+++ /dev/null
@@ -1,336 +0,0 @@
1/*
2 * Cryptographic API.
3 *
4 * AES Cipher Algorithm.
5 *
6 * Based on Brian Gladman's code.
7 *
8 * Linux developers:
9 * Alexander Kjeldaas <astor@fast.no>
10 * Herbert Valerio Riedel <hvr@hvrlab.org>
11 * Kyle McMartin <kyle@debian.org>
12 * Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
13 * Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * ---------------------------------------------------------------------------
21 * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
22 * All rights reserved.
23 *
24 * LICENSE TERMS
25 *
26 * The free distribution and use of this software in both source and binary
27 * form is allowed (with or without changes) provided that:
28 *
29 * 1. distributions of this source code include the above copyright
30 * notice, this list of conditions and the following disclaimer;
31 *
32 * 2. distributions in binary form include the above copyright
33 * notice, this list of conditions and the following disclaimer
34 * in the documentation and/or other associated materials;
35 *
36 * 3. the copyright holder's name is not used to endorse products
37 * built using this software without specific written permission.
38 *
39 * ALTERNATIVELY, provided that this notice is retained in full, this product
40 * may be distributed under the terms of the GNU General Public License (GPL),
41 * in which case the provisions of the GPL apply INSTEAD OF those given above.
42 *
43 * DISCLAIMER
44 *
45 * This software is provided 'as is' with no explicit or implied warranties
46 * in respect of its properties, including, but not limited to, correctness
47 * and/or fitness for purpose.
48 * ---------------------------------------------------------------------------
49 */
50
51/* Some changes from the Gladman version:
52 s/RIJNDAEL(e_key)/E_KEY/g
53 s/RIJNDAEL(d_key)/D_KEY/g
54*/
55
56#include <asm/byteorder.h>
57#include <linux/bitops.h>
58#include <linux/crypto.h>
59#include <linux/errno.h>
60#include <linux/init.h>
61#include <linux/module.h>
62#include <linux/types.h>
63
64#define AES_MIN_KEY_SIZE 16
65#define AES_MAX_KEY_SIZE 32
66
67#define AES_BLOCK_SIZE 16
68
69/*
70 * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
71 */
72static inline u8 byte(const u32 x, const unsigned n)
73{
74 return x >> (n << 3);
75}
76
77struct aes_ctx
78{
79 u32 key_length;
80 u32 buf[120];
81};
82
83#define E_KEY (&ctx->buf[0])
84#define D_KEY (&ctx->buf[60])
85
86static u8 pow_tab[256] __initdata;
87static u8 log_tab[256] __initdata;
88static u8 sbx_tab[256] __initdata;
89static u8 isb_tab[256] __initdata;
90static u32 rco_tab[10];
91u32 aes_ft_tab[4][256];
92u32 aes_it_tab[4][256];
93
94u32 aes_fl_tab[4][256];
95u32 aes_il_tab[4][256];
96
97static inline u8 f_mult(u8 a, u8 b)
98{
99 u8 aa = log_tab[a], cc = aa + log_tab[b];
100
101 return pow_tab[cc + (cc < aa ? 1 : 0)];
102}
103
104#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
105
106#define ls_box(x) \
107 (aes_fl_tab[0][byte(x, 0)] ^ \
108 aes_fl_tab[1][byte(x, 1)] ^ \
109 aes_fl_tab[2][byte(x, 2)] ^ \
110 aes_fl_tab[3][byte(x, 3)])
111
112static void __init gen_tabs(void)
113{
114 u32 i, t;
115 u8 p, q;
116
117 /* log and power tables for GF(2**8) finite field with
118 0x011b as modular polynomial - the simplest primitive
119 root is 0x03, used here to generate the tables */
120
121 for (i = 0, p = 1; i < 256; ++i) {
122 pow_tab[i] = (u8)p;
123 log_tab[p] = (u8)i;
124
125 p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
126 }
127
128 log_tab[1] = 0;
129
130 for (i = 0, p = 1; i < 10; ++i) {
131 rco_tab[i] = p;
132
133 p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
134 }
135
136 for (i = 0; i < 256; ++i) {
137 p = (i ? pow_tab[255 - log_tab[i]] : 0);
138 q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
139 p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
140 sbx_tab[i] = p;
141 isb_tab[p] = (u8)i;
142 }
143
144 for (i = 0; i < 256; ++i) {
145 p = sbx_tab[i];
146
147 t = p;
148 aes_fl_tab[0][i] = t;
149 aes_fl_tab[1][i] = rol32(t, 8);
150 aes_fl_tab[2][i] = rol32(t, 16);
151 aes_fl_tab[3][i] = rol32(t, 24);
152
153 t = ((u32)ff_mult(2, p)) |
154 ((u32)p << 8) |
155 ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
156
157 aes_ft_tab[0][i] = t;
158 aes_ft_tab[1][i] = rol32(t, 8);
159 aes_ft_tab[2][i] = rol32(t, 16);
160 aes_ft_tab[3][i] = rol32(t, 24);
161
162 p = isb_tab[i];
163
164 t = p;
165 aes_il_tab[0][i] = t;
166 aes_il_tab[1][i] = rol32(t, 8);
167 aes_il_tab[2][i] = rol32(t, 16);
168 aes_il_tab[3][i] = rol32(t, 24);
169
170 t = ((u32)ff_mult(14, p)) |
171 ((u32)ff_mult(9, p) << 8) |
172 ((u32)ff_mult(13, p) << 16) |
173 ((u32)ff_mult(11, p) << 24);
174
175 aes_it_tab[0][i] = t;
176 aes_it_tab[1][i] = rol32(t, 8);
177 aes_it_tab[2][i] = rol32(t, 16);
178 aes_it_tab[3][i] = rol32(t, 24);
179 }
180}
181
182#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
183
184#define imix_col(y, x) \
185 u = star_x(x); \
186 v = star_x(u); \
187 w = star_x(v); \
188 t = w ^ (x); \
189 (y) = u ^ v ^ w; \
190 (y) ^= ror32(u ^ t, 8) ^ \
191 ror32(v ^ t, 16) ^ \
192 ror32(t, 24)
193
194/* initialise the key schedule from the user supplied key */
195
196#define loop4(i) \
197{ \
198 t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
199 t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \
200 t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \
201 t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \
202 t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \
203}
204
205#define loop6(i) \
206{ \
207 t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \
208 t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \
209 t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \
210 t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \
211 t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \
212 t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \
213 t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \
214}
215
216#define loop8(i) \
217{ \
218 t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \
219 t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \
220 t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \
221 t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \
222 t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \
223 t = E_KEY[8 * i + 4] ^ ls_box(t); \
224 E_KEY[8 * i + 12] = t; \
225 t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \
226 t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \
227 t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \
228}
229
230static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
231 unsigned int key_len)
232{
233 struct aes_ctx *ctx = crypto_tfm_ctx(tfm);
234 const __le32 *key = (const __le32 *)in_key;
235 u32 *flags = &tfm->crt_flags;
236 u32 i, j, t, u, v, w;
237
238 if (key_len % 8) {
239 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
240 return -EINVAL;
241 }
242
243 ctx->key_length = key_len;
244
245 D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]);
246 D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]);
247 D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]);
248 D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]);
249
250 switch (key_len) {
251 case 16:
252 t = E_KEY[3];
253 for (i = 0; i < 10; ++i)
254 loop4(i);
255 break;
256
257 case 24:
258 E_KEY[4] = le32_to_cpu(key[4]);
259 t = E_KEY[5] = le32_to_cpu(key[5]);
260 for (i = 0; i < 8; ++i)
261 loop6 (i);
262 break;
263
264 case 32:
265 E_KEY[4] = le32_to_cpu(key[4]);
266 E_KEY[5] = le32_to_cpu(key[5]);
267 E_KEY[6] = le32_to_cpu(key[6]);
268 t = E_KEY[7] = le32_to_cpu(key[7]);
269 for (i = 0; i < 7; ++i)
270 loop8(i);
271 break;
272 }
273
274 D_KEY[0] = E_KEY[key_len + 24];
275 D_KEY[1] = E_KEY[key_len + 25];
276 D_KEY[2] = E_KEY[key_len + 26];
277 D_KEY[3] = E_KEY[key_len + 27];
278
279 for (i = 4; i < key_len + 24; ++i) {
280 j = key_len + 24 - (i & ~3) + (i & 3);
281 imix_col(D_KEY[j], E_KEY[i]);
282 }
283
284 return 0;
285}
286
287asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
288asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
289
290static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
291{
292 aes_enc_blk(tfm, dst, src);
293}
294
295static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
296{
297 aes_dec_blk(tfm, dst, src);
298}
299
300static struct crypto_alg aes_alg = {
301 .cra_name = "aes",
302 .cra_driver_name = "aes-x86_64",
303 .cra_priority = 200,
304 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
305 .cra_blocksize = AES_BLOCK_SIZE,
306 .cra_ctxsize = sizeof(struct aes_ctx),
307 .cra_module = THIS_MODULE,
308 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
309 .cra_u = {
310 .cipher = {
311 .cia_min_keysize = AES_MIN_KEY_SIZE,
312 .cia_max_keysize = AES_MAX_KEY_SIZE,
313 .cia_setkey = aes_set_key,
314 .cia_encrypt = aes_encrypt,
315 .cia_decrypt = aes_decrypt
316 }
317 }
318};
319
320static int __init aes_init(void)
321{
322 gen_tabs();
323 return crypto_register_alg(&aes_alg);
324}
325
326static void __exit aes_fini(void)
327{
328 crypto_unregister_alg(&aes_alg);
329}
330
331module_init(aes_init);
332module_exit(aes_fini);
333
334MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
335MODULE_LICENSE("GPL");
336MODULE_ALIAS("aes");
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
new file mode 100644
index 000000000000..71f457827116
--- /dev/null
+++ b/arch/x86/crypto/aes_glue.c
@@ -0,0 +1,57 @@
1/*
2 * Glue Code for the asm optimized version of the AES Cipher Algorithm
3 *
4 */
5
6#include <crypto/aes.h>
7
8asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
9asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in);
10
11static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
12{
13 aes_enc_blk(tfm, dst, src);
14}
15
16static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
17{
18 aes_dec_blk(tfm, dst, src);
19}
20
21static struct crypto_alg aes_alg = {
22 .cra_name = "aes",
23 .cra_driver_name = "aes-asm",
24 .cra_priority = 200,
25 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
26 .cra_blocksize = AES_BLOCK_SIZE,
27 .cra_ctxsize = sizeof(struct crypto_aes_ctx),
28 .cra_module = THIS_MODULE,
29 .cra_list = LIST_HEAD_INIT(aes_alg.cra_list),
30 .cra_u = {
31 .cipher = {
32 .cia_min_keysize = AES_MIN_KEY_SIZE,
33 .cia_max_keysize = AES_MAX_KEY_SIZE,
34 .cia_setkey = crypto_aes_set_key,
35 .cia_encrypt = aes_encrypt,
36 .cia_decrypt = aes_decrypt
37 }
38 }
39};
40
41static int __init aes_init(void)
42{
43 return crypto_register_alg(&aes_alg);
44}
45
46static void __exit aes_fini(void)
47{
48 crypto_unregister_alg(&aes_alg);
49}
50
51module_init(aes_init);
52module_exit(aes_fini);
53
54MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
55MODULE_LICENSE("GPL");
56MODULE_ALIAS("aes");
57MODULE_ALIAS("aes-asm");
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
new file mode 100644
index 000000000000..72eb306680b2
--- /dev/null
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -0,0 +1,1114 @@
1# salsa20_pm.s version 20051229
2# D. J. Bernstein
3# Public domain.
4
5# enter ECRYPT_encrypt_bytes
6.text
7.p2align 5
8.globl ECRYPT_encrypt_bytes
9ECRYPT_encrypt_bytes:
10 mov %esp,%eax
11 and $31,%eax
12 add $256,%eax
13 sub %eax,%esp
14 # eax_stack = eax
15 movl %eax,80(%esp)
16 # ebx_stack = ebx
17 movl %ebx,84(%esp)
18 # esi_stack = esi
19 movl %esi,88(%esp)
20 # edi_stack = edi
21 movl %edi,92(%esp)
22 # ebp_stack = ebp
23 movl %ebp,96(%esp)
24 # x = arg1
25 movl 4(%esp,%eax),%edx
26 # m = arg2
27 movl 8(%esp,%eax),%esi
28 # out = arg3
29 movl 12(%esp,%eax),%edi
30 # bytes = arg4
31 movl 16(%esp,%eax),%ebx
32 # bytes -= 0
33 sub $0,%ebx
34 # goto done if unsigned<=
35 jbe ._done
36._start:
37 # in0 = *(uint32 *) (x + 0)
38 movl 0(%edx),%eax
39 # in1 = *(uint32 *) (x + 4)
40 movl 4(%edx),%ecx
41 # in2 = *(uint32 *) (x + 8)
42 movl 8(%edx),%ebp
43 # j0 = in0
44 movl %eax,164(%esp)
45 # in3 = *(uint32 *) (x + 12)
46 movl 12(%edx),%eax
47 # j1 = in1
48 movl %ecx,168(%esp)
49 # in4 = *(uint32 *) (x + 16)
50 movl 16(%edx),%ecx
51 # j2 = in2
52 movl %ebp,172(%esp)
53 # in5 = *(uint32 *) (x + 20)
54 movl 20(%edx),%ebp
55 # j3 = in3
56 movl %eax,176(%esp)
57 # in6 = *(uint32 *) (x + 24)
58 movl 24(%edx),%eax
59 # j4 = in4
60 movl %ecx,180(%esp)
61 # in7 = *(uint32 *) (x + 28)
62 movl 28(%edx),%ecx
63 # j5 = in5
64 movl %ebp,184(%esp)
65 # in8 = *(uint32 *) (x + 32)
66 movl 32(%edx),%ebp
67 # j6 = in6
68 movl %eax,188(%esp)
69 # in9 = *(uint32 *) (x + 36)
70 movl 36(%edx),%eax
71 # j7 = in7
72 movl %ecx,192(%esp)
73 # in10 = *(uint32 *) (x + 40)
74 movl 40(%edx),%ecx
75 # j8 = in8
76 movl %ebp,196(%esp)
77 # in11 = *(uint32 *) (x + 44)
78 movl 44(%edx),%ebp
79 # j9 = in9
80 movl %eax,200(%esp)
81 # in12 = *(uint32 *) (x + 48)
82 movl 48(%edx),%eax
83 # j10 = in10
84 movl %ecx,204(%esp)
85 # in13 = *(uint32 *) (x + 52)
86 movl 52(%edx),%ecx
87 # j11 = in11
88 movl %ebp,208(%esp)
89 # in14 = *(uint32 *) (x + 56)
90 movl 56(%edx),%ebp
91 # j12 = in12
92 movl %eax,212(%esp)
93 # in15 = *(uint32 *) (x + 60)
94 movl 60(%edx),%eax
95 # j13 = in13
96 movl %ecx,216(%esp)
97 # j14 = in14
98 movl %ebp,220(%esp)
99 # j15 = in15
100 movl %eax,224(%esp)
101 # x_backup = x
102 movl %edx,64(%esp)
103._bytesatleast1:
104 # bytes - 64
105 cmp $64,%ebx
106 # goto nocopy if unsigned>=
107 jae ._nocopy
108 # ctarget = out
109 movl %edi,228(%esp)
110 # out = &tmp
111 leal 0(%esp),%edi
112 # i = bytes
113 mov %ebx,%ecx
114 # while (i) { *out++ = *m++; --i }
115 rep movsb
116 # out = &tmp
117 leal 0(%esp),%edi
118 # m = &tmp
119 leal 0(%esp),%esi
120._nocopy:
121 # out_backup = out
122 movl %edi,72(%esp)
123 # m_backup = m
124 movl %esi,68(%esp)
125 # bytes_backup = bytes
126 movl %ebx,76(%esp)
127 # in0 = j0
128 movl 164(%esp),%eax
129 # in1 = j1
130 movl 168(%esp),%ecx
131 # in2 = j2
132 movl 172(%esp),%edx
133 # in3 = j3
134 movl 176(%esp),%ebx
135 # x0 = in0
136 movl %eax,100(%esp)
137 # x1 = in1
138 movl %ecx,104(%esp)
139 # x2 = in2
140 movl %edx,108(%esp)
141 # x3 = in3
142 movl %ebx,112(%esp)
143 # in4 = j4
144 movl 180(%esp),%eax
145 # in5 = j5
146 movl 184(%esp),%ecx
147 # in6 = j6
148 movl 188(%esp),%edx
149 # in7 = j7
150 movl 192(%esp),%ebx
151 # x4 = in4
152 movl %eax,116(%esp)
153 # x5 = in5
154 movl %ecx,120(%esp)
155 # x6 = in6
156 movl %edx,124(%esp)
157 # x7 = in7
158 movl %ebx,128(%esp)
159 # in8 = j8
160 movl 196(%esp),%eax
161 # in9 = j9
162 movl 200(%esp),%ecx
163 # in10 = j10
164 movl 204(%esp),%edx
165 # in11 = j11
166 movl 208(%esp),%ebx
167 # x8 = in8
168 movl %eax,132(%esp)
169 # x9 = in9
170 movl %ecx,136(%esp)
171 # x10 = in10
172 movl %edx,140(%esp)
173 # x11 = in11
174 movl %ebx,144(%esp)
175 # in12 = j12
176 movl 212(%esp),%eax
177 # in13 = j13
178 movl 216(%esp),%ecx
179 # in14 = j14
180 movl 220(%esp),%edx
181 # in15 = j15
182 movl 224(%esp),%ebx
183 # x12 = in12
184 movl %eax,148(%esp)
185 # x13 = in13
186 movl %ecx,152(%esp)
187 # x14 = in14
188 movl %edx,156(%esp)
189 # x15 = in15
190 movl %ebx,160(%esp)
191 # i = 20
192 mov $20,%ebp
193 # p = x0
194 movl 100(%esp),%eax
195 # s = x5
196 movl 120(%esp),%ecx
197 # t = x10
198 movl 140(%esp),%edx
199 # w = x15
200 movl 160(%esp),%ebx
201._mainloop:
202 # x0 = p
203 movl %eax,100(%esp)
204 # x10 = t
205 movl %edx,140(%esp)
206 # p += x12
207 addl 148(%esp),%eax
208 # x5 = s
209 movl %ecx,120(%esp)
210 # t += x6
211 addl 124(%esp),%edx
212 # x15 = w
213 movl %ebx,160(%esp)
214 # r = x1
215 movl 104(%esp),%esi
216 # r += s
217 add %ecx,%esi
218 # v = x11
219 movl 144(%esp),%edi
220 # v += w
221 add %ebx,%edi
222 # p <<<= 7
223 rol $7,%eax
224 # p ^= x4
225 xorl 116(%esp),%eax
226 # t <<<= 7
227 rol $7,%edx
228 # t ^= x14
229 xorl 156(%esp),%edx
230 # r <<<= 7
231 rol $7,%esi
232 # r ^= x9
233 xorl 136(%esp),%esi
234 # v <<<= 7
235 rol $7,%edi
236 # v ^= x3
237 xorl 112(%esp),%edi
238 # x4 = p
239 movl %eax,116(%esp)
240 # x14 = t
241 movl %edx,156(%esp)
242 # p += x0
243 addl 100(%esp),%eax
244 # x9 = r
245 movl %esi,136(%esp)
246 # t += x10
247 addl 140(%esp),%edx
248 # x3 = v
249 movl %edi,112(%esp)
250 # p <<<= 9
251 rol $9,%eax
252 # p ^= x8
253 xorl 132(%esp),%eax
254 # t <<<= 9
255 rol $9,%edx
256 # t ^= x2
257 xorl 108(%esp),%edx
258 # s += r
259 add %esi,%ecx
260 # s <<<= 9
261 rol $9,%ecx
262 # s ^= x13
263 xorl 152(%esp),%ecx
264 # w += v
265 add %edi,%ebx
266 # w <<<= 9
267 rol $9,%ebx
268 # w ^= x7
269 xorl 128(%esp),%ebx
270 # x8 = p
271 movl %eax,132(%esp)
272 # x2 = t
273 movl %edx,108(%esp)
274 # p += x4
275 addl 116(%esp),%eax
276 # x13 = s
277 movl %ecx,152(%esp)
278 # t += x14
279 addl 156(%esp),%edx
280 # x7 = w
281 movl %ebx,128(%esp)
282 # p <<<= 13
283 rol $13,%eax
284 # p ^= x12
285 xorl 148(%esp),%eax
286 # t <<<= 13
287 rol $13,%edx
288 # t ^= x6
289 xorl 124(%esp),%edx
290 # r += s
291 add %ecx,%esi
292 # r <<<= 13
293 rol $13,%esi
294 # r ^= x1
295 xorl 104(%esp),%esi
296 # v += w
297 add %ebx,%edi
298 # v <<<= 13
299 rol $13,%edi
300 # v ^= x11
301 xorl 144(%esp),%edi
302 # x12 = p
303 movl %eax,148(%esp)
304 # x6 = t
305 movl %edx,124(%esp)
306 # p += x8
307 addl 132(%esp),%eax
308 # x1 = r
309 movl %esi,104(%esp)
310 # t += x2
311 addl 108(%esp),%edx
312 # x11 = v
313 movl %edi,144(%esp)
314 # p <<<= 18
315 rol $18,%eax
316 # p ^= x0
317 xorl 100(%esp),%eax
318 # t <<<= 18
319 rol $18,%edx
320 # t ^= x10
321 xorl 140(%esp),%edx
322 # s += r
323 add %esi,%ecx
324 # s <<<= 18
325 rol $18,%ecx
326 # s ^= x5
327 xorl 120(%esp),%ecx
328 # w += v
329 add %edi,%ebx
330 # w <<<= 18
331 rol $18,%ebx
332 # w ^= x15
333 xorl 160(%esp),%ebx
334 # x0 = p
335 movl %eax,100(%esp)
336 # x10 = t
337 movl %edx,140(%esp)
338 # p += x3
339 addl 112(%esp),%eax
340 # p <<<= 7
341 rol $7,%eax
342 # x5 = s
343 movl %ecx,120(%esp)
344 # t += x9
345 addl 136(%esp),%edx
346 # x15 = w
347 movl %ebx,160(%esp)
348 # r = x4
349 movl 116(%esp),%esi
350 # r += s
351 add %ecx,%esi
352 # v = x14
353 movl 156(%esp),%edi
354 # v += w
355 add %ebx,%edi
356 # p ^= x1
357 xorl 104(%esp),%eax
358 # t <<<= 7
359 rol $7,%edx
360 # t ^= x11
361 xorl 144(%esp),%edx
362 # r <<<= 7
363 rol $7,%esi
364 # r ^= x6
365 xorl 124(%esp),%esi
366 # v <<<= 7
367 rol $7,%edi
368 # v ^= x12
369 xorl 148(%esp),%edi
370 # x1 = p
371 movl %eax,104(%esp)
372 # x11 = t
373 movl %edx,144(%esp)
374 # p += x0
375 addl 100(%esp),%eax
376 # x6 = r
377 movl %esi,124(%esp)
378 # t += x10
379 addl 140(%esp),%edx
380 # x12 = v
381 movl %edi,148(%esp)
382 # p <<<= 9
383 rol $9,%eax
384 # p ^= x2
385 xorl 108(%esp),%eax
386 # t <<<= 9
387 rol $9,%edx
388 # t ^= x8
389 xorl 132(%esp),%edx
390 # s += r
391 add %esi,%ecx
392 # s <<<= 9
393 rol $9,%ecx
394 # s ^= x7
395 xorl 128(%esp),%ecx
396 # w += v
397 add %edi,%ebx
398 # w <<<= 9
399 rol $9,%ebx
400 # w ^= x13
401 xorl 152(%esp),%ebx
402 # x2 = p
403 movl %eax,108(%esp)
404 # x8 = t
405 movl %edx,132(%esp)
406 # p += x1
407 addl 104(%esp),%eax
408 # x7 = s
409 movl %ecx,128(%esp)
410 # t += x11
411 addl 144(%esp),%edx
412 # x13 = w
413 movl %ebx,152(%esp)
414 # p <<<= 13
415 rol $13,%eax
416 # p ^= x3
417 xorl 112(%esp),%eax
418 # t <<<= 13
419 rol $13,%edx
420 # t ^= x9
421 xorl 136(%esp),%edx
422 # r += s
423 add %ecx,%esi
424 # r <<<= 13
425 rol $13,%esi
426 # r ^= x4
427 xorl 116(%esp),%esi
428 # v += w
429 add %ebx,%edi
430 # v <<<= 13
431 rol $13,%edi
432 # v ^= x14
433 xorl 156(%esp),%edi
434 # x3 = p
435 movl %eax,112(%esp)
436 # x9 = t
437 movl %edx,136(%esp)
438 # p += x2
439 addl 108(%esp),%eax
440 # x4 = r
441 movl %esi,116(%esp)
442 # t += x8
443 addl 132(%esp),%edx
444 # x14 = v
445 movl %edi,156(%esp)
446 # p <<<= 18
447 rol $18,%eax
448 # p ^= x0
449 xorl 100(%esp),%eax
450 # t <<<= 18
451 rol $18,%edx
452 # t ^= x10
453 xorl 140(%esp),%edx
454 # s += r
455 add %esi,%ecx
456 # s <<<= 18
457 rol $18,%ecx
458 # s ^= x5
459 xorl 120(%esp),%ecx
460 # w += v
461 add %edi,%ebx
462 # w <<<= 18
463 rol $18,%ebx
464 # w ^= x15
465 xorl 160(%esp),%ebx
466 # x0 = p
467 movl %eax,100(%esp)
468 # x10 = t
469 movl %edx,140(%esp)
470 # p += x12
471 addl 148(%esp),%eax
472 # x5 = s
473 movl %ecx,120(%esp)
474 # t += x6
475 addl 124(%esp),%edx
476 # x15 = w
477 movl %ebx,160(%esp)
478 # r = x1
479 movl 104(%esp),%esi
480 # r += s
481 add %ecx,%esi
482 # v = x11
483 movl 144(%esp),%edi
484 # v += w
485 add %ebx,%edi
486 # p <<<= 7
487 rol $7,%eax
488 # p ^= x4
489 xorl 116(%esp),%eax
490 # t <<<= 7
491 rol $7,%edx
492 # t ^= x14
493 xorl 156(%esp),%edx
494 # r <<<= 7
495 rol $7,%esi
496 # r ^= x9
497 xorl 136(%esp),%esi
498 # v <<<= 7
499 rol $7,%edi
500 # v ^= x3
501 xorl 112(%esp),%edi
502 # x4 = p
503 movl %eax,116(%esp)
504 # x14 = t
505 movl %edx,156(%esp)
506 # p += x0
507 addl 100(%esp),%eax
508 # x9 = r
509 movl %esi,136(%esp)
510 # t += x10
511 addl 140(%esp),%edx
512 # x3 = v
513 movl %edi,112(%esp)
514 # p <<<= 9
515 rol $9,%eax
516 # p ^= x8
517 xorl 132(%esp),%eax
518 # t <<<= 9
519 rol $9,%edx
520 # t ^= x2
521 xorl 108(%esp),%edx
522 # s += r
523 add %esi,%ecx
524 # s <<<= 9
525 rol $9,%ecx
526 # s ^= x13
527 xorl 152(%esp),%ecx
528 # w += v
529 add %edi,%ebx
530 # w <<<= 9
531 rol $9,%ebx
532 # w ^= x7
533 xorl 128(%esp),%ebx
534 # x8 = p
535 movl %eax,132(%esp)
536 # x2 = t
537 movl %edx,108(%esp)
538 # p += x4
539 addl 116(%esp),%eax
540 # x13 = s
541 movl %ecx,152(%esp)
542 # t += x14
543 addl 156(%esp),%edx
544 # x7 = w
545 movl %ebx,128(%esp)
546 # p <<<= 13
547 rol $13,%eax
548 # p ^= x12
549 xorl 148(%esp),%eax
550 # t <<<= 13
551 rol $13,%edx
552 # t ^= x6
553 xorl 124(%esp),%edx
554 # r += s
555 add %ecx,%esi
556 # r <<<= 13
557 rol $13,%esi
558 # r ^= x1
559 xorl 104(%esp),%esi
560 # v += w
561 add %ebx,%edi
562 # v <<<= 13
563 rol $13,%edi
564 # v ^= x11
565 xorl 144(%esp),%edi
566 # x12 = p
567 movl %eax,148(%esp)
568 # x6 = t
569 movl %edx,124(%esp)
570 # p += x8
571 addl 132(%esp),%eax
572 # x1 = r
573 movl %esi,104(%esp)
574 # t += x2
575 addl 108(%esp),%edx
576 # x11 = v
577 movl %edi,144(%esp)
578 # p <<<= 18
579 rol $18,%eax
580 # p ^= x0
581 xorl 100(%esp),%eax
582 # t <<<= 18
583 rol $18,%edx
584 # t ^= x10
585 xorl 140(%esp),%edx
586 # s += r
587 add %esi,%ecx
588 # s <<<= 18
589 rol $18,%ecx
590 # s ^= x5
591 xorl 120(%esp),%ecx
592 # w += v
593 add %edi,%ebx
594 # w <<<= 18
595 rol $18,%ebx
596 # w ^= x15
597 xorl 160(%esp),%ebx
598 # x0 = p
599 movl %eax,100(%esp)
600 # x10 = t
601 movl %edx,140(%esp)
602 # p += x3
603 addl 112(%esp),%eax
604 # p <<<= 7
605 rol $7,%eax
606 # x5 = s
607 movl %ecx,120(%esp)
608 # t += x9
609 addl 136(%esp),%edx
610 # x15 = w
611 movl %ebx,160(%esp)
612 # r = x4
613 movl 116(%esp),%esi
614 # r += s
615 add %ecx,%esi
616 # v = x14
617 movl 156(%esp),%edi
618 # v += w
619 add %ebx,%edi
620 # p ^= x1
621 xorl 104(%esp),%eax
622 # t <<<= 7
623 rol $7,%edx
624 # t ^= x11
625 xorl 144(%esp),%edx
626 # r <<<= 7
627 rol $7,%esi
628 # r ^= x6
629 xorl 124(%esp),%esi
630 # v <<<= 7
631 rol $7,%edi
632 # v ^= x12
633 xorl 148(%esp),%edi
634 # x1 = p
635 movl %eax,104(%esp)
636 # x11 = t
637 movl %edx,144(%esp)
638 # p += x0
639 addl 100(%esp),%eax
640 # x6 = r
641 movl %esi,124(%esp)
642 # t += x10
643 addl 140(%esp),%edx
644 # x12 = v
645 movl %edi,148(%esp)
646 # p <<<= 9
647 rol $9,%eax
648 # p ^= x2
649 xorl 108(%esp),%eax
650 # t <<<= 9
651 rol $9,%edx
652 # t ^= x8
653 xorl 132(%esp),%edx
654 # s += r
655 add %esi,%ecx
656 # s <<<= 9
657 rol $9,%ecx
658 # s ^= x7
659 xorl 128(%esp),%ecx
660 # w += v
661 add %edi,%ebx
662 # w <<<= 9
663 rol $9,%ebx
664 # w ^= x13
665 xorl 152(%esp),%ebx
666 # x2 = p
667 movl %eax,108(%esp)
668 # x8 = t
669 movl %edx,132(%esp)
670 # p += x1
671 addl 104(%esp),%eax
672 # x7 = s
673 movl %ecx,128(%esp)
674 # t += x11
675 addl 144(%esp),%edx
676 # x13 = w
677 movl %ebx,152(%esp)
678 # p <<<= 13
679 rol $13,%eax
680 # p ^= x3
681 xorl 112(%esp),%eax
682 # t <<<= 13
683 rol $13,%edx
684 # t ^= x9
685 xorl 136(%esp),%edx
686 # r += s
687 add %ecx,%esi
688 # r <<<= 13
689 rol $13,%esi
690 # r ^= x4
691 xorl 116(%esp),%esi
692 # v += w
693 add %ebx,%edi
694 # v <<<= 13
695 rol $13,%edi
696 # v ^= x14
697 xorl 156(%esp),%edi
698 # x3 = p
699 movl %eax,112(%esp)
700 # x9 = t
701 movl %edx,136(%esp)
702 # p += x2
703 addl 108(%esp),%eax
704 # x4 = r
705 movl %esi,116(%esp)
706 # t += x8
707 addl 132(%esp),%edx
708 # x14 = v
709 movl %edi,156(%esp)
710 # p <<<= 18
711 rol $18,%eax
712 # p ^= x0
713 xorl 100(%esp),%eax
714 # t <<<= 18
715 rol $18,%edx
716 # t ^= x10
717 xorl 140(%esp),%edx
718 # s += r
719 add %esi,%ecx
720 # s <<<= 18
721 rol $18,%ecx
722 # s ^= x5
723 xorl 120(%esp),%ecx
724 # w += v
725 add %edi,%ebx
726 # w <<<= 18
727 rol $18,%ebx
728 # w ^= x15
729 xorl 160(%esp),%ebx
730 # i -= 4
731 sub $4,%ebp
732 # goto mainloop if unsigned >
733 ja ._mainloop
734 # x0 = p
735 movl %eax,100(%esp)
736 # x5 = s
737 movl %ecx,120(%esp)
738 # x10 = t
739 movl %edx,140(%esp)
740 # x15 = w
741 movl %ebx,160(%esp)
742 # out = out_backup
743 movl 72(%esp),%edi
744 # m = m_backup
745 movl 68(%esp),%esi
746 # in0 = x0
747 movl 100(%esp),%eax
748 # in1 = x1
749 movl 104(%esp),%ecx
750 # in0 += j0
751 addl 164(%esp),%eax
752 # in1 += j1
753 addl 168(%esp),%ecx
754 # in0 ^= *(uint32 *) (m + 0)
755 xorl 0(%esi),%eax
756 # in1 ^= *(uint32 *) (m + 4)
757 xorl 4(%esi),%ecx
758 # *(uint32 *) (out + 0) = in0
759 movl %eax,0(%edi)
760 # *(uint32 *) (out + 4) = in1
761 movl %ecx,4(%edi)
762 # in2 = x2
763 movl 108(%esp),%eax
764 # in3 = x3
765 movl 112(%esp),%ecx
766 # in2 += j2
767 addl 172(%esp),%eax
768 # in3 += j3
769 addl 176(%esp),%ecx
770 # in2 ^= *(uint32 *) (m + 8)
771 xorl 8(%esi),%eax
772 # in3 ^= *(uint32 *) (m + 12)
773 xorl 12(%esi),%ecx
774 # *(uint32 *) (out + 8) = in2
775 movl %eax,8(%edi)
776 # *(uint32 *) (out + 12) = in3
777 movl %ecx,12(%edi)
778 # in4 = x4
779 movl 116(%esp),%eax
780 # in5 = x5
781 movl 120(%esp),%ecx
782 # in4 += j4
783 addl 180(%esp),%eax
784 # in5 += j5
785 addl 184(%esp),%ecx
786 # in4 ^= *(uint32 *) (m + 16)
787 xorl 16(%esi),%eax
788 # in5 ^= *(uint32 *) (m + 20)
789 xorl 20(%esi),%ecx
790 # *(uint32 *) (out + 16) = in4
791 movl %eax,16(%edi)
792 # *(uint32 *) (out + 20) = in5
793 movl %ecx,20(%edi)
794 # in6 = x6
795 movl 124(%esp),%eax
796 # in7 = x7
797 movl 128(%esp),%ecx
798 # in6 += j6
799 addl 188(%esp),%eax
800 # in7 += j7
801 addl 192(%esp),%ecx
802 # in6 ^= *(uint32 *) (m + 24)
803 xorl 24(%esi),%eax
804 # in7 ^= *(uint32 *) (m + 28)
805 xorl 28(%esi),%ecx
806 # *(uint32 *) (out + 24) = in6
807 movl %eax,24(%edi)
808 # *(uint32 *) (out + 28) = in7
809 movl %ecx,28(%edi)
810 # in8 = x8
811 movl 132(%esp),%eax
812 # in9 = x9
813 movl 136(%esp),%ecx
814 # in8 += j8
815 addl 196(%esp),%eax
816 # in9 += j9
817 addl 200(%esp),%ecx
818 # in8 ^= *(uint32 *) (m + 32)
819 xorl 32(%esi),%eax
820 # in9 ^= *(uint32 *) (m + 36)
821 xorl 36(%esi),%ecx
822 # *(uint32 *) (out + 32) = in8
823 movl %eax,32(%edi)
824 # *(uint32 *) (out + 36) = in9
825 movl %ecx,36(%edi)
826 # in10 = x10
827 movl 140(%esp),%eax
828 # in11 = x11
829 movl 144(%esp),%ecx
830 # in10 += j10
831 addl 204(%esp),%eax
832 # in11 += j11
833 addl 208(%esp),%ecx
834 # in10 ^= *(uint32 *) (m + 40)
835 xorl 40(%esi),%eax
836 # in11 ^= *(uint32 *) (m + 44)
837 xorl 44(%esi),%ecx
838 # *(uint32 *) (out + 40) = in10
839 movl %eax,40(%edi)
840 # *(uint32 *) (out + 44) = in11
841 movl %ecx,44(%edi)
842 # in12 = x12
843 movl 148(%esp),%eax
844 # in13 = x13
845 movl 152(%esp),%ecx
846 # in12 += j12
847 addl 212(%esp),%eax
848 # in13 += j13
849 addl 216(%esp),%ecx
850 # in12 ^= *(uint32 *) (m + 48)
851 xorl 48(%esi),%eax
852 # in13 ^= *(uint32 *) (m + 52)
853 xorl 52(%esi),%ecx
854 # *(uint32 *) (out + 48) = in12
855 movl %eax,48(%edi)
856 # *(uint32 *) (out + 52) = in13
857 movl %ecx,52(%edi)
858 # in14 = x14
859 movl 156(%esp),%eax
860 # in15 = x15
861 movl 160(%esp),%ecx
862 # in14 += j14
863 addl 220(%esp),%eax
864 # in15 += j15
865 addl 224(%esp),%ecx
866 # in14 ^= *(uint32 *) (m + 56)
867 xorl 56(%esi),%eax
868 # in15 ^= *(uint32 *) (m + 60)
869 xorl 60(%esi),%ecx
870 # *(uint32 *) (out + 56) = in14
871 movl %eax,56(%edi)
872 # *(uint32 *) (out + 60) = in15
873 movl %ecx,60(%edi)
874 # bytes = bytes_backup
875 movl 76(%esp),%ebx
876 # in8 = j8
877 movl 196(%esp),%eax
878 # in9 = j9
879 movl 200(%esp),%ecx
880 # in8 += 1
881 add $1,%eax
882 # in9 += 0 + carry
883 adc $0,%ecx
884 # j8 = in8
885 movl %eax,196(%esp)
886 # j9 = in9
887 movl %ecx,200(%esp)
888 # bytes - 64
889 cmp $64,%ebx
890 # goto bytesatleast65 if unsigned>
891 ja ._bytesatleast65
892 # goto bytesatleast64 if unsigned>=
893 jae ._bytesatleast64
894 # m = out
895 mov %edi,%esi
896 # out = ctarget
897 movl 228(%esp),%edi
898 # i = bytes
899 mov %ebx,%ecx
900 # while (i) { *out++ = *m++; --i }
901 rep movsb
902._bytesatleast64:
903 # x = x_backup
904 movl 64(%esp),%eax
905 # in8 = j8
906 movl 196(%esp),%ecx
907 # in9 = j9
908 movl 200(%esp),%edx
909 # *(uint32 *) (x + 32) = in8
910 movl %ecx,32(%eax)
911 # *(uint32 *) (x + 36) = in9
912 movl %edx,36(%eax)
913._done:
914 # eax = eax_stack
915 movl 80(%esp),%eax
916 # ebx = ebx_stack
917 movl 84(%esp),%ebx
918 # esi = esi_stack
919 movl 88(%esp),%esi
920 # edi = edi_stack
921 movl 92(%esp),%edi
922 # ebp = ebp_stack
923 movl 96(%esp),%ebp
924 # leave
925 add %eax,%esp
926 ret
927._bytesatleast65:
928 # bytes -= 64
929 sub $64,%ebx
930 # out += 64
931 add $64,%edi
932 # m += 64
933 add $64,%esi
934 # goto bytesatleast1
935 jmp ._bytesatleast1
936# enter ECRYPT_keysetup
937.text
938.p2align 5
939.globl ECRYPT_keysetup
940ECRYPT_keysetup:
941 mov %esp,%eax
942 and $31,%eax
943 add $256,%eax
944 sub %eax,%esp
945 # eax_stack = eax
946 movl %eax,64(%esp)
947 # ebx_stack = ebx
948 movl %ebx,68(%esp)
949 # esi_stack = esi
950 movl %esi,72(%esp)
951 # edi_stack = edi
952 movl %edi,76(%esp)
953 # ebp_stack = ebp
954 movl %ebp,80(%esp)
955 # k = arg2
956 movl 8(%esp,%eax),%ecx
957 # kbits = arg3
958 movl 12(%esp,%eax),%edx
959 # x = arg1
960 movl 4(%esp,%eax),%eax
961 # in1 = *(uint32 *) (k + 0)
962 movl 0(%ecx),%ebx
963 # in2 = *(uint32 *) (k + 4)
964 movl 4(%ecx),%esi
965 # in3 = *(uint32 *) (k + 8)
966 movl 8(%ecx),%edi
967 # in4 = *(uint32 *) (k + 12)
968 movl 12(%ecx),%ebp
969 # *(uint32 *) (x + 4) = in1
970 movl %ebx,4(%eax)
971 # *(uint32 *) (x + 8) = in2
972 movl %esi,8(%eax)
973 # *(uint32 *) (x + 12) = in3
974 movl %edi,12(%eax)
975 # *(uint32 *) (x + 16) = in4
976 movl %ebp,16(%eax)
977 # kbits - 256
978 cmp $256,%edx
979 # goto kbits128 if unsigned<
980 jb ._kbits128
981._kbits256:
982 # in11 = *(uint32 *) (k + 16)
983 movl 16(%ecx),%edx
984 # in12 = *(uint32 *) (k + 20)
985 movl 20(%ecx),%ebx
986 # in13 = *(uint32 *) (k + 24)
987 movl 24(%ecx),%esi
988 # in14 = *(uint32 *) (k + 28)
989 movl 28(%ecx),%ecx
990 # *(uint32 *) (x + 44) = in11
991 movl %edx,44(%eax)
992 # *(uint32 *) (x + 48) = in12
993 movl %ebx,48(%eax)
994 # *(uint32 *) (x + 52) = in13
995 movl %esi,52(%eax)
996 # *(uint32 *) (x + 56) = in14
997 movl %ecx,56(%eax)
998 # in0 = 1634760805
999 mov $1634760805,%ecx
1000 # in5 = 857760878
1001 mov $857760878,%edx
1002 # in10 = 2036477234
1003 mov $2036477234,%ebx
1004 # in15 = 1797285236
1005 mov $1797285236,%esi
1006 # *(uint32 *) (x + 0) = in0
1007 movl %ecx,0(%eax)
1008 # *(uint32 *) (x + 20) = in5
1009 movl %edx,20(%eax)
1010 # *(uint32 *) (x + 40) = in10
1011 movl %ebx,40(%eax)
1012 # *(uint32 *) (x + 60) = in15
1013 movl %esi,60(%eax)
1014 # goto keysetupdone
1015 jmp ._keysetupdone
1016._kbits128:
1017 # in11 = *(uint32 *) (k + 0)
1018 movl 0(%ecx),%edx
1019 # in12 = *(uint32 *) (k + 4)
1020 movl 4(%ecx),%ebx
1021 # in13 = *(uint32 *) (k + 8)
1022 movl 8(%ecx),%esi
1023 # in14 = *(uint32 *) (k + 12)
1024 movl 12(%ecx),%ecx
1025 # *(uint32 *) (x + 44) = in11
1026 movl %edx,44(%eax)
1027 # *(uint32 *) (x + 48) = in12
1028 movl %ebx,48(%eax)
1029 # *(uint32 *) (x + 52) = in13
1030 movl %esi,52(%eax)
1031 # *(uint32 *) (x + 56) = in14
1032 movl %ecx,56(%eax)
1033 # in0 = 1634760805
1034 mov $1634760805,%ecx
1035 # in5 = 824206446
1036 mov $824206446,%edx
1037 # in10 = 2036477238
1038 mov $2036477238,%ebx
1039 # in15 = 1797285236
1040 mov $1797285236,%esi
1041 # *(uint32 *) (x + 0) = in0
1042 movl %ecx,0(%eax)
1043 # *(uint32 *) (x + 20) = in5
1044 movl %edx,20(%eax)
1045 # *(uint32 *) (x + 40) = in10
1046 movl %ebx,40(%eax)
1047 # *(uint32 *) (x + 60) = in15
1048 movl %esi,60(%eax)
1049._keysetupdone:
1050 # eax = eax_stack
1051 movl 64(%esp),%eax
1052 # ebx = ebx_stack
1053 movl 68(%esp),%ebx
1054 # esi = esi_stack
1055 movl 72(%esp),%esi
1056 # edi = edi_stack
1057 movl 76(%esp),%edi
1058 # ebp = ebp_stack
1059 movl 80(%esp),%ebp
1060 # leave
1061 add %eax,%esp
1062 ret
1063# enter ECRYPT_ivsetup
1064.text
1065.p2align 5
1066.globl ECRYPT_ivsetup
1067ECRYPT_ivsetup:
1068 mov %esp,%eax
1069 and $31,%eax
1070 add $256,%eax
1071 sub %eax,%esp
1072 # eax_stack = eax
1073 movl %eax,64(%esp)
1074 # ebx_stack = ebx
1075 movl %ebx,68(%esp)
1076 # esi_stack = esi
1077 movl %esi,72(%esp)
1078 # edi_stack = edi
1079 movl %edi,76(%esp)
1080 # ebp_stack = ebp
1081 movl %ebp,80(%esp)
1082 # iv = arg2
1083 movl 8(%esp,%eax),%ecx
1084 # x = arg1
1085 movl 4(%esp,%eax),%eax
1086 # in6 = *(uint32 *) (iv + 0)
1087 movl 0(%ecx),%edx
1088 # in7 = *(uint32 *) (iv + 4)
1089 movl 4(%ecx),%ecx
1090 # in8 = 0
1091 mov $0,%ebx
1092 # in9 = 0
1093 mov $0,%esi
1094 # *(uint32 *) (x + 24) = in6
1095 movl %edx,24(%eax)
1096 # *(uint32 *) (x + 28) = in7
1097 movl %ecx,28(%eax)
1098 # *(uint32 *) (x + 32) = in8
1099 movl %ebx,32(%eax)
1100 # *(uint32 *) (x + 36) = in9
1101 movl %esi,36(%eax)
1102 # eax = eax_stack
1103 movl 64(%esp),%eax
1104 # ebx = ebx_stack
1105 movl 68(%esp),%ebx
1106 # esi = esi_stack
1107 movl 72(%esp),%esi
1108 # edi = edi_stack
1109 movl 76(%esp),%edi
1110 # ebp = ebp_stack
1111 movl 80(%esp),%ebp
1112 # leave
1113 add %eax,%esp
1114 ret
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
new file mode 100644
index 000000000000..6214a9b09706
--- /dev/null
+++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S
@@ -0,0 +1,920 @@
1# enter ECRYPT_encrypt_bytes
2.text
3.p2align 5
4.globl ECRYPT_encrypt_bytes
5ECRYPT_encrypt_bytes:
6 mov %rsp,%r11
7 and $31,%r11
8 add $256,%r11
9 sub %r11,%rsp
10 # x = arg1
11 mov %rdi,%r8
12 # m = arg2
13 mov %rsi,%rsi
14 # out = arg3
15 mov %rdx,%rdi
16 # bytes = arg4
17 mov %rcx,%rdx
18 # unsigned>? bytes - 0
19 cmp $0,%rdx
20 # comment:fp stack unchanged by jump
21 # goto done if !unsigned>
22 jbe ._done
23 # comment:fp stack unchanged by fallthrough
24# start:
25._start:
26 # r11_stack = r11
27 movq %r11,0(%rsp)
28 # r12_stack = r12
29 movq %r12,8(%rsp)
30 # r13_stack = r13
31 movq %r13,16(%rsp)
32 # r14_stack = r14
33 movq %r14,24(%rsp)
34 # r15_stack = r15
35 movq %r15,32(%rsp)
36 # rbx_stack = rbx
37 movq %rbx,40(%rsp)
38 # rbp_stack = rbp
39 movq %rbp,48(%rsp)
40 # in0 = *(uint64 *) (x + 0)
41 movq 0(%r8),%rcx
42 # in2 = *(uint64 *) (x + 8)
43 movq 8(%r8),%r9
44 # in4 = *(uint64 *) (x + 16)
45 movq 16(%r8),%rax
46 # in6 = *(uint64 *) (x + 24)
47 movq 24(%r8),%r10
48 # in8 = *(uint64 *) (x + 32)
49 movq 32(%r8),%r11
50 # in10 = *(uint64 *) (x + 40)
51 movq 40(%r8),%r12
52 # in12 = *(uint64 *) (x + 48)
53 movq 48(%r8),%r13
54 # in14 = *(uint64 *) (x + 56)
55 movq 56(%r8),%r14
56 # j0 = in0
57 movq %rcx,56(%rsp)
58 # j2 = in2
59 movq %r9,64(%rsp)
60 # j4 = in4
61 movq %rax,72(%rsp)
62 # j6 = in6
63 movq %r10,80(%rsp)
64 # j8 = in8
65 movq %r11,88(%rsp)
66 # j10 = in10
67 movq %r12,96(%rsp)
68 # j12 = in12
69 movq %r13,104(%rsp)
70 # j14 = in14
71 movq %r14,112(%rsp)
72 # x_backup = x
73 movq %r8,120(%rsp)
74# bytesatleast1:
75._bytesatleast1:
76 # unsigned<? bytes - 64
77 cmp $64,%rdx
78 # comment:fp stack unchanged by jump
79 # goto nocopy if !unsigned<
80 jae ._nocopy
81 # ctarget = out
82 movq %rdi,128(%rsp)
83 # out = &tmp
84 leaq 192(%rsp),%rdi
85 # i = bytes
86 mov %rdx,%rcx
87 # while (i) { *out++ = *m++; --i }
88 rep movsb
89 # out = &tmp
90 leaq 192(%rsp),%rdi
91 # m = &tmp
92 leaq 192(%rsp),%rsi
93 # comment:fp stack unchanged by fallthrough
94# nocopy:
95._nocopy:
96 # out_backup = out
97 movq %rdi,136(%rsp)
98 # m_backup = m
99 movq %rsi,144(%rsp)
100 # bytes_backup = bytes
101 movq %rdx,152(%rsp)
102 # x1 = j0
103 movq 56(%rsp),%rdi
104 # x0 = x1
105 mov %rdi,%rdx
106 # (uint64) x1 >>= 32
107 shr $32,%rdi
108 # x3 = j2
109 movq 64(%rsp),%rsi
110 # x2 = x3
111 mov %rsi,%rcx
112 # (uint64) x3 >>= 32
113 shr $32,%rsi
114 # x5 = j4
115 movq 72(%rsp),%r8
116 # x4 = x5
117 mov %r8,%r9
118 # (uint64) x5 >>= 32
119 shr $32,%r8
120 # x5_stack = x5
121 movq %r8,160(%rsp)
122 # x7 = j6
123 movq 80(%rsp),%r8
124 # x6 = x7
125 mov %r8,%rax
126 # (uint64) x7 >>= 32
127 shr $32,%r8
128 # x9 = j8
129 movq 88(%rsp),%r10
130 # x8 = x9
131 mov %r10,%r11
132 # (uint64) x9 >>= 32
133 shr $32,%r10
134 # x11 = j10
135 movq 96(%rsp),%r12
136 # x10 = x11
137 mov %r12,%r13
138 # x10_stack = x10
139 movq %r13,168(%rsp)
140 # (uint64) x11 >>= 32
141 shr $32,%r12
142 # x13 = j12
143 movq 104(%rsp),%r13
144 # x12 = x13
145 mov %r13,%r14
146 # (uint64) x13 >>= 32
147 shr $32,%r13
148 # x15 = j14
149 movq 112(%rsp),%r15
150 # x14 = x15
151 mov %r15,%rbx
152 # (uint64) x15 >>= 32
153 shr $32,%r15
154 # x15_stack = x15
155 movq %r15,176(%rsp)
156 # i = 20
157 mov $20,%r15
158# mainloop:
159._mainloop:
160 # i_backup = i
161 movq %r15,184(%rsp)
162 # x5 = x5_stack
163 movq 160(%rsp),%r15
164 # a = x12 + x0
165 lea (%r14,%rdx),%rbp
166 # (uint32) a <<<= 7
167 rol $7,%ebp
168 # x4 ^= a
169 xor %rbp,%r9
170 # b = x1 + x5
171 lea (%rdi,%r15),%rbp
172 # (uint32) b <<<= 7
173 rol $7,%ebp
174 # x9 ^= b
175 xor %rbp,%r10
176 # a = x0 + x4
177 lea (%rdx,%r9),%rbp
178 # (uint32) a <<<= 9
179 rol $9,%ebp
180 # x8 ^= a
181 xor %rbp,%r11
182 # b = x5 + x9
183 lea (%r15,%r10),%rbp
184 # (uint32) b <<<= 9
185 rol $9,%ebp
186 # x13 ^= b
187 xor %rbp,%r13
188 # a = x4 + x8
189 lea (%r9,%r11),%rbp
190 # (uint32) a <<<= 13
191 rol $13,%ebp
192 # x12 ^= a
193 xor %rbp,%r14
194 # b = x9 + x13
195 lea (%r10,%r13),%rbp
196 # (uint32) b <<<= 13
197 rol $13,%ebp
198 # x1 ^= b
199 xor %rbp,%rdi
200 # a = x8 + x12
201 lea (%r11,%r14),%rbp
202 # (uint32) a <<<= 18
203 rol $18,%ebp
204 # x0 ^= a
205 xor %rbp,%rdx
206 # b = x13 + x1
207 lea (%r13,%rdi),%rbp
208 # (uint32) b <<<= 18
209 rol $18,%ebp
210 # x5 ^= b
211 xor %rbp,%r15
212 # x10 = x10_stack
213 movq 168(%rsp),%rbp
214 # x5_stack = x5
215 movq %r15,160(%rsp)
216 # c = x6 + x10
217 lea (%rax,%rbp),%r15
218 # (uint32) c <<<= 7
219 rol $7,%r15d
220 # x14 ^= c
221 xor %r15,%rbx
222 # c = x10 + x14
223 lea (%rbp,%rbx),%r15
224 # (uint32) c <<<= 9
225 rol $9,%r15d
226 # x2 ^= c
227 xor %r15,%rcx
228 # c = x14 + x2
229 lea (%rbx,%rcx),%r15
230 # (uint32) c <<<= 13
231 rol $13,%r15d
232 # x6 ^= c
233 xor %r15,%rax
234 # c = x2 + x6
235 lea (%rcx,%rax),%r15
236 # (uint32) c <<<= 18
237 rol $18,%r15d
238 # x10 ^= c
239 xor %r15,%rbp
240 # x15 = x15_stack
241 movq 176(%rsp),%r15
242 # x10_stack = x10
243 movq %rbp,168(%rsp)
244 # d = x11 + x15
245 lea (%r12,%r15),%rbp
246 # (uint32) d <<<= 7
247 rol $7,%ebp
248 # x3 ^= d
249 xor %rbp,%rsi
250 # d = x15 + x3
251 lea (%r15,%rsi),%rbp
252 # (uint32) d <<<= 9
253 rol $9,%ebp
254 # x7 ^= d
255 xor %rbp,%r8
256 # d = x3 + x7
257 lea (%rsi,%r8),%rbp
258 # (uint32) d <<<= 13
259 rol $13,%ebp
260 # x11 ^= d
261 xor %rbp,%r12
262 # d = x7 + x11
263 lea (%r8,%r12),%rbp
264 # (uint32) d <<<= 18
265 rol $18,%ebp
266 # x15 ^= d
267 xor %rbp,%r15
268 # x15_stack = x15
269 movq %r15,176(%rsp)
270 # x5 = x5_stack
271 movq 160(%rsp),%r15
272 # a = x3 + x0
273 lea (%rsi,%rdx),%rbp
274 # (uint32) a <<<= 7
275 rol $7,%ebp
276 # x1 ^= a
277 xor %rbp,%rdi
278 # b = x4 + x5
279 lea (%r9,%r15),%rbp
280 # (uint32) b <<<= 7
281 rol $7,%ebp
282 # x6 ^= b
283 xor %rbp,%rax
284 # a = x0 + x1
285 lea (%rdx,%rdi),%rbp
286 # (uint32) a <<<= 9
287 rol $9,%ebp
288 # x2 ^= a
289 xor %rbp,%rcx
290 # b = x5 + x6
291 lea (%r15,%rax),%rbp
292 # (uint32) b <<<= 9
293 rol $9,%ebp
294 # x7 ^= b
295 xor %rbp,%r8
296 # a = x1 + x2
297 lea (%rdi,%rcx),%rbp
298 # (uint32) a <<<= 13
299 rol $13,%ebp
300 # x3 ^= a
301 xor %rbp,%rsi
302 # b = x6 + x7
303 lea (%rax,%r8),%rbp
304 # (uint32) b <<<= 13
305 rol $13,%ebp
306 # x4 ^= b
307 xor %rbp,%r9
308 # a = x2 + x3
309 lea (%rcx,%rsi),%rbp
310 # (uint32) a <<<= 18
311 rol $18,%ebp
312 # x0 ^= a
313 xor %rbp,%rdx
314 # b = x7 + x4
315 lea (%r8,%r9),%rbp
316 # (uint32) b <<<= 18
317 rol $18,%ebp
318 # x5 ^= b
319 xor %rbp,%r15
320 # x10 = x10_stack
321 movq 168(%rsp),%rbp
322 # x5_stack = x5
323 movq %r15,160(%rsp)
324 # c = x9 + x10
325 lea (%r10,%rbp),%r15
326 # (uint32) c <<<= 7
327 rol $7,%r15d
328 # x11 ^= c
329 xor %r15,%r12
330 # c = x10 + x11
331 lea (%rbp,%r12),%r15
332 # (uint32) c <<<= 9
333 rol $9,%r15d
334 # x8 ^= c
335 xor %r15,%r11
336 # c = x11 + x8
337 lea (%r12,%r11),%r15
338 # (uint32) c <<<= 13
339 rol $13,%r15d
340 # x9 ^= c
341 xor %r15,%r10
342 # c = x8 + x9
343 lea (%r11,%r10),%r15
344 # (uint32) c <<<= 18
345 rol $18,%r15d
346 # x10 ^= c
347 xor %r15,%rbp
348 # x15 = x15_stack
349 movq 176(%rsp),%r15
350 # x10_stack = x10
351 movq %rbp,168(%rsp)
352 # d = x14 + x15
353 lea (%rbx,%r15),%rbp
354 # (uint32) d <<<= 7
355 rol $7,%ebp
356 # x12 ^= d
357 xor %rbp,%r14
358 # d = x15 + x12
359 lea (%r15,%r14),%rbp
360 # (uint32) d <<<= 9
361 rol $9,%ebp
362 # x13 ^= d
363 xor %rbp,%r13
364 # d = x12 + x13
365 lea (%r14,%r13),%rbp
366 # (uint32) d <<<= 13
367 rol $13,%ebp
368 # x14 ^= d
369 xor %rbp,%rbx
370 # d = x13 + x14
371 lea (%r13,%rbx),%rbp
372 # (uint32) d <<<= 18
373 rol $18,%ebp
374 # x15 ^= d
375 xor %rbp,%r15
376 # x15_stack = x15
377 movq %r15,176(%rsp)
378 # x5 = x5_stack
379 movq 160(%rsp),%r15
380 # a = x12 + x0
381 lea (%r14,%rdx),%rbp
382 # (uint32) a <<<= 7
383 rol $7,%ebp
384 # x4 ^= a
385 xor %rbp,%r9
386 # b = x1 + x5
387 lea (%rdi,%r15),%rbp
388 # (uint32) b <<<= 7
389 rol $7,%ebp
390 # x9 ^= b
391 xor %rbp,%r10
392 # a = x0 + x4
393 lea (%rdx,%r9),%rbp
394 # (uint32) a <<<= 9
395 rol $9,%ebp
396 # x8 ^= a
397 xor %rbp,%r11
398 # b = x5 + x9
399 lea (%r15,%r10),%rbp
400 # (uint32) b <<<= 9
401 rol $9,%ebp
402 # x13 ^= b
403 xor %rbp,%r13
404 # a = x4 + x8
405 lea (%r9,%r11),%rbp
406 # (uint32) a <<<= 13
407 rol $13,%ebp
408 # x12 ^= a
409 xor %rbp,%r14
410 # b = x9 + x13
411 lea (%r10,%r13),%rbp
412 # (uint32) b <<<= 13
413 rol $13,%ebp
414 # x1 ^= b
415 xor %rbp,%rdi
416 # a = x8 + x12
417 lea (%r11,%r14),%rbp
418 # (uint32) a <<<= 18
419 rol $18,%ebp
420 # x0 ^= a
421 xor %rbp,%rdx
422 # b = x13 + x1
423 lea (%r13,%rdi),%rbp
424 # (uint32) b <<<= 18
425 rol $18,%ebp
426 # x5 ^= b
427 xor %rbp,%r15
428 # x10 = x10_stack
429 movq 168(%rsp),%rbp
430 # x5_stack = x5
431 movq %r15,160(%rsp)
432 # c = x6 + x10
433 lea (%rax,%rbp),%r15
434 # (uint32) c <<<= 7
435 rol $7,%r15d
436 # x14 ^= c
437 xor %r15,%rbx
438 # c = x10 + x14
439 lea (%rbp,%rbx),%r15
440 # (uint32) c <<<= 9
441 rol $9,%r15d
442 # x2 ^= c
443 xor %r15,%rcx
444 # c = x14 + x2
445 lea (%rbx,%rcx),%r15
446 # (uint32) c <<<= 13
447 rol $13,%r15d
448 # x6 ^= c
449 xor %r15,%rax
450 # c = x2 + x6
451 lea (%rcx,%rax),%r15
452 # (uint32) c <<<= 18
453 rol $18,%r15d
454 # x10 ^= c
455 xor %r15,%rbp
456 # x15 = x15_stack
457 movq 176(%rsp),%r15
458 # x10_stack = x10
459 movq %rbp,168(%rsp)
460 # d = x11 + x15
461 lea (%r12,%r15),%rbp
462 # (uint32) d <<<= 7
463 rol $7,%ebp
464 # x3 ^= d
465 xor %rbp,%rsi
466 # d = x15 + x3
467 lea (%r15,%rsi),%rbp
468 # (uint32) d <<<= 9
469 rol $9,%ebp
470 # x7 ^= d
471 xor %rbp,%r8
472 # d = x3 + x7
473 lea (%rsi,%r8),%rbp
474 # (uint32) d <<<= 13
475 rol $13,%ebp
476 # x11 ^= d
477 xor %rbp,%r12
478 # d = x7 + x11
479 lea (%r8,%r12),%rbp
480 # (uint32) d <<<= 18
481 rol $18,%ebp
482 # x15 ^= d
483 xor %rbp,%r15
484 # x15_stack = x15
485 movq %r15,176(%rsp)
486 # x5 = x5_stack
487 movq 160(%rsp),%r15
488 # a = x3 + x0
489 lea (%rsi,%rdx),%rbp
490 # (uint32) a <<<= 7
491 rol $7,%ebp
492 # x1 ^= a
493 xor %rbp,%rdi
494 # b = x4 + x5
495 lea (%r9,%r15),%rbp
496 # (uint32) b <<<= 7
497 rol $7,%ebp
498 # x6 ^= b
499 xor %rbp,%rax
500 # a = x0 + x1
501 lea (%rdx,%rdi),%rbp
502 # (uint32) a <<<= 9
503 rol $9,%ebp
504 # x2 ^= a
505 xor %rbp,%rcx
506 # b = x5 + x6
507 lea (%r15,%rax),%rbp
508 # (uint32) b <<<= 9
509 rol $9,%ebp
510 # x7 ^= b
511 xor %rbp,%r8
512 # a = x1 + x2
513 lea (%rdi,%rcx),%rbp
514 # (uint32) a <<<= 13
515 rol $13,%ebp
516 # x3 ^= a
517 xor %rbp,%rsi
518 # b = x6 + x7
519 lea (%rax,%r8),%rbp
520 # (uint32) b <<<= 13
521 rol $13,%ebp
522 # x4 ^= b
523 xor %rbp,%r9
524 # a = x2 + x3
525 lea (%rcx,%rsi),%rbp
526 # (uint32) a <<<= 18
527 rol $18,%ebp
528 # x0 ^= a
529 xor %rbp,%rdx
530 # b = x7 + x4
531 lea (%r8,%r9),%rbp
532 # (uint32) b <<<= 18
533 rol $18,%ebp
534 # x5 ^= b
535 xor %rbp,%r15
536 # x10 = x10_stack
537 movq 168(%rsp),%rbp
538 # x5_stack = x5
539 movq %r15,160(%rsp)
540 # c = x9 + x10
541 lea (%r10,%rbp),%r15
542 # (uint32) c <<<= 7
543 rol $7,%r15d
544 # x11 ^= c
545 xor %r15,%r12
546 # c = x10 + x11
547 lea (%rbp,%r12),%r15
548 # (uint32) c <<<= 9
549 rol $9,%r15d
550 # x8 ^= c
551 xor %r15,%r11
552 # c = x11 + x8
553 lea (%r12,%r11),%r15
554 # (uint32) c <<<= 13
555 rol $13,%r15d
556 # x9 ^= c
557 xor %r15,%r10
558 # c = x8 + x9
559 lea (%r11,%r10),%r15
560 # (uint32) c <<<= 18
561 rol $18,%r15d
562 # x10 ^= c
563 xor %r15,%rbp
564 # x15 = x15_stack
565 movq 176(%rsp),%r15
566 # x10_stack = x10
567 movq %rbp,168(%rsp)
568 # d = x14 + x15
569 lea (%rbx,%r15),%rbp
570 # (uint32) d <<<= 7
571 rol $7,%ebp
572 # x12 ^= d
573 xor %rbp,%r14
574 # d = x15 + x12
575 lea (%r15,%r14),%rbp
576 # (uint32) d <<<= 9
577 rol $9,%ebp
578 # x13 ^= d
579 xor %rbp,%r13
580 # d = x12 + x13
581 lea (%r14,%r13),%rbp
582 # (uint32) d <<<= 13
583 rol $13,%ebp
584 # x14 ^= d
585 xor %rbp,%rbx
586 # d = x13 + x14
587 lea (%r13,%rbx),%rbp
588 # (uint32) d <<<= 18
589 rol $18,%ebp
590 # x15 ^= d
591 xor %rbp,%r15
592 # x15_stack = x15
593 movq %r15,176(%rsp)
594 # i = i_backup
595 movq 184(%rsp),%r15
596 # unsigned>? i -= 4
597 sub $4,%r15
598 # comment:fp stack unchanged by jump
599 # goto mainloop if unsigned>
600 ja ._mainloop
601 # (uint32) x2 += j2
602 addl 64(%rsp),%ecx
603 # x3 <<= 32
604 shl $32,%rsi
605 # x3 += j2
606 addq 64(%rsp),%rsi
607 # (uint64) x3 >>= 32
608 shr $32,%rsi
609 # x3 <<= 32
610 shl $32,%rsi
611 # x2 += x3
612 add %rsi,%rcx
613 # (uint32) x6 += j6
614 addl 80(%rsp),%eax
615 # x7 <<= 32
616 shl $32,%r8
617 # x7 += j6
618 addq 80(%rsp),%r8
619 # (uint64) x7 >>= 32
620 shr $32,%r8
621 # x7 <<= 32
622 shl $32,%r8
623 # x6 += x7
624 add %r8,%rax
625 # (uint32) x8 += j8
626 addl 88(%rsp),%r11d
627 # x9 <<= 32
628 shl $32,%r10
629 # x9 += j8
630 addq 88(%rsp),%r10
631 # (uint64) x9 >>= 32
632 shr $32,%r10
633 # x9 <<= 32
634 shl $32,%r10
635 # x8 += x9
636 add %r10,%r11
637 # (uint32) x12 += j12
638 addl 104(%rsp),%r14d
639 # x13 <<= 32
640 shl $32,%r13
641 # x13 += j12
642 addq 104(%rsp),%r13
643 # (uint64) x13 >>= 32
644 shr $32,%r13
645 # x13 <<= 32
646 shl $32,%r13
647 # x12 += x13
648 add %r13,%r14
649 # (uint32) x0 += j0
650 addl 56(%rsp),%edx
651 # x1 <<= 32
652 shl $32,%rdi
653 # x1 += j0
654 addq 56(%rsp),%rdi
655 # (uint64) x1 >>= 32
656 shr $32,%rdi
657 # x1 <<= 32
658 shl $32,%rdi
659 # x0 += x1
660 add %rdi,%rdx
661 # x5 = x5_stack
662 movq 160(%rsp),%rdi
663 # (uint32) x4 += j4
664 addl 72(%rsp),%r9d
665 # x5 <<= 32
666 shl $32,%rdi
667 # x5 += j4
668 addq 72(%rsp),%rdi
669 # (uint64) x5 >>= 32
670 shr $32,%rdi
671 # x5 <<= 32
672 shl $32,%rdi
673 # x4 += x5
674 add %rdi,%r9
675 # x10 = x10_stack
676 movq 168(%rsp),%r8
677 # (uint32) x10 += j10
678 addl 96(%rsp),%r8d
679 # x11 <<= 32
680 shl $32,%r12
681 # x11 += j10
682 addq 96(%rsp),%r12
683 # (uint64) x11 >>= 32
684 shr $32,%r12
685 # x11 <<= 32
686 shl $32,%r12
687 # x10 += x11
688 add %r12,%r8
689 # x15 = x15_stack
690 movq 176(%rsp),%rdi
691 # (uint32) x14 += j14
692 addl 112(%rsp),%ebx
693 # x15 <<= 32
694 shl $32,%rdi
695 # x15 += j14
696 addq 112(%rsp),%rdi
697 # (uint64) x15 >>= 32
698 shr $32,%rdi
699 # x15 <<= 32
700 shl $32,%rdi
701 # x14 += x15
702 add %rdi,%rbx
703 # out = out_backup
704 movq 136(%rsp),%rdi
705 # m = m_backup
706 movq 144(%rsp),%rsi
707 # x0 ^= *(uint64 *) (m + 0)
708 xorq 0(%rsi),%rdx
709 # *(uint64 *) (out + 0) = x0
710 movq %rdx,0(%rdi)
711 # x2 ^= *(uint64 *) (m + 8)
712 xorq 8(%rsi),%rcx
713 # *(uint64 *) (out + 8) = x2
714 movq %rcx,8(%rdi)
715 # x4 ^= *(uint64 *) (m + 16)
716 xorq 16(%rsi),%r9
717 # *(uint64 *) (out + 16) = x4
718 movq %r9,16(%rdi)
719 # x6 ^= *(uint64 *) (m + 24)
720 xorq 24(%rsi),%rax
721 # *(uint64 *) (out + 24) = x6
722 movq %rax,24(%rdi)
723 # x8 ^= *(uint64 *) (m + 32)
724 xorq 32(%rsi),%r11
725 # *(uint64 *) (out + 32) = x8
726 movq %r11,32(%rdi)
727 # x10 ^= *(uint64 *) (m + 40)
728 xorq 40(%rsi),%r8
729 # *(uint64 *) (out + 40) = x10
730 movq %r8,40(%rdi)
731 # x12 ^= *(uint64 *) (m + 48)
732 xorq 48(%rsi),%r14
733 # *(uint64 *) (out + 48) = x12
734 movq %r14,48(%rdi)
735 # x14 ^= *(uint64 *) (m + 56)
736 xorq 56(%rsi),%rbx
737 # *(uint64 *) (out + 56) = x14
738 movq %rbx,56(%rdi)
739 # bytes = bytes_backup
740 movq 152(%rsp),%rdx
741 # in8 = j8
742 movq 88(%rsp),%rcx
743 # in8 += 1
744 add $1,%rcx
745 # j8 = in8
746 movq %rcx,88(%rsp)
747 # unsigned>? unsigned<? bytes - 64
748 cmp $64,%rdx
749 # comment:fp stack unchanged by jump
750 # goto bytesatleast65 if unsigned>
751 ja ._bytesatleast65
752 # comment:fp stack unchanged by jump
753 # goto bytesatleast64 if !unsigned<
754 jae ._bytesatleast64
755 # m = out
756 mov %rdi,%rsi
757 # out = ctarget
758 movq 128(%rsp),%rdi
759 # i = bytes
760 mov %rdx,%rcx
761 # while (i) { *out++ = *m++; --i }
762 rep movsb
763 # comment:fp stack unchanged by fallthrough
764# bytesatleast64:
765._bytesatleast64:
766 # x = x_backup
767 movq 120(%rsp),%rdi
768 # in8 = j8
769 movq 88(%rsp),%rsi
770 # *(uint64 *) (x + 32) = in8
771 movq %rsi,32(%rdi)
772 # r11 = r11_stack
773 movq 0(%rsp),%r11
774 # r12 = r12_stack
775 movq 8(%rsp),%r12
776 # r13 = r13_stack
777 movq 16(%rsp),%r13
778 # r14 = r14_stack
779 movq 24(%rsp),%r14
780 # r15 = r15_stack
781 movq 32(%rsp),%r15
782 # rbx = rbx_stack
783 movq 40(%rsp),%rbx
784 # rbp = rbp_stack
785 movq 48(%rsp),%rbp
786 # comment:fp stack unchanged by fallthrough
787# done:
788._done:
789 # leave
790 add %r11,%rsp
791 mov %rdi,%rax
792 mov %rsi,%rdx
793 ret
794# bytesatleast65:
795._bytesatleast65:
796 # bytes -= 64
797 sub $64,%rdx
798 # out += 64
799 add $64,%rdi
800 # m += 64
801 add $64,%rsi
802 # comment:fp stack unchanged by jump
803 # goto bytesatleast1
804 jmp ._bytesatleast1
805# enter ECRYPT_keysetup
806.text
807.p2align 5
808.globl ECRYPT_keysetup
809ECRYPT_keysetup:
810 mov %rsp,%r11
811 and $31,%r11
812 add $256,%r11
813 sub %r11,%rsp
814 # k = arg2
815 mov %rsi,%rsi
816 # kbits = arg3
817 mov %rdx,%rdx
818 # x = arg1
819 mov %rdi,%rdi
820 # in0 = *(uint64 *) (k + 0)
821 movq 0(%rsi),%r8
822 # in2 = *(uint64 *) (k + 8)
823 movq 8(%rsi),%r9
824 # *(uint64 *) (x + 4) = in0
825 movq %r8,4(%rdi)
826 # *(uint64 *) (x + 12) = in2
827 movq %r9,12(%rdi)
828 # unsigned<? kbits - 256
829 cmp $256,%rdx
830 # comment:fp stack unchanged by jump
831 # goto kbits128 if unsigned<
832 jb ._kbits128
833# kbits256:
834._kbits256:
835 # in10 = *(uint64 *) (k + 16)
836 movq 16(%rsi),%rdx
837 # in12 = *(uint64 *) (k + 24)
838 movq 24(%rsi),%rsi
839 # *(uint64 *) (x + 44) = in10
840 movq %rdx,44(%rdi)
841 # *(uint64 *) (x + 52) = in12
842 movq %rsi,52(%rdi)
843 # in0 = 1634760805
844 mov $1634760805,%rsi
845 # in4 = 857760878
846 mov $857760878,%rdx
847 # in10 = 2036477234
848 mov $2036477234,%rcx
849 # in14 = 1797285236
850 mov $1797285236,%r8
851 # *(uint32 *) (x + 0) = in0
852 movl %esi,0(%rdi)
853 # *(uint32 *) (x + 20) = in4
854 movl %edx,20(%rdi)
855 # *(uint32 *) (x + 40) = in10
856 movl %ecx,40(%rdi)
857 # *(uint32 *) (x + 60) = in14
858 movl %r8d,60(%rdi)
859 # comment:fp stack unchanged by jump
860 # goto keysetupdone
861 jmp ._keysetupdone
862# kbits128:
863._kbits128:
864 # in10 = *(uint64 *) (k + 0)
865 movq 0(%rsi),%rdx
866 # in12 = *(uint64 *) (k + 8)
867 movq 8(%rsi),%rsi
868 # *(uint64 *) (x + 44) = in10
869 movq %rdx,44(%rdi)
870 # *(uint64 *) (x + 52) = in12
871 movq %rsi,52(%rdi)
872 # in0 = 1634760805
873 mov $1634760805,%rsi
874 # in4 = 824206446
875 mov $824206446,%rdx
876 # in10 = 2036477238
877 mov $2036477238,%rcx
878 # in14 = 1797285236
879 mov $1797285236,%r8
880 # *(uint32 *) (x + 0) = in0
881 movl %esi,0(%rdi)
882 # *(uint32 *) (x + 20) = in4
883 movl %edx,20(%rdi)
884 # *(uint32 *) (x + 40) = in10
885 movl %ecx,40(%rdi)
886 # *(uint32 *) (x + 60) = in14
887 movl %r8d,60(%rdi)
888# keysetupdone:
889._keysetupdone:
890 # leave
891 add %r11,%rsp
892 mov %rdi,%rax
893 mov %rsi,%rdx
894 ret
895# enter ECRYPT_ivsetup
896.text
897.p2align 5
898.globl ECRYPT_ivsetup
899ECRYPT_ivsetup:
900 mov %rsp,%r11
901 and $31,%r11
902 add $256,%r11
903 sub %r11,%rsp
904 # iv = arg2
905 mov %rsi,%rsi
906 # x = arg1
907 mov %rdi,%rdi
908 # in6 = *(uint64 *) (iv + 0)
909 movq 0(%rsi),%rsi
910 # in8 = 0
911 mov $0,%r8
912 # *(uint64 *) (x + 24) = in6
913 movq %rsi,24(%rdi)
914 # *(uint64 *) (x + 32) = in8
915 movq %r8,32(%rdi)
916 # leave
917 add %r11,%rsp
918 mov %rdi,%rax
919 mov %rsi,%rdx
920 ret
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
new file mode 100644
index 000000000000..bccb76d80987
--- /dev/null
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -0,0 +1,129 @@
1/*
2 * Glue code for optimized assembly version of Salsa20.
3 *
4 * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
5 *
6 * The assembly codes are public domain assembly codes written by Daniel. J.
7 * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
8 * and to remove extraneous comments and functions that are not needed.
9 * - i586 version, renamed as salsa20-i586-asm_32.S
10 * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
11 * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
12 * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 *
19 */
20
21#include <crypto/algapi.h>
22#include <linux/module.h>
23#include <linux/crypto.h>
24
25#define SALSA20_IV_SIZE 8U
26#define SALSA20_MIN_KEY_SIZE 16U
27#define SALSA20_MAX_KEY_SIZE 32U
28
29// use the ECRYPT_* function names
30#define salsa20_keysetup ECRYPT_keysetup
31#define salsa20_ivsetup ECRYPT_ivsetup
32#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
33
34struct salsa20_ctx
35{
36 u32 input[16];
37};
38
39asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
40 u32 keysize, u32 ivsize);
41asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
42asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
43 const u8 *src, u8 *dst, u32 bytes);
44
45static int setkey(struct crypto_tfm *tfm, const u8 *key,
46 unsigned int keysize)
47{
48 struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
49 salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
50 return 0;
51}
52
53static int encrypt(struct blkcipher_desc *desc,
54 struct scatterlist *dst, struct scatterlist *src,
55 unsigned int nbytes)
56{
57 struct blkcipher_walk walk;
58 struct crypto_blkcipher *tfm = desc->tfm;
59 struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
60 int err;
61
62 blkcipher_walk_init(&walk, dst, src, nbytes);
63 err = blkcipher_walk_virt_block(desc, &walk, 64);
64
65 salsa20_ivsetup(ctx, walk.iv);
66
67 if (likely(walk.nbytes == nbytes))
68 {
69 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
70 walk.dst.virt.addr, nbytes);
71 return blkcipher_walk_done(desc, &walk, 0);
72 }
73
74 while (walk.nbytes >= 64) {
75 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
76 walk.dst.virt.addr,
77 walk.nbytes - (walk.nbytes % 64));
78 err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
79 }
80
81 if (walk.nbytes) {
82 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
83 walk.dst.virt.addr, walk.nbytes);
84 err = blkcipher_walk_done(desc, &walk, 0);
85 }
86
87 return err;
88}
89
90static struct crypto_alg alg = {
91 .cra_name = "salsa20",
92 .cra_driver_name = "salsa20-asm",
93 .cra_priority = 200,
94 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
95 .cra_type = &crypto_blkcipher_type,
96 .cra_blocksize = 1,
97 .cra_ctxsize = sizeof(struct salsa20_ctx),
98 .cra_alignmask = 3,
99 .cra_module = THIS_MODULE,
100 .cra_list = LIST_HEAD_INIT(alg.cra_list),
101 .cra_u = {
102 .blkcipher = {
103 .setkey = setkey,
104 .encrypt = encrypt,
105 .decrypt = encrypt,
106 .min_keysize = SALSA20_MIN_KEY_SIZE,
107 .max_keysize = SALSA20_MAX_KEY_SIZE,
108 .ivsize = SALSA20_IV_SIZE,
109 }
110 }
111};
112
113static int __init init(void)
114{
115 return crypto_register_alg(&alg);
116}
117
118static void __exit fini(void)
119{
120 crypto_unregister_alg(&alg);
121}
122
123module_init(init);
124module_exit(fini);
125
126MODULE_LICENSE("GPL");
127MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
128MODULE_ALIAS("salsa20");
129MODULE_ALIAS("salsa20-asm");
diff --git a/arch/x86/crypto/twofish_64.c b/arch/x86/crypto/twofish_64.c
deleted file mode 100644
index 182d91d5cfb9..000000000000
--- a/arch/x86/crypto/twofish_64.c
+++ /dev/null
@@ -1,97 +0,0 @@
1/*
2 * Glue Code for optimized x86_64 assembler version of TWOFISH
3 *
4 * Originally Twofish for GPG
5 * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
6 * 256-bit key length added March 20, 1999
7 * Some modifications to reduce the text size by Werner Koch, April, 1998
8 * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
9 * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
10 *
11 * The original author has disclaimed all copyright interest in this
12 * code and thus put it in the public domain. The subsequent authors
13 * have put this under the GNU General Public License.
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
28 * USA
29 *
30 * This code is a "clean room" implementation, written from the paper
31 * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
32 * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
33 * through http://www.counterpane.com/twofish.html
34 *
35 * For background information on multiplication in finite fields, used for
36 * the matrix operations in the key schedule, see the book _Contemporary
37 * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
38 * Third Edition.
39 */
40
41#include <crypto/twofish.h>
42#include <linux/crypto.h>
43#include <linux/init.h>
44#include <linux/kernel.h>
45#include <linux/module.h>
46#include <linux/types.h>
47
48asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50
51static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
52{
53 twofish_enc_blk(tfm, dst, src);
54}
55
56static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
57{
58 twofish_dec_blk(tfm, dst, src);
59}
60
61static struct crypto_alg alg = {
62 .cra_name = "twofish",
63 .cra_driver_name = "twofish-x86_64",
64 .cra_priority = 200,
65 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
66 .cra_blocksize = TF_BLOCK_SIZE,
67 .cra_ctxsize = sizeof(struct twofish_ctx),
68 .cra_alignmask = 3,
69 .cra_module = THIS_MODULE,
70 .cra_list = LIST_HEAD_INIT(alg.cra_list),
71 .cra_u = {
72 .cipher = {
73 .cia_min_keysize = TF_MIN_KEY_SIZE,
74 .cia_max_keysize = TF_MAX_KEY_SIZE,
75 .cia_setkey = twofish_setkey,
76 .cia_encrypt = twofish_encrypt,
77 .cia_decrypt = twofish_decrypt
78 }
79 }
80};
81
82static int __init init(void)
83{
84 return crypto_register_alg(&alg);
85}
86
87static void __exit fini(void)
88{
89 crypto_unregister_alg(&alg);
90}
91
92module_init(init);
93module_exit(fini);
94
95MODULE_LICENSE("GPL");
96MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
97MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_glue.c
index e3004dfe9c7a..cefaf8b9aa18 100644
--- a/arch/x86/crypto/twofish_32.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Glue Code for optimized 586 assembler version of TWOFISH 2 * Glue Code for assembler optimized version of TWOFISH
3 * 3 *
4 * Originally Twofish for GPG 4 * Originally Twofish for GPG
5 * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998 5 * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
@@ -44,7 +44,6 @@
44#include <linux/module.h> 44#include <linux/module.h>
45#include <linux/types.h> 45#include <linux/types.h>
46 46
47
48asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); 47asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); 48asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50 49
@@ -60,7 +59,7 @@ static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
60 59
61static struct crypto_alg alg = { 60static struct crypto_alg alg = {
62 .cra_name = "twofish", 61 .cra_name = "twofish",
63 .cra_driver_name = "twofish-i586", 62 .cra_driver_name = "twofish-asm",
64 .cra_priority = 200, 63 .cra_priority = 200,
65 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 64 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
66 .cra_blocksize = TF_BLOCK_SIZE, 65 .cra_blocksize = TF_BLOCK_SIZE,
@@ -93,5 +92,6 @@ module_init(init);
93module_exit(fini); 92module_exit(fini);
94 93
95MODULE_LICENSE("GPL"); 94MODULE_LICENSE("GPL");
96MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); 95MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
97MODULE_ALIAS("twofish"); 96MODULE_ALIAS("twofish");
97MODULE_ALIAS("twofish-asm");