diff options
author | Tan Swee Heng <thesweeheng@gmail.com> | 2007-12-17 11:04:40 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2008-01-10 16:16:57 -0500 |
commit | 9a7dafbba47384c330779c75a1546684efaa8c1a (patch) | |
tree | 0fde4a938ebc3c9deb0873b709dc5d2d69ab25c3 /arch | |
parent | 974e4b752ee623854c5dc2bbfc7c7725029ce173 (diff) |
[CRYPTO] salsa20: Add x86-64 assembly version
This is the x86-64 version of the Salsa20 stream cipher algorithm. The
original assembly code came from
<http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>. It has been
reformatted for clarity.
Signed-off-by: Tan Swee Heng <thesweeheng@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/crypto/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20-x86_64-asm_64.S | 920 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20_glue.c | 2 |
3 files changed, 924 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 25cc8441046a..09200e12f14d 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | |||
8 | 8 | ||
9 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 9 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
10 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 10 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
11 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | ||
11 | 12 | ||
12 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 13 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
13 | twofish-i586-y := twofish-i586-asm_32.o twofish_32.o | 14 | twofish-i586-y := twofish-i586-asm_32.o twofish_32.o |
@@ -15,3 +16,4 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o | |||
15 | 16 | ||
16 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | 17 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
17 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o | 18 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o |
19 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | ||
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S new file mode 100644 index 000000000000..6214a9b09706 --- /dev/null +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S | |||
@@ -0,0 +1,920 @@ | |||
1 | # enter ECRYPT_encrypt_bytes | ||
2 | .text | ||
3 | .p2align 5 | ||
4 | .globl ECRYPT_encrypt_bytes | ||
5 | ECRYPT_encrypt_bytes: | ||
6 | mov %rsp,%r11 | ||
7 | and $31,%r11 | ||
8 | add $256,%r11 | ||
9 | sub %r11,%rsp | ||
10 | # x = arg1 | ||
11 | mov %rdi,%r8 | ||
12 | # m = arg2 | ||
13 | mov %rsi,%rsi | ||
14 | # out = arg3 | ||
15 | mov %rdx,%rdi | ||
16 | # bytes = arg4 | ||
17 | mov %rcx,%rdx | ||
18 | # unsigned>? bytes - 0 | ||
19 | cmp $0,%rdx | ||
20 | # comment:fp stack unchanged by jump | ||
21 | # goto done if !unsigned> | ||
22 | jbe ._done | ||
23 | # comment:fp stack unchanged by fallthrough | ||
24 | # start: | ||
25 | ._start: | ||
26 | # r11_stack = r11 | ||
27 | movq %r11,0(%rsp) | ||
28 | # r12_stack = r12 | ||
29 | movq %r12,8(%rsp) | ||
30 | # r13_stack = r13 | ||
31 | movq %r13,16(%rsp) | ||
32 | # r14_stack = r14 | ||
33 | movq %r14,24(%rsp) | ||
34 | # r15_stack = r15 | ||
35 | movq %r15,32(%rsp) | ||
36 | # rbx_stack = rbx | ||
37 | movq %rbx,40(%rsp) | ||
38 | # rbp_stack = rbp | ||
39 | movq %rbp,48(%rsp) | ||
40 | # in0 = *(uint64 *) (x + 0) | ||
41 | movq 0(%r8),%rcx | ||
42 | # in2 = *(uint64 *) (x + 8) | ||
43 | movq 8(%r8),%r9 | ||
44 | # in4 = *(uint64 *) (x + 16) | ||
45 | movq 16(%r8),%rax | ||
46 | # in6 = *(uint64 *) (x + 24) | ||
47 | movq 24(%r8),%r10 | ||
48 | # in8 = *(uint64 *) (x + 32) | ||
49 | movq 32(%r8),%r11 | ||
50 | # in10 = *(uint64 *) (x + 40) | ||
51 | movq 40(%r8),%r12 | ||
52 | # in12 = *(uint64 *) (x + 48) | ||
53 | movq 48(%r8),%r13 | ||
54 | # in14 = *(uint64 *) (x + 56) | ||
55 | movq 56(%r8),%r14 | ||
56 | # j0 = in0 | ||
57 | movq %rcx,56(%rsp) | ||
58 | # j2 = in2 | ||
59 | movq %r9,64(%rsp) | ||
60 | # j4 = in4 | ||
61 | movq %rax,72(%rsp) | ||
62 | # j6 = in6 | ||
63 | movq %r10,80(%rsp) | ||
64 | # j8 = in8 | ||
65 | movq %r11,88(%rsp) | ||
66 | # j10 = in10 | ||
67 | movq %r12,96(%rsp) | ||
68 | # j12 = in12 | ||
69 | movq %r13,104(%rsp) | ||
70 | # j14 = in14 | ||
71 | movq %r14,112(%rsp) | ||
72 | # x_backup = x | ||
73 | movq %r8,120(%rsp) | ||
74 | # bytesatleast1: | ||
75 | ._bytesatleast1: | ||
76 | # unsigned<? bytes - 64 | ||
77 | cmp $64,%rdx | ||
78 | # comment:fp stack unchanged by jump | ||
79 | # goto nocopy if !unsigned< | ||
80 | jae ._nocopy | ||
81 | # ctarget = out | ||
82 | movq %rdi,128(%rsp) | ||
83 | # out = &tmp | ||
84 | leaq 192(%rsp),%rdi | ||
85 | # i = bytes | ||
86 | mov %rdx,%rcx | ||
87 | # while (i) { *out++ = *m++; --i } | ||
88 | rep movsb | ||
89 | # out = &tmp | ||
90 | leaq 192(%rsp),%rdi | ||
91 | # m = &tmp | ||
92 | leaq 192(%rsp),%rsi | ||
93 | # comment:fp stack unchanged by fallthrough | ||
94 | # nocopy: | ||
95 | ._nocopy: | ||
96 | # out_backup = out | ||
97 | movq %rdi,136(%rsp) | ||
98 | # m_backup = m | ||
99 | movq %rsi,144(%rsp) | ||
100 | # bytes_backup = bytes | ||
101 | movq %rdx,152(%rsp) | ||
102 | # x1 = j0 | ||
103 | movq 56(%rsp),%rdi | ||
104 | # x0 = x1 | ||
105 | mov %rdi,%rdx | ||
106 | # (uint64) x1 >>= 32 | ||
107 | shr $32,%rdi | ||
108 | # x3 = j2 | ||
109 | movq 64(%rsp),%rsi | ||
110 | # x2 = x3 | ||
111 | mov %rsi,%rcx | ||
112 | # (uint64) x3 >>= 32 | ||
113 | shr $32,%rsi | ||
114 | # x5 = j4 | ||
115 | movq 72(%rsp),%r8 | ||
116 | # x4 = x5 | ||
117 | mov %r8,%r9 | ||
118 | # (uint64) x5 >>= 32 | ||
119 | shr $32,%r8 | ||
120 | # x5_stack = x5 | ||
121 | movq %r8,160(%rsp) | ||
122 | # x7 = j6 | ||
123 | movq 80(%rsp),%r8 | ||
124 | # x6 = x7 | ||
125 | mov %r8,%rax | ||
126 | # (uint64) x7 >>= 32 | ||
127 | shr $32,%r8 | ||
128 | # x9 = j8 | ||
129 | movq 88(%rsp),%r10 | ||
130 | # x8 = x9 | ||
131 | mov %r10,%r11 | ||
132 | # (uint64) x9 >>= 32 | ||
133 | shr $32,%r10 | ||
134 | # x11 = j10 | ||
135 | movq 96(%rsp),%r12 | ||
136 | # x10 = x11 | ||
137 | mov %r12,%r13 | ||
138 | # x10_stack = x10 | ||
139 | movq %r13,168(%rsp) | ||
140 | # (uint64) x11 >>= 32 | ||
141 | shr $32,%r12 | ||
142 | # x13 = j12 | ||
143 | movq 104(%rsp),%r13 | ||
144 | # x12 = x13 | ||
145 | mov %r13,%r14 | ||
146 | # (uint64) x13 >>= 32 | ||
147 | shr $32,%r13 | ||
148 | # x15 = j14 | ||
149 | movq 112(%rsp),%r15 | ||
150 | # x14 = x15 | ||
151 | mov %r15,%rbx | ||
152 | # (uint64) x15 >>= 32 | ||
153 | shr $32,%r15 | ||
154 | # x15_stack = x15 | ||
155 | movq %r15,176(%rsp) | ||
156 | # i = 20 | ||
157 | mov $20,%r15 | ||
158 | # mainloop: | ||
159 | ._mainloop: | ||
160 | # i_backup = i | ||
161 | movq %r15,184(%rsp) | ||
162 | # x5 = x5_stack | ||
163 | movq 160(%rsp),%r15 | ||
164 | # a = x12 + x0 | ||
165 | lea (%r14,%rdx),%rbp | ||
166 | # (uint32) a <<<= 7 | ||
167 | rol $7,%ebp | ||
168 | # x4 ^= a | ||
169 | xor %rbp,%r9 | ||
170 | # b = x1 + x5 | ||
171 | lea (%rdi,%r15),%rbp | ||
172 | # (uint32) b <<<= 7 | ||
173 | rol $7,%ebp | ||
174 | # x9 ^= b | ||
175 | xor %rbp,%r10 | ||
176 | # a = x0 + x4 | ||
177 | lea (%rdx,%r9),%rbp | ||
178 | # (uint32) a <<<= 9 | ||
179 | rol $9,%ebp | ||
180 | # x8 ^= a | ||
181 | xor %rbp,%r11 | ||
182 | # b = x5 + x9 | ||
183 | lea (%r15,%r10),%rbp | ||
184 | # (uint32) b <<<= 9 | ||
185 | rol $9,%ebp | ||
186 | # x13 ^= b | ||
187 | xor %rbp,%r13 | ||
188 | # a = x4 + x8 | ||
189 | lea (%r9,%r11),%rbp | ||
190 | # (uint32) a <<<= 13 | ||
191 | rol $13,%ebp | ||
192 | # x12 ^= a | ||
193 | xor %rbp,%r14 | ||
194 | # b = x9 + x13 | ||
195 | lea (%r10,%r13),%rbp | ||
196 | # (uint32) b <<<= 13 | ||
197 | rol $13,%ebp | ||
198 | # x1 ^= b | ||
199 | xor %rbp,%rdi | ||
200 | # a = x8 + x12 | ||
201 | lea (%r11,%r14),%rbp | ||
202 | # (uint32) a <<<= 18 | ||
203 | rol $18,%ebp | ||
204 | # x0 ^= a | ||
205 | xor %rbp,%rdx | ||
206 | # b = x13 + x1 | ||
207 | lea (%r13,%rdi),%rbp | ||
208 | # (uint32) b <<<= 18 | ||
209 | rol $18,%ebp | ||
210 | # x5 ^= b | ||
211 | xor %rbp,%r15 | ||
212 | # x10 = x10_stack | ||
213 | movq 168(%rsp),%rbp | ||
214 | # x5_stack = x5 | ||
215 | movq %r15,160(%rsp) | ||
216 | # c = x6 + x10 | ||
217 | lea (%rax,%rbp),%r15 | ||
218 | # (uint32) c <<<= 7 | ||
219 | rol $7,%r15d | ||
220 | # x14 ^= c | ||
221 | xor %r15,%rbx | ||
222 | # c = x10 + x14 | ||
223 | lea (%rbp,%rbx),%r15 | ||
224 | # (uint32) c <<<= 9 | ||
225 | rol $9,%r15d | ||
226 | # x2 ^= c | ||
227 | xor %r15,%rcx | ||
228 | # c = x14 + x2 | ||
229 | lea (%rbx,%rcx),%r15 | ||
230 | # (uint32) c <<<= 13 | ||
231 | rol $13,%r15d | ||
232 | # x6 ^= c | ||
233 | xor %r15,%rax | ||
234 | # c = x2 + x6 | ||
235 | lea (%rcx,%rax),%r15 | ||
236 | # (uint32) c <<<= 18 | ||
237 | rol $18,%r15d | ||
238 | # x10 ^= c | ||
239 | xor %r15,%rbp | ||
240 | # x15 = x15_stack | ||
241 | movq 176(%rsp),%r15 | ||
242 | # x10_stack = x10 | ||
243 | movq %rbp,168(%rsp) | ||
244 | # d = x11 + x15 | ||
245 | lea (%r12,%r15),%rbp | ||
246 | # (uint32) d <<<= 7 | ||
247 | rol $7,%ebp | ||
248 | # x3 ^= d | ||
249 | xor %rbp,%rsi | ||
250 | # d = x15 + x3 | ||
251 | lea (%r15,%rsi),%rbp | ||
252 | # (uint32) d <<<= 9 | ||
253 | rol $9,%ebp | ||
254 | # x7 ^= d | ||
255 | xor %rbp,%r8 | ||
256 | # d = x3 + x7 | ||
257 | lea (%rsi,%r8),%rbp | ||
258 | # (uint32) d <<<= 13 | ||
259 | rol $13,%ebp | ||
260 | # x11 ^= d | ||
261 | xor %rbp,%r12 | ||
262 | # d = x7 + x11 | ||
263 | lea (%r8,%r12),%rbp | ||
264 | # (uint32) d <<<= 18 | ||
265 | rol $18,%ebp | ||
266 | # x15 ^= d | ||
267 | xor %rbp,%r15 | ||
268 | # x15_stack = x15 | ||
269 | movq %r15,176(%rsp) | ||
270 | # x5 = x5_stack | ||
271 | movq 160(%rsp),%r15 | ||
272 | # a = x3 + x0 | ||
273 | lea (%rsi,%rdx),%rbp | ||
274 | # (uint32) a <<<= 7 | ||
275 | rol $7,%ebp | ||
276 | # x1 ^= a | ||
277 | xor %rbp,%rdi | ||
278 | # b = x4 + x5 | ||
279 | lea (%r9,%r15),%rbp | ||
280 | # (uint32) b <<<= 7 | ||
281 | rol $7,%ebp | ||
282 | # x6 ^= b | ||
283 | xor %rbp,%rax | ||
284 | # a = x0 + x1 | ||
285 | lea (%rdx,%rdi),%rbp | ||
286 | # (uint32) a <<<= 9 | ||
287 | rol $9,%ebp | ||
288 | # x2 ^= a | ||
289 | xor %rbp,%rcx | ||
290 | # b = x5 + x6 | ||
291 | lea (%r15,%rax),%rbp | ||
292 | # (uint32) b <<<= 9 | ||
293 | rol $9,%ebp | ||
294 | # x7 ^= b | ||
295 | xor %rbp,%r8 | ||
296 | # a = x1 + x2 | ||
297 | lea (%rdi,%rcx),%rbp | ||
298 | # (uint32) a <<<= 13 | ||
299 | rol $13,%ebp | ||
300 | # x3 ^= a | ||
301 | xor %rbp,%rsi | ||
302 | # b = x6 + x7 | ||
303 | lea (%rax,%r8),%rbp | ||
304 | # (uint32) b <<<= 13 | ||
305 | rol $13,%ebp | ||
306 | # x4 ^= b | ||
307 | xor %rbp,%r9 | ||
308 | # a = x2 + x3 | ||
309 | lea (%rcx,%rsi),%rbp | ||
310 | # (uint32) a <<<= 18 | ||
311 | rol $18,%ebp | ||
312 | # x0 ^= a | ||
313 | xor %rbp,%rdx | ||
314 | # b = x7 + x4 | ||
315 | lea (%r8,%r9),%rbp | ||
316 | # (uint32) b <<<= 18 | ||
317 | rol $18,%ebp | ||
318 | # x5 ^= b | ||
319 | xor %rbp,%r15 | ||
320 | # x10 = x10_stack | ||
321 | movq 168(%rsp),%rbp | ||
322 | # x5_stack = x5 | ||
323 | movq %r15,160(%rsp) | ||
324 | # c = x9 + x10 | ||
325 | lea (%r10,%rbp),%r15 | ||
326 | # (uint32) c <<<= 7 | ||
327 | rol $7,%r15d | ||
328 | # x11 ^= c | ||
329 | xor %r15,%r12 | ||
330 | # c = x10 + x11 | ||
331 | lea (%rbp,%r12),%r15 | ||
332 | # (uint32) c <<<= 9 | ||
333 | rol $9,%r15d | ||
334 | # x8 ^= c | ||
335 | xor %r15,%r11 | ||
336 | # c = x11 + x8 | ||
337 | lea (%r12,%r11),%r15 | ||
338 | # (uint32) c <<<= 13 | ||
339 | rol $13,%r15d | ||
340 | # x9 ^= c | ||
341 | xor %r15,%r10 | ||
342 | # c = x8 + x9 | ||
343 | lea (%r11,%r10),%r15 | ||
344 | # (uint32) c <<<= 18 | ||
345 | rol $18,%r15d | ||
346 | # x10 ^= c | ||
347 | xor %r15,%rbp | ||
348 | # x15 = x15_stack | ||
349 | movq 176(%rsp),%r15 | ||
350 | # x10_stack = x10 | ||
351 | movq %rbp,168(%rsp) | ||
352 | # d = x14 + x15 | ||
353 | lea (%rbx,%r15),%rbp | ||
354 | # (uint32) d <<<= 7 | ||
355 | rol $7,%ebp | ||
356 | # x12 ^= d | ||
357 | xor %rbp,%r14 | ||
358 | # d = x15 + x12 | ||
359 | lea (%r15,%r14),%rbp | ||
360 | # (uint32) d <<<= 9 | ||
361 | rol $9,%ebp | ||
362 | # x13 ^= d | ||
363 | xor %rbp,%r13 | ||
364 | # d = x12 + x13 | ||
365 | lea (%r14,%r13),%rbp | ||
366 | # (uint32) d <<<= 13 | ||
367 | rol $13,%ebp | ||
368 | # x14 ^= d | ||
369 | xor %rbp,%rbx | ||
370 | # d = x13 + x14 | ||
371 | lea (%r13,%rbx),%rbp | ||
372 | # (uint32) d <<<= 18 | ||
373 | rol $18,%ebp | ||
374 | # x15 ^= d | ||
375 | xor %rbp,%r15 | ||
376 | # x15_stack = x15 | ||
377 | movq %r15,176(%rsp) | ||
378 | # x5 = x5_stack | ||
379 | movq 160(%rsp),%r15 | ||
380 | # a = x12 + x0 | ||
381 | lea (%r14,%rdx),%rbp | ||
382 | # (uint32) a <<<= 7 | ||
383 | rol $7,%ebp | ||
384 | # x4 ^= a | ||
385 | xor %rbp,%r9 | ||
386 | # b = x1 + x5 | ||
387 | lea (%rdi,%r15),%rbp | ||
388 | # (uint32) b <<<= 7 | ||
389 | rol $7,%ebp | ||
390 | # x9 ^= b | ||
391 | xor %rbp,%r10 | ||
392 | # a = x0 + x4 | ||
393 | lea (%rdx,%r9),%rbp | ||
394 | # (uint32) a <<<= 9 | ||
395 | rol $9,%ebp | ||
396 | # x8 ^= a | ||
397 | xor %rbp,%r11 | ||
398 | # b = x5 + x9 | ||
399 | lea (%r15,%r10),%rbp | ||
400 | # (uint32) b <<<= 9 | ||
401 | rol $9,%ebp | ||
402 | # x13 ^= b | ||
403 | xor %rbp,%r13 | ||
404 | # a = x4 + x8 | ||
405 | lea (%r9,%r11),%rbp | ||
406 | # (uint32) a <<<= 13 | ||
407 | rol $13,%ebp | ||
408 | # x12 ^= a | ||
409 | xor %rbp,%r14 | ||
410 | # b = x9 + x13 | ||
411 | lea (%r10,%r13),%rbp | ||
412 | # (uint32) b <<<= 13 | ||
413 | rol $13,%ebp | ||
414 | # x1 ^= b | ||
415 | xor %rbp,%rdi | ||
416 | # a = x8 + x12 | ||
417 | lea (%r11,%r14),%rbp | ||
418 | # (uint32) a <<<= 18 | ||
419 | rol $18,%ebp | ||
420 | # x0 ^= a | ||
421 | xor %rbp,%rdx | ||
422 | # b = x13 + x1 | ||
423 | lea (%r13,%rdi),%rbp | ||
424 | # (uint32) b <<<= 18 | ||
425 | rol $18,%ebp | ||
426 | # x5 ^= b | ||
427 | xor %rbp,%r15 | ||
428 | # x10 = x10_stack | ||
429 | movq 168(%rsp),%rbp | ||
430 | # x5_stack = x5 | ||
431 | movq %r15,160(%rsp) | ||
432 | # c = x6 + x10 | ||
433 | lea (%rax,%rbp),%r15 | ||
434 | # (uint32) c <<<= 7 | ||
435 | rol $7,%r15d | ||
436 | # x14 ^= c | ||
437 | xor %r15,%rbx | ||
438 | # c = x10 + x14 | ||
439 | lea (%rbp,%rbx),%r15 | ||
440 | # (uint32) c <<<= 9 | ||
441 | rol $9,%r15d | ||
442 | # x2 ^= c | ||
443 | xor %r15,%rcx | ||
444 | # c = x14 + x2 | ||
445 | lea (%rbx,%rcx),%r15 | ||
446 | # (uint32) c <<<= 13 | ||
447 | rol $13,%r15d | ||
448 | # x6 ^= c | ||
449 | xor %r15,%rax | ||
450 | # c = x2 + x6 | ||
451 | lea (%rcx,%rax),%r15 | ||
452 | # (uint32) c <<<= 18 | ||
453 | rol $18,%r15d | ||
454 | # x10 ^= c | ||
455 | xor %r15,%rbp | ||
456 | # x15 = x15_stack | ||
457 | movq 176(%rsp),%r15 | ||
458 | # x10_stack = x10 | ||
459 | movq %rbp,168(%rsp) | ||
460 | # d = x11 + x15 | ||
461 | lea (%r12,%r15),%rbp | ||
462 | # (uint32) d <<<= 7 | ||
463 | rol $7,%ebp | ||
464 | # x3 ^= d | ||
465 | xor %rbp,%rsi | ||
466 | # d = x15 + x3 | ||
467 | lea (%r15,%rsi),%rbp | ||
468 | # (uint32) d <<<= 9 | ||
469 | rol $9,%ebp | ||
470 | # x7 ^= d | ||
471 | xor %rbp,%r8 | ||
472 | # d = x3 + x7 | ||
473 | lea (%rsi,%r8),%rbp | ||
474 | # (uint32) d <<<= 13 | ||
475 | rol $13,%ebp | ||
476 | # x11 ^= d | ||
477 | xor %rbp,%r12 | ||
478 | # d = x7 + x11 | ||
479 | lea (%r8,%r12),%rbp | ||
480 | # (uint32) d <<<= 18 | ||
481 | rol $18,%ebp | ||
482 | # x15 ^= d | ||
483 | xor %rbp,%r15 | ||
484 | # x15_stack = x15 | ||
485 | movq %r15,176(%rsp) | ||
486 | # x5 = x5_stack | ||
487 | movq 160(%rsp),%r15 | ||
488 | # a = x3 + x0 | ||
489 | lea (%rsi,%rdx),%rbp | ||
490 | # (uint32) a <<<= 7 | ||
491 | rol $7,%ebp | ||
492 | # x1 ^= a | ||
493 | xor %rbp,%rdi | ||
494 | # b = x4 + x5 | ||
495 | lea (%r9,%r15),%rbp | ||
496 | # (uint32) b <<<= 7 | ||
497 | rol $7,%ebp | ||
498 | # x6 ^= b | ||
499 | xor %rbp,%rax | ||
500 | # a = x0 + x1 | ||
501 | lea (%rdx,%rdi),%rbp | ||
502 | # (uint32) a <<<= 9 | ||
503 | rol $9,%ebp | ||
504 | # x2 ^= a | ||
505 | xor %rbp,%rcx | ||
506 | # b = x5 + x6 | ||
507 | lea (%r15,%rax),%rbp | ||
508 | # (uint32) b <<<= 9 | ||
509 | rol $9,%ebp | ||
510 | # x7 ^= b | ||
511 | xor %rbp,%r8 | ||
512 | # a = x1 + x2 | ||
513 | lea (%rdi,%rcx),%rbp | ||
514 | # (uint32) a <<<= 13 | ||
515 | rol $13,%ebp | ||
516 | # x3 ^= a | ||
517 | xor %rbp,%rsi | ||
518 | # b = x6 + x7 | ||
519 | lea (%rax,%r8),%rbp | ||
520 | # (uint32) b <<<= 13 | ||
521 | rol $13,%ebp | ||
522 | # x4 ^= b | ||
523 | xor %rbp,%r9 | ||
524 | # a = x2 + x3 | ||
525 | lea (%rcx,%rsi),%rbp | ||
526 | # (uint32) a <<<= 18 | ||
527 | rol $18,%ebp | ||
528 | # x0 ^= a | ||
529 | xor %rbp,%rdx | ||
530 | # b = x7 + x4 | ||
531 | lea (%r8,%r9),%rbp | ||
532 | # (uint32) b <<<= 18 | ||
533 | rol $18,%ebp | ||
534 | # x5 ^= b | ||
535 | xor %rbp,%r15 | ||
536 | # x10 = x10_stack | ||
537 | movq 168(%rsp),%rbp | ||
538 | # x5_stack = x5 | ||
539 | movq %r15,160(%rsp) | ||
540 | # c = x9 + x10 | ||
541 | lea (%r10,%rbp),%r15 | ||
542 | # (uint32) c <<<= 7 | ||
543 | rol $7,%r15d | ||
544 | # x11 ^= c | ||
545 | xor %r15,%r12 | ||
546 | # c = x10 + x11 | ||
547 | lea (%rbp,%r12),%r15 | ||
548 | # (uint32) c <<<= 9 | ||
549 | rol $9,%r15d | ||
550 | # x8 ^= c | ||
551 | xor %r15,%r11 | ||
552 | # c = x11 + x8 | ||
553 | lea (%r12,%r11),%r15 | ||
554 | # (uint32) c <<<= 13 | ||
555 | rol $13,%r15d | ||
556 | # x9 ^= c | ||
557 | xor %r15,%r10 | ||
558 | # c = x8 + x9 | ||
559 | lea (%r11,%r10),%r15 | ||
560 | # (uint32) c <<<= 18 | ||
561 | rol $18,%r15d | ||
562 | # x10 ^= c | ||
563 | xor %r15,%rbp | ||
564 | # x15 = x15_stack | ||
565 | movq 176(%rsp),%r15 | ||
566 | # x10_stack = x10 | ||
567 | movq %rbp,168(%rsp) | ||
568 | # d = x14 + x15 | ||
569 | lea (%rbx,%r15),%rbp | ||
570 | # (uint32) d <<<= 7 | ||
571 | rol $7,%ebp | ||
572 | # x12 ^= d | ||
573 | xor %rbp,%r14 | ||
574 | # d = x15 + x12 | ||
575 | lea (%r15,%r14),%rbp | ||
576 | # (uint32) d <<<= 9 | ||
577 | rol $9,%ebp | ||
578 | # x13 ^= d | ||
579 | xor %rbp,%r13 | ||
580 | # d = x12 + x13 | ||
581 | lea (%r14,%r13),%rbp | ||
582 | # (uint32) d <<<= 13 | ||
583 | rol $13,%ebp | ||
584 | # x14 ^= d | ||
585 | xor %rbp,%rbx | ||
586 | # d = x13 + x14 | ||
587 | lea (%r13,%rbx),%rbp | ||
588 | # (uint32) d <<<= 18 | ||
589 | rol $18,%ebp | ||
590 | # x15 ^= d | ||
591 | xor %rbp,%r15 | ||
592 | # x15_stack = x15 | ||
593 | movq %r15,176(%rsp) | ||
594 | # i = i_backup | ||
595 | movq 184(%rsp),%r15 | ||
596 | # unsigned>? i -= 4 | ||
597 | sub $4,%r15 | ||
598 | # comment:fp stack unchanged by jump | ||
599 | # goto mainloop if unsigned> | ||
600 | ja ._mainloop | ||
601 | # (uint32) x2 += j2 | ||
602 | addl 64(%rsp),%ecx | ||
603 | # x3 <<= 32 | ||
604 | shl $32,%rsi | ||
605 | # x3 += j2 | ||
606 | addq 64(%rsp),%rsi | ||
607 | # (uint64) x3 >>= 32 | ||
608 | shr $32,%rsi | ||
609 | # x3 <<= 32 | ||
610 | shl $32,%rsi | ||
611 | # x2 += x3 | ||
612 | add %rsi,%rcx | ||
613 | # (uint32) x6 += j6 | ||
614 | addl 80(%rsp),%eax | ||
615 | # x7 <<= 32 | ||
616 | shl $32,%r8 | ||
617 | # x7 += j6 | ||
618 | addq 80(%rsp),%r8 | ||
619 | # (uint64) x7 >>= 32 | ||
620 | shr $32,%r8 | ||
621 | # x7 <<= 32 | ||
622 | shl $32,%r8 | ||
623 | # x6 += x7 | ||
624 | add %r8,%rax | ||
625 | # (uint32) x8 += j8 | ||
626 | addl 88(%rsp),%r11d | ||
627 | # x9 <<= 32 | ||
628 | shl $32,%r10 | ||
629 | # x9 += j8 | ||
630 | addq 88(%rsp),%r10 | ||
631 | # (uint64) x9 >>= 32 | ||
632 | shr $32,%r10 | ||
633 | # x9 <<= 32 | ||
634 | shl $32,%r10 | ||
635 | # x8 += x9 | ||
636 | add %r10,%r11 | ||
637 | # (uint32) x12 += j12 | ||
638 | addl 104(%rsp),%r14d | ||
639 | # x13 <<= 32 | ||
640 | shl $32,%r13 | ||
641 | # x13 += j12 | ||
642 | addq 104(%rsp),%r13 | ||
643 | # (uint64) x13 >>= 32 | ||
644 | shr $32,%r13 | ||
645 | # x13 <<= 32 | ||
646 | shl $32,%r13 | ||
647 | # x12 += x13 | ||
648 | add %r13,%r14 | ||
649 | # (uint32) x0 += j0 | ||
650 | addl 56(%rsp),%edx | ||
651 | # x1 <<= 32 | ||
652 | shl $32,%rdi | ||
653 | # x1 += j0 | ||
654 | addq 56(%rsp),%rdi | ||
655 | # (uint64) x1 >>= 32 | ||
656 | shr $32,%rdi | ||
657 | # x1 <<= 32 | ||
658 | shl $32,%rdi | ||
659 | # x0 += x1 | ||
660 | add %rdi,%rdx | ||
661 | # x5 = x5_stack | ||
662 | movq 160(%rsp),%rdi | ||
663 | # (uint32) x4 += j4 | ||
664 | addl 72(%rsp),%r9d | ||
665 | # x5 <<= 32 | ||
666 | shl $32,%rdi | ||
667 | # x5 += j4 | ||
668 | addq 72(%rsp),%rdi | ||
669 | # (uint64) x5 >>= 32 | ||
670 | shr $32,%rdi | ||
671 | # x5 <<= 32 | ||
672 | shl $32,%rdi | ||
673 | # x4 += x5 | ||
674 | add %rdi,%r9 | ||
675 | # x10 = x10_stack | ||
676 | movq 168(%rsp),%r8 | ||
677 | # (uint32) x10 += j10 | ||
678 | addl 96(%rsp),%r8d | ||
679 | # x11 <<= 32 | ||
680 | shl $32,%r12 | ||
681 | # x11 += j10 | ||
682 | addq 96(%rsp),%r12 | ||
683 | # (uint64) x11 >>= 32 | ||
684 | shr $32,%r12 | ||
685 | # x11 <<= 32 | ||
686 | shl $32,%r12 | ||
687 | # x10 += x11 | ||
688 | add %r12,%r8 | ||
689 | # x15 = x15_stack | ||
690 | movq 176(%rsp),%rdi | ||
691 | # (uint32) x14 += j14 | ||
692 | addl 112(%rsp),%ebx | ||
693 | # x15 <<= 32 | ||
694 | shl $32,%rdi | ||
695 | # x15 += j14 | ||
696 | addq 112(%rsp),%rdi | ||
697 | # (uint64) x15 >>= 32 | ||
698 | shr $32,%rdi | ||
699 | # x15 <<= 32 | ||
700 | shl $32,%rdi | ||
701 | # x14 += x15 | ||
702 | add %rdi,%rbx | ||
703 | # out = out_backup | ||
704 | movq 136(%rsp),%rdi | ||
705 | # m = m_backup | ||
706 | movq 144(%rsp),%rsi | ||
707 | # x0 ^= *(uint64 *) (m + 0) | ||
708 | xorq 0(%rsi),%rdx | ||
709 | # *(uint64 *) (out + 0) = x0 | ||
710 | movq %rdx,0(%rdi) | ||
711 | # x2 ^= *(uint64 *) (m + 8) | ||
712 | xorq 8(%rsi),%rcx | ||
713 | # *(uint64 *) (out + 8) = x2 | ||
714 | movq %rcx,8(%rdi) | ||
715 | # x4 ^= *(uint64 *) (m + 16) | ||
716 | xorq 16(%rsi),%r9 | ||
717 | # *(uint64 *) (out + 16) = x4 | ||
718 | movq %r9,16(%rdi) | ||
719 | # x6 ^= *(uint64 *) (m + 24) | ||
720 | xorq 24(%rsi),%rax | ||
721 | # *(uint64 *) (out + 24) = x6 | ||
722 | movq %rax,24(%rdi) | ||
723 | # x8 ^= *(uint64 *) (m + 32) | ||
724 | xorq 32(%rsi),%r11 | ||
725 | # *(uint64 *) (out + 32) = x8 | ||
726 | movq %r11,32(%rdi) | ||
727 | # x10 ^= *(uint64 *) (m + 40) | ||
728 | xorq 40(%rsi),%r8 | ||
729 | # *(uint64 *) (out + 40) = x10 | ||
730 | movq %r8,40(%rdi) | ||
731 | # x12 ^= *(uint64 *) (m + 48) | ||
732 | xorq 48(%rsi),%r14 | ||
733 | # *(uint64 *) (out + 48) = x12 | ||
734 | movq %r14,48(%rdi) | ||
735 | # x14 ^= *(uint64 *) (m + 56) | ||
736 | xorq 56(%rsi),%rbx | ||
737 | # *(uint64 *) (out + 56) = x14 | ||
738 | movq %rbx,56(%rdi) | ||
739 | # bytes = bytes_backup | ||
740 | movq 152(%rsp),%rdx | ||
741 | # in8 = j8 | ||
742 | movq 88(%rsp),%rcx | ||
743 | # in8 += 1 | ||
744 | add $1,%rcx | ||
745 | # j8 = in8 | ||
746 | movq %rcx,88(%rsp) | ||
747 | # unsigned>? unsigned<? bytes - 64 | ||
748 | cmp $64,%rdx | ||
749 | # comment:fp stack unchanged by jump | ||
750 | # goto bytesatleast65 if unsigned> | ||
751 | ja ._bytesatleast65 | ||
752 | # comment:fp stack unchanged by jump | ||
753 | # goto bytesatleast64 if !unsigned< | ||
754 | jae ._bytesatleast64 | ||
755 | # m = out | ||
756 | mov %rdi,%rsi | ||
757 | # out = ctarget | ||
758 | movq 128(%rsp),%rdi | ||
759 | # i = bytes | ||
760 | mov %rdx,%rcx | ||
761 | # while (i) { *out++ = *m++; --i } | ||
762 | rep movsb | ||
763 | # comment:fp stack unchanged by fallthrough | ||
764 | # bytesatleast64: | ||
765 | ._bytesatleast64: | ||
766 | # x = x_backup | ||
767 | movq 120(%rsp),%rdi | ||
768 | # in8 = j8 | ||
769 | movq 88(%rsp),%rsi | ||
770 | # *(uint64 *) (x + 32) = in8 | ||
771 | movq %rsi,32(%rdi) | ||
772 | # r11 = r11_stack | ||
773 | movq 0(%rsp),%r11 | ||
774 | # r12 = r12_stack | ||
775 | movq 8(%rsp),%r12 | ||
776 | # r13 = r13_stack | ||
777 | movq 16(%rsp),%r13 | ||
778 | # r14 = r14_stack | ||
779 | movq 24(%rsp),%r14 | ||
780 | # r15 = r15_stack | ||
781 | movq 32(%rsp),%r15 | ||
782 | # rbx = rbx_stack | ||
783 | movq 40(%rsp),%rbx | ||
784 | # rbp = rbp_stack | ||
785 | movq 48(%rsp),%rbp | ||
786 | # comment:fp stack unchanged by fallthrough | ||
787 | # done: | ||
788 | ._done: | ||
789 | # leave | ||
790 | add %r11,%rsp | ||
791 | mov %rdi,%rax | ||
792 | mov %rsi,%rdx | ||
793 | ret | ||
794 | # bytesatleast65: | ||
795 | ._bytesatleast65: | ||
796 | # bytes -= 64 | ||
797 | sub $64,%rdx | ||
798 | # out += 64 | ||
799 | add $64,%rdi | ||
800 | # m += 64 | ||
801 | add $64,%rsi | ||
802 | # comment:fp stack unchanged by jump | ||
803 | # goto bytesatleast1 | ||
804 | jmp ._bytesatleast1 | ||
805 | # enter ECRYPT_keysetup | ||
806 | .text | ||
807 | .p2align 5 | ||
808 | .globl ECRYPT_keysetup | ||
809 | ECRYPT_keysetup: | ||
810 | mov %rsp,%r11 | ||
811 | and $31,%r11 | ||
812 | add $256,%r11 | ||
813 | sub %r11,%rsp | ||
814 | # k = arg2 | ||
815 | mov %rsi,%rsi | ||
816 | # kbits = arg3 | ||
817 | mov %rdx,%rdx | ||
818 | # x = arg1 | ||
819 | mov %rdi,%rdi | ||
820 | # in0 = *(uint64 *) (k + 0) | ||
821 | movq 0(%rsi),%r8 | ||
822 | # in2 = *(uint64 *) (k + 8) | ||
823 | movq 8(%rsi),%r9 | ||
824 | # *(uint64 *) (x + 4) = in0 | ||
825 | movq %r8,4(%rdi) | ||
826 | # *(uint64 *) (x + 12) = in2 | ||
827 | movq %r9,12(%rdi) | ||
828 | # unsigned<? kbits - 256 | ||
829 | cmp $256,%rdx | ||
830 | # comment:fp stack unchanged by jump | ||
831 | # goto kbits128 if unsigned< | ||
832 | jb ._kbits128 | ||
833 | # kbits256: | ||
834 | ._kbits256: | ||
835 | # in10 = *(uint64 *) (k + 16) | ||
836 | movq 16(%rsi),%rdx | ||
837 | # in12 = *(uint64 *) (k + 24) | ||
838 | movq 24(%rsi),%rsi | ||
839 | # *(uint64 *) (x + 44) = in10 | ||
840 | movq %rdx,44(%rdi) | ||
841 | # *(uint64 *) (x + 52) = in12 | ||
842 | movq %rsi,52(%rdi) | ||
843 | # in0 = 1634760805 | ||
844 | mov $1634760805,%rsi | ||
845 | # in4 = 857760878 | ||
846 | mov $857760878,%rdx | ||
847 | # in10 = 2036477234 | ||
848 | mov $2036477234,%rcx | ||
849 | # in14 = 1797285236 | ||
850 | mov $1797285236,%r8 | ||
851 | # *(uint32 *) (x + 0) = in0 | ||
852 | movl %esi,0(%rdi) | ||
853 | # *(uint32 *) (x + 20) = in4 | ||
854 | movl %edx,20(%rdi) | ||
855 | # *(uint32 *) (x + 40) = in10 | ||
856 | movl %ecx,40(%rdi) | ||
857 | # *(uint32 *) (x + 60) = in14 | ||
858 | movl %r8d,60(%rdi) | ||
859 | # comment:fp stack unchanged by jump | ||
860 | # goto keysetupdone | ||
861 | jmp ._keysetupdone | ||
862 | # kbits128: | ||
863 | ._kbits128: | ||
864 | # in10 = *(uint64 *) (k + 0) | ||
865 | movq 0(%rsi),%rdx | ||
866 | # in12 = *(uint64 *) (k + 8) | ||
867 | movq 8(%rsi),%rsi | ||
868 | # *(uint64 *) (x + 44) = in10 | ||
869 | movq %rdx,44(%rdi) | ||
870 | # *(uint64 *) (x + 52) = in12 | ||
871 | movq %rsi,52(%rdi) | ||
872 | # in0 = 1634760805 | ||
873 | mov $1634760805,%rsi | ||
874 | # in4 = 824206446 | ||
875 | mov $824206446,%rdx | ||
876 | # in10 = 2036477238 | ||
877 | mov $2036477238,%rcx | ||
878 | # in14 = 1797285236 | ||
879 | mov $1797285236,%r8 | ||
880 | # *(uint32 *) (x + 0) = in0 | ||
881 | movl %esi,0(%rdi) | ||
882 | # *(uint32 *) (x + 20) = in4 | ||
883 | movl %edx,20(%rdi) | ||
884 | # *(uint32 *) (x + 40) = in10 | ||
885 | movl %ecx,40(%rdi) | ||
886 | # *(uint32 *) (x + 60) = in14 | ||
887 | movl %r8d,60(%rdi) | ||
888 | # keysetupdone: | ||
889 | ._keysetupdone: | ||
890 | # leave | ||
891 | add %r11,%rsp | ||
892 | mov %rdi,%rax | ||
893 | mov %rsi,%rdx | ||
894 | ret | ||
895 | # enter ECRYPT_ivsetup | ||
896 | .text | ||
897 | .p2align 5 | ||
898 | .globl ECRYPT_ivsetup | ||
899 | ECRYPT_ivsetup: | ||
900 | mov %rsp,%r11 | ||
901 | and $31,%r11 | ||
902 | add $256,%r11 | ||
903 | sub %r11,%rsp | ||
904 | # iv = arg2 | ||
905 | mov %rsi,%rsi | ||
906 | # x = arg1 | ||
907 | mov %rdi,%rdi | ||
908 | # in6 = *(uint64 *) (iv + 0) | ||
909 | movq 0(%rsi),%rsi | ||
910 | # in8 = 0 | ||
911 | mov $0,%r8 | ||
912 | # *(uint64 *) (x + 24) = in6 | ||
913 | movq %rsi,24(%rdi) | ||
914 | # *(uint64 *) (x + 32) = in8 | ||
915 | movq %r8,32(%rdi) | ||
916 | # leave | ||
917 | add %r11,%rsp | ||
918 | mov %rdi,%rax | ||
919 | mov %rsi,%rdx | ||
920 | ret | ||
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 3be443995ed6..bccb76d80987 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c | |||
@@ -8,6 +8,8 @@ | |||
8 | * and to remove extraneous comments and functions that are not needed. | 8 | * and to remove extraneous comments and functions that are not needed. |
9 | * - i586 version, renamed as salsa20-i586-asm_32.S | 9 | * - i586 version, renamed as salsa20-i586-asm_32.S |
10 | * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s> | 10 | * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s> |
11 | * - x86-64 version, renamed as salsa20-x86_64-asm_64.S | ||
12 | * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s> | ||
11 | * | 13 | * |
12 | * This program is free software; you can redistribute it and/or modify it | 14 | * This program is free software; you can redistribute it and/or modify it |
13 | * under the terms of the GNU General Public License as published by the Free | 15 | * under the terms of the GNU General Public License as published by the Free |