aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorTan Swee Heng <thesweeheng@gmail.com>2007-12-10 02:52:56 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2008-01-10 16:16:57 -0500
commit974e4b752ee623854c5dc2bbfc7c7725029ce173 (patch)
treee2eb69820a90058a026def6a02a397c082811934 /arch
parentdadbc53d0bbde0e84c40b9f6bc5c50eb9eb7352a (diff)
[CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version)
This patch contains the salsa20-i586 implementation. The original assembly code came from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>. I have reformatted it (added indents) so that it matches the other algorithms in arch/x86/crypto. Signed-off-by: Tan Swee Heng <thesweeheng@gmail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S1114
-rw-r--r--arch/x86/crypto/salsa20_glue.c127
3 files changed, 1243 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b8fbb43df6d7..25cc8441046a 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -4,12 +4,14 @@
4 4
5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o 5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o 6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
7obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
7 8
8obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
9obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 10obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
10 11
11aes-i586-y := aes-i586-asm_32.o aes_glue.o 12aes-i586-y := aes-i586-asm_32.o aes_glue.o
12twofish-i586-y := twofish-i586-asm_32.o twofish_32.o 13twofish-i586-y := twofish-i586-asm_32.o twofish_32.o
14salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
13 15
14aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 16aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
15twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o 17twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
new file mode 100644
index 000000000000..72eb306680b2
--- /dev/null
+++ b/arch/x86/crypto/salsa20-i586-asm_32.S
@@ -0,0 +1,1114 @@
1# salsa20_pm.s version 20051229
2# D. J. Bernstein
3# Public domain.
4
5# enter ECRYPT_encrypt_bytes
6.text
7.p2align 5
8.globl ECRYPT_encrypt_bytes
9ECRYPT_encrypt_bytes:
10 mov %esp,%eax
11 and $31,%eax
12 add $256,%eax
13 sub %eax,%esp
14 # eax_stack = eax
15 movl %eax,80(%esp)
16 # ebx_stack = ebx
17 movl %ebx,84(%esp)
18 # esi_stack = esi
19 movl %esi,88(%esp)
20 # edi_stack = edi
21 movl %edi,92(%esp)
22 # ebp_stack = ebp
23 movl %ebp,96(%esp)
24 # x = arg1
25 movl 4(%esp,%eax),%edx
26 # m = arg2
27 movl 8(%esp,%eax),%esi
28 # out = arg3
29 movl 12(%esp,%eax),%edi
30 # bytes = arg4
31 movl 16(%esp,%eax),%ebx
32 # bytes -= 0
33 sub $0,%ebx
34 # goto done if unsigned<=
35 jbe ._done
36._start:
37 # in0 = *(uint32 *) (x + 0)
38 movl 0(%edx),%eax
39 # in1 = *(uint32 *) (x + 4)
40 movl 4(%edx),%ecx
41 # in2 = *(uint32 *) (x + 8)
42 movl 8(%edx),%ebp
43 # j0 = in0
44 movl %eax,164(%esp)
45 # in3 = *(uint32 *) (x + 12)
46 movl 12(%edx),%eax
47 # j1 = in1
48 movl %ecx,168(%esp)
49 # in4 = *(uint32 *) (x + 16)
50 movl 16(%edx),%ecx
51 # j2 = in2
52 movl %ebp,172(%esp)
53 # in5 = *(uint32 *) (x + 20)
54 movl 20(%edx),%ebp
55 # j3 = in3
56 movl %eax,176(%esp)
57 # in6 = *(uint32 *) (x + 24)
58 movl 24(%edx),%eax
59 # j4 = in4
60 movl %ecx,180(%esp)
61 # in7 = *(uint32 *) (x + 28)
62 movl 28(%edx),%ecx
63 # j5 = in5
64 movl %ebp,184(%esp)
65 # in8 = *(uint32 *) (x + 32)
66 movl 32(%edx),%ebp
67 # j6 = in6
68 movl %eax,188(%esp)
69 # in9 = *(uint32 *) (x + 36)
70 movl 36(%edx),%eax
71 # j7 = in7
72 movl %ecx,192(%esp)
73 # in10 = *(uint32 *) (x + 40)
74 movl 40(%edx),%ecx
75 # j8 = in8
76 movl %ebp,196(%esp)
77 # in11 = *(uint32 *) (x + 44)
78 movl 44(%edx),%ebp
79 # j9 = in9
80 movl %eax,200(%esp)
81 # in12 = *(uint32 *) (x + 48)
82 movl 48(%edx),%eax
83 # j10 = in10
84 movl %ecx,204(%esp)
85 # in13 = *(uint32 *) (x + 52)
86 movl 52(%edx),%ecx
87 # j11 = in11
88 movl %ebp,208(%esp)
89 # in14 = *(uint32 *) (x + 56)
90 movl 56(%edx),%ebp
91 # j12 = in12
92 movl %eax,212(%esp)
93 # in15 = *(uint32 *) (x + 60)
94 movl 60(%edx),%eax
95 # j13 = in13
96 movl %ecx,216(%esp)
97 # j14 = in14
98 movl %ebp,220(%esp)
99 # j15 = in15
100 movl %eax,224(%esp)
101 # x_backup = x
102 movl %edx,64(%esp)
103._bytesatleast1:
104 # bytes - 64
105 cmp $64,%ebx
106 # goto nocopy if unsigned>=
107 jae ._nocopy
108 # ctarget = out
109 movl %edi,228(%esp)
110 # out = &tmp
111 leal 0(%esp),%edi
112 # i = bytes
113 mov %ebx,%ecx
114 # while (i) { *out++ = *m++; --i }
115 rep movsb
116 # out = &tmp
117 leal 0(%esp),%edi
118 # m = &tmp
119 leal 0(%esp),%esi
120._nocopy:
121 # out_backup = out
122 movl %edi,72(%esp)
123 # m_backup = m
124 movl %esi,68(%esp)
125 # bytes_backup = bytes
126 movl %ebx,76(%esp)
127 # in0 = j0
128 movl 164(%esp),%eax
129 # in1 = j1
130 movl 168(%esp),%ecx
131 # in2 = j2
132 movl 172(%esp),%edx
133 # in3 = j3
134 movl 176(%esp),%ebx
135 # x0 = in0
136 movl %eax,100(%esp)
137 # x1 = in1
138 movl %ecx,104(%esp)
139 # x2 = in2
140 movl %edx,108(%esp)
141 # x3 = in3
142 movl %ebx,112(%esp)
143 # in4 = j4
144 movl 180(%esp),%eax
145 # in5 = j5
146 movl 184(%esp),%ecx
147 # in6 = j6
148 movl 188(%esp),%edx
149 # in7 = j7
150 movl 192(%esp),%ebx
151 # x4 = in4
152 movl %eax,116(%esp)
153 # x5 = in5
154 movl %ecx,120(%esp)
155 # x6 = in6
156 movl %edx,124(%esp)
157 # x7 = in7
158 movl %ebx,128(%esp)
159 # in8 = j8
160 movl 196(%esp),%eax
161 # in9 = j9
162 movl 200(%esp),%ecx
163 # in10 = j10
164 movl 204(%esp),%edx
165 # in11 = j11
166 movl 208(%esp),%ebx
167 # x8 = in8
168 movl %eax,132(%esp)
169 # x9 = in9
170 movl %ecx,136(%esp)
171 # x10 = in10
172 movl %edx,140(%esp)
173 # x11 = in11
174 movl %ebx,144(%esp)
175 # in12 = j12
176 movl 212(%esp),%eax
177 # in13 = j13
178 movl 216(%esp),%ecx
179 # in14 = j14
180 movl 220(%esp),%edx
181 # in15 = j15
182 movl 224(%esp),%ebx
183 # x12 = in12
184 movl %eax,148(%esp)
185 # x13 = in13
186 movl %ecx,152(%esp)
187 # x14 = in14
188 movl %edx,156(%esp)
189 # x15 = in15
190 movl %ebx,160(%esp)
191 # i = 20
192 mov $20,%ebp
193 # p = x0
194 movl 100(%esp),%eax
195 # s = x5
196 movl 120(%esp),%ecx
197 # t = x10
198 movl 140(%esp),%edx
199 # w = x15
200 movl 160(%esp),%ebx
201._mainloop:
202 # x0 = p
203 movl %eax,100(%esp)
204 # x10 = t
205 movl %edx,140(%esp)
206 # p += x12
207 addl 148(%esp),%eax
208 # x5 = s
209 movl %ecx,120(%esp)
210 # t += x6
211 addl 124(%esp),%edx
212 # x15 = w
213 movl %ebx,160(%esp)
214 # r = x1
215 movl 104(%esp),%esi
216 # r += s
217 add %ecx,%esi
218 # v = x11
219 movl 144(%esp),%edi
220 # v += w
221 add %ebx,%edi
222 # p <<<= 7
223 rol $7,%eax
224 # p ^= x4
225 xorl 116(%esp),%eax
226 # t <<<= 7
227 rol $7,%edx
228 # t ^= x14
229 xorl 156(%esp),%edx
230 # r <<<= 7
231 rol $7,%esi
232 # r ^= x9
233 xorl 136(%esp),%esi
234 # v <<<= 7
235 rol $7,%edi
236 # v ^= x3
237 xorl 112(%esp),%edi
238 # x4 = p
239 movl %eax,116(%esp)
240 # x14 = t
241 movl %edx,156(%esp)
242 # p += x0
243 addl 100(%esp),%eax
244 # x9 = r
245 movl %esi,136(%esp)
246 # t += x10
247 addl 140(%esp),%edx
248 # x3 = v
249 movl %edi,112(%esp)
250 # p <<<= 9
251 rol $9,%eax
252 # p ^= x8
253 xorl 132(%esp),%eax
254 # t <<<= 9
255 rol $9,%edx
256 # t ^= x2
257 xorl 108(%esp),%edx
258 # s += r
259 add %esi,%ecx
260 # s <<<= 9
261 rol $9,%ecx
262 # s ^= x13
263 xorl 152(%esp),%ecx
264 # w += v
265 add %edi,%ebx
266 # w <<<= 9
267 rol $9,%ebx
268 # w ^= x7
269 xorl 128(%esp),%ebx
270 # x8 = p
271 movl %eax,132(%esp)
272 # x2 = t
273 movl %edx,108(%esp)
274 # p += x4
275 addl 116(%esp),%eax
276 # x13 = s
277 movl %ecx,152(%esp)
278 # t += x14
279 addl 156(%esp),%edx
280 # x7 = w
281 movl %ebx,128(%esp)
282 # p <<<= 13
283 rol $13,%eax
284 # p ^= x12
285 xorl 148(%esp),%eax
286 # t <<<= 13
287 rol $13,%edx
288 # t ^= x6
289 xorl 124(%esp),%edx
290 # r += s
291 add %ecx,%esi
292 # r <<<= 13
293 rol $13,%esi
294 # r ^= x1
295 xorl 104(%esp),%esi
296 # v += w
297 add %ebx,%edi
298 # v <<<= 13
299 rol $13,%edi
300 # v ^= x11
301 xorl 144(%esp),%edi
302 # x12 = p
303 movl %eax,148(%esp)
304 # x6 = t
305 movl %edx,124(%esp)
306 # p += x8
307 addl 132(%esp),%eax
308 # x1 = r
309 movl %esi,104(%esp)
310 # t += x2
311 addl 108(%esp),%edx
312 # x11 = v
313 movl %edi,144(%esp)
314 # p <<<= 18
315 rol $18,%eax
316 # p ^= x0
317 xorl 100(%esp),%eax
318 # t <<<= 18
319 rol $18,%edx
320 # t ^= x10
321 xorl 140(%esp),%edx
322 # s += r
323 add %esi,%ecx
324 # s <<<= 18
325 rol $18,%ecx
326 # s ^= x5
327 xorl 120(%esp),%ecx
328 # w += v
329 add %edi,%ebx
330 # w <<<= 18
331 rol $18,%ebx
332 # w ^= x15
333 xorl 160(%esp),%ebx
334 # x0 = p
335 movl %eax,100(%esp)
336 # x10 = t
337 movl %edx,140(%esp)
338 # p += x3
339 addl 112(%esp),%eax
340 # p <<<= 7
341 rol $7,%eax
342 # x5 = s
343 movl %ecx,120(%esp)
344 # t += x9
345 addl 136(%esp),%edx
346 # x15 = w
347 movl %ebx,160(%esp)
348 # r = x4
349 movl 116(%esp),%esi
350 # r += s
351 add %ecx,%esi
352 # v = x14
353 movl 156(%esp),%edi
354 # v += w
355 add %ebx,%edi
356 # p ^= x1
357 xorl 104(%esp),%eax
358 # t <<<= 7
359 rol $7,%edx
360 # t ^= x11
361 xorl 144(%esp),%edx
362 # r <<<= 7
363 rol $7,%esi
364 # r ^= x6
365 xorl 124(%esp),%esi
366 # v <<<= 7
367 rol $7,%edi
368 # v ^= x12
369 xorl 148(%esp),%edi
370 # x1 = p
371 movl %eax,104(%esp)
372 # x11 = t
373 movl %edx,144(%esp)
374 # p += x0
375 addl 100(%esp),%eax
376 # x6 = r
377 movl %esi,124(%esp)
378 # t += x10
379 addl 140(%esp),%edx
380 # x12 = v
381 movl %edi,148(%esp)
382 # p <<<= 9
383 rol $9,%eax
384 # p ^= x2
385 xorl 108(%esp),%eax
386 # t <<<= 9
387 rol $9,%edx
388 # t ^= x8
389 xorl 132(%esp),%edx
390 # s += r
391 add %esi,%ecx
392 # s <<<= 9
393 rol $9,%ecx
394 # s ^= x7
395 xorl 128(%esp),%ecx
396 # w += v
397 add %edi,%ebx
398 # w <<<= 9
399 rol $9,%ebx
400 # w ^= x13
401 xorl 152(%esp),%ebx
402 # x2 = p
403 movl %eax,108(%esp)
404 # x8 = t
405 movl %edx,132(%esp)
406 # p += x1
407 addl 104(%esp),%eax
408 # x7 = s
409 movl %ecx,128(%esp)
410 # t += x11
411 addl 144(%esp),%edx
412 # x13 = w
413 movl %ebx,152(%esp)
414 # p <<<= 13
415 rol $13,%eax
416 # p ^= x3
417 xorl 112(%esp),%eax
418 # t <<<= 13
419 rol $13,%edx
420 # t ^= x9
421 xorl 136(%esp),%edx
422 # r += s
423 add %ecx,%esi
424 # r <<<= 13
425 rol $13,%esi
426 # r ^= x4
427 xorl 116(%esp),%esi
428 # v += w
429 add %ebx,%edi
430 # v <<<= 13
431 rol $13,%edi
432 # v ^= x14
433 xorl 156(%esp),%edi
434 # x3 = p
435 movl %eax,112(%esp)
436 # x9 = t
437 movl %edx,136(%esp)
438 # p += x2
439 addl 108(%esp),%eax
440 # x4 = r
441 movl %esi,116(%esp)
442 # t += x8
443 addl 132(%esp),%edx
444 # x14 = v
445 movl %edi,156(%esp)
446 # p <<<= 18
447 rol $18,%eax
448 # p ^= x0
449 xorl 100(%esp),%eax
450 # t <<<= 18
451 rol $18,%edx
452 # t ^= x10
453 xorl 140(%esp),%edx
454 # s += r
455 add %esi,%ecx
456 # s <<<= 18
457 rol $18,%ecx
458 # s ^= x5
459 xorl 120(%esp),%ecx
460 # w += v
461 add %edi,%ebx
462 # w <<<= 18
463 rol $18,%ebx
464 # w ^= x15
465 xorl 160(%esp),%ebx
466 # x0 = p
467 movl %eax,100(%esp)
468 # x10 = t
469 movl %edx,140(%esp)
470 # p += x12
471 addl 148(%esp),%eax
472 # x5 = s
473 movl %ecx,120(%esp)
474 # t += x6
475 addl 124(%esp),%edx
476 # x15 = w
477 movl %ebx,160(%esp)
478 # r = x1
479 movl 104(%esp),%esi
480 # r += s
481 add %ecx,%esi
482 # v = x11
483 movl 144(%esp),%edi
484 # v += w
485 add %ebx,%edi
486 # p <<<= 7
487 rol $7,%eax
488 # p ^= x4
489 xorl 116(%esp),%eax
490 # t <<<= 7
491 rol $7,%edx
492 # t ^= x14
493 xorl 156(%esp),%edx
494 # r <<<= 7
495 rol $7,%esi
496 # r ^= x9
497 xorl 136(%esp),%esi
498 # v <<<= 7
499 rol $7,%edi
500 # v ^= x3
501 xorl 112(%esp),%edi
502 # x4 = p
503 movl %eax,116(%esp)
504 # x14 = t
505 movl %edx,156(%esp)
506 # p += x0
507 addl 100(%esp),%eax
508 # x9 = r
509 movl %esi,136(%esp)
510 # t += x10
511 addl 140(%esp),%edx
512 # x3 = v
513 movl %edi,112(%esp)
514 # p <<<= 9
515 rol $9,%eax
516 # p ^= x8
517 xorl 132(%esp),%eax
518 # t <<<= 9
519 rol $9,%edx
520 # t ^= x2
521 xorl 108(%esp),%edx
522 # s += r
523 add %esi,%ecx
524 # s <<<= 9
525 rol $9,%ecx
526 # s ^= x13
527 xorl 152(%esp),%ecx
528 # w += v
529 add %edi,%ebx
530 # w <<<= 9
531 rol $9,%ebx
532 # w ^= x7
533 xorl 128(%esp),%ebx
534 # x8 = p
535 movl %eax,132(%esp)
536 # x2 = t
537 movl %edx,108(%esp)
538 # p += x4
539 addl 116(%esp),%eax
540 # x13 = s
541 movl %ecx,152(%esp)
542 # t += x14
543 addl 156(%esp),%edx
544 # x7 = w
545 movl %ebx,128(%esp)
546 # p <<<= 13
547 rol $13,%eax
548 # p ^= x12
549 xorl 148(%esp),%eax
550 # t <<<= 13
551 rol $13,%edx
552 # t ^= x6
553 xorl 124(%esp),%edx
554 # r += s
555 add %ecx,%esi
556 # r <<<= 13
557 rol $13,%esi
558 # r ^= x1
559 xorl 104(%esp),%esi
560 # v += w
561 add %ebx,%edi
562 # v <<<= 13
563 rol $13,%edi
564 # v ^= x11
565 xorl 144(%esp),%edi
566 # x12 = p
567 movl %eax,148(%esp)
568 # x6 = t
569 movl %edx,124(%esp)
570 # p += x8
571 addl 132(%esp),%eax
572 # x1 = r
573 movl %esi,104(%esp)
574 # t += x2
575 addl 108(%esp),%edx
576 # x11 = v
577 movl %edi,144(%esp)
578 # p <<<= 18
579 rol $18,%eax
580 # p ^= x0
581 xorl 100(%esp),%eax
582 # t <<<= 18
583 rol $18,%edx
584 # t ^= x10
585 xorl 140(%esp),%edx
586 # s += r
587 add %esi,%ecx
588 # s <<<= 18
589 rol $18,%ecx
590 # s ^= x5
591 xorl 120(%esp),%ecx
592 # w += v
593 add %edi,%ebx
594 # w <<<= 18
595 rol $18,%ebx
596 # w ^= x15
597 xorl 160(%esp),%ebx
598 # x0 = p
599 movl %eax,100(%esp)
600 # x10 = t
601 movl %edx,140(%esp)
602 # p += x3
603 addl 112(%esp),%eax
604 # p <<<= 7
605 rol $7,%eax
606 # x5 = s
607 movl %ecx,120(%esp)
608 # t += x9
609 addl 136(%esp),%edx
610 # x15 = w
611 movl %ebx,160(%esp)
612 # r = x4
613 movl 116(%esp),%esi
614 # r += s
615 add %ecx,%esi
616 # v = x14
617 movl 156(%esp),%edi
618 # v += w
619 add %ebx,%edi
620 # p ^= x1
621 xorl 104(%esp),%eax
622 # t <<<= 7
623 rol $7,%edx
624 # t ^= x11
625 xorl 144(%esp),%edx
626 # r <<<= 7
627 rol $7,%esi
628 # r ^= x6
629 xorl 124(%esp),%esi
630 # v <<<= 7
631 rol $7,%edi
632 # v ^= x12
633 xorl 148(%esp),%edi
634 # x1 = p
635 movl %eax,104(%esp)
636 # x11 = t
637 movl %edx,144(%esp)
638 # p += x0
639 addl 100(%esp),%eax
640 # x6 = r
641 movl %esi,124(%esp)
642 # t += x10
643 addl 140(%esp),%edx
644 # x12 = v
645 movl %edi,148(%esp)
646 # p <<<= 9
647 rol $9,%eax
648 # p ^= x2
649 xorl 108(%esp),%eax
650 # t <<<= 9
651 rol $9,%edx
652 # t ^= x8
653 xorl 132(%esp),%edx
654 # s += r
655 add %esi,%ecx
656 # s <<<= 9
657 rol $9,%ecx
658 # s ^= x7
659 xorl 128(%esp),%ecx
660 # w += v
661 add %edi,%ebx
662 # w <<<= 9
663 rol $9,%ebx
664 # w ^= x13
665 xorl 152(%esp),%ebx
666 # x2 = p
667 movl %eax,108(%esp)
668 # x8 = t
669 movl %edx,132(%esp)
670 # p += x1
671 addl 104(%esp),%eax
672 # x7 = s
673 movl %ecx,128(%esp)
674 # t += x11
675 addl 144(%esp),%edx
676 # x13 = w
677 movl %ebx,152(%esp)
678 # p <<<= 13
679 rol $13,%eax
680 # p ^= x3
681 xorl 112(%esp),%eax
682 # t <<<= 13
683 rol $13,%edx
684 # t ^= x9
685 xorl 136(%esp),%edx
686 # r += s
687 add %ecx,%esi
688 # r <<<= 13
689 rol $13,%esi
690 # r ^= x4
691 xorl 116(%esp),%esi
692 # v += w
693 add %ebx,%edi
694 # v <<<= 13
695 rol $13,%edi
696 # v ^= x14
697 xorl 156(%esp),%edi
698 # x3 = p
699 movl %eax,112(%esp)
700 # x9 = t
701 movl %edx,136(%esp)
702 # p += x2
703 addl 108(%esp),%eax
704 # x4 = r
705 movl %esi,116(%esp)
706 # t += x8
707 addl 132(%esp),%edx
708 # x14 = v
709 movl %edi,156(%esp)
710 # p <<<= 18
711 rol $18,%eax
712 # p ^= x0
713 xorl 100(%esp),%eax
714 # t <<<= 18
715 rol $18,%edx
716 # t ^= x10
717 xorl 140(%esp),%edx
718 # s += r
719 add %esi,%ecx
720 # s <<<= 18
721 rol $18,%ecx
722 # s ^= x5
723 xorl 120(%esp),%ecx
724 # w += v
725 add %edi,%ebx
726 # w <<<= 18
727 rol $18,%ebx
728 # w ^= x15
729 xorl 160(%esp),%ebx
730 # i -= 4
731 sub $4,%ebp
732 # goto mainloop if unsigned >
733 ja ._mainloop
734 # x0 = p
735 movl %eax,100(%esp)
736 # x5 = s
737 movl %ecx,120(%esp)
738 # x10 = t
739 movl %edx,140(%esp)
740 # x15 = w
741 movl %ebx,160(%esp)
742 # out = out_backup
743 movl 72(%esp),%edi
744 # m = m_backup
745 movl 68(%esp),%esi
746 # in0 = x0
747 movl 100(%esp),%eax
748 # in1 = x1
749 movl 104(%esp),%ecx
750 # in0 += j0
751 addl 164(%esp),%eax
752 # in1 += j1
753 addl 168(%esp),%ecx
754 # in0 ^= *(uint32 *) (m + 0)
755 xorl 0(%esi),%eax
756 # in1 ^= *(uint32 *) (m + 4)
757 xorl 4(%esi),%ecx
758 # *(uint32 *) (out + 0) = in0
759 movl %eax,0(%edi)
760 # *(uint32 *) (out + 4) = in1
761 movl %ecx,4(%edi)
762 # in2 = x2
763 movl 108(%esp),%eax
764 # in3 = x3
765 movl 112(%esp),%ecx
766 # in2 += j2
767 addl 172(%esp),%eax
768 # in3 += j3
769 addl 176(%esp),%ecx
770 # in2 ^= *(uint32 *) (m + 8)
771 xorl 8(%esi),%eax
772 # in3 ^= *(uint32 *) (m + 12)
773 xorl 12(%esi),%ecx
774 # *(uint32 *) (out + 8) = in2
775 movl %eax,8(%edi)
776 # *(uint32 *) (out + 12) = in3
777 movl %ecx,12(%edi)
778 # in4 = x4
779 movl 116(%esp),%eax
780 # in5 = x5
781 movl 120(%esp),%ecx
782 # in4 += j4
783 addl 180(%esp),%eax
784 # in5 += j5
785 addl 184(%esp),%ecx
786 # in4 ^= *(uint32 *) (m + 16)
787 xorl 16(%esi),%eax
788 # in5 ^= *(uint32 *) (m + 20)
789 xorl 20(%esi),%ecx
790 # *(uint32 *) (out + 16) = in4
791 movl %eax,16(%edi)
792 # *(uint32 *) (out + 20) = in5
793 movl %ecx,20(%edi)
794 # in6 = x6
795 movl 124(%esp),%eax
796 # in7 = x7
797 movl 128(%esp),%ecx
798 # in6 += j6
799 addl 188(%esp),%eax
800 # in7 += j7
801 addl 192(%esp),%ecx
802 # in6 ^= *(uint32 *) (m + 24)
803 xorl 24(%esi),%eax
804 # in7 ^= *(uint32 *) (m + 28)
805 xorl 28(%esi),%ecx
806 # *(uint32 *) (out + 24) = in6
807 movl %eax,24(%edi)
808 # *(uint32 *) (out + 28) = in7
809 movl %ecx,28(%edi)
810 # in8 = x8
811 movl 132(%esp),%eax
812 # in9 = x9
813 movl 136(%esp),%ecx
814 # in8 += j8
815 addl 196(%esp),%eax
816 # in9 += j9
817 addl 200(%esp),%ecx
818 # in8 ^= *(uint32 *) (m + 32)
819 xorl 32(%esi),%eax
820 # in9 ^= *(uint32 *) (m + 36)
821 xorl 36(%esi),%ecx
822 # *(uint32 *) (out + 32) = in8
823 movl %eax,32(%edi)
824 # *(uint32 *) (out + 36) = in9
825 movl %ecx,36(%edi)
826 # in10 = x10
827 movl 140(%esp),%eax
828 # in11 = x11
829 movl 144(%esp),%ecx
830 # in10 += j10
831 addl 204(%esp),%eax
832 # in11 += j11
833 addl 208(%esp),%ecx
834 # in10 ^= *(uint32 *) (m + 40)
835 xorl 40(%esi),%eax
836 # in11 ^= *(uint32 *) (m + 44)
837 xorl 44(%esi),%ecx
838 # *(uint32 *) (out + 40) = in10
839 movl %eax,40(%edi)
840 # *(uint32 *) (out + 44) = in11
841 movl %ecx,44(%edi)
842 # in12 = x12
843 movl 148(%esp),%eax
844 # in13 = x13
845 movl 152(%esp),%ecx
846 # in12 += j12
847 addl 212(%esp),%eax
848 # in13 += j13
849 addl 216(%esp),%ecx
850 # in12 ^= *(uint32 *) (m + 48)
851 xorl 48(%esi),%eax
852 # in13 ^= *(uint32 *) (m + 52)
853 xorl 52(%esi),%ecx
854 # *(uint32 *) (out + 48) = in12
855 movl %eax,48(%edi)
856 # *(uint32 *) (out + 52) = in13
857 movl %ecx,52(%edi)
858 # in14 = x14
859 movl 156(%esp),%eax
860 # in15 = x15
861 movl 160(%esp),%ecx
862 # in14 += j14
863 addl 220(%esp),%eax
864 # in15 += j15
865 addl 224(%esp),%ecx
866 # in14 ^= *(uint32 *) (m + 56)
867 xorl 56(%esi),%eax
868 # in15 ^= *(uint32 *) (m + 60)
869 xorl 60(%esi),%ecx
870 # *(uint32 *) (out + 56) = in14
871 movl %eax,56(%edi)
872 # *(uint32 *) (out + 60) = in15
873 movl %ecx,60(%edi)
874 # bytes = bytes_backup
875 movl 76(%esp),%ebx
876 # in8 = j8
877 movl 196(%esp),%eax
878 # in9 = j9
879 movl 200(%esp),%ecx
880 # in8 += 1
881 add $1,%eax
882 # in9 += 0 + carry
883 adc $0,%ecx
884 # j8 = in8
885 movl %eax,196(%esp)
886 # j9 = in9
887 movl %ecx,200(%esp)
888 # bytes - 64
889 cmp $64,%ebx
890 # goto bytesatleast65 if unsigned>
891 ja ._bytesatleast65
892 # goto bytesatleast64 if unsigned>=
893 jae ._bytesatleast64
894 # m = out
895 mov %edi,%esi
896 # out = ctarget
897 movl 228(%esp),%edi
898 # i = bytes
899 mov %ebx,%ecx
900 # while (i) { *out++ = *m++; --i }
901 rep movsb
902._bytesatleast64:
903 # x = x_backup
904 movl 64(%esp),%eax
905 # in8 = j8
906 movl 196(%esp),%ecx
907 # in9 = j9
908 movl 200(%esp),%edx
909 # *(uint32 *) (x + 32) = in8
910 movl %ecx,32(%eax)
911 # *(uint32 *) (x + 36) = in9
912 movl %edx,36(%eax)
913._done:
914 # eax = eax_stack
915 movl 80(%esp),%eax
916 # ebx = ebx_stack
917 movl 84(%esp),%ebx
918 # esi = esi_stack
919 movl 88(%esp),%esi
920 # edi = edi_stack
921 movl 92(%esp),%edi
922 # ebp = ebp_stack
923 movl 96(%esp),%ebp
924 # leave
925 add %eax,%esp
926 ret
927._bytesatleast65:
928 # bytes -= 64
929 sub $64,%ebx
930 # out += 64
931 add $64,%edi
932 # m += 64
933 add $64,%esi
934 # goto bytesatleast1
935 jmp ._bytesatleast1
936# enter ECRYPT_keysetup
937.text
938.p2align 5
939.globl ECRYPT_keysetup
940ECRYPT_keysetup:
941 mov %esp,%eax
942 and $31,%eax
943 add $256,%eax
944 sub %eax,%esp
945 # eax_stack = eax
946 movl %eax,64(%esp)
947 # ebx_stack = ebx
948 movl %ebx,68(%esp)
949 # esi_stack = esi
950 movl %esi,72(%esp)
951 # edi_stack = edi
952 movl %edi,76(%esp)
953 # ebp_stack = ebp
954 movl %ebp,80(%esp)
955 # k = arg2
956 movl 8(%esp,%eax),%ecx
957 # kbits = arg3
958 movl 12(%esp,%eax),%edx
959 # x = arg1
960 movl 4(%esp,%eax),%eax
961 # in1 = *(uint32 *) (k + 0)
962 movl 0(%ecx),%ebx
963 # in2 = *(uint32 *) (k + 4)
964 movl 4(%ecx),%esi
965 # in3 = *(uint32 *) (k + 8)
966 movl 8(%ecx),%edi
967 # in4 = *(uint32 *) (k + 12)
968 movl 12(%ecx),%ebp
969 # *(uint32 *) (x + 4) = in1
970 movl %ebx,4(%eax)
971 # *(uint32 *) (x + 8) = in2
972 movl %esi,8(%eax)
973 # *(uint32 *) (x + 12) = in3
974 movl %edi,12(%eax)
975 # *(uint32 *) (x + 16) = in4
976 movl %ebp,16(%eax)
977 # kbits - 256
978 cmp $256,%edx
979 # goto kbits128 if unsigned<
980 jb ._kbits128
981._kbits256:
982 # in11 = *(uint32 *) (k + 16)
983 movl 16(%ecx),%edx
984 # in12 = *(uint32 *) (k + 20)
985 movl 20(%ecx),%ebx
986 # in13 = *(uint32 *) (k + 24)
987 movl 24(%ecx),%esi
988 # in14 = *(uint32 *) (k + 28)
989 movl 28(%ecx),%ecx
990 # *(uint32 *) (x + 44) = in11
991 movl %edx,44(%eax)
992 # *(uint32 *) (x + 48) = in12
993 movl %ebx,48(%eax)
994 # *(uint32 *) (x + 52) = in13
995 movl %esi,52(%eax)
996 # *(uint32 *) (x + 56) = in14
997 movl %ecx,56(%eax)
998 # in0 = 1634760805
999 mov $1634760805,%ecx
1000 # in5 = 857760878
1001 mov $857760878,%edx
1002 # in10 = 2036477234
1003 mov $2036477234,%ebx
1004 # in15 = 1797285236
1005 mov $1797285236,%esi
1006 # *(uint32 *) (x + 0) = in0
1007 movl %ecx,0(%eax)
1008 # *(uint32 *) (x + 20) = in5
1009 movl %edx,20(%eax)
1010 # *(uint32 *) (x + 40) = in10
1011 movl %ebx,40(%eax)
1012 # *(uint32 *) (x + 60) = in15
1013 movl %esi,60(%eax)
1014 # goto keysetupdone
1015 jmp ._keysetupdone
1016._kbits128:
1017 # in11 = *(uint32 *) (k + 0)
1018 movl 0(%ecx),%edx
1019 # in12 = *(uint32 *) (k + 4)
1020 movl 4(%ecx),%ebx
1021 # in13 = *(uint32 *) (k + 8)
1022 movl 8(%ecx),%esi
1023 # in14 = *(uint32 *) (k + 12)
1024 movl 12(%ecx),%ecx
1025 # *(uint32 *) (x + 44) = in11
1026 movl %edx,44(%eax)
1027 # *(uint32 *) (x + 48) = in12
1028 movl %ebx,48(%eax)
1029 # *(uint32 *) (x + 52) = in13
1030 movl %esi,52(%eax)
1031 # *(uint32 *) (x + 56) = in14
1032 movl %ecx,56(%eax)
1033 # in0 = 1634760805
1034 mov $1634760805,%ecx
1035 # in5 = 824206446
1036 mov $824206446,%edx
1037 # in10 = 2036477238
1038 mov $2036477238,%ebx
1039 # in15 = 1797285236
1040 mov $1797285236,%esi
1041 # *(uint32 *) (x + 0) = in0
1042 movl %ecx,0(%eax)
1043 # *(uint32 *) (x + 20) = in5
1044 movl %edx,20(%eax)
1045 # *(uint32 *) (x + 40) = in10
1046 movl %ebx,40(%eax)
1047 # *(uint32 *) (x + 60) = in15
1048 movl %esi,60(%eax)
1049._keysetupdone:
1050 # eax = eax_stack
1051 movl 64(%esp),%eax
1052 # ebx = ebx_stack
1053 movl 68(%esp),%ebx
1054 # esi = esi_stack
1055 movl 72(%esp),%esi
1056 # edi = edi_stack
1057 movl 76(%esp),%edi
1058 # ebp = ebp_stack
1059 movl 80(%esp),%ebp
1060 # leave
1061 add %eax,%esp
1062 ret
1063# enter ECRYPT_ivsetup
1064.text
1065.p2align 5
1066.globl ECRYPT_ivsetup
1067ECRYPT_ivsetup:
1068 mov %esp,%eax
1069 and $31,%eax
1070 add $256,%eax
1071 sub %eax,%esp
1072 # eax_stack = eax
1073 movl %eax,64(%esp)
1074 # ebx_stack = ebx
1075 movl %ebx,68(%esp)
1076 # esi_stack = esi
1077 movl %esi,72(%esp)
1078 # edi_stack = edi
1079 movl %edi,76(%esp)
1080 # ebp_stack = ebp
1081 movl %ebp,80(%esp)
1082 # iv = arg2
1083 movl 8(%esp,%eax),%ecx
1084 # x = arg1
1085 movl 4(%esp,%eax),%eax
1086 # in6 = *(uint32 *) (iv + 0)
1087 movl 0(%ecx),%edx
1088 # in7 = *(uint32 *) (iv + 4)
1089 movl 4(%ecx),%ecx
1090 # in8 = 0
1091 mov $0,%ebx
1092 # in9 = 0
1093 mov $0,%esi
1094 # *(uint32 *) (x + 24) = in6
1095 movl %edx,24(%eax)
1096 # *(uint32 *) (x + 28) = in7
1097 movl %ecx,28(%eax)
1098 # *(uint32 *) (x + 32) = in8
1099 movl %ebx,32(%eax)
1100 # *(uint32 *) (x + 36) = in9
1101 movl %esi,36(%eax)
1102 # eax = eax_stack
1103 movl 64(%esp),%eax
1104 # ebx = ebx_stack
1105 movl 68(%esp),%ebx
1106 # esi = esi_stack
1107 movl 72(%esp),%esi
1108 # edi = edi_stack
1109 movl 76(%esp),%edi
1110 # ebp = ebp_stack
1111 movl 80(%esp),%ebp
1112 # leave
1113 add %eax,%esp
1114 ret
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
new file mode 100644
index 000000000000..3be443995ed6
--- /dev/null
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -0,0 +1,127 @@
1/*
2 * Glue code for optimized assembly version of Salsa20.
3 *
4 * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
5 *
6 * The assembly codes are public domain assembly codes written by Daniel. J.
7 * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
8 * and to remove extraneous comments and functions that are not needed.
9 * - i586 version, renamed as salsa20-i586-asm_32.S
10 * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 2 of the License, or (at your option)
15 * any later version.
16 *
17 */
18
19#include <crypto/algapi.h>
20#include <linux/module.h>
21#include <linux/crypto.h>
22
23#define SALSA20_IV_SIZE 8U
24#define SALSA20_MIN_KEY_SIZE 16U
25#define SALSA20_MAX_KEY_SIZE 32U
26
27// use the ECRYPT_* function names
28#define salsa20_keysetup ECRYPT_keysetup
29#define salsa20_ivsetup ECRYPT_ivsetup
30#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes
31
32struct salsa20_ctx
33{
34 u32 input[16];
35};
36
37asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
38 u32 keysize, u32 ivsize);
39asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
40asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
41 const u8 *src, u8 *dst, u32 bytes);
42
43static int setkey(struct crypto_tfm *tfm, const u8 *key,
44 unsigned int keysize)
45{
46 struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
47 salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
48 return 0;
49}
50
51static int encrypt(struct blkcipher_desc *desc,
52 struct scatterlist *dst, struct scatterlist *src,
53 unsigned int nbytes)
54{
55 struct blkcipher_walk walk;
56 struct crypto_blkcipher *tfm = desc->tfm;
57 struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
58 int err;
59
60 blkcipher_walk_init(&walk, dst, src, nbytes);
61 err = blkcipher_walk_virt_block(desc, &walk, 64);
62
63 salsa20_ivsetup(ctx, walk.iv);
64
65 if (likely(walk.nbytes == nbytes))
66 {
67 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
68 walk.dst.virt.addr, nbytes);
69 return blkcipher_walk_done(desc, &walk, 0);
70 }
71
72 while (walk.nbytes >= 64) {
73 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
74 walk.dst.virt.addr,
75 walk.nbytes - (walk.nbytes % 64));
76 err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
77 }
78
79 if (walk.nbytes) {
80 salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
81 walk.dst.virt.addr, walk.nbytes);
82 err = blkcipher_walk_done(desc, &walk, 0);
83 }
84
85 return err;
86}
87
88static struct crypto_alg alg = {
89 .cra_name = "salsa20",
90 .cra_driver_name = "salsa20-asm",
91 .cra_priority = 200,
92 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
93 .cra_type = &crypto_blkcipher_type,
94 .cra_blocksize = 1,
95 .cra_ctxsize = sizeof(struct salsa20_ctx),
96 .cra_alignmask = 3,
97 .cra_module = THIS_MODULE,
98 .cra_list = LIST_HEAD_INIT(alg.cra_list),
99 .cra_u = {
100 .blkcipher = {
101 .setkey = setkey,
102 .encrypt = encrypt,
103 .decrypt = encrypt,
104 .min_keysize = SALSA20_MIN_KEY_SIZE,
105 .max_keysize = SALSA20_MAX_KEY_SIZE,
106 .ivsize = SALSA20_IV_SIZE,
107 }
108 }
109};
110
111static int __init init(void)
112{
113 return crypto_register_alg(&alg);
114}
115
116static void __exit fini(void)
117{
118 crypto_unregister_alg(&alg);
119}
120
121module_init(init);
122module_exit(fini);
123
124MODULE_LICENSE("GPL");
125MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
126MODULE_ALIAS("salsa20");
127MODULE_ALIAS("salsa20-asm");