diff options
author | Tadeusz Struk <tadeusz.struk@intel.com> | 2010-12-13 06:51:15 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2010-12-13 06:51:15 -0500 |
commit | 3c097b800816c0e4c2a34c38f8b2409427770f7a (patch) | |
tree | c4d5293dae7c0842877221fb12b5f2cf7d4a0a26 /arch/x86 | |
parent | 507cad355fc9e426f2846c46a4edca2d22d25f44 (diff) |
crypto: aesni-intel - Fixed build with binutils 2.16
This patch fixes the problem with 2.16 binutils.
Signed-off-by: Aidan O'Mahony <aidan.o.mahony@intel.com>
Signed-off-by: Adrian Hoban <adrian.hoban@intel.com>
Signed-off-by: Gabriele Paoloni <gabriele.paoloni@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 598 |
1 files changed, 519 insertions, 79 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d528fde219d2..8fe2a4966b7a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -204,9 +204,9 @@ enc: .octa 0x2 | |||
204 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | 204 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified |
205 | */ | 205 | */ |
206 | 206 | ||
207 | .macro INITIAL_BLOCKS num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
208 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
209 | 207 | ||
208 | .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
209 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
210 | mov arg7, %r10 # %r10 = AAD | 210 | mov arg7, %r10 # %r10 = AAD |
211 | mov arg8, %r12 # %r12 = aadLen | 211 | mov arg8, %r12 # %r12 = aadLen |
212 | mov %r12, %r11 | 212 | mov %r12, %r11 |
@@ -228,19 +228,25 @@ _get_AAD_loop2\num_initial_blocks\operation: | |||
228 | cmp %r11, %r12 | 228 | cmp %r11, %r12 |
229 | jne _get_AAD_loop2\num_initial_blocks\operation | 229 | jne _get_AAD_loop2\num_initial_blocks\operation |
230 | _get_AAD_loop2_done\num_initial_blocks\operation: | 230 | _get_AAD_loop2_done\num_initial_blocks\operation: |
231 | pshufb SHUF_MASK(%rip), %xmm\i # byte-reflect the AAD data | 231 | movdqa SHUF_MASK(%rip), %xmm14 |
232 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
233 | |||
232 | xor %r11, %r11 # initialise the data pointer offset as zero | 234 | xor %r11, %r11 # initialise the data pointer offset as zero |
233 | 235 | ||
234 | # start AES for num_initial_blocks blocks | 236 | # start AES for num_initial_blocks blocks |
235 | 237 | ||
236 | mov %arg5, %rax # %rax = *Y0 | 238 | mov %arg5, %rax # %rax = *Y0 |
237 | movdqu (%rax), \XMM0 # XMM0 = Y0 | 239 | movdqu (%rax), \XMM0 # XMM0 = Y0 |
238 | pshufb SHUF_MASK(%rip), \XMM0 | 240 | movdqa SHUF_MASK(%rip), %xmm14 |
239 | .if \i_seq != 0 | 241 | PSHUFB_XMM %xmm14, \XMM0 |
242 | |||
243 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
240 | .irpc index, \i_seq | 244 | .irpc index, \i_seq |
241 | paddd ONE(%rip), \XMM0 # INCR Y0 | 245 | paddd ONE(%rip), \XMM0 # INCR Y0 |
242 | movdqa \XMM0, %xmm\index | 246 | movdqa \XMM0, %xmm\index |
243 | pshufb SHUF_MASK(%rip), %xmm\index # perform a 16 byte swap | 247 | movdqa SHUF_MASK(%rip), %xmm14 |
248 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
249 | |||
244 | .endr | 250 | .endr |
245 | .irpc index, \i_seq | 251 | .irpc index, \i_seq |
246 | pxor 16*0(%arg1), %xmm\index | 252 | pxor 16*0(%arg1), %xmm\index |
@@ -291,10 +297,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
291 | movdqu %xmm\index, (%arg2 , %r11, 1) | 297 | movdqu %xmm\index, (%arg2 , %r11, 1) |
292 | # write back plaintext/ciphertext for num_initial_blocks | 298 | # write back plaintext/ciphertext for num_initial_blocks |
293 | add $16, %r11 | 299 | add $16, %r11 |
294 | .if \operation == dec | 300 | |
295 | movdqa \TMP1, %xmm\index | 301 | movdqa \TMP1, %xmm\index |
296 | .endif | 302 | movdqa SHUF_MASK(%rip), %xmm14 |
297 | pshufb SHUF_MASK(%rip), %xmm\index | 303 | PSHUFB_XMM %xmm14, %xmm\index |
304 | |||
298 | # prepare plaintext/ciphertext for GHASH computation | 305 | # prepare plaintext/ciphertext for GHASH computation |
299 | .endr | 306 | .endr |
300 | .endif | 307 | .endif |
@@ -327,16 +334,24 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
327 | */ | 334 | */ |
328 | paddd ONE(%rip), \XMM0 # INCR Y0 | 335 | paddd ONE(%rip), \XMM0 # INCR Y0 |
329 | movdqa \XMM0, \XMM1 | 336 | movdqa \XMM0, \XMM1 |
330 | pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap | 337 | movdqa SHUF_MASK(%rip), %xmm14 |
338 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
339 | |||
331 | paddd ONE(%rip), \XMM0 # INCR Y0 | 340 | paddd ONE(%rip), \XMM0 # INCR Y0 |
332 | movdqa \XMM0, \XMM2 | 341 | movdqa \XMM0, \XMM2 |
333 | pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap | 342 | movdqa SHUF_MASK(%rip), %xmm14 |
343 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
344 | |||
334 | paddd ONE(%rip), \XMM0 # INCR Y0 | 345 | paddd ONE(%rip), \XMM0 # INCR Y0 |
335 | movdqa \XMM0, \XMM3 | 346 | movdqa \XMM0, \XMM3 |
336 | pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap | 347 | movdqa SHUF_MASK(%rip), %xmm14 |
348 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
349 | |||
337 | paddd ONE(%rip), \XMM0 # INCR Y0 | 350 | paddd ONE(%rip), \XMM0 # INCR Y0 |
338 | movdqa \XMM0, \XMM4 | 351 | movdqa \XMM0, \XMM4 |
339 | pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap | 352 | movdqa SHUF_MASK(%rip), %xmm14 |
353 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
354 | |||
340 | pxor 16*0(%arg1), \XMM1 | 355 | pxor 16*0(%arg1), \XMM1 |
341 | pxor 16*0(%arg1), \XMM2 | 356 | pxor 16*0(%arg1), \XMM2 |
342 | pxor 16*0(%arg1), \XMM3 | 357 | pxor 16*0(%arg1), \XMM3 |
@@ -385,41 +400,268 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
385 | AESENCLAST \TMP2, \XMM4 | 400 | AESENCLAST \TMP2, \XMM4 |
386 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | 401 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 |
387 | pxor \TMP1, \XMM1 | 402 | pxor \TMP1, \XMM1 |
388 | .if \operation == dec | ||
389 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | 403 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) |
390 | movdqa \TMP1, \XMM1 | 404 | movdqa \TMP1, \XMM1 |
391 | .endif | ||
392 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | 405 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 |
393 | pxor \TMP1, \XMM2 | 406 | pxor \TMP1, \XMM2 |
394 | .if \operation == dec | ||
395 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | 407 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) |
396 | movdqa \TMP1, \XMM2 | 408 | movdqa \TMP1, \XMM2 |
397 | .endif | ||
398 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | 409 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 |
399 | pxor \TMP1, \XMM3 | 410 | pxor \TMP1, \XMM3 |
400 | .if \operation == dec | ||
401 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | 411 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) |
402 | movdqa \TMP1, \XMM3 | 412 | movdqa \TMP1, \XMM3 |
403 | .endif | ||
404 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | 413 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 |
405 | pxor \TMP1, \XMM4 | 414 | pxor \TMP1, \XMM4 |
406 | .if \operation == dec | ||
407 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | 415 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) |
408 | movdqa \TMP1, \XMM4 | 416 | movdqa \TMP1, \XMM4 |
409 | .else | 417 | add $64, %r11 |
418 | movdqa SHUF_MASK(%rip), %xmm14 | ||
419 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
420 | pxor \XMMDst, \XMM1 | ||
421 | # combine GHASHed value with the corresponding ciphertext | ||
422 | movdqa SHUF_MASK(%rip), %xmm14 | ||
423 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
424 | movdqa SHUF_MASK(%rip), %xmm14 | ||
425 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
426 | movdqa SHUF_MASK(%rip), %xmm14 | ||
427 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
428 | |||
429 | _initial_blocks_done\num_initial_blocks\operation: | ||
430 | |||
431 | .endm | ||
432 | |||
433 | |||
434 | /* | ||
435 | * if a = number of total plaintext bytes | ||
436 | * b = floor(a/16) | ||
437 | * num_initial_blocks = b mod 4 | ||
438 | * encrypt the initial num_initial_blocks blocks and apply ghash on | ||
439 | * the ciphertext | ||
440 | * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers | ||
441 | * are clobbered | ||
442 | * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified | ||
443 | */ | ||
444 | |||
445 | |||
446 | .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | ||
447 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | ||
448 | mov arg7, %r10 # %r10 = AAD | ||
449 | mov arg8, %r12 # %r12 = aadLen | ||
450 | mov %r12, %r11 | ||
451 | pxor %xmm\i, %xmm\i | ||
452 | _get_AAD_loop\num_initial_blocks\operation: | ||
453 | movd (%r10), \TMP1 | ||
454 | pslldq $12, \TMP1 | ||
455 | psrldq $4, %xmm\i | ||
456 | pxor \TMP1, %xmm\i | ||
457 | add $4, %r10 | ||
458 | sub $4, %r12 | ||
459 | jne _get_AAD_loop\num_initial_blocks\operation | ||
460 | cmp $16, %r11 | ||
461 | je _get_AAD_loop2_done\num_initial_blocks\operation | ||
462 | mov $16, %r12 | ||
463 | _get_AAD_loop2\num_initial_blocks\operation: | ||
464 | psrldq $4, %xmm\i | ||
465 | sub $4, %r12 | ||
466 | cmp %r11, %r12 | ||
467 | jne _get_AAD_loop2\num_initial_blocks\operation | ||
468 | _get_AAD_loop2_done\num_initial_blocks\operation: | ||
469 | movdqa SHUF_MASK(%rip), %xmm14 | ||
470 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | ||
471 | |||
472 | xor %r11, %r11 # initialise the data pointer offset as zero | ||
473 | |||
474 | # start AES for num_initial_blocks blocks | ||
475 | |||
476 | mov %arg5, %rax # %rax = *Y0 | ||
477 | movdqu (%rax), \XMM0 # XMM0 = Y0 | ||
478 | movdqa SHUF_MASK(%rip), %xmm14 | ||
479 | PSHUFB_XMM %xmm14, \XMM0 | ||
480 | |||
481 | .if (\i == 5) || (\i == 6) || (\i == 7) | ||
482 | .irpc index, \i_seq | ||
483 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
484 | movdqa \XMM0, %xmm\index | ||
485 | movdqa SHUF_MASK(%rip), %xmm14 | ||
486 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
487 | |||
488 | .endr | ||
489 | .irpc index, \i_seq | ||
490 | pxor 16*0(%arg1), %xmm\index | ||
491 | .endr | ||
492 | .irpc index, \i_seq | ||
493 | movaps 0x10(%rdi), \TMP1 | ||
494 | AESENC \TMP1, %xmm\index # Round 1 | ||
495 | .endr | ||
496 | .irpc index, \i_seq | ||
497 | movaps 0x20(%arg1), \TMP1 | ||
498 | AESENC \TMP1, %xmm\index # Round 2 | ||
499 | .endr | ||
500 | .irpc index, \i_seq | ||
501 | movaps 0x30(%arg1), \TMP1 | ||
502 | AESENC \TMP1, %xmm\index # Round 2 | ||
503 | .endr | ||
504 | .irpc index, \i_seq | ||
505 | movaps 0x40(%arg1), \TMP1 | ||
506 | AESENC \TMP1, %xmm\index # Round 2 | ||
507 | .endr | ||
508 | .irpc index, \i_seq | ||
509 | movaps 0x50(%arg1), \TMP1 | ||
510 | AESENC \TMP1, %xmm\index # Round 2 | ||
511 | .endr | ||
512 | .irpc index, \i_seq | ||
513 | movaps 0x60(%arg1), \TMP1 | ||
514 | AESENC \TMP1, %xmm\index # Round 2 | ||
515 | .endr | ||
516 | .irpc index, \i_seq | ||
517 | movaps 0x70(%arg1), \TMP1 | ||
518 | AESENC \TMP1, %xmm\index # Round 2 | ||
519 | .endr | ||
520 | .irpc index, \i_seq | ||
521 | movaps 0x80(%arg1), \TMP1 | ||
522 | AESENC \TMP1, %xmm\index # Round 2 | ||
523 | .endr | ||
524 | .irpc index, \i_seq | ||
525 | movaps 0x90(%arg1), \TMP1 | ||
526 | AESENC \TMP1, %xmm\index # Round 2 | ||
527 | .endr | ||
528 | .irpc index, \i_seq | ||
529 | movaps 0xa0(%arg1), \TMP1 | ||
530 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
531 | .endr | ||
532 | .irpc index, \i_seq | ||
533 | movdqu (%arg3 , %r11, 1), \TMP1 | ||
534 | pxor \TMP1, %xmm\index | ||
535 | movdqu %xmm\index, (%arg2 , %r11, 1) | ||
536 | # write back plaintext/ciphertext for num_initial_blocks | ||
537 | add $16, %r11 | ||
538 | |||
539 | movdqa SHUF_MASK(%rip), %xmm14 | ||
540 | PSHUFB_XMM %xmm14, %xmm\index | ||
541 | |||
542 | # prepare plaintext/ciphertext for GHASH computation | ||
543 | .endr | ||
544 | .endif | ||
545 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
546 | # apply GHASH on num_initial_blocks blocks | ||
547 | |||
548 | .if \i == 5 | ||
549 | pxor %xmm5, %xmm6 | ||
550 | GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
551 | pxor %xmm6, %xmm7 | ||
552 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
553 | pxor %xmm7, %xmm8 | ||
554 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
555 | .elseif \i == 6 | ||
556 | pxor %xmm6, %xmm7 | ||
557 | GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
558 | pxor %xmm7, %xmm8 | ||
559 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
560 | .elseif \i == 7 | ||
561 | pxor %xmm7, %xmm8 | ||
562 | GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | ||
563 | .endif | ||
564 | cmp $64, %r13 | ||
565 | jl _initial_blocks_done\num_initial_blocks\operation | ||
566 | # no need for precomputed values | ||
567 | /* | ||
568 | * | ||
569 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | ||
570 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
571 | */ | ||
572 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
573 | movdqa \XMM0, \XMM1 | ||
574 | movdqa SHUF_MASK(%rip), %xmm14 | ||
575 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
576 | |||
577 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
578 | movdqa \XMM0, \XMM2 | ||
579 | movdqa SHUF_MASK(%rip), %xmm14 | ||
580 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | ||
581 | |||
582 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
583 | movdqa \XMM0, \XMM3 | ||
584 | movdqa SHUF_MASK(%rip), %xmm14 | ||
585 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
586 | |||
587 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
588 | movdqa \XMM0, \XMM4 | ||
589 | movdqa SHUF_MASK(%rip), %xmm14 | ||
590 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
591 | |||
592 | pxor 16*0(%arg1), \XMM1 | ||
593 | pxor 16*0(%arg1), \XMM2 | ||
594 | pxor 16*0(%arg1), \XMM3 | ||
595 | pxor 16*0(%arg1), \XMM4 | ||
596 | movdqa \TMP3, \TMP5 | ||
597 | pshufd $78, \TMP3, \TMP1 | ||
598 | pxor \TMP3, \TMP1 | ||
599 | movdqa \TMP1, HashKey_k(%rsp) | ||
600 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
601 | # TMP5 = HashKey^2<<1 (mod poly) | ||
602 | movdqa \TMP5, HashKey_2(%rsp) | ||
603 | # HashKey_2 = HashKey^2<<1 (mod poly) | ||
604 | pshufd $78, \TMP5, \TMP1 | ||
605 | pxor \TMP5, \TMP1 | ||
606 | movdqa \TMP1, HashKey_2_k(%rsp) | ||
607 | .irpc index, 1234 # do 4 rounds | ||
608 | movaps 0x10*\index(%arg1), \TMP1 | ||
609 | AESENC \TMP1, \XMM1 | ||
610 | AESENC \TMP1, \XMM2 | ||
611 | AESENC \TMP1, \XMM3 | ||
612 | AESENC \TMP1, \XMM4 | ||
613 | .endr | ||
614 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
615 | # TMP5 = HashKey^3<<1 (mod poly) | ||
616 | movdqa \TMP5, HashKey_3(%rsp) | ||
617 | pshufd $78, \TMP5, \TMP1 | ||
618 | pxor \TMP5, \TMP1 | ||
619 | movdqa \TMP1, HashKey_3_k(%rsp) | ||
620 | .irpc index, 56789 # do next 5 rounds | ||
621 | movaps 0x10*\index(%arg1), \TMP1 | ||
622 | AESENC \TMP1, \XMM1 | ||
623 | AESENC \TMP1, \XMM2 | ||
624 | AESENC \TMP1, \XMM3 | ||
625 | AESENC \TMP1, \XMM4 | ||
626 | .endr | ||
627 | GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 | ||
628 | # TMP5 = HashKey^3<<1 (mod poly) | ||
629 | movdqa \TMP5, HashKey_4(%rsp) | ||
630 | pshufd $78, \TMP5, \TMP1 | ||
631 | pxor \TMP5, \TMP1 | ||
632 | movdqa \TMP1, HashKey_4_k(%rsp) | ||
633 | movaps 0xa0(%arg1), \TMP2 | ||
634 | AESENCLAST \TMP2, \XMM1 | ||
635 | AESENCLAST \TMP2, \XMM2 | ||
636 | AESENCLAST \TMP2, \XMM3 | ||
637 | AESENCLAST \TMP2, \XMM4 | ||
638 | movdqu 16*0(%arg3 , %r11 , 1), \TMP1 | ||
639 | pxor \TMP1, \XMM1 | ||
640 | movdqu 16*1(%arg3 , %r11 , 1), \TMP1 | ||
641 | pxor \TMP1, \XMM2 | ||
642 | movdqu 16*2(%arg3 , %r11 , 1), \TMP1 | ||
643 | pxor \TMP1, \XMM3 | ||
644 | movdqu 16*3(%arg3 , %r11 , 1), \TMP1 | ||
645 | pxor \TMP1, \XMM4 | ||
410 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) | 646 | movdqu \XMM1, 16*0(%arg2 , %r11 , 1) |
411 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) | 647 | movdqu \XMM2, 16*1(%arg2 , %r11 , 1) |
412 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) | 648 | movdqu \XMM3, 16*2(%arg2 , %r11 , 1) |
413 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | 649 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) |
414 | .endif | 650 | |
415 | add $64, %r11 | 651 | add $64, %r11 |
416 | pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap | 652 | movdqa SHUF_MASK(%rip), %xmm14 |
653 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | ||
417 | pxor \XMMDst, \XMM1 | 654 | pxor \XMMDst, \XMM1 |
418 | # combine GHASHed value with the corresponding ciphertext | 655 | # combine GHASHed value with the corresponding ciphertext |
419 | pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap | 656 | movdqa SHUF_MASK(%rip), %xmm14 |
420 | pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap | 657 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
421 | pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap | 658 | movdqa SHUF_MASK(%rip), %xmm14 |
659 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | ||
660 | movdqa SHUF_MASK(%rip), %xmm14 | ||
661 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | ||
662 | |||
422 | _initial_blocks_done\num_initial_blocks\operation: | 663 | _initial_blocks_done\num_initial_blocks\operation: |
664 | |||
423 | .endm | 665 | .endm |
424 | 666 | ||
425 | /* | 667 | /* |
@@ -428,7 +670,199 @@ _initial_blocks_done\num_initial_blocks\operation: | |||
428 | * arg1, %arg2, %arg3 are used as pointers only, not modified | 670 | * arg1, %arg2, %arg3 are used as pointers only, not modified |
429 | * %r11 is the data offset value | 671 | * %r11 is the data offset value |
430 | */ | 672 | */ |
431 | .macro GHASH_4_ENCRYPT_4_PARALLEL TMP1 TMP2 TMP3 TMP4 TMP5 \ | 673 | .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ |
674 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | ||
675 | |||
676 | movdqa \XMM1, \XMM5 | ||
677 | movdqa \XMM2, \XMM6 | ||
678 | movdqa \XMM3, \XMM7 | ||
679 | movdqa \XMM4, \XMM8 | ||
680 | |||
681 | movdqa SHUF_MASK(%rip), %xmm15 | ||
682 | # multiply TMP5 * HashKey using karatsuba | ||
683 | |||
684 | movdqa \XMM5, \TMP4 | ||
685 | pshufd $78, \XMM5, \TMP6 | ||
686 | pxor \XMM5, \TMP6 | ||
687 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
688 | movdqa HashKey_4(%rsp), \TMP5 | ||
689 | PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 | ||
690 | movdqa \XMM0, \XMM1 | ||
691 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
692 | movdqa \XMM0, \XMM2 | ||
693 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
694 | movdqa \XMM0, \XMM3 | ||
695 | paddd ONE(%rip), \XMM0 # INCR CNT | ||
696 | movdqa \XMM0, \XMM4 | ||
697 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
698 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | ||
699 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
700 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
701 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
702 | |||
703 | pxor (%arg1), \XMM1 | ||
704 | pxor (%arg1), \XMM2 | ||
705 | pxor (%arg1), \XMM3 | ||
706 | pxor (%arg1), \XMM4 | ||
707 | movdqa HashKey_4_k(%rsp), \TMP5 | ||
708 | PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) | ||
709 | movaps 0x10(%arg1), \TMP1 | ||
710 | AESENC \TMP1, \XMM1 # Round 1 | ||
711 | AESENC \TMP1, \XMM2 | ||
712 | AESENC \TMP1, \XMM3 | ||
713 | AESENC \TMP1, \XMM4 | ||
714 | movaps 0x20(%arg1), \TMP1 | ||
715 | AESENC \TMP1, \XMM1 # Round 2 | ||
716 | AESENC \TMP1, \XMM2 | ||
717 | AESENC \TMP1, \XMM3 | ||
718 | AESENC \TMP1, \XMM4 | ||
719 | movdqa \XMM6, \TMP1 | ||
720 | pshufd $78, \XMM6, \TMP2 | ||
721 | pxor \XMM6, \TMP2 | ||
722 | movdqa HashKey_3(%rsp), \TMP5 | ||
723 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 | ||
724 | movaps 0x30(%arg1), \TMP3 | ||
725 | AESENC \TMP3, \XMM1 # Round 3 | ||
726 | AESENC \TMP3, \XMM2 | ||
727 | AESENC \TMP3, \XMM3 | ||
728 | AESENC \TMP3, \XMM4 | ||
729 | PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 | ||
730 | movaps 0x40(%arg1), \TMP3 | ||
731 | AESENC \TMP3, \XMM1 # Round 4 | ||
732 | AESENC \TMP3, \XMM2 | ||
733 | AESENC \TMP3, \XMM3 | ||
734 | AESENC \TMP3, \XMM4 | ||
735 | movdqa HashKey_3_k(%rsp), \TMP5 | ||
736 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
737 | movaps 0x50(%arg1), \TMP3 | ||
738 | AESENC \TMP3, \XMM1 # Round 5 | ||
739 | AESENC \TMP3, \XMM2 | ||
740 | AESENC \TMP3, \XMM3 | ||
741 | AESENC \TMP3, \XMM4 | ||
742 | pxor \TMP1, \TMP4 | ||
743 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
744 | pxor \XMM6, \XMM5 | ||
745 | pxor \TMP2, \TMP6 | ||
746 | movdqa \XMM7, \TMP1 | ||
747 | pshufd $78, \XMM7, \TMP2 | ||
748 | pxor \XMM7, \TMP2 | ||
749 | movdqa HashKey_2(%rsp ), \TMP5 | ||
750 | |||
751 | # Multiply TMP5 * HashKey using karatsuba | ||
752 | |||
753 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
754 | movaps 0x60(%arg1), \TMP3 | ||
755 | AESENC \TMP3, \XMM1 # Round 6 | ||
756 | AESENC \TMP3, \XMM2 | ||
757 | AESENC \TMP3, \XMM3 | ||
758 | AESENC \TMP3, \XMM4 | ||
759 | PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 | ||
760 | movaps 0x70(%arg1), \TMP3 | ||
761 | AESENC \TMP3, \XMM1 # Round 7 | ||
762 | AESENC \TMP3, \XMM2 | ||
763 | AESENC \TMP3, \XMM3 | ||
764 | AESENC \TMP3, \XMM4 | ||
765 | movdqa HashKey_2_k(%rsp), \TMP5 | ||
766 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
767 | movaps 0x80(%arg1), \TMP3 | ||
768 | AESENC \TMP3, \XMM1 # Round 8 | ||
769 | AESENC \TMP3, \XMM2 | ||
770 | AESENC \TMP3, \XMM3 | ||
771 | AESENC \TMP3, \XMM4 | ||
772 | pxor \TMP1, \TMP4 | ||
773 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part | ||
774 | pxor \XMM7, \XMM5 | ||
775 | pxor \TMP2, \TMP6 | ||
776 | |||
777 | # Multiply XMM8 * HashKey | ||
778 | # XMM8 and TMP5 hold the values for the two operands | ||
779 | |||
780 | movdqa \XMM8, \TMP1 | ||
781 | pshufd $78, \XMM8, \TMP2 | ||
782 | pxor \XMM8, \TMP2 | ||
783 | movdqa HashKey(%rsp), \TMP5 | ||
784 | PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 | ||
785 | movaps 0x90(%arg1), \TMP3 | ||
786 | AESENC \TMP3, \XMM1 # Round 9 | ||
787 | AESENC \TMP3, \XMM2 | ||
788 | AESENC \TMP3, \XMM3 | ||
789 | AESENC \TMP3, \XMM4 | ||
790 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | ||
791 | movaps 0xa0(%arg1), \TMP3 | ||
792 | AESENCLAST \TMP3, \XMM1 # Round 10 | ||
793 | AESENCLAST \TMP3, \XMM2 | ||
794 | AESENCLAST \TMP3, \XMM3 | ||
795 | AESENCLAST \TMP3, \XMM4 | ||
796 | movdqa HashKey_k(%rsp), \TMP5 | ||
797 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | ||
798 | movdqu (%arg3,%r11,1), \TMP3 | ||
799 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | ||
800 | movdqu 16(%arg3,%r11,1), \TMP3 | ||
801 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | ||
802 | movdqu 32(%arg3,%r11,1), \TMP3 | ||
803 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | ||
804 | movdqu 48(%arg3,%r11,1), \TMP3 | ||
805 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | ||
806 | movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer | ||
807 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer | ||
808 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer | ||
809 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer | ||
810 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap | ||
811 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap | ||
812 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap | ||
813 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap | ||
814 | |||
815 | pxor \TMP4, \TMP1 | ||
816 | pxor \XMM8, \XMM5 | ||
817 | pxor \TMP6, \TMP2 | ||
818 | pxor \TMP1, \TMP2 | ||
819 | pxor \XMM5, \TMP2 | ||
820 | movdqa \TMP2, \TMP3 | ||
821 | pslldq $8, \TMP3 # left shift TMP3 2 DWs | ||
822 | psrldq $8, \TMP2 # right shift TMP2 2 DWs | ||
823 | pxor \TMP3, \XMM5 | ||
824 | pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 | ||
825 | |||
826 | # first phase of reduction | ||
827 | |||
828 | movdqa \XMM5, \TMP2 | ||
829 | movdqa \XMM5, \TMP3 | ||
830 | movdqa \XMM5, \TMP4 | ||
831 | # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently | ||
832 | pslld $31, \TMP2 # packed right shift << 31 | ||
833 | pslld $30, \TMP3 # packed right shift << 30 | ||
834 | pslld $25, \TMP4 # packed right shift << 25 | ||
835 | pxor \TMP3, \TMP2 # xor the shifted versions | ||
836 | pxor \TMP4, \TMP2 | ||
837 | movdqa \TMP2, \TMP5 | ||
838 | psrldq $4, \TMP5 # right shift T5 1 DW | ||
839 | pslldq $12, \TMP2 # left shift T2 3 DWs | ||
840 | pxor \TMP2, \XMM5 | ||
841 | |||
842 | # second phase of reduction | ||
843 | |||
844 | movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 | ||
845 | movdqa \XMM5,\TMP3 | ||
846 | movdqa \XMM5,\TMP4 | ||
847 | psrld $1, \TMP2 # packed left shift >>1 | ||
848 | psrld $2, \TMP3 # packed left shift >>2 | ||
849 | psrld $7, \TMP4 # packed left shift >>7 | ||
850 | pxor \TMP3,\TMP2 # xor the shifted versions | ||
851 | pxor \TMP4,\TMP2 | ||
852 | pxor \TMP5, \TMP2 | ||
853 | pxor \TMP2, \XMM5 | ||
854 | pxor \TMP1, \XMM5 # result is in TMP1 | ||
855 | |||
856 | pxor \XMM5, \XMM1 | ||
857 | .endm | ||
858 | |||
859 | /* | ||
860 | * decrypt 4 blocks at a time | ||
861 | * ghash the 4 previously decrypted ciphertext blocks | ||
862 | * arg1, %arg2, %arg3 are used as pointers only, not modified | ||
863 | * %r11 is the data offset value | ||
864 | */ | ||
865 | .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ | ||
432 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | 866 | TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation |
433 | 867 | ||
434 | movdqa \XMM1, \XMM5 | 868 | movdqa \XMM1, \XMM5 |
@@ -436,6 +870,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | |||
436 | movdqa \XMM3, \XMM7 | 870 | movdqa \XMM3, \XMM7 |
437 | movdqa \XMM4, \XMM8 | 871 | movdqa \XMM4, \XMM8 |
438 | 872 | ||
873 | movdqa SHUF_MASK(%rip), %xmm15 | ||
439 | # multiply TMP5 * HashKey using karatsuba | 874 | # multiply TMP5 * HashKey using karatsuba |
440 | 875 | ||
441 | movdqa \XMM5, \TMP4 | 876 | movdqa \XMM5, \TMP4 |
@@ -451,11 +886,12 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | |||
451 | movdqa \XMM0, \XMM3 | 886 | movdqa \XMM0, \XMM3 |
452 | paddd ONE(%rip), \XMM0 # INCR CNT | 887 | paddd ONE(%rip), \XMM0 # INCR CNT |
453 | movdqa \XMM0, \XMM4 | 888 | movdqa \XMM0, \XMM4 |
454 | pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap | 889 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap |
455 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 | 890 | PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 |
456 | pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap | 891 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap |
457 | pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap | 892 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap |
458 | pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap | 893 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap |
894 | |||
459 | pxor (%arg1), \XMM1 | 895 | pxor (%arg1), \XMM1 |
460 | pxor (%arg1), \XMM2 | 896 | pxor (%arg1), \XMM2 |
461 | pxor (%arg1), \XMM3 | 897 | pxor (%arg1), \XMM3 |
@@ -553,37 +989,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | |||
553 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) | 989 | PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
554 | movdqu (%arg3,%r11,1), \TMP3 | 990 | movdqu (%arg3,%r11,1), \TMP3 |
555 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK | 991 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK |
556 | .if \operation == dec | ||
557 | movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer | 992 | movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer |
558 | movdqa \TMP3, \XMM1 | 993 | movdqa \TMP3, \XMM1 |
559 | .endif | ||
560 | movdqu 16(%arg3,%r11,1), \TMP3 | 994 | movdqu 16(%arg3,%r11,1), \TMP3 |
561 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK | 995 | pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK |
562 | .if \operation == dec | ||
563 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer | 996 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer |
564 | movdqa \TMP3, \XMM2 | 997 | movdqa \TMP3, \XMM2 |
565 | .endif | ||
566 | movdqu 32(%arg3,%r11,1), \TMP3 | 998 | movdqu 32(%arg3,%r11,1), \TMP3 |
567 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK | 999 | pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK |
568 | .if \operation == dec | ||
569 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer | 1000 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer |
570 | movdqa \TMP3, \XMM3 | 1001 | movdqa \TMP3, \XMM3 |
571 | .endif | ||
572 | movdqu 48(%arg3,%r11,1), \TMP3 | 1002 | movdqu 48(%arg3,%r11,1), \TMP3 |
573 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK | 1003 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK |
574 | .if \operation == dec | ||
575 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer | 1004 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer |
576 | movdqa \TMP3, \XMM4 | 1005 | movdqa \TMP3, \XMM4 |
577 | .else | 1006 | PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap |
578 | movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer | 1007 | PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap |
579 | movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer | 1008 | PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap |
580 | movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer | 1009 | PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap |
581 | movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer | ||
582 | .endif | ||
583 | pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap | ||
584 | pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap | ||
585 | pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap | ||
586 | pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte sway | ||
587 | 1010 | ||
588 | pxor \TMP4, \TMP1 | 1011 | pxor \TMP4, \TMP1 |
589 | pxor \XMM8, \XMM5 | 1012 | pxor \XMM8, \XMM5 |
@@ -853,7 +1276,9 @@ ENTRY(aesni_gcm_dec) | |||
853 | and $~63, %rsp # align rsp to 64 bytes | 1276 | and $~63, %rsp # align rsp to 64 bytes |
854 | mov %arg6, %r12 | 1277 | mov %arg6, %r12 |
855 | movdqu (%r12), %xmm13 # %xmm13 = HashKey | 1278 | movdqu (%r12), %xmm13 # %xmm13 = HashKey |
856 | pshufb SHUF_MASK(%rip), %xmm13 | 1279 | movdqa SHUF_MASK(%rip), %xmm2 |
1280 | PSHUFB_XMM %xmm2, %xmm13 | ||
1281 | |||
857 | 1282 | ||
858 | # Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) | 1283 | # Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) |
859 | 1284 | ||
@@ -885,22 +1310,22 @@ ENTRY(aesni_gcm_dec) | |||
885 | jb _initial_num_blocks_is_1_decrypt | 1310 | jb _initial_num_blocks_is_1_decrypt |
886 | je _initial_num_blocks_is_2_decrypt | 1311 | je _initial_num_blocks_is_2_decrypt |
887 | _initial_num_blocks_is_3_decrypt: | 1312 | _initial_num_blocks_is_3_decrypt: |
888 | INITIAL_BLOCKS 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1313 | INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
889 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec | 1314 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec |
890 | sub $48, %r13 | 1315 | sub $48, %r13 |
891 | jmp _initial_blocks_decrypted | 1316 | jmp _initial_blocks_decrypted |
892 | _initial_num_blocks_is_2_decrypt: | 1317 | _initial_num_blocks_is_2_decrypt: |
893 | INITIAL_BLOCKS 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1318 | INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
894 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec | 1319 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec |
895 | sub $32, %r13 | 1320 | sub $32, %r13 |
896 | jmp _initial_blocks_decrypted | 1321 | jmp _initial_blocks_decrypted |
897 | _initial_num_blocks_is_1_decrypt: | 1322 | _initial_num_blocks_is_1_decrypt: |
898 | INITIAL_BLOCKS 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1323 | INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
899 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec | 1324 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec |
900 | sub $16, %r13 | 1325 | sub $16, %r13 |
901 | jmp _initial_blocks_decrypted | 1326 | jmp _initial_blocks_decrypted |
902 | _initial_num_blocks_is_0_decrypt: | 1327 | _initial_num_blocks_is_0_decrypt: |
903 | INITIAL_BLOCKS 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1328 | INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
904 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec | 1329 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec |
905 | _initial_blocks_decrypted: | 1330 | _initial_blocks_decrypted: |
906 | cmp $0, %r13 | 1331 | cmp $0, %r13 |
@@ -908,7 +1333,7 @@ _initial_blocks_decrypted: | |||
908 | sub $64, %r13 | 1333 | sub $64, %r13 |
909 | je _four_cipher_left_decrypt | 1334 | je _four_cipher_left_decrypt |
910 | _decrypt_by_4: | 1335 | _decrypt_by_4: |
911 | GHASH_4_ENCRYPT_4_PARALLEL %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | 1336 | GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ |
912 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec | 1337 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec |
913 | add $64, %r11 | 1338 | add $64, %r11 |
914 | sub $64, %r13 | 1339 | sub $64, %r13 |
@@ -924,7 +1349,9 @@ _zero_cipher_left_decrypt: | |||
924 | # Handle the last <16 byte block seperately | 1349 | # Handle the last <16 byte block seperately |
925 | 1350 | ||
926 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn | 1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn |
927 | pshufb SHUF_MASK(%rip), %xmm0 | 1352 | movdqa SHUF_MASK(%rip), %xmm10 |
1353 | PSHUFB_XMM %xmm10, %xmm0 | ||
1354 | |||
928 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) | 1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) |
929 | sub $16, %r11 | 1356 | sub $16, %r11 |
930 | add %r13, %r11 | 1357 | add %r13, %r11 |
@@ -934,14 +1361,17 @@ _zero_cipher_left_decrypt: | |||
934 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes | 1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes |
935 | # (%r13 is the number of bytes in plaintext mod 16) | 1362 | # (%r13 is the number of bytes in plaintext mod 16) |
936 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | 1363 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask |
937 | pshufb %xmm2, %xmm1 # right shift 16-%r13 butes | 1364 | PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes |
1365 | |||
938 | movdqa %xmm1, %xmm2 | 1366 | movdqa %xmm1, %xmm2 |
939 | pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) | 1367 | pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) |
940 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | 1368 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 |
941 | # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 | 1369 | # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 |
942 | pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 | 1370 | pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 |
943 | pand %xmm1, %xmm2 | 1371 | pand %xmm1, %xmm2 |
944 | pshufb SHUF_MASK(%rip),%xmm2 | 1372 | movdqa SHUF_MASK(%rip), %xmm10 |
1373 | PSHUFB_XMM %xmm10 ,%xmm2 | ||
1374 | |||
945 | pxor %xmm2, %xmm8 | 1375 | pxor %xmm2, %xmm8 |
946 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | 1376 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 |
947 | # GHASH computation for the last <16 byte block | 1377 | # GHASH computation for the last <16 byte block |
@@ -949,13 +1379,13 @@ _zero_cipher_left_decrypt: | |||
949 | add $16, %r11 | 1379 | add $16, %r11 |
950 | 1380 | ||
951 | # output %r13 bytes | 1381 | # output %r13 bytes |
952 | movq %xmm0, %rax | 1382 | MOVQ_R64_XMM %xmm0, %rax |
953 | cmp $8, %r13 | 1383 | cmp $8, %r13 |
954 | jle _less_than_8_bytes_left_decrypt | 1384 | jle _less_than_8_bytes_left_decrypt |
955 | mov %rax, (%arg2 , %r11, 1) | 1385 | mov %rax, (%arg2 , %r11, 1) |
956 | add $8, %r11 | 1386 | add $8, %r11 |
957 | psrldq $8, %xmm0 | 1387 | psrldq $8, %xmm0 |
958 | movq %xmm0, %rax | 1388 | MOVQ_R64_XMM %xmm0, %rax |
959 | sub $8, %r13 | 1389 | sub $8, %r13 |
960 | _less_than_8_bytes_left_decrypt: | 1390 | _less_than_8_bytes_left_decrypt: |
961 | mov %al, (%arg2, %r11, 1) | 1391 | mov %al, (%arg2, %r11, 1) |
@@ -968,13 +1398,15 @@ _multiple_of_16_bytes_decrypt: | |||
968 | shl $3, %r12 # convert into number of bits | 1398 | shl $3, %r12 # convert into number of bits |
969 | movd %r12d, %xmm15 # len(A) in %xmm15 | 1399 | movd %r12d, %xmm15 # len(A) in %xmm15 |
970 | shl $3, %arg4 # len(C) in bits (*128) | 1400 | shl $3, %arg4 # len(C) in bits (*128) |
971 | movq %arg4, %xmm1 | 1401 | MOVQ_R64_XMM %arg4, %xmm1 |
972 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | 1402 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 |
973 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | 1403 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) |
974 | pxor %xmm15, %xmm8 | 1404 | pxor %xmm15, %xmm8 |
975 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | 1405 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 |
976 | # final GHASH computation | 1406 | # final GHASH computation |
977 | pshufb SHUF_MASK(%rip), %xmm8 | 1407 | movdqa SHUF_MASK(%rip), %xmm10 |
1408 | PSHUFB_XMM %xmm10, %xmm8 | ||
1409 | |||
978 | mov %arg5, %rax # %rax = *Y0 | 1410 | mov %arg5, %rax # %rax = *Y0 |
979 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | 1411 | movdqu (%rax), %xmm0 # %xmm0 = Y0 |
980 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) | 1412 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) |
@@ -987,11 +1419,11 @@ _return_T_decrypt: | |||
987 | cmp $12, %r11 | 1419 | cmp $12, %r11 |
988 | je _T_12_decrypt | 1420 | je _T_12_decrypt |
989 | _T_8_decrypt: | 1421 | _T_8_decrypt: |
990 | movq %xmm0, %rax | 1422 | MOVQ_R64_XMM %xmm0, %rax |
991 | mov %rax, (%r10) | 1423 | mov %rax, (%r10) |
992 | jmp _return_T_done_decrypt | 1424 | jmp _return_T_done_decrypt |
993 | _T_12_decrypt: | 1425 | _T_12_decrypt: |
994 | movq %xmm0, %rax | 1426 | MOVQ_R64_XMM %xmm0, %rax |
995 | mov %rax, (%r10) | 1427 | mov %rax, (%r10) |
996 | psrldq $8, %xmm0 | 1428 | psrldq $8, %xmm0 |
997 | movd %xmm0, %eax | 1429 | movd %xmm0, %eax |
@@ -1103,7 +1535,9 @@ ENTRY(aesni_gcm_enc) | |||
1103 | and $~63, %rsp | 1535 | and $~63, %rsp |
1104 | mov %arg6, %r12 | 1536 | mov %arg6, %r12 |
1105 | movdqu (%r12), %xmm13 | 1537 | movdqu (%r12), %xmm13 |
1106 | pshufb SHUF_MASK(%rip), %xmm13 | 1538 | movdqa SHUF_MASK(%rip), %xmm2 |
1539 | PSHUFB_XMM %xmm2, %xmm13 | ||
1540 | |||
1107 | 1541 | ||
1108 | # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) | 1542 | # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) |
1109 | 1543 | ||
@@ -1134,22 +1568,22 @@ ENTRY(aesni_gcm_enc) | |||
1134 | jb _initial_num_blocks_is_1_encrypt | 1568 | jb _initial_num_blocks_is_1_encrypt |
1135 | je _initial_num_blocks_is_2_encrypt | 1569 | je _initial_num_blocks_is_2_encrypt |
1136 | _initial_num_blocks_is_3_encrypt: | 1570 | _initial_num_blocks_is_3_encrypt: |
1137 | INITIAL_BLOCKS 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1571 | INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
1138 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc | 1572 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc |
1139 | sub $48, %r13 | 1573 | sub $48, %r13 |
1140 | jmp _initial_blocks_encrypted | 1574 | jmp _initial_blocks_encrypted |
1141 | _initial_num_blocks_is_2_encrypt: | 1575 | _initial_num_blocks_is_2_encrypt: |
1142 | INITIAL_BLOCKS 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1576 | INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
1143 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc | 1577 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc |
1144 | sub $32, %r13 | 1578 | sub $32, %r13 |
1145 | jmp _initial_blocks_encrypted | 1579 | jmp _initial_blocks_encrypted |
1146 | _initial_num_blocks_is_1_encrypt: | 1580 | _initial_num_blocks_is_1_encrypt: |
1147 | INITIAL_BLOCKS 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1581 | INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
1148 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc | 1582 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc |
1149 | sub $16, %r13 | 1583 | sub $16, %r13 |
1150 | jmp _initial_blocks_encrypted | 1584 | jmp _initial_blocks_encrypted |
1151 | _initial_num_blocks_is_0_encrypt: | 1585 | _initial_num_blocks_is_0_encrypt: |
1152 | INITIAL_BLOCKS 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ | 1586 | INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ |
1153 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc | 1587 | %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc |
1154 | _initial_blocks_encrypted: | 1588 | _initial_blocks_encrypted: |
1155 | 1589 | ||
@@ -1160,7 +1594,7 @@ _initial_blocks_encrypted: | |||
1160 | sub $64, %r13 | 1594 | sub $64, %r13 |
1161 | je _four_cipher_left_encrypt | 1595 | je _four_cipher_left_encrypt |
1162 | _encrypt_by_4_encrypt: | 1596 | _encrypt_by_4_encrypt: |
1163 | GHASH_4_ENCRYPT_4_PARALLEL %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ | 1597 | GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ |
1164 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc | 1598 | %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc |
1165 | add $64, %r11 | 1599 | add $64, %r11 |
1166 | sub $64, %r13 | 1600 | sub $64, %r13 |
@@ -1175,7 +1609,9 @@ _zero_cipher_left_encrypt: | |||
1175 | 1609 | ||
1176 | # Handle the last <16 Byte block seperately | 1610 | # Handle the last <16 Byte block seperately |
1177 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn | 1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn |
1178 | pshufb SHUF_MASK(%rip), %xmm0 | 1612 | movdqa SHUF_MASK(%rip), %xmm10 |
1613 | PSHUFB_XMM %xmm10, %xmm0 | ||
1614 | |||
1179 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) | 1615 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) |
1180 | sub $16, %r11 | 1616 | sub $16, %r11 |
1181 | add %r13, %r11 | 1617 | add %r13, %r11 |
@@ -1185,29 +1621,31 @@ _zero_cipher_left_encrypt: | |||
1185 | # adjust the shuffle mask pointer to be able to shift 16-r13 bytes | 1621 | # adjust the shuffle mask pointer to be able to shift 16-r13 bytes |
1186 | # (%r13 is the number of bytes in plaintext mod 16) | 1622 | # (%r13 is the number of bytes in plaintext mod 16) |
1187 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask | 1623 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask |
1188 | pshufb %xmm2, %xmm1 # shift right 16-r13 byte | 1624 | PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte |
1189 | pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) | 1625 | pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) |
1190 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 | 1626 | movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 |
1191 | # get the appropriate mask to mask out top 16-r13 bytes of xmm0 | 1627 | # get the appropriate mask to mask out top 16-r13 bytes of xmm0 |
1192 | pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 | 1628 | pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 |
1629 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1630 | PSHUFB_XMM %xmm10,%xmm0 | ||
1193 | 1631 | ||
1194 | pshufb SHUF_MASK(%rip),%xmm0 | ||
1195 | pxor %xmm0, %xmm8 | 1632 | pxor %xmm0, %xmm8 |
1196 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | 1633 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 |
1197 | # GHASH computation for the last <16 byte block | 1634 | # GHASH computation for the last <16 byte block |
1198 | sub %r13, %r11 | 1635 | sub %r13, %r11 |
1199 | add $16, %r11 | 1636 | add $16, %r11 |
1200 | pshufb SHUF_MASK(%rip), %xmm0 | 1637 | PSHUFB_XMM %xmm10, %xmm1 |
1638 | |||
1201 | # shuffle xmm0 back to output as ciphertext | 1639 | # shuffle xmm0 back to output as ciphertext |
1202 | 1640 | ||
1203 | # Output %r13 bytes | 1641 | # Output %r13 bytes |
1204 | movq %xmm0, %rax | 1642 | MOVQ_R64_XMM %xmm0, %rax |
1205 | cmp $8, %r13 | 1643 | cmp $8, %r13 |
1206 | jle _less_than_8_bytes_left_encrypt | 1644 | jle _less_than_8_bytes_left_encrypt |
1207 | mov %rax, (%arg2 , %r11, 1) | 1645 | mov %rax, (%arg2 , %r11, 1) |
1208 | add $8, %r11 | 1646 | add $8, %r11 |
1209 | psrldq $8, %xmm0 | 1647 | psrldq $8, %xmm0 |
1210 | movq %xmm0, %rax | 1648 | MOVQ_R64_XMM %xmm0, %rax |
1211 | sub $8, %r13 | 1649 | sub $8, %r13 |
1212 | _less_than_8_bytes_left_encrypt: | 1650 | _less_than_8_bytes_left_encrypt: |
1213 | mov %al, (%arg2, %r11, 1) | 1651 | mov %al, (%arg2, %r11, 1) |
@@ -1220,14 +1658,15 @@ _multiple_of_16_bytes_encrypt: | |||
1220 | shl $3, %r12 | 1658 | shl $3, %r12 |
1221 | movd %r12d, %xmm15 # len(A) in %xmm15 | 1659 | movd %r12d, %xmm15 # len(A) in %xmm15 |
1222 | shl $3, %arg4 # len(C) in bits (*128) | 1660 | shl $3, %arg4 # len(C) in bits (*128) |
1223 | movq %arg4, %xmm1 | 1661 | MOVQ_R64_XMM %arg4, %xmm1 |
1224 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 | 1662 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 |
1225 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) | 1663 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) |
1226 | pxor %xmm15, %xmm8 | 1664 | pxor %xmm15, %xmm8 |
1227 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 | 1665 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 |
1228 | # final GHASH computation | 1666 | # final GHASH computation |
1667 | movdqa SHUF_MASK(%rip), %xmm10 | ||
1668 | PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap | ||
1229 | 1669 | ||
1230 | pshufb SHUF_MASK(%rip), %xmm8 # perform a 16 byte swap | ||
1231 | mov %arg5, %rax # %rax = *Y0 | 1670 | mov %arg5, %rax # %rax = *Y0 |
1232 | movdqu (%rax), %xmm0 # %xmm0 = Y0 | 1671 | movdqu (%rax), %xmm0 # %xmm0 = Y0 |
1233 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) | 1672 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) |
@@ -1240,11 +1679,11 @@ _return_T_encrypt: | |||
1240 | cmp $12, %r11 | 1679 | cmp $12, %r11 |
1241 | je _T_12_encrypt | 1680 | je _T_12_encrypt |
1242 | _T_8_encrypt: | 1681 | _T_8_encrypt: |
1243 | movq %xmm0, %rax | 1682 | MOVQ_R64_XMM %xmm0, %rax |
1244 | mov %rax, (%r10) | 1683 | mov %rax, (%r10) |
1245 | jmp _return_T_done_encrypt | 1684 | jmp _return_T_done_encrypt |
1246 | _T_12_encrypt: | 1685 | _T_12_encrypt: |
1247 | movq %xmm0, %rax | 1686 | MOVQ_R64_XMM %xmm0, %rax |
1248 | mov %rax, (%r10) | 1687 | mov %rax, (%r10) |
1249 | psrldq $8, %xmm0 | 1688 | psrldq $8, %xmm0 |
1250 | movd %xmm0, %eax | 1689 | movd %xmm0, %eax |
@@ -1258,6 +1697,7 @@ _return_T_done_encrypt: | |||
1258 | pop %r13 | 1697 | pop %r13 |
1259 | pop %r12 | 1698 | pop %r12 |
1260 | ret | 1699 | ret |
1700 | |||
1261 | #endif | 1701 | #endif |
1262 | 1702 | ||
1263 | 1703 | ||