diff options
author | Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 2011-12-20 05:58:06 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2012-01-13 00:38:40 -0500 |
commit | 847cb7ef565d31484f426677e0bea081bfd2acd9 (patch) | |
tree | 7325f4ce5961e0d51ea4707119aeba80622991c3 /arch/x86/crypto | |
parent | 4c58464b8034cef4317593bf4ccbfc19d5bb3a77 (diff) |
crypto: serpent-sse2 - change transpose_4x4 to only use integer instructions
Matrix transpose macro in serpent-sse2 uses mix of SSE2 integer and SSE floating
point instructions, which might cause performance penality on some CPUs.
This patch replaces transpose_4x4 macro with version that uses only SSE2
integer instructions.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/serpent-sse2-i586-asm_32.S | 29 | ||||
-rw-r--r-- | arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 29 |
2 files changed, 26 insertions, 32 deletions
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index 4e37677ca851..c00053d42f99 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S | |||
@@ -463,23 +463,20 @@ | |||
463 | pand x0, x4; \ | 463 | pand x0, x4; \ |
464 | pxor x2, x4; | 464 | pxor x2, x4; |
465 | 465 | ||
466 | #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ | 466 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ |
467 | movdqa x2, t3; \ | ||
468 | movdqa x0, t1; \ | ||
469 | unpcklps x3, t3; \ | ||
470 | movdqa x0, t2; \ | 467 | movdqa x0, t2; \ |
471 | unpcklps x1, t1; \ | 468 | punpckldq x1, x0; \ |
472 | unpckhps x1, t2; \ | 469 | punpckhdq x1, t2; \ |
473 | movdqa t3, x1; \ | 470 | movdqa x2, t1; \ |
474 | unpckhps x3, x2; \ | 471 | punpckhdq x3, x2; \ |
475 | movdqa t1, x0; \ | 472 | punpckldq x3, t1; \ |
476 | movhlps t1, x1; \ | 473 | movdqa x0, x1; \ |
477 | movdqa t2, t1; \ | 474 | punpcklqdq t1, x0; \ |
478 | movlhps t3, x0; \ | 475 | punpckhqdq t1, x1; \ |
479 | movlhps x2, t1; \ | 476 | movdqa t2, x3; \ |
480 | movhlps t2, x2; \ | 477 | punpcklqdq x2, t2; \ |
481 | movdqa x2, x3; \ | 478 | punpckhqdq x2, x3; \ |
482 | movdqa t1, x2; | 479 | movdqa t2, x2; |
483 | 480 | ||
484 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | 481 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ |
485 | movdqu (0*4*4)(in), x0; \ | 482 | movdqu (0*4*4)(in), x0; \ |
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 7f24a1540821..3ee1ff04d3e9 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | |||
@@ -585,23 +585,20 @@ | |||
585 | get_key(i, 1, RK1); \ | 585 | get_key(i, 1, RK1); \ |
586 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | 586 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ |
587 | 587 | ||
588 | #define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ | 588 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ |
589 | movdqa x2, t3; \ | ||
590 | movdqa x0, t1; \ | ||
591 | unpcklps x3, t3; \ | ||
592 | movdqa x0, t2; \ | 589 | movdqa x0, t2; \ |
593 | unpcklps x1, t1; \ | 590 | punpckldq x1, x0; \ |
594 | unpckhps x1, t2; \ | 591 | punpckhdq x1, t2; \ |
595 | movdqa t3, x1; \ | 592 | movdqa x2, t1; \ |
596 | unpckhps x3, x2; \ | 593 | punpckhdq x3, x2; \ |
597 | movdqa t1, x0; \ | 594 | punpckldq x3, t1; \ |
598 | movhlps t1, x1; \ | 595 | movdqa x0, x1; \ |
599 | movdqa t2, t1; \ | 596 | punpcklqdq t1, x0; \ |
600 | movlhps t3, x0; \ | 597 | punpckhqdq t1, x1; \ |
601 | movlhps x2, t1; \ | 598 | movdqa t2, x3; \ |
602 | movhlps t2, x2; \ | 599 | punpcklqdq x2, t2; \ |
603 | movdqa x2, x3; \ | 600 | punpckhqdq x2, x3; \ |
604 | movdqa t1, x2; | 601 | movdqa t2, x2; |
605 | 602 | ||
606 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | 603 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ |
607 | movdqu (0*4*4)(in), x0; \ | 604 | movdqu (0*4*4)(in), x0; \ |