aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2007-10-26 04:22:57 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2008-01-10 16:16:06 -0500
commite2b21b5002a2bf21ca73c7448309a7288a984ddf (patch)
tree11e55173debdb2428a37655968ceef39786f2383
parentb7a30da61adc5f252ee97b2a4f3fc23c9d06a08a (diff)
[CRYPTO] twofish: Do not unroll big stuff in twofish key setup
Currently twofish cipher key setup code has unrolled loops - approximately 70-100 instructions are repeated 40 times. As a result, twofish module is the biggest module in crypto/*. Unrolling produces x2.5 more code (+18k on i386), and speeds up key setup by 7%: unrolled: twofish_setkey/sec: 41128 loop: twofish_setkey/sec: 38148 CALC_K256: ~100 insns each CALC_K192: ~90 insns CALC_K: ~70 insns Attached patch removes this unrolling. $ size */twofish_common.o text data bss dec hex filename 37920 0 0 37920 9420 crypto.org/twofish_common.o 13209 0 0 13209 3399 crypto/twofish_common.o Run tested (modprobe tcrypt reports ok). Please apply. Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--crypto/twofish_common.c96
1 files changed, 30 insertions, 66 deletions
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index b4b9c0c3f4ae..0af216c75d7e 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -655,84 +655,48 @@ int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len)
655 CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); 655 CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
656 } 656 }
657 657
658 /* Calculate whitening and round subkeys. The constants are 658 /* CALC_K256/CALC_K192/CALC_K loops were unrolled.
659 * indices of subkeys, preprocessed through q0 and q1. */ 659 * Unrolling produced x2.5 more code (+18k on i386),
660 CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3); 660 * and speeded up key setup by 7%:
661 CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); 661 * unrolled: twofish_setkey/sec: 41128
662 CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); 662 * loop: twofish_setkey/sec: 38148
663 CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); 663 * CALC_K256: ~100 insns each
664 CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); 664 * CALC_K192: ~90 insns
665 CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B); 665 * CALC_K: ~70 insns
666 CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); 666 */
667 CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); 667 /* Calculate whitening and round subkeys */
668 CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); 668 for ( i = 0; i < 8; i += 2 ) {
669 CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD); 669 CALC_K256 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
670 CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71); 670 }
671 CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); 671 for ( i = 0; i < 32; i += 2 ) {
672 CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F); 672 CALC_K256 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
673 CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B); 673 }
674 CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
675 CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
676 CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
677 CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
678 CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
679 CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
680 } else if (key_len == 24) { /* 192-bit key */ 674 } else if (key_len == 24) { /* 192-bit key */
681 /* Compute the S-boxes. */ 675 /* Compute the S-boxes. */
682 for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { 676 for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
683 CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); 677 CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
684 } 678 }
685 679
686 /* Calculate whitening and round subkeys. The constants are 680 /* Calculate whitening and round subkeys */
687 * indices of subkeys, preprocessed through q0 and q1. */ 681 for ( i = 0; i < 8; i += 2 ) {
688 CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3); 682 CALC_K192 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
689 CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); 683 }
690 CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); 684 for ( i = 0; i < 32; i += 2 ) {
691 CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); 685 CALC_K192 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
692 CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); 686 }
693 CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
694 CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
695 CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
696 CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
697 CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
698 CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71);
699 CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
700 CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F);
701 CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
702 CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
703 CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F);
704 CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
705 CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
706 CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00);
707 CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
708 } else { /* 128-bit key */ 687 } else { /* 128-bit key */
709 /* Compute the S-boxes. */ 688 /* Compute the S-boxes. */
710 for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { 689 for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) {
711 CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); 690 CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
712 } 691 }
713 692
714 /* Calculate whitening and round subkeys. The constants are 693 /* Calculate whitening and round subkeys */
715 * indices of subkeys, preprocessed through q0 and q1. */ 694 for ( i = 0; i < 8; i += 2 ) {
716 CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3); 695 CALC_K (w, i, q0[i], q1[i], q0[i+1], q1[i+1]);
717 CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); 696 }
718 CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B); 697 for ( i = 0; i < 32; i += 2 ) {
719 CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8); 698 CALC_K (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]);
720 CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3); 699 }
721 CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
722 CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
723 CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
724 CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
725 CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
726 CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
727 CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
728 CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
729 CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
730 CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
731 CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
732 CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
733 CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
734 CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
735 CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
736 } 700 }
737 701
738 return 0; 702 return 0;