diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2007-10-26 04:22:57 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2008-01-10 16:16:06 -0500 |
commit | e2b21b5002a2bf21ca73c7448309a7288a984ddf (patch) | |
tree | 11e55173debdb2428a37655968ceef39786f2383 | |
parent | b7a30da61adc5f252ee97b2a4f3fc23c9d06a08a (diff) |
[CRYPTO] twofish: Do not unroll big stuff in twofish key setup
Currently twofish cipher key setup code
has unrolled loops - approximately 70-100
instructions are repeated 40 times.
As a result, twofish module is the biggest module
in crypto/*.
Unrolling produces x2.5 more code (+18k on i386), and speeds up key
setup by 7%:
unrolled: twofish_setkey/sec: 41128
loop: twofish_setkey/sec: 38148
CALC_K256: ~100 insns each
CALC_K192: ~90 insns
CALC_K: ~70 insns
Attached patch removes this unrolling.
$ size */twofish_common.o
text data bss dec hex filename
37920 0 0 37920 9420 crypto.org/twofish_common.o
13209 0 0 13209 3399 crypto/twofish_common.o
Run tested (modprobe tcrypt reports ok). Please apply.
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | crypto/twofish_common.c | 96 |
1 files changed, 30 insertions, 66 deletions
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c index b4b9c0c3f4ae..0af216c75d7e 100644 --- a/crypto/twofish_common.c +++ b/crypto/twofish_common.c | |||
@@ -655,84 +655,48 @@ int twofish_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) | |||
655 | CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); | 655 | CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); |
656 | } | 656 | } |
657 | 657 | ||
658 | /* Calculate whitening and round subkeys. The constants are | 658 | /* CALC_K256/CALC_K192/CALC_K loops were unrolled. |
659 | * indices of subkeys, preprocessed through q0 and q1. */ | 659 | * Unrolling produced x2.5 more code (+18k on i386), |
660 | CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3); | 660 | * and speeded up key setup by 7%: |
661 | CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); | 661 | * unrolled: twofish_setkey/sec: 41128 |
662 | CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); | 662 | * loop: twofish_setkey/sec: 38148 |
663 | CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); | 663 | * CALC_K256: ~100 insns each |
664 | CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); | 664 | * CALC_K192: ~90 insns |
665 | CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B); | 665 | * CALC_K: ~70 insns |
666 | CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); | 666 | */ |
667 | CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); | 667 | /* Calculate whitening and round subkeys */ |
668 | CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); | 668 | for ( i = 0; i < 8; i += 2 ) { |
669 | CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD); | 669 | CALC_K256 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]); |
670 | CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71); | 670 | } |
671 | CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); | 671 | for ( i = 0; i < 32; i += 2 ) { |
672 | CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F); | 672 | CALC_K256 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]); |
673 | CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B); | 673 | } |
674 | CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA); | ||
675 | CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F); | ||
676 | CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); | ||
677 | CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); | ||
678 | CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00); | ||
679 | CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); | ||
680 | } else if (key_len == 24) { /* 192-bit key */ | 674 | } else if (key_len == 24) { /* 192-bit key */ |
681 | /* Compute the S-boxes. */ | 675 | /* Compute the S-boxes. */ |
682 | for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { | 676 | for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { |
683 | CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); | 677 | CALC_SB192_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); |
684 | } | 678 | } |
685 | 679 | ||
686 | /* Calculate whitening and round subkeys. The constants are | 680 | /* Calculate whitening and round subkeys */ |
687 | * indices of subkeys, preprocessed through q0 and q1. */ | 681 | for ( i = 0; i < 8; i += 2 ) { |
688 | CALC_K192 (w, 0, 0xA9, 0x75, 0x67, 0xF3); | 682 | CALC_K192 (w, i, q0[i], q1[i], q0[i+1], q1[i+1]); |
689 | CALC_K192 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); | 683 | } |
690 | CALC_K192 (w, 4, 0x04, 0xDB, 0xFD, 0x7B); | 684 | for ( i = 0; i < 32; i += 2 ) { |
691 | CALC_K192 (w, 6, 0xA3, 0xFB, 0x76, 0xC8); | 685 | CALC_K192 (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]); |
692 | CALC_K192 (k, 0, 0x9A, 0x4A, 0x92, 0xD3); | 686 | } |
693 | CALC_K192 (k, 2, 0x80, 0xE6, 0x78, 0x6B); | ||
694 | CALC_K192 (k, 4, 0xE4, 0x45, 0xDD, 0x7D); | ||
695 | CALC_K192 (k, 6, 0xD1, 0xE8, 0x38, 0x4B); | ||
696 | CALC_K192 (k, 8, 0x0D, 0xD6, 0xC6, 0x32); | ||
697 | CALC_K192 (k, 10, 0x35, 0xD8, 0x98, 0xFD); | ||
698 | CALC_K192 (k, 12, 0x18, 0x37, 0xF7, 0x71); | ||
699 | CALC_K192 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); | ||
700 | CALC_K192 (k, 16, 0x43, 0x30, 0x75, 0x0F); | ||
701 | CALC_K192 (k, 18, 0x37, 0xF8, 0x26, 0x1B); | ||
702 | CALC_K192 (k, 20, 0xFA, 0x87, 0x13, 0xFA); | ||
703 | CALC_K192 (k, 22, 0x94, 0x06, 0x48, 0x3F); | ||
704 | CALC_K192 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); | ||
705 | CALC_K192 (k, 26, 0x8B, 0xAE, 0x30, 0x5B); | ||
706 | CALC_K192 (k, 28, 0x84, 0x8A, 0x54, 0x00); | ||
707 | CALC_K192 (k, 30, 0xDF, 0xBC, 0x23, 0x9D); | ||
708 | } else { /* 128-bit key */ | 687 | } else { /* 128-bit key */ |
709 | /* Compute the S-boxes. */ | 688 | /* Compute the S-boxes. */ |
710 | for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { | 689 | for ( i = j = 0, k = 1; i < 256; i++, j += 2, k += 2 ) { |
711 | CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); | 690 | CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] ); |
712 | } | 691 | } |
713 | 692 | ||
714 | /* Calculate whitening and round subkeys. The constants are | 693 | /* Calculate whitening and round subkeys */ |
715 | * indices of subkeys, preprocessed through q0 and q1. */ | 694 | for ( i = 0; i < 8; i += 2 ) { |
716 | CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3); | 695 | CALC_K (w, i, q0[i], q1[i], q0[i+1], q1[i+1]); |
717 | CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4); | 696 | } |
718 | CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B); | 697 | for ( i = 0; i < 32; i += 2 ) { |
719 | CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8); | 698 | CALC_K (k, i, q0[i+8], q1[i+8], q0[i+9], q1[i+9]); |
720 | CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3); | 699 | } |
721 | CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B); | ||
722 | CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D); | ||
723 | CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B); | ||
724 | CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32); | ||
725 | CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD); | ||
726 | CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71); | ||
727 | CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1); | ||
728 | CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F); | ||
729 | CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B); | ||
730 | CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA); | ||
731 | CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F); | ||
732 | CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA); | ||
733 | CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B); | ||
734 | CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00); | ||
735 | CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D); | ||
736 | } | 700 | } |
737 | 701 | ||
738 | return 0; | 702 | return 0; |