diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2007-11-06 09:15:19 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2008-01-10 16:16:08 -0500 |
commit | d3e7480572bf882dee5baa2891bccbfa3db0b1a1 (patch) | |
tree | 40281dcc543e1b76b835b4f63a6feadcc9720166 | |
parent | 1ce73e8d6d95ceb860184c34fa1a91a82e51cbb3 (diff) |
[CRYPTO] camellia: De-unrolling
Move huge unrolled pieces of code (3 screenfuls) at the end of
128/256 key setup routines into common camellia_setup_tail(),
convert it to loop there.
Loop is still unrolled six times, so performance hit is very small,
code size win is big.
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Acked-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | crypto/camellia.c | 107 |
1 files changed, 23 insertions, 84 deletions
diff --git a/crypto/camellia.c b/crypto/camellia.c index 16529dddee72..2e129ab1a6a2 100644 --- a/crypto/camellia.c +++ b/crypto/camellia.c | |||
@@ -424,6 +424,27 @@ static const u32 camellia_sp4404[256] = { | |||
424 | #define SUBKEY_L(INDEX) (subkey[(INDEX)*2]) | 424 | #define SUBKEY_L(INDEX) (subkey[(INDEX)*2]) |
425 | #define SUBKEY_R(INDEX) (subkey[(INDEX)*2 + 1]) | 425 | #define SUBKEY_R(INDEX) (subkey[(INDEX)*2 + 1]) |
426 | 426 | ||
427 | static void camellia_setup_tail(u32 *subkey, int max) | ||
428 | { | ||
429 | u32 dw; | ||
430 | int i = 2; | ||
431 | do { | ||
432 | dw = SUBKEY_L(i + 0) ^ SUBKEY_R(i + 0); dw = ROL8(dw);/* round 1 */ | ||
433 | SUBKEY_R(i + 0) = SUBKEY_L(i + 0) ^ dw; SUBKEY_L(i + 0) = dw; | ||
434 | dw = SUBKEY_L(i + 1) ^ SUBKEY_R(i + 1); dw = ROL8(dw);/* round 2 */ | ||
435 | SUBKEY_R(i + 1) = SUBKEY_L(i + 1) ^ dw; SUBKEY_L(i + 1) = dw; | ||
436 | dw = SUBKEY_L(i + 2) ^ SUBKEY_R(i + 2); dw = ROL8(dw);/* round 3 */ | ||
437 | SUBKEY_R(i + 2) = SUBKEY_L(i + 2) ^ dw; SUBKEY_L(i + 2) = dw; | ||
438 | dw = SUBKEY_L(i + 3) ^ SUBKEY_R(i + 3); dw = ROL8(dw);/* round 4 */ | ||
439 | SUBKEY_R(i + 3) = SUBKEY_L(i + 3) ^ dw; SUBKEY_L(i + 3) = dw; | ||
440 | dw = SUBKEY_L(i + 4) ^ SUBKEY_R(i + 4); dw = ROL8(dw);/* round 5 */ | ||
441 | SUBKEY_R(i + 4) = SUBKEY_L(i + 4) ^ dw; SUBKEY_L(i + 4) = dw; | ||
442 | dw = SUBKEY_L(i + 5) ^ SUBKEY_R(i + 5); dw = ROL8(dw);/* round 6 */ | ||
443 | SUBKEY_R(i + 5) = SUBKEY_L(i + 5) ^ dw; SUBKEY_L(i + 5) = dw; | ||
444 | i += 8; | ||
445 | } while (i < max); | ||
446 | } | ||
447 | |||
427 | static void camellia_setup128(const unsigned char *key, u32 *subkey) | 448 | static void camellia_setup128(const unsigned char *key, u32 *subkey) |
428 | { | 449 | { |
429 | u32 kll, klr, krl, krr; | 450 | u32 kll, klr, krl, krr; |
@@ -650,42 +671,7 @@ static void camellia_setup128(const unsigned char *key, u32 *subkey) | |||
650 | SUBKEY_R(24) = subR[24] ^ subR[23]; | 671 | SUBKEY_R(24) = subR[24] ^ subR[23]; |
651 | 672 | ||
652 | /* apply the inverse of the last half of P-function */ | 673 | /* apply the inverse of the last half of P-function */ |
653 | dw = SUBKEY_L(2) ^ SUBKEY_R(2); dw = ROL8(dw);/* round 1 */ | 674 | camellia_setup_tail(subkey, 24); |
654 | SUBKEY_R(2) = SUBKEY_L(2) ^ dw; SUBKEY_L(2) = dw; | ||
655 | dw = SUBKEY_L(3) ^ SUBKEY_R(3); dw = ROL8(dw);/* round 2 */ | ||
656 | SUBKEY_R(3) = SUBKEY_L(3) ^ dw; SUBKEY_L(3) = dw; | ||
657 | dw = SUBKEY_L(4) ^ SUBKEY_R(4); dw = ROL8(dw);/* round 3 */ | ||
658 | SUBKEY_R(4) = SUBKEY_L(4) ^ dw; SUBKEY_L(4) = dw; | ||
659 | dw = SUBKEY_L(5) ^ SUBKEY_R(5); dw = ROL8(dw);/* round 4 */ | ||
660 | SUBKEY_R(5) = SUBKEY_L(5) ^ dw; SUBKEY_L(5) = dw; | ||
661 | dw = SUBKEY_L(6) ^ SUBKEY_R(6); dw = ROL8(dw);/* round 5 */ | ||
662 | SUBKEY_R(6) = SUBKEY_L(6) ^ dw; SUBKEY_L(6) = dw; | ||
663 | dw = SUBKEY_L(7) ^ SUBKEY_R(7); dw = ROL8(dw);/* round 6 */ | ||
664 | SUBKEY_R(7) = SUBKEY_L(7) ^ dw; SUBKEY_L(7) = dw; | ||
665 | dw = SUBKEY_L(10) ^ SUBKEY_R(10); dw = ROL8(dw);/* round 7 */ | ||
666 | SUBKEY_R(10) = SUBKEY_L(10) ^ dw; SUBKEY_L(10) = dw; | ||
667 | dw = SUBKEY_L(11) ^ SUBKEY_R(11); dw = ROL8(dw);/* round 8 */ | ||
668 | SUBKEY_R(11) = SUBKEY_L(11) ^ dw; SUBKEY_L(11) = dw; | ||
669 | dw = SUBKEY_L(12) ^ SUBKEY_R(12); dw = ROL8(dw);/* round 9 */ | ||
670 | SUBKEY_R(12) = SUBKEY_L(12) ^ dw; SUBKEY_L(12) = dw; | ||
671 | dw = SUBKEY_L(13) ^ SUBKEY_R(13); dw = ROL8(dw);/* round 10 */ | ||
672 | SUBKEY_R(13) = SUBKEY_L(13) ^ dw; SUBKEY_L(13) = dw; | ||
673 | dw = SUBKEY_L(14) ^ SUBKEY_R(14); dw = ROL8(dw);/* round 11 */ | ||
674 | SUBKEY_R(14) = SUBKEY_L(14) ^ dw; SUBKEY_L(14) = dw; | ||
675 | dw = SUBKEY_L(15) ^ SUBKEY_R(15); dw = ROL8(dw);/* round 12 */ | ||
676 | SUBKEY_R(15) = SUBKEY_L(15) ^ dw; SUBKEY_L(15) = dw; | ||
677 | dw = SUBKEY_L(18) ^ SUBKEY_R(18); dw = ROL8(dw);/* round 13 */ | ||
678 | SUBKEY_R(18) = SUBKEY_L(18) ^ dw; SUBKEY_L(18) = dw; | ||
679 | dw = SUBKEY_L(19) ^ SUBKEY_R(19); dw = ROL8(dw);/* round 14 */ | ||
680 | SUBKEY_R(19) = SUBKEY_L(19) ^ dw; SUBKEY_L(19) = dw; | ||
681 | dw = SUBKEY_L(20) ^ SUBKEY_R(20); dw = ROL8(dw);/* round 15 */ | ||
682 | SUBKEY_R(20) = SUBKEY_L(20) ^ dw; SUBKEY_L(20) = dw; | ||
683 | dw = SUBKEY_L(21) ^ SUBKEY_R(21); dw = ROL8(dw);/* round 16 */ | ||
684 | SUBKEY_R(21) = SUBKEY_L(21) ^ dw; SUBKEY_L(21) = dw; | ||
685 | dw = SUBKEY_L(22) ^ SUBKEY_R(22); dw = ROL8(dw);/* round 17 */ | ||
686 | SUBKEY_R(22) = SUBKEY_L(22) ^ dw; SUBKEY_L(22) = dw; | ||
687 | dw = SUBKEY_L(23) ^ SUBKEY_R(23); dw = ROL8(dw);/* round 18 */ | ||
688 | SUBKEY_R(23) = SUBKEY_L(23) ^ dw; SUBKEY_L(23) = dw; | ||
689 | } | 675 | } |
690 | 676 | ||
691 | static void camellia_setup256(const unsigned char *key, u32 *subkey) | 677 | static void camellia_setup256(const unsigned char *key, u32 *subkey) |
@@ -995,54 +981,7 @@ static void camellia_setup256(const unsigned char *key, u32 *subkey) | |||
995 | SUBKEY_R(32) = subR[32] ^ subR[31]; | 981 | SUBKEY_R(32) = subR[32] ^ subR[31]; |
996 | 982 | ||
997 | /* apply the inverse of the last half of P-function */ | 983 | /* apply the inverse of the last half of P-function */ |
998 | dw = SUBKEY_L(2) ^ SUBKEY_R(2); dw = ROL8(dw);/* round 1 */ | 984 | camellia_setup_tail(subkey, 32); |
999 | SUBKEY_R(2) = SUBKEY_L(2) ^ dw; SUBKEY_L(2) = dw; | ||
1000 | dw = SUBKEY_L(3) ^ SUBKEY_R(3); dw = ROL8(dw);/* round 2 */ | ||
1001 | SUBKEY_R(3) = SUBKEY_L(3) ^ dw; SUBKEY_L(3) = dw; | ||
1002 | dw = SUBKEY_L(4) ^ SUBKEY_R(4); dw = ROL8(dw);/* round 3 */ | ||
1003 | SUBKEY_R(4) = SUBKEY_L(4) ^ dw; SUBKEY_L(4) = dw; | ||
1004 | dw = SUBKEY_L(5) ^ SUBKEY_R(5); dw = ROL8(dw);/* round 4 */ | ||
1005 | SUBKEY_R(5) = SUBKEY_L(5) ^ dw; SUBKEY_L(5) = dw; | ||
1006 | dw = SUBKEY_L(6) ^ SUBKEY_R(6); dw = ROL8(dw);/* round 5 */ | ||
1007 | SUBKEY_R(6) = SUBKEY_L(6) ^ dw; SUBKEY_L(6) = dw; | ||
1008 | dw = SUBKEY_L(7) ^ SUBKEY_R(7); dw = ROL8(dw);/* round 6 */ | ||
1009 | SUBKEY_R(7) = SUBKEY_L(7) ^ dw; SUBKEY_L(7) = dw; | ||
1010 | dw = SUBKEY_L(10) ^ SUBKEY_R(10); dw = ROL8(dw);/* round 7 */ | ||
1011 | SUBKEY_R(10) = SUBKEY_L(10) ^ dw; SUBKEY_L(10) = dw; | ||
1012 | dw = SUBKEY_L(11) ^ SUBKEY_R(11); dw = ROL8(dw);/* round 8 */ | ||
1013 | SUBKEY_R(11) = SUBKEY_L(11) ^ dw; SUBKEY_L(11) = dw; | ||
1014 | dw = SUBKEY_L(12) ^ SUBKEY_R(12); dw = ROL8(dw);/* round 9 */ | ||
1015 | SUBKEY_R(12) = SUBKEY_L(12) ^ dw; SUBKEY_L(12) = dw; | ||
1016 | dw = SUBKEY_L(13) ^ SUBKEY_R(13); dw = ROL8(dw);/* round 10 */ | ||
1017 | SUBKEY_R(13) = SUBKEY_L(13) ^ dw; SUBKEY_L(13) = dw; | ||
1018 | dw = SUBKEY_L(14) ^ SUBKEY_R(14); dw = ROL8(dw);/* round 11 */ | ||
1019 | SUBKEY_R(14) = SUBKEY_L(14) ^ dw; SUBKEY_L(14) = dw; | ||
1020 | dw = SUBKEY_L(15) ^ SUBKEY_R(15); dw = ROL8(dw);/* round 12 */ | ||
1021 | SUBKEY_R(15) = SUBKEY_L(15) ^ dw; SUBKEY_L(15) = dw; | ||
1022 | dw = SUBKEY_L(18) ^ SUBKEY_R(18); dw = ROL8(dw);/* round 13 */ | ||
1023 | SUBKEY_R(18) = SUBKEY_L(18) ^ dw; SUBKEY_L(18) = dw; | ||
1024 | dw = SUBKEY_L(19) ^ SUBKEY_R(19); dw = ROL8(dw);/* round 14 */ | ||
1025 | SUBKEY_R(19) = SUBKEY_L(19) ^ dw; SUBKEY_L(19) = dw; | ||
1026 | dw = SUBKEY_L(20) ^ SUBKEY_R(20); dw = ROL8(dw);/* round 15 */ | ||
1027 | SUBKEY_R(20) = SUBKEY_L(20) ^ dw; SUBKEY_L(20) = dw; | ||
1028 | dw = SUBKEY_L(21) ^ SUBKEY_R(21); dw = ROL8(dw);/* round 16 */ | ||
1029 | SUBKEY_R(21) = SUBKEY_L(21) ^ dw; SUBKEY_L(21) = dw; | ||
1030 | dw = SUBKEY_L(22) ^ SUBKEY_R(22); dw = ROL8(dw);/* round 17 */ | ||
1031 | SUBKEY_R(22) = SUBKEY_L(22) ^ dw; SUBKEY_L(22) = dw; | ||
1032 | dw = SUBKEY_L(23) ^ SUBKEY_R(23); dw = ROL8(dw);/* round 18 */ | ||
1033 | SUBKEY_R(23) = SUBKEY_L(23) ^ dw; SUBKEY_L(23) = dw; | ||
1034 | dw = SUBKEY_L(26) ^ SUBKEY_R(26); dw = ROL8(dw);/* round 19 */ | ||
1035 | SUBKEY_R(26) = SUBKEY_L(26) ^ dw; SUBKEY_L(26) = dw; | ||
1036 | dw = SUBKEY_L(27) ^ SUBKEY_R(27); dw = ROL8(dw);/* round 20 */ | ||
1037 | SUBKEY_R(27) = SUBKEY_L(27) ^ dw; SUBKEY_L(27) = dw; | ||
1038 | dw = SUBKEY_L(28) ^ SUBKEY_R(28); dw = ROL8(dw);/* round 21 */ | ||
1039 | SUBKEY_R(28) = SUBKEY_L(28) ^ dw; SUBKEY_L(28) = dw; | ||
1040 | dw = SUBKEY_L(29) ^ SUBKEY_R(29); dw = ROL8(dw);/* round 22 */ | ||
1041 | SUBKEY_R(29) = SUBKEY_L(29) ^ dw; SUBKEY_L(29) = dw; | ||
1042 | dw = SUBKEY_L(30) ^ SUBKEY_R(30); dw = ROL8(dw);/* round 23 */ | ||
1043 | SUBKEY_R(30) = SUBKEY_L(30) ^ dw; SUBKEY_L(30) = dw; | ||
1044 | dw = SUBKEY_L(31) ^ SUBKEY_R(31); dw = ROL8(dw);/* round 24 */ | ||
1045 | SUBKEY_R(31) = SUBKEY_L(31) ^ dw; SUBKEY_L(31) = dw; | ||
1046 | } | 985 | } |
1047 | 986 | ||
1048 | static void camellia_setup192(const unsigned char *key, u32 *subkey) | 987 | static void camellia_setup192(const unsigned char *key, u32 *subkey) |