diff options
author | Nicolas Kaiser <nikai@nikai.net> | 2010-11-04 14:58:12 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2010-11-13 07:47:55 -0500 |
commit | 895be15745d59cc7ede0e1c203e3432b0abdb71c (patch) | |
tree | 38c15d799b7be1768dca4ec638d964c6f7298336 | |
parent | 90246e79af062fcbb8c3728a5f29cb19b3468f59 (diff) |
crypto: cast5 - simplify if-statements
I noticed that by factoring out common rounds from the
branches of the if-statements in the encryption and
decryption functions, the executable file size goes down
significantly, for crypto/cast5.ko from 26688 bytes
to 24336 bytes (amd64).
On my test system, I saw a slight speedup. This is the
first time I'm doing such a benchmark - I found a similar
one on the crypto mailing list, and I hope I did it right?
Before:
# cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128
Passsatz eingeben:
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,43484 s, 21,5 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,4089 s, 21,8 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,41091 s, 21,7 MB/s
After:
# cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128
Passsatz eingeben:
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,38128 s, 22,0 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,29486 s, 22,8 MB/s
# dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50
52428800 Bytes (52 MB) kopiert, 2,37162 s, 22,1 MB/s
Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r-- | crypto/cast5.c | 74 |
1 files changed, 24 insertions, 50 deletions
diff --git a/crypto/cast5.c b/crypto/cast5.c index a1d2294b50ad..4a230ddec877 100644 --- a/crypto/cast5.c +++ b/crypto/cast5.c | |||
@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) | |||
604 | * Rounds 3, 6, 9, 12, and 15 use f function Type 3. | 604 | * Rounds 3, 6, 9, 12, and 15 use f function Type 3. |
605 | */ | 605 | */ |
606 | 606 | ||
607 | t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); | ||
608 | t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); | ||
609 | t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); | ||
610 | t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); | ||
611 | t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); | ||
612 | t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); | ||
613 | t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); | ||
614 | t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); | ||
615 | t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); | ||
616 | t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); | ||
617 | t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); | ||
618 | t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); | ||
607 | if (!(c->rr)) { | 619 | if (!(c->rr)) { |
608 | t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); | ||
609 | t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); | ||
610 | t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); | ||
611 | t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); | ||
612 | t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); | ||
613 | t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); | ||
614 | t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); | ||
615 | t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); | ||
616 | t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); | ||
617 | t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); | ||
618 | t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); | ||
619 | t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); | ||
620 | t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); | 620 | t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); |
621 | t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); | 621 | t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); |
622 | t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); | 622 | t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); |
623 | t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); | 623 | t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); |
624 | } else { | ||
625 | t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); | ||
626 | t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); | ||
627 | t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); | ||
628 | t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); | ||
629 | t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); | ||
630 | t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); | ||
631 | t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); | ||
632 | t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); | ||
633 | t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); | ||
634 | t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); | ||
635 | t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); | ||
636 | t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); | ||
637 | } | 624 | } |
638 | 625 | ||
639 | /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and | 626 | /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and |
@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) | |||
663 | t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); | 650 | t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); |
664 | t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); | 651 | t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); |
665 | t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); | 652 | t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); |
666 | t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); | ||
667 | t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); | ||
668 | t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); | ||
669 | t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); | ||
670 | t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); | ||
671 | t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); | ||
672 | t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); | ||
673 | t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); | ||
674 | t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); | ||
675 | t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); | ||
676 | t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); | ||
677 | t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); | ||
678 | } else { | ||
679 | t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); | ||
680 | t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); | ||
681 | t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); | ||
682 | t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); | ||
683 | t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); | ||
684 | t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); | ||
685 | t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); | ||
686 | t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); | ||
687 | t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); | ||
688 | t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); | ||
689 | t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); | ||
690 | t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); | ||
691 | } | 653 | } |
654 | t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); | ||
655 | t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); | ||
656 | t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); | ||
657 | t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); | ||
658 | t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); | ||
659 | t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); | ||
660 | t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); | ||
661 | t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); | ||
662 | t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); | ||
663 | t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); | ||
664 | t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); | ||
665 | t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); | ||
692 | 666 | ||
693 | dst[0] = cpu_to_be32(r); | 667 | dst[0] = cpu_to_be32(r); |
694 | dst[1] = cpu_to_be32(l); | 668 | dst[1] = cpu_to_be32(l); |