aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Kaiser <nikai@nikai.net>2010-11-04 14:58:12 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2010-11-13 07:47:55 -0500
commit895be15745d59cc7ede0e1c203e3432b0abdb71c (patch)
tree38c15d799b7be1768dca4ec638d964c6f7298336
parent90246e79af062fcbb8c3728a5f29cb19b3468f59 (diff)
crypto: cast5 - simplify if-statements
I noticed that by factoring out common rounds from the branches of the if-statements in the encryption and decryption functions, the executable file size goes down significantly, for crypto/cast5.ko from 26688 bytes to 24336 bytes (amd64). On my test system, I saw a slight speedup. This is the first time I'm doing such a benchmark - I found a similar one on the crypto mailing list, and I hope I did it right? Before: # cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128 Passsatz eingeben: # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,43484 s, 21,5 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,4089 s, 21,8 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,41091 s, 21,7 MB/s After: # cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128 Passsatz eingeben: # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,38128 s, 22,0 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,29486 s, 22,8 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,37162 s, 22,1 MB/s Signed-off-by: Nicolas Kaiser <nikai@nikai.net> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--crypto/cast5.c74
1 files changed, 24 insertions, 50 deletions
diff --git a/crypto/cast5.c b/crypto/cast5.c
index a1d2294b50ad..4a230ddec877 100644
--- a/crypto/cast5.c
+++ b/crypto/cast5.c
@@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
604 * Rounds 3, 6, 9, 12, and 15 use f function Type 3. 604 * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
605 */ 605 */
606 606
607 t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
608 t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
609 t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
610 t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
611 t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
612 t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
613 t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
614 t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
615 t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
616 t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
617 t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
618 t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
607 if (!(c->rr)) { 619 if (!(c->rr)) {
608 t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
609 t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
610 t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
611 t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
612 t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
613 t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
614 t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
615 t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
616 t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
617 t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
618 t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
619 t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
620 t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); 620 t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
621 t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); 621 t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
622 t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); 622 t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
623 t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); 623 t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
624 } else {
625 t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
626 t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
627 t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
628 t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
629 t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
630 t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
631 t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
632 t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
633 t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
634 t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
635 t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
636 t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
637 } 624 }
638 625
639 /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and 626 /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
@@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf)
663 t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); 650 t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
664 t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); 651 t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
665 t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); 652 t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
666 t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
667 t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
668 t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
669 t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
670 t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
671 t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
672 t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
673 t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
674 t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
675 t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
676 t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
677 t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
678 } else {
679 t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
680 t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
681 t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
682 t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
683 t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
684 t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
685 t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
686 t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
687 t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
688 t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
689 t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
690 t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
691 } 653 }
654 t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
655 t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
656 t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]);
657 t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]);
658 t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]);
659 t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]);
660 t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]);
661 t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]);
662 t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]);
663 t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]);
664 t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]);
665 t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]);
692 666
693 dst[0] = cpu_to_be32(r); 667 dst[0] = cpu_to_be32(r);
694 dst[1] = cpu_to_be32(l); 668 dst[1] = cpu_to_be32(l);