aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-05-19 00:11:21 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2010-05-19 00:11:21 -0400
commitbeb63da739f797519aa9990297697abf4db1ac0d (patch)
treedb40ed026c87d248e4b67e09999b4e889f8f589c
parent85c6201a80ce4464a52c58a5f5ea8de15a557a6f (diff)
crypto: tcrypt - Add speed tests for async hashing
These are invoked in the 'mode' range of 400 to 499. The cost of async vs. sync for the software algorithm implementations varies. It can be as low as 16 cycles but as much as a couple hundred. Here two runs of md5 testing, async then sync: testing speed of async md5 test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): 2448 cycles/operation, 153 cycles/byte test 1 ( 64 byte blocks, 16 bytes per update, 4 updates): 4992 cycles/operation, 78 cycles/byte test 2 ( 64 byte blocks, 64 bytes per update, 1 updates): 3808 cycles/operation, 59 cycles/byte test 3 ( 256 byte blocks, 16 bytes per update, 16 updates): 14000 cycles/operation, 54 cycles/byte test 4 ( 256 byte blocks, 64 bytes per update, 4 updates): 8480 cycles/operation, 33 cycles/byte test 5 ( 256 byte blocks, 256 bytes per update, 1 updates): 7280 cycles/operation, 28 cycles/byte test 6 ( 1024 byte blocks, 16 bytes per update, 64 updates): 50016 cycles/operation, 48 cycles/byte test 7 ( 1024 byte blocks, 256 bytes per update, 4 updates): 22496 cycles/operation, 21 cycles/byte test 8 ( 1024 byte blocks, 1024 bytes per update, 1 updates): 21232 cycles/operation, 20 cycles/byte test 9 ( 2048 byte blocks, 16 bytes per update, 128 updates): 117184 cycles/operation, 57 cycles/byte test 10 ( 2048 byte blocks, 256 bytes per update, 8 updates): 43008 cycles/operation, 21 cycles/byte test 11 ( 2048 byte blocks, 1024 bytes per update, 2 updates): 40176 cycles/operation, 19 cycles/byte test 12 ( 2048 byte blocks, 2048 bytes per update, 1 updates): 39888 cycles/operation, 19 cycles/byte test 13 ( 4096 byte blocks, 16 bytes per update, 256 updates): 194176 cycles/operation, 47 cycles/byte test 14 ( 4096 byte blocks, 256 bytes per update, 16 updates): 84096 cycles/operation, 20 cycles/byte test 15 ( 4096 byte blocks, 1024 bytes per update, 4 updates): 78336 cycles/operation, 19 cycles/byte test 16 ( 4096 byte blocks, 4096 bytes per update, 1 updates): 77120 cycles/operation, 18 cycles/byte test 17 ( 8192 byte blocks, 16 bytes per update, 512 updates): 403056 cycles/operation, 49 cycles/byte test 18 ( 8192 byte blocks, 256 bytes per update, 32 updates): 166112 cycles/operation, 20 cycles/byte test 19 ( 8192 byte blocks, 1024 bytes per update, 8 updates): 154768 cycles/operation, 18 cycles/byte test 20 ( 8192 byte blocks, 4096 bytes per update, 2 updates): 151904 cycles/operation, 18 cycles/byte test 21 ( 8192 byte blocks, 8192 bytes per update, 1 updates): 155456 cycles/operation, 18 cycles/byte testing speed of md5 test 0 ( 16 byte blocks, 16 bytes per update, 1 updates): 2208 cycles/operation, 138 cycles/byte test 1 ( 64 byte blocks, 16 bytes per update, 4 updates): 5008 cycles/operation, 78 cycles/byte test 2 ( 64 byte blocks, 64 bytes per update, 1 updates): 3600 cycles/operation, 56 cycles/byte test 3 ( 256 byte blocks, 16 bytes per update, 16 updates): 14080 cycles/operation, 55 cycles/byte test 4 ( 256 byte blocks, 64 bytes per update, 4 updates): 8560 cycles/operation, 33 cycles/byte test 5 ( 256 byte blocks, 256 bytes per update, 1 updates): 7040 cycles/operation, 27 cycles/byte test 6 ( 1024 byte blocks, 16 bytes per update, 64 updates): 50592 cycles/operation, 49 cycles/byte test 7 ( 1024 byte blocks, 256 bytes per update, 4 updates): 22736 cycles/operation, 22 cycles/byte test 8 ( 1024 byte blocks, 1024 bytes per update, 1 updates): 24960 cycles/operation, 24 cycles/byte test 9 ( 2048 byte blocks, 16 bytes per update, 128 updates): 99312 cycles/operation, 48 cycles/byte test 10 ( 2048 byte blocks, 256 bytes per update, 8 updates): 43520 cycles/operation, 21 cycles/byte test 11 ( 2048 byte blocks, 1024 bytes per update, 2 updates): 40704 cycles/operation, 19 cycles/byte test 12 ( 2048 byte blocks, 2048 bytes per update, 1 updates): 39552 cycles/operation, 19 cycles/byte test 13 ( 4096 byte blocks, 16 bytes per update, 256 updates): 196720 cycles/operation, 48 cycles/byte test 14 ( 4096 byte blocks, 256 bytes per update, 16 updates): 85152 cycles/operation, 20 cycles/byte test 15 ( 4096 byte blocks, 1024 bytes per update, 4 updates): 79408 cycles/operation, 19 cycles/byte test 16 ( 4096 byte blocks, 4096 bytes per update, 1 updates): 76816 cycles/operation, 18 cycles/byte test 17 ( 8192 byte blocks, 16 bytes per update, 512 updates): 391520 cycles/operation, 47 cycles/byte test 18 ( 8192 byte blocks, 256 bytes per update, 32 updates): 168464 cycles/operation, 20 cycles/byte test 19 ( 8192 byte blocks, 1024 bytes per update, 8 updates): 156912 cycles/operation, 19 cycles/byte test 20 ( 8192 byte blocks, 4096 bytes per update, 2 updates): 154016 cycles/operation, 18 cycles/byte test 21 ( 8192 byte blocks, 8192 bytes per update, 1 updates): 153856 cycles/operation, 18 cycles/byte We can ditch the sync hash code at some point if we feel that makes sense. For now I've left it there. Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--crypto/tcrypt.c336
1 files changed, 330 insertions, 6 deletions
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index ea610ad45aa1..3ca68f9fc14d 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -394,6 +394,17 @@ out:
394 return 0; 394 return 0;
395} 395}
396 396
397static void test_hash_sg_init(struct scatterlist *sg)
398{
399 int i;
400
401 sg_init_table(sg, TVMEMSIZE);
402 for (i = 0; i < TVMEMSIZE; i++) {
403 sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
404 memset(tvmem[i], 0xff, PAGE_SIZE);
405 }
406}
407
397static void test_hash_speed(const char *algo, unsigned int sec, 408static void test_hash_speed(const char *algo, unsigned int sec,
398 struct hash_speed *speed) 409 struct hash_speed *speed)
399{ 410{
@@ -423,12 +434,7 @@ static void test_hash_speed(const char *algo, unsigned int sec,
423 goto out; 434 goto out;
424 } 435 }
425 436
426 sg_init_table(sg, TVMEMSIZE); 437 test_hash_sg_init(sg);
427 for (i = 0; i < TVMEMSIZE; i++) {
428 sg_set_buf(sg + i, tvmem[i], PAGE_SIZE);
429 memset(tvmem[i], 0xff, PAGE_SIZE);
430 }
431
432 for (i = 0; speed[i].blen != 0; i++) { 438 for (i = 0; speed[i].blen != 0; i++) {
433 if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) { 439 if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
434 printk(KERN_ERR 440 printk(KERN_ERR
@@ -461,6 +467,250 @@ out:
461 crypto_free_hash(tfm); 467 crypto_free_hash(tfm);
462} 468}
463 469
470struct tcrypt_result {
471 struct completion completion;
472 int err;
473};
474
475static void tcrypt_complete(struct crypto_async_request *req, int err)
476{
477 struct tcrypt_result *res = req->data;
478
479 if (err == -EINPROGRESS)
480 return;
481
482 res->err = err;
483 complete(&res->completion);
484}
485
486static inline int do_one_ahash_op(struct ahash_request *req, int ret)
487{
488 if (ret == -EINPROGRESS || ret == -EBUSY) {
489 struct tcrypt_result *tr = req->base.data;
490
491 ret = wait_for_completion_interruptible(&tr->completion);
492 if (!ret)
493 ret = tr->err;
494 INIT_COMPLETION(tr->completion);
495 }
496 return ret;
497}
498
499static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
500 char *out, int sec)
501{
502 unsigned long start, end;
503 int bcount;
504 int ret;
505
506 for (start = jiffies, end = start + sec * HZ, bcount = 0;
507 time_before(jiffies, end); bcount++) {
508 ret = do_one_ahash_op(req, crypto_ahash_digest(req));
509 if (ret)
510 return ret;
511 }
512
513 printk("%6u opers/sec, %9lu bytes/sec\n",
514 bcount / sec, ((long)bcount * blen) / sec);
515
516 return 0;
517}
518
519static int test_ahash_jiffies(struct ahash_request *req, int blen,
520 int plen, char *out, int sec)
521{
522 unsigned long start, end;
523 int bcount, pcount;
524 int ret;
525
526 if (plen == blen)
527 return test_ahash_jiffies_digest(req, blen, out, sec);
528
529 for (start = jiffies, end = start + sec * HZ, bcount = 0;
530 time_before(jiffies, end); bcount++) {
531 ret = crypto_ahash_init(req);
532 if (ret)
533 return ret;
534 for (pcount = 0; pcount < blen; pcount += plen) {
535 ret = do_one_ahash_op(req, crypto_ahash_update(req));
536 if (ret)
537 return ret;
538 }
539 /* we assume there is enough space in 'out' for the result */
540 ret = do_one_ahash_op(req, crypto_ahash_final(req));
541 if (ret)
542 return ret;
543 }
544
545 pr_cont("%6u opers/sec, %9lu bytes/sec\n",
546 bcount / sec, ((long)bcount * blen) / sec);
547
548 return 0;
549}
550
551static int test_ahash_cycles_digest(struct ahash_request *req, int blen,
552 char *out)
553{
554 unsigned long cycles = 0;
555 int ret, i;
556
557 /* Warm-up run. */
558 for (i = 0; i < 4; i++) {
559 ret = do_one_ahash_op(req, crypto_ahash_digest(req));
560 if (ret)
561 goto out;
562 }
563
564 /* The real thing. */
565 for (i = 0; i < 8; i++) {
566 cycles_t start, end;
567
568 start = get_cycles();
569
570 ret = do_one_ahash_op(req, crypto_ahash_digest(req));
571 if (ret)
572 goto out;
573
574 end = get_cycles();
575
576 cycles += end - start;
577 }
578
579out:
580 if (ret)
581 return ret;
582
583 pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
584 cycles / 8, cycles / (8 * blen));
585
586 return 0;
587}
588
589static int test_ahash_cycles(struct ahash_request *req, int blen,
590 int plen, char *out)
591{
592 unsigned long cycles = 0;
593 int i, pcount, ret;
594
595 if (plen == blen)
596 return test_ahash_cycles_digest(req, blen, out);
597
598 /* Warm-up run. */
599 for (i = 0; i < 4; i++) {
600 ret = crypto_ahash_init(req);
601 if (ret)
602 goto out;
603 for (pcount = 0; pcount < blen; pcount += plen) {
604 ret = do_one_ahash_op(req, crypto_ahash_update(req));
605 if (ret)
606 goto out;
607 }
608 ret = do_one_ahash_op(req, crypto_ahash_final(req));
609 if (ret)
610 goto out;
611 }
612
613 /* The real thing. */
614 for (i = 0; i < 8; i++) {
615 cycles_t start, end;
616
617 start = get_cycles();
618
619 ret = crypto_ahash_init(req);
620 if (ret)
621 goto out;
622 for (pcount = 0; pcount < blen; pcount += plen) {
623 ret = do_one_ahash_op(req, crypto_ahash_update(req));
624 if (ret)
625 goto out;
626 }
627 ret = do_one_ahash_op(req, crypto_ahash_final(req));
628 if (ret)
629 goto out;
630
631 end = get_cycles();
632
633 cycles += end - start;
634 }
635
636out:
637 if (ret)
638 return ret;
639
640 pr_cont("%6lu cycles/operation, %4lu cycles/byte\n",
641 cycles / 8, cycles / (8 * blen));
642
643 return 0;
644}
645
646static void test_ahash_speed(const char *algo, unsigned int sec,
647 struct hash_speed *speed)
648{
649 struct scatterlist sg[TVMEMSIZE];
650 struct tcrypt_result tresult;
651 struct ahash_request *req;
652 struct crypto_ahash *tfm;
653 static char output[1024];
654 int i, ret;
655
656 printk(KERN_INFO "\ntesting speed of async %s\n", algo);
657
658 tfm = crypto_alloc_ahash(algo, 0, 0);
659 if (IS_ERR(tfm)) {
660 pr_err("failed to load transform for %s: %ld\n",
661 algo, PTR_ERR(tfm));
662 return;
663 }
664
665 if (crypto_ahash_digestsize(tfm) > sizeof(output)) {
666 pr_err("digestsize(%u) > outputbuffer(%zu)\n",
667 crypto_ahash_digestsize(tfm), sizeof(output));
668 goto out;
669 }
670
671 test_hash_sg_init(sg);
672 req = ahash_request_alloc(tfm, GFP_KERNEL);
673 if (!req) {
674 pr_err("ahash request allocation failure\n");
675 goto out;
676 }
677
678 init_completion(&tresult.completion);
679 ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
680 tcrypt_complete, &tresult);
681
682 for (i = 0; speed[i].blen != 0; i++) {
683 if (speed[i].blen > TVMEMSIZE * PAGE_SIZE) {
684 pr_err("template (%u) too big for tvmem (%lu)\n",
685 speed[i].blen, TVMEMSIZE * PAGE_SIZE);
686 break;
687 }
688
689 pr_info("test%3u "
690 "(%5u byte blocks,%5u bytes per update,%4u updates): ",
691 i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
692
693 ahash_request_set_crypt(req, sg, output, speed[i].plen);
694
695 if (sec)
696 ret = test_ahash_jiffies(req, speed[i].blen,
697 speed[i].plen, output, sec);
698 else
699 ret = test_ahash_cycles(req, speed[i].blen,
700 speed[i].plen, output);
701
702 if (ret) {
703 pr_err("hashing failed ret=%d\n", ret);
704 break;
705 }
706 }
707
708 ahash_request_free(req);
709
710out:
711 crypto_free_ahash(tfm);
712}
713
464static void test_available(void) 714static void test_available(void)
465{ 715{
466 char **name = check; 716 char **name = check;
@@ -891,6 +1141,80 @@ static int do_test(int m)
891 case 399: 1141 case 399:
892 break; 1142 break;
893 1143
1144 case 400:
1145 /* fall through */
1146
1147 case 401:
1148 test_ahash_speed("md4", sec, generic_hash_speed_template);
1149 if (mode > 400 && mode < 500) break;
1150
1151 case 402:
1152 test_ahash_speed("md5", sec, generic_hash_speed_template);
1153 if (mode > 400 && mode < 500) break;
1154
1155 case 403:
1156 test_ahash_speed("sha1", sec, generic_hash_speed_template);
1157 if (mode > 400 && mode < 500) break;
1158
1159 case 404:
1160 test_ahash_speed("sha256", sec, generic_hash_speed_template);
1161 if (mode > 400 && mode < 500) break;
1162
1163 case 405:
1164 test_ahash_speed("sha384", sec, generic_hash_speed_template);
1165 if (mode > 400 && mode < 500) break;
1166
1167 case 406:
1168 test_ahash_speed("sha512", sec, generic_hash_speed_template);
1169 if (mode > 400 && mode < 500) break;
1170
1171 case 407:
1172 test_ahash_speed("wp256", sec, generic_hash_speed_template);
1173 if (mode > 400 && mode < 500) break;
1174
1175 case 408:
1176 test_ahash_speed("wp384", sec, generic_hash_speed_template);
1177 if (mode > 400 && mode < 500) break;
1178
1179 case 409:
1180 test_ahash_speed("wp512", sec, generic_hash_speed_template);
1181 if (mode > 400 && mode < 500) break;
1182
1183 case 410:
1184 test_ahash_speed("tgr128", sec, generic_hash_speed_template);
1185 if (mode > 400 && mode < 500) break;
1186
1187 case 411:
1188 test_ahash_speed("tgr160", sec, generic_hash_speed_template);
1189 if (mode > 400 && mode < 500) break;
1190
1191 case 412:
1192 test_ahash_speed("tgr192", sec, generic_hash_speed_template);
1193 if (mode > 400 && mode < 500) break;
1194
1195 case 413:
1196 test_ahash_speed("sha224", sec, generic_hash_speed_template);
1197 if (mode > 400 && mode < 500) break;
1198
1199 case 414:
1200 test_ahash_speed("rmd128", sec, generic_hash_speed_template);
1201 if (mode > 400 && mode < 500) break;
1202
1203 case 415:
1204 test_ahash_speed("rmd160", sec, generic_hash_speed_template);
1205 if (mode > 400 && mode < 500) break;
1206
1207 case 416:
1208 test_ahash_speed("rmd256", sec, generic_hash_speed_template);
1209 if (mode > 400 && mode < 500) break;
1210
1211 case 417:
1212 test_ahash_speed("rmd320", sec, generic_hash_speed_template);
1213 if (mode > 400 && mode < 500) break;
1214
1215 case 499:
1216 break;
1217
894 case 1000: 1218 case 1000:
895 test_available(); 1219 test_available();
896 break; 1220 break;