aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2010-03-10 05:28:55 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2010-03-10 05:28:55 -0500
commit12387a46bb150f5608de4aa9a90dfdddbf991e3f (patch)
treea840b4a5da93cc3658eeb2477e47f402d0c77e28
parent269ab459da46ae37979a0d16307d1fcaa05600b2 (diff)
crypto: aesni-intel - Add AES-NI accelerated CTR mode
To take advantage of the hardware pipeline implementation of AES-NI instructions. CTR mode cryption is implemented in ASM to schedule multiple AES-NI instructions one after another. This way, some latency of AES-NI instruction can be eliminated. Performance testing based on dm-crypt should 50% reduction of ecryption/decryption time. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S115
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c130
2 files changed, 238 insertions, 7 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 20bb0e1ac681..822846a9eba9 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,9 @@
32#define IN IN1 32#define IN IN1
33#define KEY %xmm2 33#define KEY %xmm2
34#define IV %xmm3 34#define IV %xmm3
35#define BSWAP_MASK %xmm10
36#define CTR %xmm11
37#define INC %xmm12
35 38
36#define KEYP %rdi 39#define KEYP %rdi
37#define OUTP %rsi 40#define OUTP %rsi
@@ -42,6 +45,7 @@
42#define T1 %r10 45#define T1 %r10
43#define TKEYP T1 46#define TKEYP T1
44#define T2 %r11 47#define T2 %r11
48#define TCTR_LOW T2
45 49
46_key_expansion_128: 50_key_expansion_128:
47_key_expansion_256a: 51_key_expansion_256a:
@@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec)
724 movups IV, (IVP) 728 movups IV, (IVP)
725.Lcbc_dec_just_ret: 729.Lcbc_dec_just_ret:
726 ret 730 ret
731
732.align 16
733.Lbswap_mask:
734 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
735
736/*
737 * _aesni_inc_init: internal ABI
738 * setup registers used by _aesni_inc
739 * input:
740 * IV
741 * output:
742 * CTR: == IV, in little endian
743 * TCTR_LOW: == lower qword of CTR
744 * INC: == 1, in little endian
745 * BSWAP_MASK == endian swapping mask
746 */
747_aesni_inc_init:
748 movaps .Lbswap_mask, BSWAP_MASK
749 movaps IV, CTR
750 PSHUFB_XMM BSWAP_MASK CTR
751 mov $1, TCTR_LOW
752 movq TCTR_LOW, INC
753 movq CTR, TCTR_LOW
754 ret
755
756/*
757 * _aesni_inc: internal ABI
758 * Increase IV by 1, IV is in big endian
759 * input:
760 * IV
761 * CTR: == IV, in little endian
762 * TCTR_LOW: == lower qword of CTR
763 * INC: == 1, in little endian
764 * BSWAP_MASK == endian swapping mask
765 * output:
766 * IV: Increase by 1
767 * changed:
768 * CTR: == output IV, in little endian
769 * TCTR_LOW: == lower qword of CTR
770 */
771_aesni_inc:
772 paddq INC, CTR
773 add $1, TCTR_LOW
774 jnc .Linc_low
775 pslldq $8, INC
776 paddq INC, CTR
777 psrldq $8, INC
778.Linc_low:
779 movaps CTR, IV
780 PSHUFB_XMM BSWAP_MASK IV
781 ret
782
783/*
784 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
785 * size_t len, u8 *iv)
786 */
787ENTRY(aesni_ctr_enc)
788 cmp $16, LEN
789 jb .Lctr_enc_just_ret
790 mov 480(KEYP), KLEN
791 movups (IVP), IV
792 call _aesni_inc_init
793 cmp $64, LEN
794 jb .Lctr_enc_loop1
795.align 4
796.Lctr_enc_loop4:
797 movaps IV, STATE1
798 call _aesni_inc
799 movups (INP), IN1
800 movaps IV, STATE2
801 call _aesni_inc
802 movups 0x10(INP), IN2
803 movaps IV, STATE3
804 call _aesni_inc
805 movups 0x20(INP), IN3
806 movaps IV, STATE4
807 call _aesni_inc
808 movups 0x30(INP), IN4
809 call _aesni_enc4
810 pxor IN1, STATE1
811 movups STATE1, (OUTP)
812 pxor IN2, STATE2
813 movups STATE2, 0x10(OUTP)
814 pxor IN3, STATE3
815 movups STATE3, 0x20(OUTP)
816 pxor IN4, STATE4
817 movups STATE4, 0x30(OUTP)
818 sub $64, LEN
819 add $64, INP
820 add $64, OUTP
821 cmp $64, LEN
822 jge .Lctr_enc_loop4
823 cmp $16, LEN
824 jb .Lctr_enc_ret
825.align 4
826.Lctr_enc_loop1:
827 movaps IV, STATE
828 call _aesni_inc
829 movups (INP), IN
830 call _aesni_enc1
831 pxor IN, STATE
832 movups STATE, (OUTP)
833 sub $16, LEN
834 add $16, INP
835 add $16, OUTP
836 cmp $16, LEN
837 jge .Lctr_enc_loop1
838.Lctr_enc_ret:
839 movups IV, (IVP)
840.Lctr_enc_just_ret:
841 ret
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 49c552c060e9..2cb3dcc4490a 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -18,6 +18,7 @@
18#include <crypto/algapi.h> 18#include <crypto/algapi.h>
19#include <crypto/aes.h> 19#include <crypto/aes.h>
20#include <crypto/cryptd.h> 20#include <crypto/cryptd.h>
21#include <crypto/ctr.h>
21#include <asm/i387.h> 22#include <asm/i387.h>
22#include <asm/aes.h> 23#include <asm/aes.h>
23 24
@@ -58,6 +59,8 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
58 const u8 *in, unsigned int len, u8 *iv); 59 const u8 *in, unsigned int len, u8 *iv);
59asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, 60asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
60 const u8 *in, unsigned int len, u8 *iv); 61 const u8 *in, unsigned int len, u8 *iv);
62asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
63 const u8 *in, unsigned int len, u8 *iv);
61 64
62static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) 65static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
63{ 66{
@@ -321,6 +324,72 @@ static struct crypto_alg blk_cbc_alg = {
321 }, 324 },
322}; 325};
323 326
327static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
328 struct blkcipher_walk *walk)
329{
330 u8 *ctrblk = walk->iv;
331 u8 keystream[AES_BLOCK_SIZE];
332 u8 *src = walk->src.virt.addr;
333 u8 *dst = walk->dst.virt.addr;
334 unsigned int nbytes = walk->nbytes;
335
336 aesni_enc(ctx, keystream, ctrblk);
337 crypto_xor(keystream, src, nbytes);
338 memcpy(dst, keystream, nbytes);
339 crypto_inc(ctrblk, AES_BLOCK_SIZE);
340}
341
342static int ctr_crypt(struct blkcipher_desc *desc,
343 struct scatterlist *dst, struct scatterlist *src,
344 unsigned int nbytes)
345{
346 struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
347 struct blkcipher_walk walk;
348 int err;
349
350 blkcipher_walk_init(&walk, dst, src, nbytes);
351 err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
352 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
353
354 kernel_fpu_begin();
355 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
356 aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
357 nbytes & AES_BLOCK_MASK, walk.iv);
358 nbytes &= AES_BLOCK_SIZE - 1;
359 err = blkcipher_walk_done(desc, &walk, nbytes);
360 }
361 if (walk.nbytes) {
362 ctr_crypt_final(ctx, &walk);
363 err = blkcipher_walk_done(desc, &walk, 0);
364 }
365 kernel_fpu_end();
366
367 return err;
368}
369
370static struct crypto_alg blk_ctr_alg = {
371 .cra_name = "__ctr-aes-aesni",
372 .cra_driver_name = "__driver-ctr-aes-aesni",
373 .cra_priority = 0,
374 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
375 .cra_blocksize = 1,
376 .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1,
377 .cra_alignmask = 0,
378 .cra_type = &crypto_blkcipher_type,
379 .cra_module = THIS_MODULE,
380 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
381 .cra_u = {
382 .blkcipher = {
383 .min_keysize = AES_MIN_KEY_SIZE,
384 .max_keysize = AES_MAX_KEY_SIZE,
385 .ivsize = AES_BLOCK_SIZE,
386 .setkey = aes_set_key,
387 .encrypt = ctr_crypt,
388 .decrypt = ctr_crypt,
389 },
390 },
391};
392
324static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, 393static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
325 unsigned int key_len) 394 unsigned int key_len)
326{ 395{
@@ -467,13 +536,11 @@ static struct crypto_alg ablk_cbc_alg = {
467 }, 536 },
468}; 537};
469 538
470#ifdef HAS_CTR
471static int ablk_ctr_init(struct crypto_tfm *tfm) 539static int ablk_ctr_init(struct crypto_tfm *tfm)
472{ 540{
473 struct cryptd_ablkcipher *cryptd_tfm; 541 struct cryptd_ablkcipher *cryptd_tfm;
474 542
475 cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", 543 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0);
476 0, 0);
477 if (IS_ERR(cryptd_tfm)) 544 if (IS_ERR(cryptd_tfm))
478 return PTR_ERR(cryptd_tfm); 545 return PTR_ERR(cryptd_tfm);
479 ablk_init_common(tfm, cryptd_tfm); 546 ablk_init_common(tfm, cryptd_tfm);
@@ -500,11 +567,50 @@ static struct crypto_alg ablk_ctr_alg = {
500 .ivsize = AES_BLOCK_SIZE, 567 .ivsize = AES_BLOCK_SIZE,
501 .setkey = ablk_set_key, 568 .setkey = ablk_set_key,
502 .encrypt = ablk_encrypt, 569 .encrypt = ablk_encrypt,
503 .decrypt = ablk_decrypt, 570 .decrypt = ablk_encrypt,
504 .geniv = "chainiv", 571 .geniv = "chainiv",
505 }, 572 },
506 }, 573 },
507}; 574};
575
576#ifdef HAS_CTR
577static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
578{
579 struct cryptd_ablkcipher *cryptd_tfm;
580
581 cryptd_tfm = cryptd_alloc_ablkcipher(
582 "rfc3686(__driver-ctr-aes-aesni)", 0, 0);
583 if (IS_ERR(cryptd_tfm))
584 return PTR_ERR(cryptd_tfm);
585 ablk_init_common(tfm, cryptd_tfm);
586 return 0;
587}
588
589static struct crypto_alg ablk_rfc3686_ctr_alg = {
590 .cra_name = "rfc3686(ctr(aes))",
591 .cra_driver_name = "rfc3686-ctr-aes-aesni",
592 .cra_priority = 400,
593 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
594 .cra_blocksize = 1,
595 .cra_ctxsize = sizeof(struct async_aes_ctx),
596 .cra_alignmask = 0,
597 .cra_type = &crypto_ablkcipher_type,
598 .cra_module = THIS_MODULE,
599 .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list),
600 .cra_init = ablk_rfc3686_ctr_init,
601 .cra_exit = ablk_exit,
602 .cra_u = {
603 .ablkcipher = {
604 .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
605 .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE,
606 .ivsize = CTR_RFC3686_IV_SIZE,
607 .setkey = ablk_set_key,
608 .encrypt = ablk_encrypt,
609 .decrypt = ablk_decrypt,
610 .geniv = "seqiv",
611 },
612 },
613};
508#endif 614#endif
509 615
510#ifdef HAS_LRW 616#ifdef HAS_LRW
@@ -640,13 +746,17 @@ static int __init aesni_init(void)
640 goto blk_ecb_err; 746 goto blk_ecb_err;
641 if ((err = crypto_register_alg(&blk_cbc_alg))) 747 if ((err = crypto_register_alg(&blk_cbc_alg)))
642 goto blk_cbc_err; 748 goto blk_cbc_err;
749 if ((err = crypto_register_alg(&blk_ctr_alg)))
750 goto blk_ctr_err;
643 if ((err = crypto_register_alg(&ablk_ecb_alg))) 751 if ((err = crypto_register_alg(&ablk_ecb_alg)))
644 goto ablk_ecb_err; 752 goto ablk_ecb_err;
645 if ((err = crypto_register_alg(&ablk_cbc_alg))) 753 if ((err = crypto_register_alg(&ablk_cbc_alg)))
646 goto ablk_cbc_err; 754 goto ablk_cbc_err;
647#ifdef HAS_CTR
648 if ((err = crypto_register_alg(&ablk_ctr_alg))) 755 if ((err = crypto_register_alg(&ablk_ctr_alg)))
649 goto ablk_ctr_err; 756 goto ablk_ctr_err;
757#ifdef HAS_CTR
758 if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg)))
759 goto ablk_rfc3686_ctr_err;
650#endif 760#endif
651#ifdef HAS_LRW 761#ifdef HAS_LRW
652 if ((err = crypto_register_alg(&ablk_lrw_alg))) 762 if ((err = crypto_register_alg(&ablk_lrw_alg)))
@@ -675,13 +785,17 @@ ablk_pcbc_err:
675ablk_lrw_err: 785ablk_lrw_err:
676#endif 786#endif
677#ifdef HAS_CTR 787#ifdef HAS_CTR
788 crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
789ablk_rfc3686_ctr_err:
790#endif
678 crypto_unregister_alg(&ablk_ctr_alg); 791 crypto_unregister_alg(&ablk_ctr_alg);
679ablk_ctr_err: 792ablk_ctr_err:
680#endif
681 crypto_unregister_alg(&ablk_cbc_alg); 793 crypto_unregister_alg(&ablk_cbc_alg);
682ablk_cbc_err: 794ablk_cbc_err:
683 crypto_unregister_alg(&ablk_ecb_alg); 795 crypto_unregister_alg(&ablk_ecb_alg);
684ablk_ecb_err: 796ablk_ecb_err:
797 crypto_unregister_alg(&blk_ctr_alg);
798blk_ctr_err:
685 crypto_unregister_alg(&blk_cbc_alg); 799 crypto_unregister_alg(&blk_cbc_alg);
686blk_cbc_err: 800blk_cbc_err:
687 crypto_unregister_alg(&blk_ecb_alg); 801 crypto_unregister_alg(&blk_ecb_alg);
@@ -705,10 +819,12 @@ static void __exit aesni_exit(void)
705 crypto_unregister_alg(&ablk_lrw_alg); 819 crypto_unregister_alg(&ablk_lrw_alg);
706#endif 820#endif
707#ifdef HAS_CTR 821#ifdef HAS_CTR
708 crypto_unregister_alg(&ablk_ctr_alg); 822 crypto_unregister_alg(&ablk_rfc3686_ctr_alg);
709#endif 823#endif
824 crypto_unregister_alg(&ablk_ctr_alg);
710 crypto_unregister_alg(&ablk_cbc_alg); 825 crypto_unregister_alg(&ablk_cbc_alg);
711 crypto_unregister_alg(&ablk_ecb_alg); 826 crypto_unregister_alg(&ablk_ecb_alg);
827 crypto_unregister_alg(&blk_ctr_alg);
712 crypto_unregister_alg(&blk_cbc_alg); 828 crypto_unregister_alg(&blk_cbc_alg);
713 crypto_unregister_alg(&blk_ecb_alg); 829 crypto_unregister_alg(&blk_ecb_alg);
714 crypto_unregister_alg(&__aesni_alg); 830 crypto_unregister_alg(&__aesni_alg);