aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/aesni-intel_asm.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto/aesni-intel_asm.S')
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S115
1 files changed, 115 insertions, 0 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 20bb0e1ac681..ff16756a51c1 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,9 @@
32#define IN IN1 32#define IN IN1
33#define KEY %xmm2 33#define KEY %xmm2
34#define IV %xmm3 34#define IV %xmm3
35#define BSWAP_MASK %xmm10
36#define CTR %xmm11
37#define INC %xmm12
35 38
36#define KEYP %rdi 39#define KEYP %rdi
37#define OUTP %rsi 40#define OUTP %rsi
@@ -42,6 +45,7 @@
42#define T1 %r10 45#define T1 %r10
43#define TKEYP T1 46#define TKEYP T1
44#define T2 %r11 47#define T2 %r11
48#define TCTR_LOW T2
45 49
46_key_expansion_128: 50_key_expansion_128:
47_key_expansion_256a: 51_key_expansion_256a:
@@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec)
724 movups IV, (IVP) 728 movups IV, (IVP)
725.Lcbc_dec_just_ret: 729.Lcbc_dec_just_ret:
726 ret 730 ret
731
732.align 16
733.Lbswap_mask:
734 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
735
736/*
737 * _aesni_inc_init: internal ABI
738 * setup registers used by _aesni_inc
739 * input:
740 * IV
741 * output:
742 * CTR: == IV, in little endian
743 * TCTR_LOW: == lower qword of CTR
744 * INC: == 1, in little endian
745 * BSWAP_MASK == endian swapping mask
746 */
747_aesni_inc_init:
748 movaps .Lbswap_mask, BSWAP_MASK
749 movaps IV, CTR
750 PSHUFB_XMM BSWAP_MASK CTR
751 mov $1, TCTR_LOW
752 MOVQ_R64_XMM TCTR_LOW INC
753 MOVQ_R64_XMM CTR TCTR_LOW
754 ret
755
756/*
757 * _aesni_inc: internal ABI
758 * Increase IV by 1, IV is in big endian
759 * input:
760 * IV
761 * CTR: == IV, in little endian
762 * TCTR_LOW: == lower qword of CTR
763 * INC: == 1, in little endian
764 * BSWAP_MASK == endian swapping mask
765 * output:
766 * IV: Increase by 1
767 * changed:
768 * CTR: == output IV, in little endian
769 * TCTR_LOW: == lower qword of CTR
770 */
771_aesni_inc:
772 paddq INC, CTR
773 add $1, TCTR_LOW
774 jnc .Linc_low
775 pslldq $8, INC
776 paddq INC, CTR
777 psrldq $8, INC
778.Linc_low:
779 movaps CTR, IV
780 PSHUFB_XMM BSWAP_MASK IV
781 ret
782
783/*
784 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
785 * size_t len, u8 *iv)
786 */
787ENTRY(aesni_ctr_enc)
788 cmp $16, LEN
789 jb .Lctr_enc_just_ret
790 mov 480(KEYP), KLEN
791 movups (IVP), IV
792 call _aesni_inc_init
793 cmp $64, LEN
794 jb .Lctr_enc_loop1
795.align 4
796.Lctr_enc_loop4:
797 movaps IV, STATE1
798 call _aesni_inc
799 movups (INP), IN1
800 movaps IV, STATE2
801 call _aesni_inc
802 movups 0x10(INP), IN2
803 movaps IV, STATE3
804 call _aesni_inc
805 movups 0x20(INP), IN3
806 movaps IV, STATE4
807 call _aesni_inc
808 movups 0x30(INP), IN4
809 call _aesni_enc4
810 pxor IN1, STATE1
811 movups STATE1, (OUTP)
812 pxor IN2, STATE2
813 movups STATE2, 0x10(OUTP)
814 pxor IN3, STATE3
815 movups STATE3, 0x20(OUTP)
816 pxor IN4, STATE4
817 movups STATE4, 0x30(OUTP)
818 sub $64, LEN
819 add $64, INP
820 add $64, OUTP
821 cmp $64, LEN
822 jge .Lctr_enc_loop4
823 cmp $16, LEN
824 jb .Lctr_enc_ret
825.align 4
826.Lctr_enc_loop1:
827 movaps IV, STATE
828 call _aesni_inc
829 movups (INP), IN
830 call _aesni_enc1
831 pxor IN, STATE
832 movups STATE, (OUTP)
833 sub $16, LEN
834 add $16, INP
835 add $16, OUTP
836 cmp $16, LEN
837 jge .Lctr_enc_loop1
838.Lctr_enc_ret:
839 movups IV, (IVP)
840.Lctr_enc_just_ret:
841 ret