diff options
-rw-r--r-- | arch/x86/crypto/ghash-clmulni-intel_asm.S | 29 |
1 files changed, 10 insertions, 19 deletions
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 59584982fb75..1528dc4886cf 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S | |||
@@ -17,7 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/linkage.h> | 19 | #include <linux/linkage.h> |
20 | #include <asm/i387.h> | 20 | #include <asm/inst.h> |
21 | 21 | ||
22 | .align 16 | 22 | .align 16 |
23 | .Lbswap_mask: | 23 | .Lbswap_mask: |
@@ -56,12 +56,9 @@ __clmul_gf128mul_ble: | |||
56 | pxor DATA, T2 | 56 | pxor DATA, T2 |
57 | pxor SHASH, T3 | 57 | pxor SHASH, T3 |
58 | 58 | ||
59 | # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 | 59 | PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 |
60 | .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 | 60 | PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 |
61 | # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 | 61 | PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) |
62 | .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 | ||
63 | # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) | ||
64 | .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 | ||
65 | pxor DATA, T2 | 62 | pxor DATA, T2 |
66 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 | 63 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 |
67 | 64 | ||
@@ -101,11 +98,9 @@ ENTRY(clmul_ghash_mul) | |||
101 | movups (%rdi), DATA | 98 | movups (%rdi), DATA |
102 | movups (%rsi), SHASH | 99 | movups (%rsi), SHASH |
103 | movaps .Lbswap_mask, BSWAP | 100 | movaps .Lbswap_mask, BSWAP |
104 | # pshufb BSWAP, DATA | 101 | PSHUFB_XMM BSWAP DATA |
105 | PSHUFB_XMM5_XMM0 | ||
106 | call __clmul_gf128mul_ble | 102 | call __clmul_gf128mul_ble |
107 | # pshufb BSWAP, DATA | 103 | PSHUFB_XMM BSWAP DATA |
108 | .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | ||
109 | movups DATA, (%rdi) | 104 | movups DATA, (%rdi) |
110 | ret | 105 | ret |
111 | 106 | ||
@@ -119,21 +114,18 @@ ENTRY(clmul_ghash_update) | |||
119 | movaps .Lbswap_mask, BSWAP | 114 | movaps .Lbswap_mask, BSWAP |
120 | movups (%rdi), DATA | 115 | movups (%rdi), DATA |
121 | movups (%rcx), SHASH | 116 | movups (%rcx), SHASH |
122 | # pshufb BSWAP, DATA | 117 | PSHUFB_XMM BSWAP DATA |
123 | PSHUFB_XMM5_XMM0 | ||
124 | .align 4 | 118 | .align 4 |
125 | .Lupdate_loop: | 119 | .Lupdate_loop: |
126 | movups (%rsi), IN1 | 120 | movups (%rsi), IN1 |
127 | # pshufb BSWAP, IN1 | 121 | PSHUFB_XMM BSWAP IN1 |
128 | PSHUFB_XMM5_XMM6 | ||
129 | pxor IN1, DATA | 122 | pxor IN1, DATA |
130 | call __clmul_gf128mul_ble | 123 | call __clmul_gf128mul_ble |
131 | sub $16, %rdx | 124 | sub $16, %rdx |
132 | add $16, %rsi | 125 | add $16, %rsi |
133 | cmp $16, %rdx | 126 | cmp $16, %rdx |
134 | jge .Lupdate_loop | 127 | jge .Lupdate_loop |
135 | # pshufb BSWAP, DATA | 128 | PSHUFB_XMM BSWAP DATA |
136 | PSHUFB_XMM5_XMM0 | ||
137 | movups DATA, (%rdi) | 129 | movups DATA, (%rdi) |
138 | .Lupdate_just_ret: | 130 | .Lupdate_just_ret: |
139 | ret | 131 | ret |
@@ -146,8 +138,7 @@ ENTRY(clmul_ghash_update) | |||
146 | ENTRY(clmul_ghash_setkey) | 138 | ENTRY(clmul_ghash_setkey) |
147 | movaps .Lbswap_mask, BSWAP | 139 | movaps .Lbswap_mask, BSWAP |
148 | movups (%rsi), %xmm0 | 140 | movups (%rsi), %xmm0 |
149 | # pshufb BSWAP, %xmm0 | 141 | PSHUFB_XMM BSWAP %xmm0 |
150 | PSHUFB_XMM5_XMM0 | ||
151 | movaps %xmm0, %xmm1 | 142 | movaps %xmm0, %xmm1 |
152 | psllq $1, %xmm0 | 143 | psllq $1, %xmm0 |
153 | psrlq $63, %xmm1 | 144 | psrlq $63, %xmm1 |