aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2009-11-23 06:55:22 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2009-11-23 06:55:22 -0500
commit564ec0ec05ac6ee409bde81f7ef27a3dadbf3a6a (patch)
treeb4bb4e29cdfc2ceb90ac10ed4da139546375faa7 /arch/x86
parentb369e521237d6ef21c453f3ac4f4b8577ec14f87 (diff)
crypto: ghash-clmulni-intel - Use gas macro for PCLMULQDQ-NI and PSHUFB
Old binutils do not support PCLMULQDQ-NI and PSHUFB, to make kernel can be compiled by them, .byte code is used instead of assembly instructions. But the readability and flexibility of raw .byte code is not good. So corresponding assembly instruction like gas macro is used instead. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S29
1 files changed, 10 insertions, 19 deletions
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 59584982fb75..1528dc4886cf 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -17,7 +17,7 @@
17 */ 17 */
18 18
19#include <linux/linkage.h> 19#include <linux/linkage.h>
20#include <asm/i387.h> 20#include <asm/inst.h>
21 21
22.align 16 22.align 16
23.Lbswap_mask: 23.Lbswap_mask:
@@ -56,12 +56,9 @@ __clmul_gf128mul_ble:
56 pxor DATA, T2 56 pxor DATA, T2
57 pxor SHASH, T3 57 pxor SHASH, T3
58 58
59 # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 59 PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
60 .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 60 PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
61 # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 61 PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
62 .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11
63 # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
64 .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00
65 pxor DATA, T2 62 pxor DATA, T2
66 pxor T1, T2 # T2 = a0 * b1 + a1 * b0 63 pxor T1, T2 # T2 = a0 * b1 + a1 * b0
67 64
@@ -101,11 +98,9 @@ ENTRY(clmul_ghash_mul)
101 movups (%rdi), DATA 98 movups (%rdi), DATA
102 movups (%rsi), SHASH 99 movups (%rsi), SHASH
103 movaps .Lbswap_mask, BSWAP 100 movaps .Lbswap_mask, BSWAP
104 # pshufb BSWAP, DATA 101 PSHUFB_XMM BSWAP DATA
105 PSHUFB_XMM5_XMM0
106 call __clmul_gf128mul_ble 102 call __clmul_gf128mul_ble
107 # pshufb BSWAP, DATA 103 PSHUFB_XMM BSWAP DATA
108 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
109 movups DATA, (%rdi) 104 movups DATA, (%rdi)
110 ret 105 ret
111 106
@@ -119,21 +114,18 @@ ENTRY(clmul_ghash_update)
119 movaps .Lbswap_mask, BSWAP 114 movaps .Lbswap_mask, BSWAP
120 movups (%rdi), DATA 115 movups (%rdi), DATA
121 movups (%rcx), SHASH 116 movups (%rcx), SHASH
122 # pshufb BSWAP, DATA 117 PSHUFB_XMM BSWAP DATA
123 PSHUFB_XMM5_XMM0
124.align 4 118.align 4
125.Lupdate_loop: 119.Lupdate_loop:
126 movups (%rsi), IN1 120 movups (%rsi), IN1
127 # pshufb BSWAP, IN1 121 PSHUFB_XMM BSWAP IN1
128 PSHUFB_XMM5_XMM6
129 pxor IN1, DATA 122 pxor IN1, DATA
130 call __clmul_gf128mul_ble 123 call __clmul_gf128mul_ble
131 sub $16, %rdx 124 sub $16, %rdx
132 add $16, %rsi 125 add $16, %rsi
133 cmp $16, %rdx 126 cmp $16, %rdx
134 jge .Lupdate_loop 127 jge .Lupdate_loop
135 # pshufb BSWAP, DATA 128 PSHUFB_XMM BSWAP DATA
136 PSHUFB_XMM5_XMM0
137 movups DATA, (%rdi) 129 movups DATA, (%rdi)
138.Lupdate_just_ret: 130.Lupdate_just_ret:
139 ret 131 ret
@@ -146,8 +138,7 @@ ENTRY(clmul_ghash_update)
146ENTRY(clmul_ghash_setkey) 138ENTRY(clmul_ghash_setkey)
147 movaps .Lbswap_mask, BSWAP 139 movaps .Lbswap_mask, BSWAP
148 movups (%rsi), %xmm0 140 movups (%rsi), %xmm0
149 # pshufb BSWAP, %xmm0 141 PSHUFB_XMM BSWAP %xmm0
150 PSHUFB_XMM5_XMM0
151 movaps %xmm0, %xmm1 142 movaps %xmm0, %xmm1
152 psllq $1, %xmm0 143 psllq $1, %xmm0
153 psrlq $63, %xmm1 144 psrlq $63, %xmm1