diff options
author | Huang Ying <ying.huang@intel.com> | 2009-11-23 06:55:22 -0500 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2009-11-23 06:55:22 -0500 |
commit | 564ec0ec05ac6ee409bde81f7ef27a3dadbf3a6a (patch) | |
tree | b4bb4e29cdfc2ceb90ac10ed4da139546375faa7 /arch/x86 | |
parent | b369e521237d6ef21c453f3ac4f4b8577ec14f87 (diff) |
crypto: ghash-clmulni-intel - Use gas macro for PCLMULQDQ-NI and PSHUFB
Old binutils do not support PCLMULQDQ-NI and PSHUFB, to make kernel
can be compiled by them, .byte code is used instead of assembly
instructions. But the readability and flexibility of raw .byte code is
not good.
So corresponding assembly instruction like gas macro is used instead.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/crypto/ghash-clmulni-intel_asm.S | 29 |
1 files changed, 10 insertions, 19 deletions
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 59584982fb75..1528dc4886cf 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S | |||
@@ -17,7 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/linkage.h> | 19 | #include <linux/linkage.h> |
20 | #include <asm/i387.h> | 20 | #include <asm/inst.h> |
21 | 21 | ||
22 | .align 16 | 22 | .align 16 |
23 | .Lbswap_mask: | 23 | .Lbswap_mask: |
@@ -56,12 +56,9 @@ __clmul_gf128mul_ble: | |||
56 | pxor DATA, T2 | 56 | pxor DATA, T2 |
57 | pxor SHASH, T3 | 57 | pxor SHASH, T3 |
58 | 58 | ||
59 | # pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0 | 59 | PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 |
60 | .byte 0x66, 0x0f, 0x3a, 0x44, 0xc1, 0x00 | 60 | PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 |
61 | # pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1 | 61 | PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) |
62 | .byte 0x66, 0x0f, 0x3a, 0x44, 0xd1, 0x11 | ||
63 | # pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0) | ||
64 | .byte 0x66, 0x0f, 0x3a, 0x44, 0xdc, 0x00 | ||
65 | pxor DATA, T2 | 62 | pxor DATA, T2 |
66 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 | 63 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 |
67 | 64 | ||
@@ -101,11 +98,9 @@ ENTRY(clmul_ghash_mul) | |||
101 | movups (%rdi), DATA | 98 | movups (%rdi), DATA |
102 | movups (%rsi), SHASH | 99 | movups (%rsi), SHASH |
103 | movaps .Lbswap_mask, BSWAP | 100 | movaps .Lbswap_mask, BSWAP |
104 | # pshufb BSWAP, DATA | 101 | PSHUFB_XMM BSWAP DATA |
105 | PSHUFB_XMM5_XMM0 | ||
106 | call __clmul_gf128mul_ble | 102 | call __clmul_gf128mul_ble |
107 | # pshufb BSWAP, DATA | 103 | PSHUFB_XMM BSWAP DATA |
108 | .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | ||
109 | movups DATA, (%rdi) | 104 | movups DATA, (%rdi) |
110 | ret | 105 | ret |
111 | 106 | ||
@@ -119,21 +114,18 @@ ENTRY(clmul_ghash_update) | |||
119 | movaps .Lbswap_mask, BSWAP | 114 | movaps .Lbswap_mask, BSWAP |
120 | movups (%rdi), DATA | 115 | movups (%rdi), DATA |
121 | movups (%rcx), SHASH | 116 | movups (%rcx), SHASH |
122 | # pshufb BSWAP, DATA | 117 | PSHUFB_XMM BSWAP DATA |
123 | PSHUFB_XMM5_XMM0 | ||
124 | .align 4 | 118 | .align 4 |
125 | .Lupdate_loop: | 119 | .Lupdate_loop: |
126 | movups (%rsi), IN1 | 120 | movups (%rsi), IN1 |
127 | # pshufb BSWAP, IN1 | 121 | PSHUFB_XMM BSWAP IN1 |
128 | PSHUFB_XMM5_XMM6 | ||
129 | pxor IN1, DATA | 122 | pxor IN1, DATA |
130 | call __clmul_gf128mul_ble | 123 | call __clmul_gf128mul_ble |
131 | sub $16, %rdx | 124 | sub $16, %rdx |
132 | add $16, %rsi | 125 | add $16, %rsi |
133 | cmp $16, %rdx | 126 | cmp $16, %rdx |
134 | jge .Lupdate_loop | 127 | jge .Lupdate_loop |
135 | # pshufb BSWAP, DATA | 128 | PSHUFB_XMM BSWAP DATA |
136 | PSHUFB_XMM5_XMM0 | ||
137 | movups DATA, (%rdi) | 129 | movups DATA, (%rdi) |
138 | .Lupdate_just_ret: | 130 | .Lupdate_just_ret: |
139 | ret | 131 | ret |
@@ -146,8 +138,7 @@ ENTRY(clmul_ghash_update) | |||
146 | ENTRY(clmul_ghash_setkey) | 138 | ENTRY(clmul_ghash_setkey) |
147 | movaps .Lbswap_mask, BSWAP | 139 | movaps .Lbswap_mask, BSWAP |
148 | movups (%rsi), %xmm0 | 140 | movups (%rsi), %xmm0 |
149 | # pshufb BSWAP, %xmm0 | 141 | PSHUFB_XMM BSWAP %xmm0 |
150 | PSHUFB_XMM5_XMM0 | ||
151 | movaps %xmm0, %xmm1 | 142 | movaps %xmm0, %xmm1 |
152 | psllq $1, %xmm0 | 143 | psllq $1, %xmm0 |
153 | psrlq $63, %xmm1 | 144 | psrlq $63, %xmm1 |