aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 18:55:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 18:55:13 -0500
commite069efb6bbf8f739a2e084183709b5eb76abf90d (patch)
tree0866058fa6e1b77d9defc6f45f39d1f851afe327 /arch/x86
parent324889b6bd2a89e0d69a2f9d133d6cf24579ab6c (diff)
parenteed89d0f9d3383851cec634565a6414fae70fe91 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: hwrng: core - Prevent too-small buffer sizes hwrng: virtio-rng - Convert to new API hwrng: core - Replace u32 in driver API with byte array crypto: ansi_cprng - Move FIPS functions under CONFIG_CRYPTO_FIPS crypto: testmgr - Add ghash algorithm test before provide to users crypto: ghash-clmulni-intel - Put proper .data section in place crypto: ghash-clmulni-intel - Use gas macro for PCLMULQDQ-NI and PSHUFB crypto: aesni-intel - Use gas macro for AES-NI instructions x86: Generate .byte code for some new instructions via gas macro crypto: ghash-intel - Fix irq_fpu_usable usage crypto: ghash-intel - Add PSHUFB macros crypto: ghash-intel - Hard-code pshufb crypto: ghash-intel - Fix building failure on x86_32 crypto: testmgr - Fix warning crypto: ansi_cprng - Fix test in get_prng_bytes crypto: hash - Remove cra_u.{digest,hash} crypto: api - Remove digest case from procfs show handler crypto: hash - Remove legacy hash/digest code crypto: ansi_cprng - Add FIPS wrapper crypto: ghash - Add PCLMULQDQ accelerated implementation
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S517
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S157
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c333
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/i387.h7
-rw-r--r--arch/x86/include/asm/inst.h150
7 files changed, 824 insertions, 344 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index cfb0010fa940..1a58ad89fdf7 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
13obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 13obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
14obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o 14obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
15obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
15 16
16obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 17obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
17 18
@@ -24,3 +25,5 @@ twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
24salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 25salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
25 26
26aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o 27aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
28
29ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index eb0566e83319..20bb0e1ac681 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -16,6 +16,7 @@
16 */ 16 */
17 17
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19#include <asm/inst.h>
19 20
20.text 21.text
21 22
@@ -122,103 +123,72 @@ ENTRY(aesni_set_key)
122 movups 0x10(%rsi), %xmm2 # other user key 123 movups 0x10(%rsi), %xmm2 # other user key
123 movaps %xmm2, (%rcx) 124 movaps %xmm2, (%rcx)
124 add $0x10, %rcx 125 add $0x10, %rcx
125 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 126 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
126 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
127 call _key_expansion_256a 127 call _key_expansion_256a
128 # aeskeygenassist $0x1, %xmm0, %xmm1 128 AESKEYGENASSIST 0x1 %xmm0 %xmm1
129 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
130 call _key_expansion_256b 129 call _key_expansion_256b
131 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 130 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
132 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
133 call _key_expansion_256a 131 call _key_expansion_256a
134 # aeskeygenassist $0x2, %xmm0, %xmm1 132 AESKEYGENASSIST 0x2 %xmm0 %xmm1
135 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
136 call _key_expansion_256b 133 call _key_expansion_256b
137 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 134 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
138 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
139 call _key_expansion_256a 135 call _key_expansion_256a
140 # aeskeygenassist $0x4, %xmm0, %xmm1 136 AESKEYGENASSIST 0x4 %xmm0 %xmm1
141 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
142 call _key_expansion_256b 137 call _key_expansion_256b
143 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 138 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
144 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
145 call _key_expansion_256a 139 call _key_expansion_256a
146 # aeskeygenassist $0x8, %xmm0, %xmm1 140 AESKEYGENASSIST 0x8 %xmm0 %xmm1
147 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
148 call _key_expansion_256b 141 call _key_expansion_256b
149 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 142 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
150 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
151 call _key_expansion_256a 143 call _key_expansion_256a
152 # aeskeygenassist $0x10, %xmm0, %xmm1 144 AESKEYGENASSIST 0x10 %xmm0 %xmm1
153 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
154 call _key_expansion_256b 145 call _key_expansion_256b
155 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 146 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
156 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
157 call _key_expansion_256a 147 call _key_expansion_256a
158 # aeskeygenassist $0x20, %xmm0, %xmm1 148 AESKEYGENASSIST 0x20 %xmm0 %xmm1
159 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
160 call _key_expansion_256b 149 call _key_expansion_256b
161 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 150 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
162 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
163 call _key_expansion_256a 151 call _key_expansion_256a
164 jmp .Ldec_key 152 jmp .Ldec_key
165.Lenc_key192: 153.Lenc_key192:
166 movq 0x10(%rsi), %xmm2 # other user key 154 movq 0x10(%rsi), %xmm2 # other user key
167 # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 155 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
168 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
169 call _key_expansion_192a 156 call _key_expansion_192a
170 # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 157 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
171 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
172 call _key_expansion_192b 158 call _key_expansion_192b
173 # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 159 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
174 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
175 call _key_expansion_192a 160 call _key_expansion_192a
176 # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 161 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
177 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
178 call _key_expansion_192b 162 call _key_expansion_192b
179 # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 163 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
180 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
181 call _key_expansion_192a 164 call _key_expansion_192a
182 # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 165 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
183 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
184 call _key_expansion_192b 166 call _key_expansion_192b
185 # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 167 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
186 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
187 call _key_expansion_192a 168 call _key_expansion_192a
188 # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 169 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
189 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
190 call _key_expansion_192b 170 call _key_expansion_192b
191 jmp .Ldec_key 171 jmp .Ldec_key
192.Lenc_key128: 172.Lenc_key128:
193 # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 173 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
194 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
195 call _key_expansion_128 174 call _key_expansion_128
196 # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 175 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
197 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
198 call _key_expansion_128 176 call _key_expansion_128
199 # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 177 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
200 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
201 call _key_expansion_128 178 call _key_expansion_128
202 # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 179 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
203 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
204 call _key_expansion_128 180 call _key_expansion_128
205 # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 181 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
206 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
207 call _key_expansion_128 182 call _key_expansion_128
208 # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 183 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
209 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
210 call _key_expansion_128 184 call _key_expansion_128
211 # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 185 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
212 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
213 call _key_expansion_128 186 call _key_expansion_128
214 # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 187 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
215 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
216 call _key_expansion_128 188 call _key_expansion_128
217 # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 189 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
218 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
219 call _key_expansion_128 190 call _key_expansion_128
220 # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 191 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
221 .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
222 call _key_expansion_128 192 call _key_expansion_128
223.Ldec_key: 193.Ldec_key:
224 sub $0x10, %rcx 194 sub $0x10, %rcx
@@ -231,8 +201,7 @@ ENTRY(aesni_set_key)
231.align 4 201.align 4
232.Ldec_key_loop: 202.Ldec_key_loop:
233 movaps (%rdi), %xmm0 203 movaps (%rdi), %xmm0
234 # aesimc %xmm0, %xmm1 204 AESIMC %xmm0 %xmm1
235 .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
236 movaps %xmm1, (%rsi) 205 movaps %xmm1, (%rsi)
237 add $0x10, %rdi 206 add $0x10, %rdi
238 sub $0x10, %rsi 207 sub $0x10, %rsi
@@ -274,51 +243,37 @@ _aesni_enc1:
274 je .Lenc192 243 je .Lenc192
275 add $0x20, TKEYP 244 add $0x20, TKEYP
276 movaps -0x60(TKEYP), KEY 245 movaps -0x60(TKEYP), KEY
277 # aesenc KEY, STATE 246 AESENC KEY STATE
278 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
279 movaps -0x50(TKEYP), KEY 247 movaps -0x50(TKEYP), KEY
280 # aesenc KEY, STATE 248 AESENC KEY STATE
281 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
282.align 4 249.align 4
283.Lenc192: 250.Lenc192:
284 movaps -0x40(TKEYP), KEY 251 movaps -0x40(TKEYP), KEY
285 # aesenc KEY, STATE 252 AESENC KEY STATE
286 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
287 movaps -0x30(TKEYP), KEY 253 movaps -0x30(TKEYP), KEY
288 # aesenc KEY, STATE 254 AESENC KEY STATE
289 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
290.align 4 255.align 4
291.Lenc128: 256.Lenc128:
292 movaps -0x20(TKEYP), KEY 257 movaps -0x20(TKEYP), KEY
293 # aesenc KEY, STATE 258 AESENC KEY STATE
294 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
295 movaps -0x10(TKEYP), KEY 259 movaps -0x10(TKEYP), KEY
296 # aesenc KEY, STATE 260 AESENC KEY STATE
297 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
298 movaps (TKEYP), KEY 261 movaps (TKEYP), KEY
299 # aesenc KEY, STATE 262 AESENC KEY STATE
300 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
301 movaps 0x10(TKEYP), KEY 263 movaps 0x10(TKEYP), KEY
302 # aesenc KEY, STATE 264 AESENC KEY STATE
303 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
304 movaps 0x20(TKEYP), KEY 265 movaps 0x20(TKEYP), KEY
305 # aesenc KEY, STATE 266 AESENC KEY STATE
306 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
307 movaps 0x30(TKEYP), KEY 267 movaps 0x30(TKEYP), KEY
308 # aesenc KEY, STATE 268 AESENC KEY STATE
309 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
310 movaps 0x40(TKEYP), KEY 269 movaps 0x40(TKEYP), KEY
311 # aesenc KEY, STATE 270 AESENC KEY STATE
312 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
313 movaps 0x50(TKEYP), KEY 271 movaps 0x50(TKEYP), KEY
314 # aesenc KEY, STATE 272 AESENC KEY STATE
315 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
316 movaps 0x60(TKEYP), KEY 273 movaps 0x60(TKEYP), KEY
317 # aesenc KEY, STATE 274 AESENC KEY STATE
318 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
319 movaps 0x70(TKEYP), KEY 275 movaps 0x70(TKEYP), KEY
320 # aesenclast KEY, STATE # last round 276 AESENCLAST KEY STATE
321 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
322 ret 277 ret
323 278
324/* 279/*
@@ -353,135 +308,79 @@ _aesni_enc4:
353 je .L4enc192 308 je .L4enc192
354 add $0x20, TKEYP 309 add $0x20, TKEYP
355 movaps -0x60(TKEYP), KEY 310 movaps -0x60(TKEYP), KEY
356 # aesenc KEY, STATE1 311 AESENC KEY STATE1
357 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 312 AESENC KEY STATE2
358 # aesenc KEY, STATE2 313 AESENC KEY STATE3
359 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 314 AESENC KEY STATE4
360 # aesenc KEY, STATE3
361 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
362 # aesenc KEY, STATE4
363 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
364 movaps -0x50(TKEYP), KEY 315 movaps -0x50(TKEYP), KEY
365 # aesenc KEY, STATE1 316 AESENC KEY STATE1
366 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 317 AESENC KEY STATE2
367 # aesenc KEY, STATE2 318 AESENC KEY STATE3
368 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 319 AESENC KEY STATE4
369 # aesenc KEY, STATE3
370 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
371 # aesenc KEY, STATE4
372 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
373#.align 4 320#.align 4
374.L4enc192: 321.L4enc192:
375 movaps -0x40(TKEYP), KEY 322 movaps -0x40(TKEYP), KEY
376 # aesenc KEY, STATE1 323 AESENC KEY STATE1
377 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 324 AESENC KEY STATE2
378 # aesenc KEY, STATE2 325 AESENC KEY STATE3
379 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 326 AESENC KEY STATE4
380 # aesenc KEY, STATE3
381 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
382 # aesenc KEY, STATE4
383 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
384 movaps -0x30(TKEYP), KEY 327 movaps -0x30(TKEYP), KEY
385 # aesenc KEY, STATE1 328 AESENC KEY STATE1
386 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 329 AESENC KEY STATE2
387 # aesenc KEY, STATE2 330 AESENC KEY STATE3
388 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 331 AESENC KEY STATE4
389 # aesenc KEY, STATE3
390 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
391 # aesenc KEY, STATE4
392 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
393#.align 4 332#.align 4
394.L4enc128: 333.L4enc128:
395 movaps -0x20(TKEYP), KEY 334 movaps -0x20(TKEYP), KEY
396 # aesenc KEY, STATE1 335 AESENC KEY STATE1
397 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 336 AESENC KEY STATE2
398 # aesenc KEY, STATE2 337 AESENC KEY STATE3
399 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 338 AESENC KEY STATE4
400 # aesenc KEY, STATE3
401 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
402 # aesenc KEY, STATE4
403 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
404 movaps -0x10(TKEYP), KEY 339 movaps -0x10(TKEYP), KEY
405 # aesenc KEY, STATE1 340 AESENC KEY STATE1
406 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 341 AESENC KEY STATE2
407 # aesenc KEY, STATE2 342 AESENC KEY STATE3
408 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 343 AESENC KEY STATE4
409 # aesenc KEY, STATE3
410 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
411 # aesenc KEY, STATE4
412 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
413 movaps (TKEYP), KEY 344 movaps (TKEYP), KEY
414 # aesenc KEY, STATE1 345 AESENC KEY STATE1
415 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 346 AESENC KEY STATE2
416 # aesenc KEY, STATE2 347 AESENC KEY STATE3
417 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 348 AESENC KEY STATE4
418 # aesenc KEY, STATE3
419 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
420 # aesenc KEY, STATE4
421 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
422 movaps 0x10(TKEYP), KEY 349 movaps 0x10(TKEYP), KEY
423 # aesenc KEY, STATE1 350 AESENC KEY STATE1
424 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 351 AESENC KEY STATE2
425 # aesenc KEY, STATE2 352 AESENC KEY STATE3
426 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 353 AESENC KEY STATE4
427 # aesenc KEY, STATE3
428 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
429 # aesenc KEY, STATE4
430 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
431 movaps 0x20(TKEYP), KEY 354 movaps 0x20(TKEYP), KEY
432 # aesenc KEY, STATE1 355 AESENC KEY STATE1
433 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 356 AESENC KEY STATE2
434 # aesenc KEY, STATE2 357 AESENC KEY STATE3
435 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 358 AESENC KEY STATE4
436 # aesenc KEY, STATE3
437 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
438 # aesenc KEY, STATE4
439 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
440 movaps 0x30(TKEYP), KEY 359 movaps 0x30(TKEYP), KEY
441 # aesenc KEY, STATE1 360 AESENC KEY STATE1
442 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 361 AESENC KEY STATE2
443 # aesenc KEY, STATE2 362 AESENC KEY STATE3
444 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 363 AESENC KEY STATE4
445 # aesenc KEY, STATE3
446 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
447 # aesenc KEY, STATE4
448 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
449 movaps 0x40(TKEYP), KEY 364 movaps 0x40(TKEYP), KEY
450 # aesenc KEY, STATE1 365 AESENC KEY STATE1
451 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 366 AESENC KEY STATE2
452 # aesenc KEY, STATE2 367 AESENC KEY STATE3
453 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 368 AESENC KEY STATE4
454 # aesenc KEY, STATE3
455 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
456 # aesenc KEY, STATE4
457 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
458 movaps 0x50(TKEYP), KEY 369 movaps 0x50(TKEYP), KEY
459 # aesenc KEY, STATE1 370 AESENC KEY STATE1
460 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 371 AESENC KEY STATE2
461 # aesenc KEY, STATE2 372 AESENC KEY STATE3
462 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 373 AESENC KEY STATE4
463 # aesenc KEY, STATE3
464 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
465 # aesenc KEY, STATE4
466 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
467 movaps 0x60(TKEYP), KEY 374 movaps 0x60(TKEYP), KEY
468 # aesenc KEY, STATE1 375 AESENC KEY STATE1
469 .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 376 AESENC KEY STATE2
470 # aesenc KEY, STATE2 377 AESENC KEY STATE3
471 .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 378 AESENC KEY STATE4
472 # aesenc KEY, STATE3
473 .byte 0x66, 0x0f, 0x38, 0xdc, 0xea
474 # aesenc KEY, STATE4
475 .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
476 movaps 0x70(TKEYP), KEY 379 movaps 0x70(TKEYP), KEY
477 # aesenclast KEY, STATE1 # last round 380 AESENCLAST KEY STATE1 # last round
478 .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 381 AESENCLAST KEY STATE2
479 # aesenclast KEY, STATE2 382 AESENCLAST KEY STATE3
480 .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 383 AESENCLAST KEY STATE4
481 # aesenclast KEY, STATE3
482 .byte 0x66, 0x0f, 0x38, 0xdd, 0xea
483 # aesenclast KEY, STATE4
484 .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
485 ret 384 ret
486 385
487/* 386/*
@@ -518,51 +417,37 @@ _aesni_dec1:
518 je .Ldec192 417 je .Ldec192
519 add $0x20, TKEYP 418 add $0x20, TKEYP
520 movaps -0x60(TKEYP), KEY 419 movaps -0x60(TKEYP), KEY
521 # aesdec KEY, STATE 420 AESDEC KEY STATE
522 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
523 movaps -0x50(TKEYP), KEY 421 movaps -0x50(TKEYP), KEY
524 # aesdec KEY, STATE 422 AESDEC KEY STATE
525 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
526.align 4 423.align 4
527.Ldec192: 424.Ldec192:
528 movaps -0x40(TKEYP), KEY 425 movaps -0x40(TKEYP), KEY
529 # aesdec KEY, STATE 426 AESDEC KEY STATE
530 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
531 movaps -0x30(TKEYP), KEY 427 movaps -0x30(TKEYP), KEY
532 # aesdec KEY, STATE 428 AESDEC KEY STATE
533 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
534.align 4 429.align 4
535.Ldec128: 430.Ldec128:
536 movaps -0x20(TKEYP), KEY 431 movaps -0x20(TKEYP), KEY
537 # aesdec KEY, STATE 432 AESDEC KEY STATE
538 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
539 movaps -0x10(TKEYP), KEY 433 movaps -0x10(TKEYP), KEY
540 # aesdec KEY, STATE 434 AESDEC KEY STATE
541 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
542 movaps (TKEYP), KEY 435 movaps (TKEYP), KEY
543 # aesdec KEY, STATE 436 AESDEC KEY STATE
544 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
545 movaps 0x10(TKEYP), KEY 437 movaps 0x10(TKEYP), KEY
546 # aesdec KEY, STATE 438 AESDEC KEY STATE
547 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
548 movaps 0x20(TKEYP), KEY 439 movaps 0x20(TKEYP), KEY
549 # aesdec KEY, STATE 440 AESDEC KEY STATE
550 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
551 movaps 0x30(TKEYP), KEY 441 movaps 0x30(TKEYP), KEY
552 # aesdec KEY, STATE 442 AESDEC KEY STATE
553 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
554 movaps 0x40(TKEYP), KEY 443 movaps 0x40(TKEYP), KEY
555 # aesdec KEY, STATE 444 AESDEC KEY STATE
556 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
557 movaps 0x50(TKEYP), KEY 445 movaps 0x50(TKEYP), KEY
558 # aesdec KEY, STATE 446 AESDEC KEY STATE
559 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
560 movaps 0x60(TKEYP), KEY 447 movaps 0x60(TKEYP), KEY
561 # aesdec KEY, STATE 448 AESDEC KEY STATE
562 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2
563 movaps 0x70(TKEYP), KEY 449 movaps 0x70(TKEYP), KEY
564 # aesdeclast KEY, STATE # last round 450 AESDECLAST KEY STATE
565 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
566 ret 451 ret
567 452
568/* 453/*
@@ -597,135 +482,79 @@ _aesni_dec4:
597 je .L4dec192 482 je .L4dec192
598 add $0x20, TKEYP 483 add $0x20, TKEYP
599 movaps -0x60(TKEYP), KEY 484 movaps -0x60(TKEYP), KEY
600 # aesdec KEY, STATE1 485 AESDEC KEY STATE1
601 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 486 AESDEC KEY STATE2
602 # aesdec KEY, STATE2 487 AESDEC KEY STATE3
603 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 488 AESDEC KEY STATE4
604 # aesdec KEY, STATE3
605 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
606 # aesdec KEY, STATE4
607 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
608 movaps -0x50(TKEYP), KEY 489 movaps -0x50(TKEYP), KEY
609 # aesdec KEY, STATE1 490 AESDEC KEY STATE1
610 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 491 AESDEC KEY STATE2
611 # aesdec KEY, STATE2 492 AESDEC KEY STATE3
612 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 493 AESDEC KEY STATE4
613 # aesdec KEY, STATE3
614 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
615 # aesdec KEY, STATE4
616 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
617.align 4 494.align 4
618.L4dec192: 495.L4dec192:
619 movaps -0x40(TKEYP), KEY 496 movaps -0x40(TKEYP), KEY
620 # aesdec KEY, STATE1 497 AESDEC KEY STATE1
621 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 498 AESDEC KEY STATE2
622 # aesdec KEY, STATE2 499 AESDEC KEY STATE3
623 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 500 AESDEC KEY STATE4
624 # aesdec KEY, STATE3
625 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
626 # aesdec KEY, STATE4
627 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
628 movaps -0x30(TKEYP), KEY 501 movaps -0x30(TKEYP), KEY
629 # aesdec KEY, STATE1 502 AESDEC KEY STATE1
630 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 503 AESDEC KEY STATE2
631 # aesdec KEY, STATE2 504 AESDEC KEY STATE3
632 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 505 AESDEC KEY STATE4
633 # aesdec KEY, STATE3
634 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
635 # aesdec KEY, STATE4
636 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
637.align 4 506.align 4
638.L4dec128: 507.L4dec128:
639 movaps -0x20(TKEYP), KEY 508 movaps -0x20(TKEYP), KEY
640 # aesdec KEY, STATE1 509 AESDEC KEY STATE1
641 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 510 AESDEC KEY STATE2
642 # aesdec KEY, STATE2 511 AESDEC KEY STATE3
643 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 512 AESDEC KEY STATE4
644 # aesdec KEY, STATE3
645 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
646 # aesdec KEY, STATE4
647 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
648 movaps -0x10(TKEYP), KEY 513 movaps -0x10(TKEYP), KEY
649 # aesdec KEY, STATE1 514 AESDEC KEY STATE1
650 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 515 AESDEC KEY STATE2
651 # aesdec KEY, STATE2 516 AESDEC KEY STATE3
652 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 517 AESDEC KEY STATE4
653 # aesdec KEY, STATE3
654 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
655 # aesdec KEY, STATE4
656 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
657 movaps (TKEYP), KEY 518 movaps (TKEYP), KEY
658 # aesdec KEY, STATE1 519 AESDEC KEY STATE1
659 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 520 AESDEC KEY STATE2
660 # aesdec KEY, STATE2 521 AESDEC KEY STATE3
661 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 522 AESDEC KEY STATE4
662 # aesdec KEY, STATE3
663 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
664 # aesdec KEY, STATE4
665 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
666 movaps 0x10(TKEYP), KEY 523 movaps 0x10(TKEYP), KEY
667 # aesdec KEY, STATE1 524 AESDEC KEY STATE1
668 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 525 AESDEC KEY STATE2
669 # aesdec KEY, STATE2 526 AESDEC KEY STATE3
670 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 527 AESDEC KEY STATE4
671 # aesdec KEY, STATE3
672 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
673 # aesdec KEY, STATE4
674 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
675 movaps 0x20(TKEYP), KEY 528 movaps 0x20(TKEYP), KEY
676 # aesdec KEY, STATE1 529 AESDEC KEY STATE1
677 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 530 AESDEC KEY STATE2
678 # aesdec KEY, STATE2 531 AESDEC KEY STATE3
679 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 532 AESDEC KEY STATE4
680 # aesdec KEY, STATE3
681 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
682 # aesdec KEY, STATE4
683 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
684 movaps 0x30(TKEYP), KEY 533 movaps 0x30(TKEYP), KEY
685 # aesdec KEY, STATE1 534 AESDEC KEY STATE1
686 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 535 AESDEC KEY STATE2
687 # aesdec KEY, STATE2 536 AESDEC KEY STATE3
688 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 537 AESDEC KEY STATE4
689 # aesdec KEY, STATE3
690 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
691 # aesdec KEY, STATE4
692 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
693 movaps 0x40(TKEYP), KEY 538 movaps 0x40(TKEYP), KEY
694 # aesdec KEY, STATE1 539 AESDEC KEY STATE1
695 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 540 AESDEC KEY STATE2
696 # aesdec KEY, STATE2 541 AESDEC KEY STATE3
697 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 542 AESDEC KEY STATE4
698 # aesdec KEY, STATE3
699 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
700 # aesdec KEY, STATE4
701 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
702 movaps 0x50(TKEYP), KEY 543 movaps 0x50(TKEYP), KEY
703 # aesdec KEY, STATE1 544 AESDEC KEY STATE1
704 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 545 AESDEC KEY STATE2
705 # aesdec KEY, STATE2 546 AESDEC KEY STATE3
706 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 547 AESDEC KEY STATE4
707 # aesdec KEY, STATE3
708 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
709 # aesdec KEY, STATE4
710 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
711 movaps 0x60(TKEYP), KEY 548 movaps 0x60(TKEYP), KEY
712 # aesdec KEY, STATE1 549 AESDEC KEY STATE1
713 .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 550 AESDEC KEY STATE2
714 # aesdec KEY, STATE2 551 AESDEC KEY STATE3
715 .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 552 AESDEC KEY STATE4
716 # aesdec KEY, STATE3
717 .byte 0x66, 0x0f, 0x38, 0xde, 0xea
718 # aesdec KEY, STATE4
719 .byte 0x66, 0x0f, 0x38, 0xde, 0xf2
720 movaps 0x70(TKEYP), KEY 553 movaps 0x70(TKEYP), KEY
721 # aesdeclast KEY, STATE1 # last round 554 AESDECLAST KEY STATE1 # last round
722 .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 555 AESDECLAST KEY STATE2
723 # aesdeclast KEY, STATE2 556 AESDECLAST KEY STATE3
724 .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 557 AESDECLAST KEY STATE4
725 # aesdeclast KEY, STATE3
726 .byte 0x66, 0x0f, 0x38, 0xdf, 0xea
727 # aesdeclast KEY, STATE4
728 .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
729 ret 558 ret
730 559
731/* 560/*
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
new file mode 100644
index 000000000000..1eb7f90cb7b9
--- /dev/null
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -0,0 +1,157 @@
1/*
2 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
3 * instructions. This file contains accelerated part of ghash
4 * implementation. More information about PCLMULQDQ can be found at:
5 *
6 * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
7 *
8 * Copyright (c) 2009 Intel Corp.
9 * Author: Huang Ying <ying.huang@intel.com>
10 * Vinodh Gopal
11 * Erdinc Ozturk
12 * Deniz Karakoyunlu
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License version 2 as published
16 * by the Free Software Foundation.
17 */
18
19#include <linux/linkage.h>
20#include <asm/inst.h>
21
22.data
23
24.align 16
25.Lbswap_mask:
26 .octa 0x000102030405060708090a0b0c0d0e0f
27.Lpoly:
28 .octa 0xc2000000000000000000000000000001
29.Ltwo_one:
30 .octa 0x00000001000000000000000000000001
31
32#define DATA %xmm0
33#define SHASH %xmm1
34#define T1 %xmm2
35#define T2 %xmm3
36#define T3 %xmm4
37#define BSWAP %xmm5
38#define IN1 %xmm6
39
40.text
41
42/*
43 * __clmul_gf128mul_ble: internal ABI
44 * input:
45 * DATA: operand1
46 * SHASH: operand2, hash_key << 1 mod poly
47 * output:
48 * DATA: operand1 * operand2 mod poly
49 * changed:
50 * T1
51 * T2
52 * T3
53 */
54__clmul_gf128mul_ble:
55 movaps DATA, T1
56 pshufd $0b01001110, DATA, T2
57 pshufd $0b01001110, SHASH, T3
58 pxor DATA, T2
59 pxor SHASH, T3
60
61 PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
62 PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
63 PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
64 pxor DATA, T2
65 pxor T1, T2 # T2 = a0 * b1 + a1 * b0
66
67 movaps T2, T3
68 pslldq $8, T3
69 psrldq $8, T2
70 pxor T3, DATA
71 pxor T2, T1 # <T1:DATA> is result of
72 # carry-less multiplication
73
74 # first phase of the reduction
75 movaps DATA, T3
76 psllq $1, T3
77 pxor DATA, T3
78 psllq $5, T3
79 pxor DATA, T3
80 psllq $57, T3
81 movaps T3, T2
82 pslldq $8, T2
83 psrldq $8, T3
84 pxor T2, DATA
85 pxor T3, T1
86
87 # second phase of the reduction
88 movaps DATA, T2
89 psrlq $5, T2
90 pxor DATA, T2
91 psrlq $1, T2
92 pxor DATA, T2
93 psrlq $1, T2
94 pxor T2, T1
95 pxor T1, DATA
96 ret
97
98/* void clmul_ghash_mul(char *dst, const be128 *shash) */
99ENTRY(clmul_ghash_mul)
100 movups (%rdi), DATA
101 movups (%rsi), SHASH
102 movaps .Lbswap_mask, BSWAP
103 PSHUFB_XMM BSWAP DATA
104 call __clmul_gf128mul_ble
105 PSHUFB_XMM BSWAP DATA
106 movups DATA, (%rdi)
107 ret
108
109/*
110 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
111 * const be128 *shash);
112 */
113ENTRY(clmul_ghash_update)
114 cmp $16, %rdx
115 jb .Lupdate_just_ret # check length
116 movaps .Lbswap_mask, BSWAP
117 movups (%rdi), DATA
118 movups (%rcx), SHASH
119 PSHUFB_XMM BSWAP DATA
120.align 4
121.Lupdate_loop:
122 movups (%rsi), IN1
123 PSHUFB_XMM BSWAP IN1
124 pxor IN1, DATA
125 call __clmul_gf128mul_ble
126 sub $16, %rdx
127 add $16, %rsi
128 cmp $16, %rdx
129 jge .Lupdate_loop
130 PSHUFB_XMM BSWAP DATA
131 movups DATA, (%rdi)
132.Lupdate_just_ret:
133 ret
134
135/*
136 * void clmul_ghash_setkey(be128 *shash, const u8 *key);
137 *
138 * Calculate hash_key << 1 mod poly
139 */
140ENTRY(clmul_ghash_setkey)
141 movaps .Lbswap_mask, BSWAP
142 movups (%rsi), %xmm0
143 PSHUFB_XMM BSWAP %xmm0
144 movaps %xmm0, %xmm1
145 psllq $1, %xmm0
146 psrlq $63, %xmm1
147 movaps %xmm1, %xmm2
148 pslldq $8, %xmm1
149 psrldq $8, %xmm2
150 por %xmm1, %xmm0
151 # reduction
152 pshufd $0b00100100, %xmm2, %xmm1
153 pcmpeqd .Ltwo_one, %xmm1
154 pand .Lpoly, %xmm1
155 pxor %xmm1, %xmm0
156 movups %xmm0, (%rdi)
157 ret
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
new file mode 100644
index 000000000000..cbcc8d8ea93a
--- /dev/null
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -0,0 +1,333 @@
1/*
2 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
3 * instructions. This file contains glue code.
4 *
5 * Copyright (c) 2009 Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License version 2 as published
10 * by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/crypto.h>
17#include <crypto/algapi.h>
18#include <crypto/cryptd.h>
19#include <crypto/gf128mul.h>
20#include <crypto/internal/hash.h>
21#include <asm/i387.h>
22
23#define GHASH_BLOCK_SIZE 16
24#define GHASH_DIGEST_SIZE 16
25
26void clmul_ghash_mul(char *dst, const be128 *shash);
27
28void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
29 const be128 *shash);
30
31void clmul_ghash_setkey(be128 *shash, const u8 *key);
32
33struct ghash_async_ctx {
34 struct cryptd_ahash *cryptd_tfm;
35};
36
37struct ghash_ctx {
38 be128 shash;
39};
40
41struct ghash_desc_ctx {
42 u8 buffer[GHASH_BLOCK_SIZE];
43 u32 bytes;
44};
45
46static int ghash_init(struct shash_desc *desc)
47{
48 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
49
50 memset(dctx, 0, sizeof(*dctx));
51
52 return 0;
53}
54
55static int ghash_setkey(struct crypto_shash *tfm,
56 const u8 *key, unsigned int keylen)
57{
58 struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
59
60 if (keylen != GHASH_BLOCK_SIZE) {
61 crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
62 return -EINVAL;
63 }
64
65 clmul_ghash_setkey(&ctx->shash, key);
66
67 return 0;
68}
69
70static int ghash_update(struct shash_desc *desc,
71 const u8 *src, unsigned int srclen)
72{
73 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
74 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
75 u8 *dst = dctx->buffer;
76
77 kernel_fpu_begin();
78 if (dctx->bytes) {
79 int n = min(srclen, dctx->bytes);
80 u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
81
82 dctx->bytes -= n;
83 srclen -= n;
84
85 while (n--)
86 *pos++ ^= *src++;
87
88 if (!dctx->bytes)
89 clmul_ghash_mul(dst, &ctx->shash);
90 }
91
92 clmul_ghash_update(dst, src, srclen, &ctx->shash);
93 kernel_fpu_end();
94
95 if (srclen & 0xf) {
96 src += srclen - (srclen & 0xf);
97 srclen &= 0xf;
98 dctx->bytes = GHASH_BLOCK_SIZE - srclen;
99 while (srclen--)
100 *dst++ ^= *src++;
101 }
102
103 return 0;
104}
105
106static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
107{
108 u8 *dst = dctx->buffer;
109
110 if (dctx->bytes) {
111 u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
112
113 while (dctx->bytes--)
114 *tmp++ ^= 0;
115
116 kernel_fpu_begin();
117 clmul_ghash_mul(dst, &ctx->shash);
118 kernel_fpu_end();
119 }
120
121 dctx->bytes = 0;
122}
123
124static int ghash_final(struct shash_desc *desc, u8 *dst)
125{
126 struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
127 struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
128 u8 *buf = dctx->buffer;
129
130 ghash_flush(ctx, dctx);
131 memcpy(dst, buf, GHASH_BLOCK_SIZE);
132
133 return 0;
134}
135
136static struct shash_alg ghash_alg = {
137 .digestsize = GHASH_DIGEST_SIZE,
138 .init = ghash_init,
139 .update = ghash_update,
140 .final = ghash_final,
141 .setkey = ghash_setkey,
142 .descsize = sizeof(struct ghash_desc_ctx),
143 .base = {
144 .cra_name = "__ghash",
145 .cra_driver_name = "__ghash-pclmulqdqni",
146 .cra_priority = 0,
147 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
148 .cra_blocksize = GHASH_BLOCK_SIZE,
149 .cra_ctxsize = sizeof(struct ghash_ctx),
150 .cra_module = THIS_MODULE,
151 .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list),
152 },
153};
154
155static int ghash_async_init(struct ahash_request *req)
156{
157 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
158 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
159 struct ahash_request *cryptd_req = ahash_request_ctx(req);
160 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
161
162 if (!irq_fpu_usable()) {
163 memcpy(cryptd_req, req, sizeof(*req));
164 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
165 return crypto_ahash_init(cryptd_req);
166 } else {
167 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
168 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
169
170 desc->tfm = child;
171 desc->flags = req->base.flags;
172 return crypto_shash_init(desc);
173 }
174}
175
176static int ghash_async_update(struct ahash_request *req)
177{
178 struct ahash_request *cryptd_req = ahash_request_ctx(req);
179
180 if (!irq_fpu_usable()) {
181 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
182 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
183 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
184
185 memcpy(cryptd_req, req, sizeof(*req));
186 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
187 return crypto_ahash_update(cryptd_req);
188 } else {
189 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
190 return shash_ahash_update(req, desc);
191 }
192}
193
194static int ghash_async_final(struct ahash_request *req)
195{
196 struct ahash_request *cryptd_req = ahash_request_ctx(req);
197
198 if (!irq_fpu_usable()) {
199 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
200 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
201 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
202
203 memcpy(cryptd_req, req, sizeof(*req));
204 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
205 return crypto_ahash_final(cryptd_req);
206 } else {
207 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
208 return crypto_shash_final(desc, req->result);
209 }
210}
211
212static int ghash_async_digest(struct ahash_request *req)
213{
214 struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
215 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
216 struct ahash_request *cryptd_req = ahash_request_ctx(req);
217 struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
218
219 if (!irq_fpu_usable()) {
220 memcpy(cryptd_req, req, sizeof(*req));
221 ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
222 return crypto_ahash_digest(cryptd_req);
223 } else {
224 struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
225 struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
226
227 desc->tfm = child;
228 desc->flags = req->base.flags;
229 return shash_ahash_digest(req, desc);
230 }
231}
232
233static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
234 unsigned int keylen)
235{
236 struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
237 struct crypto_ahash *child = &ctx->cryptd_tfm->base;
238 int err;
239
240 crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
241 crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
242 & CRYPTO_TFM_REQ_MASK);
243 err = crypto_ahash_setkey(child, key, keylen);
244 crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
245 & CRYPTO_TFM_RES_MASK);
246
247 return 0;
248}
249
250static int ghash_async_init_tfm(struct crypto_tfm *tfm)
251{
252 struct cryptd_ahash *cryptd_tfm;
253 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
254
255 cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
256 if (IS_ERR(cryptd_tfm))
257 return PTR_ERR(cryptd_tfm);
258 ctx->cryptd_tfm = cryptd_tfm;
259 crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
260 sizeof(struct ahash_request) +
261 crypto_ahash_reqsize(&cryptd_tfm->base));
262
263 return 0;
264}
265
266static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
267{
268 struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
269
270 cryptd_free_ahash(ctx->cryptd_tfm);
271}
272
273static struct ahash_alg ghash_async_alg = {
274 .init = ghash_async_init,
275 .update = ghash_async_update,
276 .final = ghash_async_final,
277 .setkey = ghash_async_setkey,
278 .digest = ghash_async_digest,
279 .halg = {
280 .digestsize = GHASH_DIGEST_SIZE,
281 .base = {
282 .cra_name = "ghash",
283 .cra_driver_name = "ghash-clmulni",
284 .cra_priority = 400,
285 .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
286 .cra_blocksize = GHASH_BLOCK_SIZE,
287 .cra_type = &crypto_ahash_type,
288 .cra_module = THIS_MODULE,
289 .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
290 .cra_init = ghash_async_init_tfm,
291 .cra_exit = ghash_async_exit_tfm,
292 },
293 },
294};
295
296static int __init ghash_pclmulqdqni_mod_init(void)
297{
298 int err;
299
300 if (!cpu_has_pclmulqdq) {
301 printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
302 " detected.\n");
303 return -ENODEV;
304 }
305
306 err = crypto_register_shash(&ghash_alg);
307 if (err)
308 goto err_out;
309 err = crypto_register_ahash(&ghash_async_alg);
310 if (err)
311 goto err_shash;
312
313 return 0;
314
315err_shash:
316 crypto_unregister_shash(&ghash_alg);
317err_out:
318 return err;
319}
320
321static void __exit ghash_pclmulqdqni_mod_exit(void)
322{
323 crypto_unregister_ahash(&ghash_async_alg);
324 crypto_unregister_shash(&ghash_alg);
325}
326
327module_init(ghash_pclmulqdqni_mod_init);
328module_exit(ghash_pclmulqdqni_mod_exit);
329
330MODULE_LICENSE("GPL");
331MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
332 "acclerated by PCLMULQDQ-NI");
333MODULE_ALIAS("ghash");
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9cfc88b97742..613700f27a4a 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -248,6 +248,7 @@ extern const char * const x86_power_flags[32];
248#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) 248#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
249#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) 249#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
250#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) 250#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
251#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
251 252
252#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 253#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
253# define cpu_has_invlpg 1 254# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 0b20bbb758f2..ebfb8a9e11f7 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -10,6 +10,8 @@
10#ifndef _ASM_X86_I387_H 10#ifndef _ASM_X86_I387_H
11#define _ASM_X86_I387_H 11#define _ASM_X86_I387_H
12 12
13#ifndef __ASSEMBLY__
14
13#include <linux/sched.h> 15#include <linux/sched.h>
14#include <linux/kernel_stat.h> 16#include <linux/kernel_stat.h>
15#include <linux/regset.h> 17#include <linux/regset.h>
@@ -411,4 +413,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
411 } 413 }
412} 414}
413 415
416#endif /* __ASSEMBLY__ */
417
418#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
419#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
420
414#endif /* _ASM_X86_I387_H */ 421#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
new file mode 100644
index 000000000000..14cf526091f9
--- /dev/null
+++ b/arch/x86/include/asm/inst.h
@@ -0,0 +1,150 @@
1/*
2 * Generate .byte code for some instructions not supported by old
3 * binutils.
4 */
5#ifndef X86_ASM_INST_H
6#define X86_ASM_INST_H
7
8#ifdef __ASSEMBLY__
9
10 .macro XMM_NUM opd xmm
11 .ifc \xmm,%xmm0
12 \opd = 0
13 .endif
14 .ifc \xmm,%xmm1
15 \opd = 1
16 .endif
17 .ifc \xmm,%xmm2
18 \opd = 2
19 .endif
20 .ifc \xmm,%xmm3
21 \opd = 3
22 .endif
23 .ifc \xmm,%xmm4
24 \opd = 4
25 .endif
26 .ifc \xmm,%xmm5
27 \opd = 5
28 .endif
29 .ifc \xmm,%xmm6
30 \opd = 6
31 .endif
32 .ifc \xmm,%xmm7
33 \opd = 7
34 .endif
35 .ifc \xmm,%xmm8
36 \opd = 8
37 .endif
38 .ifc \xmm,%xmm9
39 \opd = 9
40 .endif
41 .ifc \xmm,%xmm10
42 \opd = 10
43 .endif
44 .ifc \xmm,%xmm11
45 \opd = 11
46 .endif
47 .ifc \xmm,%xmm12
48 \opd = 12
49 .endif
50 .ifc \xmm,%xmm13
51 \opd = 13
52 .endif
53 .ifc \xmm,%xmm14
54 \opd = 14
55 .endif
56 .ifc \xmm,%xmm15
57 \opd = 15
58 .endif
59 .endm
60
61 .macro PFX_OPD_SIZE
62 .byte 0x66
63 .endm
64
65 .macro PFX_REX opd1 opd2
66 .if (\opd1 | \opd2) & 8
67 .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
68 .endif
69 .endm
70
71 .macro MODRM mod opd1 opd2
72 .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
73 .endm
74
75 .macro PSHUFB_XMM xmm1 xmm2
76 XMM_NUM pshufb_opd1 \xmm1
77 XMM_NUM pshufb_opd2 \xmm2
78 PFX_OPD_SIZE
79 PFX_REX pshufb_opd1 pshufb_opd2
80 .byte 0x0f, 0x38, 0x00
81 MODRM 0xc0 pshufb_opd1 pshufb_opd2
82 .endm
83
84 .macro PCLMULQDQ imm8 xmm1 xmm2
85 XMM_NUM clmul_opd1 \xmm1
86 XMM_NUM clmul_opd2 \xmm2
87 PFX_OPD_SIZE
88 PFX_REX clmul_opd1 clmul_opd2
89 .byte 0x0f, 0x3a, 0x44
90 MODRM 0xc0 clmul_opd1 clmul_opd2
91 .byte \imm8
92 .endm
93
94 .macro AESKEYGENASSIST rcon xmm1 xmm2
95 XMM_NUM aeskeygen_opd1 \xmm1
96 XMM_NUM aeskeygen_opd2 \xmm2
97 PFX_OPD_SIZE
98 PFX_REX aeskeygen_opd1 aeskeygen_opd2
99 .byte 0x0f, 0x3a, 0xdf
100 MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
101 .byte \rcon
102 .endm
103
104 .macro AESIMC xmm1 xmm2
105 XMM_NUM aesimc_opd1 \xmm1
106 XMM_NUM aesimc_opd2 \xmm2
107 PFX_OPD_SIZE
108 PFX_REX aesimc_opd1 aesimc_opd2
109 .byte 0x0f, 0x38, 0xdb
110 MODRM 0xc0 aesimc_opd1 aesimc_opd2
111 .endm
112
113 .macro AESENC xmm1 xmm2
114 XMM_NUM aesenc_opd1 \xmm1
115 XMM_NUM aesenc_opd2 \xmm2
116 PFX_OPD_SIZE
117 PFX_REX aesenc_opd1 aesenc_opd2
118 .byte 0x0f, 0x38, 0xdc
119 MODRM 0xc0 aesenc_opd1 aesenc_opd2
120 .endm
121
122 .macro AESENCLAST xmm1 xmm2
123 XMM_NUM aesenclast_opd1 \xmm1
124 XMM_NUM aesenclast_opd2 \xmm2
125 PFX_OPD_SIZE
126 PFX_REX aesenclast_opd1 aesenclast_opd2
127 .byte 0x0f, 0x38, 0xdd
128 MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
129 .endm
130
131 .macro AESDEC xmm1 xmm2
132 XMM_NUM aesdec_opd1 \xmm1
133 XMM_NUM aesdec_opd2 \xmm2
134 PFX_OPD_SIZE
135 PFX_REX aesdec_opd1 aesdec_opd2
136 .byte 0x0f, 0x38, 0xde
137 MODRM 0xc0 aesdec_opd1 aesdec_opd2
138 .endm
139
140 .macro AESDECLAST xmm1 xmm2
141 XMM_NUM aesdeclast_opd1 \xmm1
142 XMM_NUM aesdeclast_opd2 \xmm2
143 PFX_OPD_SIZE
144 PFX_REX aesdeclast_opd1 aesdeclast_opd2
145 .byte 0x0f, 0x38, 0xdf
146 MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
147 .endm
148#endif
149
150#endif