aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 12:52:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 12:52:51 -0400
commit3e7a716a92a0e051f5502c7b689f8c9127c37c33 (patch)
tree2ebb892eb3a024f108e68a9577c767a53b955a4a /arch/x86/crypto
parentc2df436bd2504f52808c10ab7d7da832f61ad3f0 (diff)
parentce5481d01f67ad304908ec2113515440c0fa86eb (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: - CTR(AES) optimisation on x86_64 using "by8" AVX. - arm64 support to ccp - Intel QAT crypto driver - Qualcomm crypto engine driver - x86-64 assembly optimisation for 3DES - CTR(3DES) speed test - move FIPS panic from module.c so that it only triggers on crypto modules - SP800-90A Deterministic Random Bit Generator (drbg). - more test vectors for ghash. - tweak self tests to catch partial block bugs. - misc fixes. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (94 commits) crypto: drbg - fix failure of generating multiple of 2**16 bytes crypto: ccp - Do not sign extend input data to CCP crypto: testmgr - add missing spaces to drbg error strings crypto: atmel-tdes - Switch to managed version of kzalloc crypto: atmel-sha - Switch to managed version of kzalloc crypto: testmgr - use chunks smaller than algo block size in chunk tests crypto: qat - Fixed SKU1 dev issue crypto: qat - Use hweight for bit counting crypto: qat - Updated print outputs crypto: qat - change ae_num to ae_id crypto: qat - change slice->regions to slice->region crypto: qat - use min_t macro crypto: qat - remove unnecessary parentheses crypto: qat - remove unneeded header crypto: qat - checkpatch blank lines crypto: qat - remove unnecessary return codes crypto: Resolve shadow warnings crypto: ccp - Remove "select OF" from Kconfig crypto: caam - fix DECO RSR polling crypto: qce - Let 'DEV_QCE' depend on both HAS_DMA and HAS_IOMEM ...
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S546
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c40
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S281
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S805
-rw-r--r--arch/x86/crypto/des3_ede_glue.c509
6 files changed, 2040 insertions, 145 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 61d6e281898b..d551165a3159 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
14obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o 14obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
15 15
16obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 16obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
17obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
17obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o 18obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
18obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 19obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
19obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 20obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
@@ -52,6 +53,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
52serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o 53serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
53 54
54aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 55aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
56des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
55camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o 57camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
56blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 58blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
57twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 59twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
@@ -76,7 +78,7 @@ ifeq ($(avx2_supported),yes)
76endif 78endif
77 79
78aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 80aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
79aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o 81aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
80ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o 82ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
81sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o 83sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
82ifeq ($(avx2_supported),yes) 84ifeq ($(avx2_supported),yes)
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
new file mode 100644
index 000000000000..f091f122ed24
--- /dev/null
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -0,0 +1,546 @@
1/*
2 * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64)
3 *
4 * This is AES128/192/256 CTR mode optimization implementation. It requires
5 * the support of Intel(R) AESNI and AVX instructions.
6 *
7 * This work was inspired by the AES CTR mode optimization published
8 * in Intel Optimized IPSEC Cryptograhpic library.
9 * Additional information on it can be found at:
10 * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972
11 *
12 * This file is provided under a dual BSD/GPLv2 license. When using or
13 * redistributing this file, you may do so under either license.
14 *
15 * GPL LICENSE SUMMARY
16 *
17 * Copyright(c) 2014 Intel Corporation.
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of version 2 of the GNU General Public License as
21 * published by the Free Software Foundation.
22 *
23 * This program is distributed in the hope that it will be useful, but
24 * WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 * General Public License for more details.
27 *
28 * Contact Information:
29 * James Guilford <james.guilford@intel.com>
30 * Sean Gulley <sean.m.gulley@intel.com>
31 * Chandramouli Narayanan <mouli@linux.intel.com>
32 *
33 * BSD LICENSE
34 *
35 * Copyright(c) 2014 Intel Corporation.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 *
41 * Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in
45 * the documentation and/or other materials provided with the
46 * distribution.
47 * Neither the name of Intel Corporation nor the names of its
48 * contributors may be used to endorse or promote products derived
49 * from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62 *
63 */
64
65#include <linux/linkage.h>
66#include <asm/inst.h>
67
68#define CONCAT(a,b) a##b
69#define VMOVDQ vmovdqu
70
71#define xdata0 %xmm0
72#define xdata1 %xmm1
73#define xdata2 %xmm2
74#define xdata3 %xmm3
75#define xdata4 %xmm4
76#define xdata5 %xmm5
77#define xdata6 %xmm6
78#define xdata7 %xmm7
79#define xcounter %xmm8
80#define xbyteswap %xmm9
81#define xkey0 %xmm10
82#define xkey3 %xmm11
83#define xkey6 %xmm12
84#define xkey9 %xmm13
85#define xkey4 %xmm11
86#define xkey8 %xmm12
87#define xkey12 %xmm13
88#define xkeyA %xmm14
89#define xkeyB %xmm15
90
91#define p_in %rdi
92#define p_iv %rsi
93#define p_keys %rdx
94#define p_out %rcx
95#define num_bytes %r8
96
97#define tmp %r10
98#define DDQ(i) CONCAT(ddq_add_,i)
99#define XMM(i) CONCAT(%xmm, i)
100#define DDQ_DATA 0
101#define XDATA 1
102#define KEY_128 1
103#define KEY_192 2
104#define KEY_256 3
105
106.section .rodata
107.align 16
108
109byteswap_const:
110 .octa 0x000102030405060708090A0B0C0D0E0F
111ddq_add_1:
112 .octa 0x00000000000000000000000000000001
113ddq_add_2:
114 .octa 0x00000000000000000000000000000002
115ddq_add_3:
116 .octa 0x00000000000000000000000000000003
117ddq_add_4:
118 .octa 0x00000000000000000000000000000004
119ddq_add_5:
120 .octa 0x00000000000000000000000000000005
121ddq_add_6:
122 .octa 0x00000000000000000000000000000006
123ddq_add_7:
124 .octa 0x00000000000000000000000000000007
125ddq_add_8:
126 .octa 0x00000000000000000000000000000008
127
128.text
129
130/* generate a unique variable for ddq_add_x */
131
132.macro setddq n
133 var_ddq_add = DDQ(\n)
134.endm
135
136/* generate a unique variable for xmm register */
137.macro setxdata n
138 var_xdata = XMM(\n)
139.endm
140
141/* club the numeric 'id' to the symbol 'name' */
142
143.macro club name, id
144.altmacro
145 .if \name == DDQ_DATA
146 setddq %\id
147 .elseif \name == XDATA
148 setxdata %\id
149 .endif
150.noaltmacro
151.endm
152
153/*
154 * do_aes num_in_par load_keys key_len
155 * This increments p_in, but not p_out
156 */
157.macro do_aes b, k, key_len
158 .set by, \b
159 .set load_keys, \k
160 .set klen, \key_len
161
162 .if (load_keys)
163 vmovdqa 0*16(p_keys), xkey0
164 .endif
165
166 vpshufb xbyteswap, xcounter, xdata0
167
168 .set i, 1
169 .rept (by - 1)
170 club DDQ_DATA, i
171 club XDATA, i
172 vpaddd var_ddq_add(%rip), xcounter, var_xdata
173 vpshufb xbyteswap, var_xdata, var_xdata
174 .set i, (i +1)
175 .endr
176
177 vmovdqa 1*16(p_keys), xkeyA
178
179 vpxor xkey0, xdata0, xdata0
180 club DDQ_DATA, by
181 vpaddd var_ddq_add(%rip), xcounter, xcounter
182
183 .set i, 1
184 .rept (by - 1)
185 club XDATA, i
186 vpxor xkey0, var_xdata, var_xdata
187 .set i, (i +1)
188 .endr
189
190 vmovdqa 2*16(p_keys), xkeyB
191
192 .set i, 0
193 .rept by
194 club XDATA, i
195 vaesenc xkeyA, var_xdata, var_xdata /* key 1 */
196 .set i, (i +1)
197 .endr
198
199 .if (klen == KEY_128)
200 .if (load_keys)
201 vmovdqa 3*16(p_keys), xkeyA
202 .endif
203 .else
204 vmovdqa 3*16(p_keys), xkeyA
205 .endif
206
207 .set i, 0
208 .rept by
209 club XDATA, i
210 vaesenc xkeyB, var_xdata, var_xdata /* key 2 */
211 .set i, (i +1)
212 .endr
213
214 add $(16*by), p_in
215
216 .if (klen == KEY_128)
217 vmovdqa 4*16(p_keys), xkey4
218 .else
219 .if (load_keys)
220 vmovdqa 4*16(p_keys), xkey4
221 .endif
222 .endif
223
224 .set i, 0
225 .rept by
226 club XDATA, i
227 vaesenc xkeyA, var_xdata, var_xdata /* key 3 */
228 .set i, (i +1)
229 .endr
230
231 vmovdqa 5*16(p_keys), xkeyA
232
233 .set i, 0
234 .rept by
235 club XDATA, i
236 vaesenc xkey4, var_xdata, var_xdata /* key 4 */
237 .set i, (i +1)
238 .endr
239
240 .if (klen == KEY_128)
241 .if (load_keys)
242 vmovdqa 6*16(p_keys), xkeyB
243 .endif
244 .else
245 vmovdqa 6*16(p_keys), xkeyB
246 .endif
247
248 .set i, 0
249 .rept by
250 club XDATA, i
251 vaesenc xkeyA, var_xdata, var_xdata /* key 5 */
252 .set i, (i +1)
253 .endr
254
255 vmovdqa 7*16(p_keys), xkeyA
256
257 .set i, 0
258 .rept by
259 club XDATA, i
260 vaesenc xkeyB, var_xdata, var_xdata /* key 6 */
261 .set i, (i +1)
262 .endr
263
264 .if (klen == KEY_128)
265 vmovdqa 8*16(p_keys), xkey8
266 .else
267 .if (load_keys)
268 vmovdqa 8*16(p_keys), xkey8
269 .endif
270 .endif
271
272 .set i, 0
273 .rept by
274 club XDATA, i
275 vaesenc xkeyA, var_xdata, var_xdata /* key 7 */
276 .set i, (i +1)
277 .endr
278
279 .if (klen == KEY_128)
280 .if (load_keys)
281 vmovdqa 9*16(p_keys), xkeyA
282 .endif
283 .else
284 vmovdqa 9*16(p_keys), xkeyA
285 .endif
286
287 .set i, 0
288 .rept by
289 club XDATA, i
290 vaesenc xkey8, var_xdata, var_xdata /* key 8 */
291 .set i, (i +1)
292 .endr
293
294 vmovdqa 10*16(p_keys), xkeyB
295
296 .set i, 0
297 .rept by
298 club XDATA, i
299 vaesenc xkeyA, var_xdata, var_xdata /* key 9 */
300 .set i, (i +1)
301 .endr
302
303 .if (klen != KEY_128)
304 vmovdqa 11*16(p_keys), xkeyA
305 .endif
306
307 .set i, 0
308 .rept by
309 club XDATA, i
310 /* key 10 */
311 .if (klen == KEY_128)
312 vaesenclast xkeyB, var_xdata, var_xdata
313 .else
314 vaesenc xkeyB, var_xdata, var_xdata
315 .endif
316 .set i, (i +1)
317 .endr
318
319 .if (klen != KEY_128)
320 .if (load_keys)
321 vmovdqa 12*16(p_keys), xkey12
322 .endif
323
324 .set i, 0
325 .rept by
326 club XDATA, i
327 vaesenc xkeyA, var_xdata, var_xdata /* key 11 */
328 .set i, (i +1)
329 .endr
330
331 .if (klen == KEY_256)
332 vmovdqa 13*16(p_keys), xkeyA
333 .endif
334
335 .set i, 0
336 .rept by
337 club XDATA, i
338 .if (klen == KEY_256)
339 /* key 12 */
340 vaesenc xkey12, var_xdata, var_xdata
341 .else
342 vaesenclast xkey12, var_xdata, var_xdata
343 .endif
344 .set i, (i +1)
345 .endr
346
347 .if (klen == KEY_256)
348 vmovdqa 14*16(p_keys), xkeyB
349
350 .set i, 0
351 .rept by
352 club XDATA, i
353 /* key 13 */
354 vaesenc xkeyA, var_xdata, var_xdata
355 .set i, (i +1)
356 .endr
357
358 .set i, 0
359 .rept by
360 club XDATA, i
361 /* key 14 */
362 vaesenclast xkeyB, var_xdata, var_xdata
363 .set i, (i +1)
364 .endr
365 .endif
366 .endif
367
368 .set i, 0
369 .rept (by / 2)
370 .set j, (i+1)
371 VMOVDQ (i*16 - 16*by)(p_in), xkeyA
372 VMOVDQ (j*16 - 16*by)(p_in), xkeyB
373 club XDATA, i
374 vpxor xkeyA, var_xdata, var_xdata
375 club XDATA, j
376 vpxor xkeyB, var_xdata, var_xdata
377 .set i, (i+2)
378 .endr
379
380 .if (i < by)
381 VMOVDQ (i*16 - 16*by)(p_in), xkeyA
382 club XDATA, i
383 vpxor xkeyA, var_xdata, var_xdata
384 .endif
385
386 .set i, 0
387 .rept by
388 club XDATA, i
389 VMOVDQ var_xdata, i*16(p_out)
390 .set i, (i+1)
391 .endr
392.endm
393
394.macro do_aes_load val, key_len
395 do_aes \val, 1, \key_len
396.endm
397
398.macro do_aes_noload val, key_len
399 do_aes \val, 0, \key_len
400.endm
401
402/* main body of aes ctr load */
403
404.macro do_aes_ctrmain key_len
405
406 cmp $16, num_bytes
407 jb .Ldo_return2\key_len
408
409 vmovdqa byteswap_const(%rip), xbyteswap
410 vmovdqu (p_iv), xcounter
411 vpshufb xbyteswap, xcounter, xcounter
412
413 mov num_bytes, tmp
414 and $(7*16), tmp
415 jz .Lmult_of_8_blks\key_len
416
417 /* 1 <= tmp <= 7 */
418 cmp $(4*16), tmp
419 jg .Lgt4\key_len
420 je .Leq4\key_len
421
422.Llt4\key_len:
423 cmp $(2*16), tmp
424 jg .Leq3\key_len
425 je .Leq2\key_len
426
427.Leq1\key_len:
428 do_aes_load 1, \key_len
429 add $(1*16), p_out
430 and $(~7*16), num_bytes
431 jz .Ldo_return2\key_len
432 jmp .Lmain_loop2\key_len
433
434.Leq2\key_len:
435 do_aes_load 2, \key_len
436 add $(2*16), p_out
437 and $(~7*16), num_bytes
438 jz .Ldo_return2\key_len
439 jmp .Lmain_loop2\key_len
440
441
442.Leq3\key_len:
443 do_aes_load 3, \key_len
444 add $(3*16), p_out
445 and $(~7*16), num_bytes
446 jz .Ldo_return2\key_len
447 jmp .Lmain_loop2\key_len
448
449.Leq4\key_len:
450 do_aes_load 4, \key_len
451 add $(4*16), p_out
452 and $(~7*16), num_bytes
453 jz .Ldo_return2\key_len
454 jmp .Lmain_loop2\key_len
455
456.Lgt4\key_len:
457 cmp $(6*16), tmp
458 jg .Leq7\key_len
459 je .Leq6\key_len
460
461.Leq5\key_len:
462 do_aes_load 5, \key_len
463 add $(5*16), p_out
464 and $(~7*16), num_bytes
465 jz .Ldo_return2\key_len
466 jmp .Lmain_loop2\key_len
467
468.Leq6\key_len:
469 do_aes_load 6, \key_len
470 add $(6*16), p_out
471 and $(~7*16), num_bytes
472 jz .Ldo_return2\key_len
473 jmp .Lmain_loop2\key_len
474
475.Leq7\key_len:
476 do_aes_load 7, \key_len
477 add $(7*16), p_out
478 and $(~7*16), num_bytes
479 jz .Ldo_return2\key_len
480 jmp .Lmain_loop2\key_len
481
482.Lmult_of_8_blks\key_len:
483 .if (\key_len != KEY_128)
484 vmovdqa 0*16(p_keys), xkey0
485 vmovdqa 4*16(p_keys), xkey4
486 vmovdqa 8*16(p_keys), xkey8
487 vmovdqa 12*16(p_keys), xkey12
488 .else
489 vmovdqa 0*16(p_keys), xkey0
490 vmovdqa 3*16(p_keys), xkey4
491 vmovdqa 6*16(p_keys), xkey8
492 vmovdqa 9*16(p_keys), xkey12
493 .endif
494.align 16
495.Lmain_loop2\key_len:
496 /* num_bytes is a multiple of 8 and >0 */
497 do_aes_noload 8, \key_len
498 add $(8*16), p_out
499 sub $(8*16), num_bytes
500 jne .Lmain_loop2\key_len
501
502.Ldo_return2\key_len:
503 /* return updated IV */
504 vpshufb xbyteswap, xcounter, xcounter
505 vmovdqu xcounter, (p_iv)
506 ret
507.endm
508
509/*
510 * routine to do AES128 CTR enc/decrypt "by8"
511 * XMM registers are clobbered.
512 * Saving/restoring must be done at a higher level
513 * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
514 * unsigned int num_bytes)
515 */
516ENTRY(aes_ctr_enc_128_avx_by8)
517 /* call the aes main loop */
518 do_aes_ctrmain KEY_128
519
520ENDPROC(aes_ctr_enc_128_avx_by8)
521
522/*
523 * routine to do AES192 CTR enc/decrypt "by8"
524 * XMM registers are clobbered.
525 * Saving/restoring must be done at a higher level
526 * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
527 * unsigned int num_bytes)
528 */
529ENTRY(aes_ctr_enc_192_avx_by8)
530 /* call the aes main loop */
531 do_aes_ctrmain KEY_192
532
533ENDPROC(aes_ctr_enc_192_avx_by8)
534
535/*
536 * routine to do AES256 CTR enc/decrypt "by8"
537 * XMM registers are clobbered.
538 * Saving/restoring must be done at a higher level
539 * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
540 * unsigned int num_bytes)
541 */
542ENTRY(aes_ctr_enc_256_avx_by8)
543 /* call the aes main loop */
544 do_aes_ctrmain KEY_256
545
546ENDPROC(aes_ctr_enc_256_avx_by8)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 948ad0e77741..888950f29fd9 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -105,6 +105,9 @@ void crypto_fpu_exit(void);
105#define AVX_GEN4_OPTSIZE 4096 105#define AVX_GEN4_OPTSIZE 4096
106 106
107#ifdef CONFIG_X86_64 107#ifdef CONFIG_X86_64
108
109static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
110 const u8 *in, unsigned int len, u8 *iv);
108asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 111asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
109 const u8 *in, unsigned int len, u8 *iv); 112 const u8 *in, unsigned int len, u8 *iv);
110 113
@@ -155,6 +158,12 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
155 158
156 159
157#ifdef CONFIG_AS_AVX 160#ifdef CONFIG_AS_AVX
161asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
162 void *keys, u8 *out, unsigned int num_bytes);
163asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
164 void *keys, u8 *out, unsigned int num_bytes);
165asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
166 void *keys, u8 *out, unsigned int num_bytes);
158/* 167/*
159 * asmlinkage void aesni_gcm_precomp_avx_gen2() 168 * asmlinkage void aesni_gcm_precomp_avx_gen2()
160 * gcm_data *my_ctx_data, context data 169 * gcm_data *my_ctx_data, context data
@@ -472,6 +481,25 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
472 crypto_inc(ctrblk, AES_BLOCK_SIZE); 481 crypto_inc(ctrblk, AES_BLOCK_SIZE);
473} 482}
474 483
484#ifdef CONFIG_AS_AVX
485static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
486 const u8 *in, unsigned int len, u8 *iv)
487{
488 /*
489 * based on key length, override with the by8 version
490 * of ctr mode encryption/decryption for improved performance
491 * aes_set_key_common() ensures that key length is one of
492 * {128,192,256}
493 */
494 if (ctx->key_length == AES_KEYSIZE_128)
495 aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len);
496 else if (ctx->key_length == AES_KEYSIZE_192)
497 aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len);
498 else
499 aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len);
500}
501#endif
502
475static int ctr_crypt(struct blkcipher_desc *desc, 503static int ctr_crypt(struct blkcipher_desc *desc,
476 struct scatterlist *dst, struct scatterlist *src, 504 struct scatterlist *dst, struct scatterlist *src,
477 unsigned int nbytes) 505 unsigned int nbytes)
@@ -486,8 +514,8 @@ static int ctr_crypt(struct blkcipher_desc *desc,
486 514
487 kernel_fpu_begin(); 515 kernel_fpu_begin();
488 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { 516 while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
489 aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, 517 aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
490 nbytes & AES_BLOCK_MASK, walk.iv); 518 nbytes & AES_BLOCK_MASK, walk.iv);
491 nbytes &= AES_BLOCK_SIZE - 1; 519 nbytes &= AES_BLOCK_SIZE - 1;
492 err = blkcipher_walk_done(desc, &walk, nbytes); 520 err = blkcipher_walk_done(desc, &walk, nbytes);
493 } 521 }
@@ -1493,6 +1521,14 @@ static int __init aesni_init(void)
1493 aesni_gcm_enc_tfm = aesni_gcm_enc; 1521 aesni_gcm_enc_tfm = aesni_gcm_enc;
1494 aesni_gcm_dec_tfm = aesni_gcm_dec; 1522 aesni_gcm_dec_tfm = aesni_gcm_dec;
1495 } 1523 }
1524 aesni_ctr_enc_tfm = aesni_ctr_enc;
1525#ifdef CONFIG_AS_AVX
1526 if (cpu_has_avx) {
1527 /* optimize performance of ctr mode encryption transform */
1528 aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
1529 pr_info("AES CTR mode by8 optimization enabled\n");
1530 }
1531#endif
1496#endif 1532#endif
1497 1533
1498 err = crypto_fpu_init(); 1534 err = crypto_fpu_init();
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index dbc4339b5417..26d49ebae040 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -72,6 +72,7 @@
72 72
73# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); 73# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
74 74
75.text
75ENTRY(crc_pcl) 76ENTRY(crc_pcl)
76#define bufp %rdi 77#define bufp %rdi
77#define bufp_dw %edi 78#define bufp_dw %edi
@@ -216,15 +217,11 @@ LABEL crc_ %i
216 ## 4) Combine three results: 217 ## 4) Combine three results:
217 ################################################################ 218 ################################################################
218 219
219 lea (K_table-16)(%rip), bufp # first entry is for idx 1 220 lea (K_table-8)(%rip), bufp # first entry is for idx 1
220 shlq $3, %rax # rax *= 8 221 shlq $3, %rax # rax *= 8
221 subq %rax, tmp # tmp -= rax*8 222 pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
222 shlq $1, %rax 223 leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
223 subq %rax, tmp # tmp -= rax*16 224 subq %rax, tmp # tmp -= rax*24
224 # (total tmp -= rax*24)
225 addq %rax, bufp
226
227 movdqa (bufp), %xmm0 # 2 consts: K1:K2
228 225
229 movq crc_init, %xmm1 # CRC for block 1 226 movq crc_init, %xmm1 # CRC for block 1
230 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 227 PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2
@@ -238,9 +235,9 @@ LABEL crc_ %i
238 mov crc2, crc_init 235 mov crc2, crc_init
239 crc32 %rax, crc_init 236 crc32 %rax, crc_init
240 237
241################################################################ 238 ################################################################
242## 5) Check for end: 239 ## 5) Check for end:
243################################################################ 240 ################################################################
244 241
245LABEL crc_ 0 242LABEL crc_ 0
246 mov tmp, len 243 mov tmp, len
@@ -331,136 +328,136 @@ ENDPROC(crc_pcl)
331 328
332 ################################################################ 329 ################################################################
333 ## PCLMULQDQ tables 330 ## PCLMULQDQ tables
334 ## Table is 128 entries x 2 quad words each 331 ## Table is 128 entries x 2 words (8 bytes) each
335 ################################################################ 332 ################################################################
336.data 333.section .rotata, "a", %progbits
337.align 64 334.align 8
338K_table: 335K_table:
339 .quad 0x14cd00bd6,0x105ec76f0 336 .long 0x493c7d27, 0x00000001
340 .quad 0x0ba4fc28e,0x14cd00bd6 337 .long 0xba4fc28e, 0x493c7d27
341 .quad 0x1d82c63da,0x0f20c0dfe 338 .long 0xddc0152b, 0xf20c0dfe
342 .quad 0x09e4addf8,0x0ba4fc28e 339 .long 0x9e4addf8, 0xba4fc28e
343 .quad 0x039d3b296,0x1384aa63a 340 .long 0x39d3b296, 0x3da6d0cb
344 .quad 0x102f9b8a2,0x1d82c63da 341 .long 0x0715ce53, 0xddc0152b
345 .quad 0x14237f5e6,0x01c291d04 342 .long 0x47db8317, 0x1c291d04
346 .quad 0x00d3b6092,0x09e4addf8 343 .long 0x0d3b6092, 0x9e4addf8
347 .quad 0x0c96cfdc0,0x0740eef02 344 .long 0xc96cfdc0, 0x740eef02
348 .quad 0x18266e456,0x039d3b296 345 .long 0x878a92a7, 0x39d3b296
349 .quad 0x0daece73e,0x0083a6eec 346 .long 0xdaece73e, 0x083a6eec
350 .quad 0x0ab7aff2a,0x102f9b8a2 347 .long 0xab7aff2a, 0x0715ce53
351 .quad 0x1248ea574,0x1c1733996 348 .long 0x2162d385, 0xc49f4f67
352 .quad 0x083348832,0x14237f5e6 349 .long 0x83348832, 0x47db8317
353 .quad 0x12c743124,0x02ad91c30 350 .long 0x299847d5, 0x2ad91c30
354 .quad 0x0b9e02b86,0x00d3b6092 351 .long 0xb9e02b86, 0x0d3b6092
355 .quad 0x018b33a4e,0x06992cea2 352 .long 0x18b33a4e, 0x6992cea2
356 .quad 0x1b331e26a,0x0c96cfdc0 353 .long 0xb6dd949b, 0xc96cfdc0
357 .quad 0x17d35ba46,0x07e908048 354 .long 0x78d9ccb7, 0x7e908048
358 .quad 0x1bf2e8b8a,0x18266e456 355 .long 0xbac2fd7b, 0x878a92a7
359 .quad 0x1a3e0968a,0x11ed1f9d8 356 .long 0xa60ce07b, 0x1b3d8f29
360 .quad 0x0ce7f39f4,0x0daece73e 357 .long 0xce7f39f4, 0xdaece73e
361 .quad 0x061d82e56,0x0f1d0f55e 358 .long 0x61d82e56, 0xf1d0f55e
362 .quad 0x0d270f1a2,0x0ab7aff2a 359 .long 0xd270f1a2, 0xab7aff2a
363 .quad 0x1c3f5f66c,0x0a87ab8a8 360 .long 0xc619809d, 0xa87ab8a8
364 .quad 0x12ed0daac,0x1248ea574 361 .long 0x2b3cac5d, 0x2162d385
365 .quad 0x065863b64,0x08462d800 362 .long 0x65863b64, 0x8462d800
366 .quad 0x11eef4f8e,0x083348832 363 .long 0x1b03397f, 0x83348832
367 .quad 0x1ee54f54c,0x071d111a8 364 .long 0xebb883bd, 0x71d111a8
368 .quad 0x0b3e32c28,0x12c743124 365 .long 0xb3e32c28, 0x299847d5
369 .quad 0x0064f7f26,0x0ffd852c6 366 .long 0x064f7f26, 0xffd852c6
370 .quad 0x0dd7e3b0c,0x0b9e02b86 367 .long 0xdd7e3b0c, 0xb9e02b86
371 .quad 0x0f285651c,0x0dcb17aa4 368 .long 0xf285651c, 0xdcb17aa4
372 .quad 0x010746f3c,0x018b33a4e 369 .long 0x10746f3c, 0x18b33a4e
373 .quad 0x1c24afea4,0x0f37c5aee 370 .long 0xc7a68855, 0xf37c5aee
374 .quad 0x0271d9844,0x1b331e26a 371 .long 0x271d9844, 0xb6dd949b
375 .quad 0x08e766a0c,0x06051d5a2 372 .long 0x8e766a0c, 0x6051d5a2
376 .quad 0x093a5f730,0x17d35ba46 373 .long 0x93a5f730, 0x78d9ccb7
377 .quad 0x06cb08e5c,0x11d5ca20e 374 .long 0x6cb08e5c, 0x18b0d4ff
378 .quad 0x06b749fb2,0x1bf2e8b8a 375 .long 0x6b749fb2, 0xbac2fd7b
379 .quad 0x1167f94f2,0x021f3d99c 376 .long 0x1393e203, 0x21f3d99c
380 .quad 0x0cec3662e,0x1a3e0968a 377 .long 0xcec3662e, 0xa60ce07b
381 .quad 0x19329634a,0x08f158014 378 .long 0x96c515bb, 0x8f158014
382 .quad 0x0e6fc4e6a,0x0ce7f39f4 379 .long 0xe6fc4e6a, 0xce7f39f4
383 .quad 0x08227bb8a,0x1a5e82106 380 .long 0x8227bb8a, 0xa00457f7
384 .quad 0x0b0cd4768,0x061d82e56 381 .long 0xb0cd4768, 0x61d82e56
385 .quad 0x13c2b89c4,0x188815ab2 382 .long 0x39c7ff35, 0x8d6d2c43
386 .quad 0x0d7a4825c,0x0d270f1a2 383 .long 0xd7a4825c, 0xd270f1a2
387 .quad 0x10f5ff2ba,0x105405f3e 384 .long 0x0ab3844b, 0x00ac29cf
388 .quad 0x00167d312,0x1c3f5f66c 385 .long 0x0167d312, 0xc619809d
389 .quad 0x0f6076544,0x0e9adf796 386 .long 0xf6076544, 0xe9adf796
390 .quad 0x026f6a60a,0x12ed0daac 387 .long 0x26f6a60a, 0x2b3cac5d
391 .quad 0x1a2adb74e,0x096638b34 388 .long 0xa741c1bf, 0x96638b34
392 .quad 0x19d34af3a,0x065863b64 389 .long 0x98d8d9cb, 0x65863b64
393 .quad 0x049c3cc9c,0x1e50585a0 390 .long 0x49c3cc9c, 0xe0e9f351
394 .quad 0x068bce87a,0x11eef4f8e 391 .long 0x68bce87a, 0x1b03397f
395 .quad 0x1524fa6c6,0x19f1c69dc 392 .long 0x57a3d037, 0x9af01f2d
396 .quad 0x16cba8aca,0x1ee54f54c 393 .long 0x6956fc3b, 0xebb883bd
397 .quad 0x042d98888,0x12913343e 394 .long 0x42d98888, 0x2cff42cf
398 .quad 0x1329d9f7e,0x0b3e32c28 395 .long 0x3771e98f, 0xb3e32c28
399 .quad 0x1b1c69528,0x088f25a3a 396 .long 0xb42ae3d9, 0x88f25a3a
400 .quad 0x02178513a,0x0064f7f26 397 .long 0x2178513a, 0x064f7f26
401 .quad 0x0e0ac139e,0x04e36f0b0 398 .long 0xe0ac139e, 0x4e36f0b0
402 .quad 0x0170076fa,0x0dd7e3b0c 399 .long 0x170076fa, 0xdd7e3b0c
403 .quad 0x141a1a2e2,0x0bd6f81f8 400 .long 0x444dd413, 0xbd6f81f8
404 .quad 0x16ad828b4,0x0f285651c 401 .long 0x6f345e45, 0xf285651c
405 .quad 0x041d17b64,0x19425cbba 402 .long 0x41d17b64, 0x91c9bd4b
406 .quad 0x1fae1cc66,0x010746f3c 403 .long 0xff0dba97, 0x10746f3c
407 .quad 0x1a75b4b00,0x18db37e8a 404 .long 0xa2b73df1, 0x885f087b
408 .quad 0x0f872e54c,0x1c24afea4 405 .long 0xf872e54c, 0xc7a68855
409 .quad 0x01e41e9fc,0x04c144932 406 .long 0x1e41e9fc, 0x4c144932
410 .quad 0x086d8e4d2,0x0271d9844 407 .long 0x86d8e4d2, 0x271d9844
411 .quad 0x160f7af7a,0x052148f02 408 .long 0x651bd98b, 0x52148f02
412 .quad 0x05bb8f1bc,0x08e766a0c 409 .long 0x5bb8f1bc, 0x8e766a0c
413 .quad 0x0a90fd27a,0x0a3c6f37a 410 .long 0xa90fd27a, 0xa3c6f37a
414 .quad 0x0b3af077a,0x093a5f730 411 .long 0xb3af077a, 0x93a5f730
415 .quad 0x04984d782,0x1d22c238e 412 .long 0x4984d782, 0xd7c0557f
416 .quad 0x0ca6ef3ac,0x06cb08e5c 413 .long 0xca6ef3ac, 0x6cb08e5c
417 .quad 0x0234e0b26,0x063ded06a 414 .long 0x234e0b26, 0x63ded06a
418 .quad 0x1d88abd4a,0x06b749fb2 415 .long 0xdd66cbbb, 0x6b749fb2
419 .quad 0x04597456a,0x04d56973c 416 .long 0x4597456a, 0x4d56973c
420 .quad 0x0e9e28eb4,0x1167f94f2 417 .long 0xe9e28eb4, 0x1393e203
421 .quad 0x07b3ff57a,0x19385bf2e 418 .long 0x7b3ff57a, 0x9669c9df
422 .quad 0x0c9c8b782,0x0cec3662e 419 .long 0xc9c8b782, 0xcec3662e
423 .quad 0x13a9cba9e,0x0e417f38a 420 .long 0x3f70cc6f, 0xe417f38a
424 .quad 0x093e106a4,0x19329634a 421 .long 0x93e106a4, 0x96c515bb
425 .quad 0x167001a9c,0x14e727980 422 .long 0x62ec6c6d, 0x4b9e0f71
426 .quad 0x1ddffc5d4,0x0e6fc4e6a 423 .long 0xd813b325, 0xe6fc4e6a
427 .quad 0x00df04680,0x0d104b8fc 424 .long 0x0df04680, 0xd104b8fc
428 .quad 0x02342001e,0x08227bb8a 425 .long 0x2342001e, 0x8227bb8a
429 .quad 0x00a2a8d7e,0x05b397730 426 .long 0x0a2a8d7e, 0x5b397730
430 .quad 0x168763fa6,0x0b0cd4768 427 .long 0x6d9a4957, 0xb0cd4768
431 .quad 0x1ed5a407a,0x0e78eb416 428 .long 0xe8b6368b, 0xe78eb416
432 .quad 0x0d2c3ed1a,0x13c2b89c4 429 .long 0xd2c3ed1a, 0x39c7ff35
433 .quad 0x0995a5724,0x1641378f0 430 .long 0x995a5724, 0x61ff0e01
434 .quad 0x19b1afbc4,0x0d7a4825c 431 .long 0x9ef68d35, 0xd7a4825c
435 .quad 0x109ffedc0,0x08d96551c 432 .long 0x0c139b31, 0x8d96551c
436 .quad 0x0f2271e60,0x10f5ff2ba 433 .long 0xf2271e60, 0x0ab3844b
437 .quad 0x00b0bf8ca,0x00bf80dd2 434 .long 0x0b0bf8ca, 0x0bf80dd2
438 .quad 0x123888b7a,0x00167d312 435 .long 0x2664fd8b, 0x0167d312
439 .quad 0x1e888f7dc,0x18dcddd1c 436 .long 0xed64812d, 0x8821abed
440 .quad 0x002ee03b2,0x0f6076544 437 .long 0x02ee03b2, 0xf6076544
441 .quad 0x183e8d8fe,0x06a45d2b2 438 .long 0x8604ae0f, 0x6a45d2b2
442 .quad 0x133d7a042,0x026f6a60a 439 .long 0x363bd6b3, 0x26f6a60a
443 .quad 0x116b0f50c,0x1dd3e10e8 440 .long 0x135c83fd, 0xd8d26619
444 .quad 0x05fabe670,0x1a2adb74e 441 .long 0x5fabe670, 0xa741c1bf
445 .quad 0x130004488,0x0de87806c 442 .long 0x35ec3279, 0xde87806c
446 .quad 0x000bcf5f6,0x19d34af3a 443 .long 0x00bcf5f6, 0x98d8d9cb
447 .quad 0x18f0c7078,0x014338754 444 .long 0x8ae00689, 0x14338754
448 .quad 0x017f27698,0x049c3cc9c 445 .long 0x17f27698, 0x49c3cc9c
449 .quad 0x058ca5f00,0x15e3e77ee 446 .long 0x58ca5f00, 0x5bd2011f
450 .quad 0x1af900c24,0x068bce87a 447 .long 0xaa7c7ad5, 0x68bce87a
451 .quad 0x0b5cfca28,0x0dd07448e 448 .long 0xb5cfca28, 0xdd07448e
452 .quad 0x0ded288f8,0x1524fa6c6 449 .long 0xded288f8, 0x57a3d037
453 .quad 0x059f229bc,0x1d8048348 450 .long 0x59f229bc, 0xdde8f5b9
454 .quad 0x06d390dec,0x16cba8aca 451 .long 0x6d390dec, 0x6956fc3b
455 .quad 0x037170390,0x0a3e3e02c 452 .long 0x37170390, 0xa3e3e02c
456 .quad 0x06353c1cc,0x042d98888 453 .long 0x6353c1cc, 0x42d98888
457 .quad 0x0c4584f5c,0x0d73c7bea 454 .long 0xc4584f5c, 0xd73c7bea
458 .quad 0x1f16a3418,0x1329d9f7e 455 .long 0xf48642e9, 0x3771e98f
459 .quad 0x0531377e2,0x185137662 456 .long 0x531377e2, 0x80ff0093
460 .quad 0x1d8d9ca7c,0x1b1c69528 457 .long 0xdd35bc8d, 0xb42ae3d9
461 .quad 0x0b25b29f2,0x18a08b5bc 458 .long 0xb25b29f2, 0x8fe4c34d
462 .quad 0x19fb2a8b0,0x02178513a 459 .long 0x9a5ede41, 0x2178513a
463 .quad 0x1a08fe6ac,0x1da758ae0 460 .long 0xa563905d, 0xdf99fc11
464 .quad 0x045cddf4e,0x0e0ac139e 461 .long 0x45cddf4e, 0xe0ac139e
465 .quad 0x1a91647f2,0x169cf9eb0 462 .long 0xacfa3103, 0x6c23e841
466 .quad 0x1a0f717c4,0x0170076fa 463 .long 0xa51b6135, 0x170076fa
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
new file mode 100644
index 000000000000..038f6ae87c5e
--- /dev/null
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -0,0 +1,805 @@
1/*
2 * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/linkage.h>
18
19.file "des3_ede-asm_64.S"
20.text
21
22#define s1 .L_s1
23#define s2 ((s1) + (64*8))
24#define s3 ((s2) + (64*8))
25#define s4 ((s3) + (64*8))
26#define s5 ((s4) + (64*8))
27#define s6 ((s5) + (64*8))
28#define s7 ((s6) + (64*8))
29#define s8 ((s7) + (64*8))
30
31/* register macros */
32#define CTX %rdi
33
34#define RL0 %r8
35#define RL1 %r9
36#define RL2 %r10
37
38#define RL0d %r8d
39#define RL1d %r9d
40#define RL2d %r10d
41
42#define RR0 %r11
43#define RR1 %r12
44#define RR2 %r13
45
46#define RR0d %r11d
47#define RR1d %r12d
48#define RR2d %r13d
49
50#define RW0 %rax
51#define RW1 %rbx
52#define RW2 %rcx
53
54#define RW0d %eax
55#define RW1d %ebx
56#define RW2d %ecx
57
58#define RW0bl %al
59#define RW1bl %bl
60#define RW2bl %cl
61
62#define RW0bh %ah
63#define RW1bh %bh
64#define RW2bh %ch
65
66#define RT0 %r15
67#define RT1 %rbp
68#define RT2 %r14
69#define RT3 %rdx
70
71#define RT0d %r15d
72#define RT1d %ebp
73#define RT2d %r14d
74#define RT3d %edx
75
76/***********************************************************************
77 * 1-way 3DES
78 ***********************************************************************/
79#define do_permutation(a, b, offset, mask) \
80 movl a, RT0d; \
81 shrl $(offset), RT0d; \
82 xorl b, RT0d; \
83 andl $(mask), RT0d; \
84 xorl RT0d, b; \
85 shll $(offset), RT0d; \
86 xorl RT0d, a;
87
88#define expand_to_64bits(val, mask) \
89 movl val##d, RT0d; \
90 rorl $4, RT0d; \
91 shlq $32, RT0; \
92 orq RT0, val; \
93 andq mask, val;
94
95#define compress_to_64bits(val) \
96 movq val, RT0; \
97 shrq $32, RT0; \
98 roll $4, RT0d; \
99 orl RT0d, val##d;
100
101#define initial_permutation(left, right) \
102 do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
103 do_permutation(left##d, right##d, 16, 0x0000ffff); \
104 do_permutation(right##d, left##d, 2, 0x33333333); \
105 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
106 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
107 movl left##d, RW0d; \
108 roll $1, right##d; \
109 xorl right##d, RW0d; \
110 andl $0xaaaaaaaa, RW0d; \
111 xorl RW0d, left##d; \
112 xorl RW0d, right##d; \
113 roll $1, left##d; \
114 expand_to_64bits(right, RT3); \
115 expand_to_64bits(left, RT3);
116
117#define final_permutation(left, right) \
118 compress_to_64bits(right); \
119 compress_to_64bits(left); \
120 movl right##d, RW0d; \
121 rorl $1, left##d; \
122 xorl left##d, RW0d; \
123 andl $0xaaaaaaaa, RW0d; \
124 xorl RW0d, right##d; \
125 xorl RW0d, left##d; \
126 rorl $1, right##d; \
127 do_permutation(right##d, left##d, 8, 0x00ff00ff); \
128 do_permutation(right##d, left##d, 2, 0x33333333); \
129 do_permutation(left##d, right##d, 16, 0x0000ffff); \
130 do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
131
132#define round1(n, from, to, load_next_key) \
133 xorq from, RW0; \
134 \
135 movzbl RW0bl, RT0d; \
136 movzbl RW0bh, RT1d; \
137 shrq $16, RW0; \
138 movzbl RW0bl, RT2d; \
139 movzbl RW0bh, RT3d; \
140 shrq $16, RW0; \
141 movq s8(, RT0, 8), RT0; \
142 xorq s6(, RT1, 8), to; \
143 movzbl RW0bl, RL1d; \
144 movzbl RW0bh, RT1d; \
145 shrl $16, RW0d; \
146 xorq s4(, RT2, 8), RT0; \
147 xorq s2(, RT3, 8), to; \
148 movzbl RW0bl, RT2d; \
149 movzbl RW0bh, RT3d; \
150 xorq s7(, RL1, 8), RT0; \
151 xorq s5(, RT1, 8), to; \
152 xorq s3(, RT2, 8), RT0; \
153 load_next_key(n, RW0); \
154 xorq RT0, to; \
155 xorq s1(, RT3, 8), to; \
156
157#define load_next_key(n, RWx) \
158 movq (((n) + 1) * 8)(CTX), RWx;
159
160#define dummy2(a, b) /*_*/
161
162#define read_block(io, left, right) \
163 movl (io), left##d; \
164 movl 4(io), right##d; \
165 bswapl left##d; \
166 bswapl right##d;
167
168#define write_block(io, left, right) \
169 bswapl left##d; \
170 bswapl right##d; \
171 movl left##d, (io); \
172 movl right##d, 4(io);
173
174ENTRY(des3_ede_x86_64_crypt_blk)
175 /* input:
176 * %rdi: round keys, CTX
177 * %rsi: dst
178 * %rdx: src
179 */
180 pushq %rbp;
181 pushq %rbx;
182 pushq %r12;
183 pushq %r13;
184 pushq %r14;
185 pushq %r15;
186
187 read_block(%rdx, RL0, RR0);
188 initial_permutation(RL0, RR0);
189
190 movq (CTX), RW0;
191
192 round1(0, RR0, RL0, load_next_key);
193 round1(1, RL0, RR0, load_next_key);
194 round1(2, RR0, RL0, load_next_key);
195 round1(3, RL0, RR0, load_next_key);
196 round1(4, RR0, RL0, load_next_key);
197 round1(5, RL0, RR0, load_next_key);
198 round1(6, RR0, RL0, load_next_key);
199 round1(7, RL0, RR0, load_next_key);
200 round1(8, RR0, RL0, load_next_key);
201 round1(9, RL0, RR0, load_next_key);
202 round1(10, RR0, RL0, load_next_key);
203 round1(11, RL0, RR0, load_next_key);
204 round1(12, RR0, RL0, load_next_key);
205 round1(13, RL0, RR0, load_next_key);
206 round1(14, RR0, RL0, load_next_key);
207 round1(15, RL0, RR0, load_next_key);
208
209 round1(16+0, RL0, RR0, load_next_key);
210 round1(16+1, RR0, RL0, load_next_key);
211 round1(16+2, RL0, RR0, load_next_key);
212 round1(16+3, RR0, RL0, load_next_key);
213 round1(16+4, RL0, RR0, load_next_key);
214 round1(16+5, RR0, RL0, load_next_key);
215 round1(16+6, RL0, RR0, load_next_key);
216 round1(16+7, RR0, RL0, load_next_key);
217 round1(16+8, RL0, RR0, load_next_key);
218 round1(16+9, RR0, RL0, load_next_key);
219 round1(16+10, RL0, RR0, load_next_key);
220 round1(16+11, RR0, RL0, load_next_key);
221 round1(16+12, RL0, RR0, load_next_key);
222 round1(16+13, RR0, RL0, load_next_key);
223 round1(16+14, RL0, RR0, load_next_key);
224 round1(16+15, RR0, RL0, load_next_key);
225
226 round1(32+0, RR0, RL0, load_next_key);
227 round1(32+1, RL0, RR0, load_next_key);
228 round1(32+2, RR0, RL0, load_next_key);
229 round1(32+3, RL0, RR0, load_next_key);
230 round1(32+4, RR0, RL0, load_next_key);
231 round1(32+5, RL0, RR0, load_next_key);
232 round1(32+6, RR0, RL0, load_next_key);
233 round1(32+7, RL0, RR0, load_next_key);
234 round1(32+8, RR0, RL0, load_next_key);
235 round1(32+9, RL0, RR0, load_next_key);
236 round1(32+10, RR0, RL0, load_next_key);
237 round1(32+11, RL0, RR0, load_next_key);
238 round1(32+12, RR0, RL0, load_next_key);
239 round1(32+13, RL0, RR0, load_next_key);
240 round1(32+14, RR0, RL0, load_next_key);
241 round1(32+15, RL0, RR0, dummy2);
242
243 final_permutation(RR0, RL0);
244 write_block(%rsi, RR0, RL0);
245
246 popq %r15;
247 popq %r14;
248 popq %r13;
249 popq %r12;
250 popq %rbx;
251 popq %rbp;
252
253 ret;
254ENDPROC(des3_ede_x86_64_crypt_blk)
255
256/***********************************************************************
257 * 3-way 3DES
258 ***********************************************************************/
259#define expand_to_64bits(val, mask) \
260 movl val##d, RT0d; \
261 rorl $4, RT0d; \
262 shlq $32, RT0; \
263 orq RT0, val; \
264 andq mask, val;
265
266#define compress_to_64bits(val) \
267 movq val, RT0; \
268 shrq $32, RT0; \
269 roll $4, RT0d; \
270 orl RT0d, val##d;
271
272#define initial_permutation3(left, right) \
273 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
274 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
275 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
276 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
277 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
278 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
279 \
280 do_permutation(right##0d, left##0d, 2, 0x33333333); \
281 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
282 do_permutation(right##1d, left##1d, 2, 0x33333333); \
283 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
284 do_permutation(right##2d, left##2d, 2, 0x33333333); \
285 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
286 \
287 movabs $0x3f3f3f3f3f3f3f3f, RT3; \
288 \
289 movl left##0d, RW0d; \
290 roll $1, right##0d; \
291 xorl right##0d, RW0d; \
292 andl $0xaaaaaaaa, RW0d; \
293 xorl RW0d, left##0d; \
294 xorl RW0d, right##0d; \
295 roll $1, left##0d; \
296 expand_to_64bits(right##0, RT3); \
297 expand_to_64bits(left##0, RT3); \
298 movl left##1d, RW1d; \
299 roll $1, right##1d; \
300 xorl right##1d, RW1d; \
301 andl $0xaaaaaaaa, RW1d; \
302 xorl RW1d, left##1d; \
303 xorl RW1d, right##1d; \
304 roll $1, left##1d; \
305 expand_to_64bits(right##1, RT3); \
306 expand_to_64bits(left##1, RT3); \
307 movl left##2d, RW2d; \
308 roll $1, right##2d; \
309 xorl right##2d, RW2d; \
310 andl $0xaaaaaaaa, RW2d; \
311 xorl RW2d, left##2d; \
312 xorl RW2d, right##2d; \
313 roll $1, left##2d; \
314 expand_to_64bits(right##2, RT3); \
315 expand_to_64bits(left##2, RT3);
316
317#define final_permutation3(left, right) \
318 compress_to_64bits(right##0); \
319 compress_to_64bits(left##0); \
320 movl right##0d, RW0d; \
321 rorl $1, left##0d; \
322 xorl left##0d, RW0d; \
323 andl $0xaaaaaaaa, RW0d; \
324 xorl RW0d, right##0d; \
325 xorl RW0d, left##0d; \
326 rorl $1, right##0d; \
327 compress_to_64bits(right##1); \
328 compress_to_64bits(left##1); \
329 movl right##1d, RW1d; \
330 rorl $1, left##1d; \
331 xorl left##1d, RW1d; \
332 andl $0xaaaaaaaa, RW1d; \
333 xorl RW1d, right##1d; \
334 xorl RW1d, left##1d; \
335 rorl $1, right##1d; \
336 compress_to_64bits(right##2); \
337 compress_to_64bits(left##2); \
338 movl right##2d, RW2d; \
339 rorl $1, left##2d; \
340 xorl left##2d, RW2d; \
341 andl $0xaaaaaaaa, RW2d; \
342 xorl RW2d, right##2d; \
343 xorl RW2d, left##2d; \
344 rorl $1, right##2d; \
345 \
346 do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
347 do_permutation(right##0d, left##0d, 2, 0x33333333); \
348 do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
349 do_permutation(right##1d, left##1d, 2, 0x33333333); \
350 do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
351 do_permutation(right##2d, left##2d, 2, 0x33333333); \
352 \
353 do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
354 do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
355 do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
356 do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
357 do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
358 do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
359
360#define round3(n, from, to, load_next_key, do_movq) \
361 xorq from##0, RW0; \
362 movzbl RW0bl, RT3d; \
363 movzbl RW0bh, RT1d; \
364 shrq $16, RW0; \
365 xorq s8(, RT3, 8), to##0; \
366 xorq s6(, RT1, 8), to##0; \
367 movzbl RW0bl, RT3d; \
368 movzbl RW0bh, RT1d; \
369 shrq $16, RW0; \
370 xorq s4(, RT3, 8), to##0; \
371 xorq s2(, RT1, 8), to##0; \
372 movzbl RW0bl, RT3d; \
373 movzbl RW0bh, RT1d; \
374 shrl $16, RW0d; \
375 xorq s7(, RT3, 8), to##0; \
376 xorq s5(, RT1, 8), to##0; \
377 movzbl RW0bl, RT3d; \
378 movzbl RW0bh, RT1d; \
379 load_next_key(n, RW0); \
380 xorq s3(, RT3, 8), to##0; \
381 xorq s1(, RT1, 8), to##0; \
382 xorq from##1, RW1; \
383 movzbl RW1bl, RT3d; \
384 movzbl RW1bh, RT1d; \
385 shrq $16, RW1; \
386 xorq s8(, RT3, 8), to##1; \
387 xorq s6(, RT1, 8), to##1; \
388 movzbl RW1bl, RT3d; \
389 movzbl RW1bh, RT1d; \
390 shrq $16, RW1; \
391 xorq s4(, RT3, 8), to##1; \
392 xorq s2(, RT1, 8), to##1; \
393 movzbl RW1bl, RT3d; \
394 movzbl RW1bh, RT1d; \
395 shrl $16, RW1d; \
396 xorq s7(, RT3, 8), to##1; \
397 xorq s5(, RT1, 8), to##1; \
398 movzbl RW1bl, RT3d; \
399 movzbl RW1bh, RT1d; \
400 do_movq(RW0, RW1); \
401 xorq s3(, RT3, 8), to##1; \
402 xorq s1(, RT1, 8), to##1; \
403 xorq from##2, RW2; \
404 movzbl RW2bl, RT3d; \
405 movzbl RW2bh, RT1d; \
406 shrq $16, RW2; \
407 xorq s8(, RT3, 8), to##2; \
408 xorq s6(, RT1, 8), to##2; \
409 movzbl RW2bl, RT3d; \
410 movzbl RW2bh, RT1d; \
411 shrq $16, RW2; \
412 xorq s4(, RT3, 8), to##2; \
413 xorq s2(, RT1, 8), to##2; \
414 movzbl RW2bl, RT3d; \
415 movzbl RW2bh, RT1d; \
416 shrl $16, RW2d; \
417 xorq s7(, RT3, 8), to##2; \
418 xorq s5(, RT1, 8), to##2; \
419 movzbl RW2bl, RT3d; \
420 movzbl RW2bh, RT1d; \
421 do_movq(RW0, RW2); \
422 xorq s3(, RT3, 8), to##2; \
423 xorq s1(, RT1, 8), to##2;
424
425#define __movq(src, dst) \
426 movq src, dst;
427
428ENTRY(des3_ede_x86_64_crypt_blk_3way)
429 /* input:
430 * %rdi: ctx, round keys
431 * %rsi: dst (3 blocks)
432 * %rdx: src (3 blocks)
433 */
434
435 pushq %rbp;
436 pushq %rbx;
437 pushq %r12;
438 pushq %r13;
439 pushq %r14;
440 pushq %r15;
441
442 /* load input */
443 movl 0 * 4(%rdx), RL0d;
444 movl 1 * 4(%rdx), RR0d;
445 movl 2 * 4(%rdx), RL1d;
446 movl 3 * 4(%rdx), RR1d;
447 movl 4 * 4(%rdx), RL2d;
448 movl 5 * 4(%rdx), RR2d;
449
450 bswapl RL0d;
451 bswapl RR0d;
452 bswapl RL1d;
453 bswapl RR1d;
454 bswapl RL2d;
455 bswapl RR2d;
456
457 initial_permutation3(RL, RR);
458
459 movq 0(CTX), RW0;
460 movq RW0, RW1;
461 movq RW0, RW2;
462
463 round3(0, RR, RL, load_next_key, __movq);
464 round3(1, RL, RR, load_next_key, __movq);
465 round3(2, RR, RL, load_next_key, __movq);
466 round3(3, RL, RR, load_next_key, __movq);
467 round3(4, RR, RL, load_next_key, __movq);
468 round3(5, RL, RR, load_next_key, __movq);
469 round3(6, RR, RL, load_next_key, __movq);
470 round3(7, RL, RR, load_next_key, __movq);
471 round3(8, RR, RL, load_next_key, __movq);
472 round3(9, RL, RR, load_next_key, __movq);
473 round3(10, RR, RL, load_next_key, __movq);
474 round3(11, RL, RR, load_next_key, __movq);
475 round3(12, RR, RL, load_next_key, __movq);
476 round3(13, RL, RR, load_next_key, __movq);
477 round3(14, RR, RL, load_next_key, __movq);
478 round3(15, RL, RR, load_next_key, __movq);
479
480 round3(16+0, RL, RR, load_next_key, __movq);
481 round3(16+1, RR, RL, load_next_key, __movq);
482 round3(16+2, RL, RR, load_next_key, __movq);
483 round3(16+3, RR, RL, load_next_key, __movq);
484 round3(16+4, RL, RR, load_next_key, __movq);
485 round3(16+5, RR, RL, load_next_key, __movq);
486 round3(16+6, RL, RR, load_next_key, __movq);
487 round3(16+7, RR, RL, load_next_key, __movq);
488 round3(16+8, RL, RR, load_next_key, __movq);
489 round3(16+9, RR, RL, load_next_key, __movq);
490 round3(16+10, RL, RR, load_next_key, __movq);
491 round3(16+11, RR, RL, load_next_key, __movq);
492 round3(16+12, RL, RR, load_next_key, __movq);
493 round3(16+13, RR, RL, load_next_key, __movq);
494 round3(16+14, RL, RR, load_next_key, __movq);
495 round3(16+15, RR, RL, load_next_key, __movq);
496
497 round3(32+0, RR, RL, load_next_key, __movq);
498 round3(32+1, RL, RR, load_next_key, __movq);
499 round3(32+2, RR, RL, load_next_key, __movq);
500 round3(32+3, RL, RR, load_next_key, __movq);
501 round3(32+4, RR, RL, load_next_key, __movq);
502 round3(32+5, RL, RR, load_next_key, __movq);
503 round3(32+6, RR, RL, load_next_key, __movq);
504 round3(32+7, RL, RR, load_next_key, __movq);
505 round3(32+8, RR, RL, load_next_key, __movq);
506 round3(32+9, RL, RR, load_next_key, __movq);
507 round3(32+10, RR, RL, load_next_key, __movq);
508 round3(32+11, RL, RR, load_next_key, __movq);
509 round3(32+12, RR, RL, load_next_key, __movq);
510 round3(32+13, RL, RR, load_next_key, __movq);
511 round3(32+14, RR, RL, load_next_key, __movq);
512 round3(32+15, RL, RR, dummy2, dummy2);
513
514 final_permutation3(RR, RL);
515
516 bswapl RR0d;
517 bswapl RL0d;
518 bswapl RR1d;
519 bswapl RL1d;
520 bswapl RR2d;
521 bswapl RL2d;
522
523 movl RR0d, 0 * 4(%rsi);
524 movl RL0d, 1 * 4(%rsi);
525 movl RR1d, 2 * 4(%rsi);
526 movl RL1d, 3 * 4(%rsi);
527 movl RR2d, 4 * 4(%rsi);
528 movl RL2d, 5 * 4(%rsi);
529
530 popq %r15;
531 popq %r14;
532 popq %r13;
533 popq %r12;
534 popq %rbx;
535 popq %rbp;
536
537 ret;
538ENDPROC(des3_ede_x86_64_crypt_blk_3way)
539
540.data
541.align 16
542.L_s1:
543 .quad 0x0010100001010400, 0x0000000000000000
544 .quad 0x0000100000010000, 0x0010100001010404
545 .quad 0x0010100001010004, 0x0000100000010404
546 .quad 0x0000000000000004, 0x0000100000010000
547 .quad 0x0000000000000400, 0x0010100001010400
548 .quad 0x0010100001010404, 0x0000000000000400
549 .quad 0x0010000001000404, 0x0010100001010004
550 .quad 0x0010000001000000, 0x0000000000000004
551 .quad 0x0000000000000404, 0x0010000001000400
552 .quad 0x0010000001000400, 0x0000100000010400
553 .quad 0x0000100000010400, 0x0010100001010000
554 .quad 0x0010100001010000, 0x0010000001000404
555 .quad 0x0000100000010004, 0x0010000001000004
556 .quad 0x0010000001000004, 0x0000100000010004
557 .quad 0x0000000000000000, 0x0000000000000404
558 .quad 0x0000100000010404, 0x0010000001000000
559 .quad 0x0000100000010000, 0x0010100001010404
560 .quad 0x0000000000000004, 0x0010100001010000
561 .quad 0x0010100001010400, 0x0010000001000000
562 .quad 0x0010000001000000, 0x0000000000000400
563 .quad 0x0010100001010004, 0x0000100000010000
564 .quad 0x0000100000010400, 0x0010000001000004
565 .quad 0x0000000000000400, 0x0000000000000004
566 .quad 0x0010000001000404, 0x0000100000010404
567 .quad 0x0010100001010404, 0x0000100000010004
568 .quad 0x0010100001010000, 0x0010000001000404
569 .quad 0x0010000001000004, 0x0000000000000404
570 .quad 0x0000100000010404, 0x0010100001010400
571 .quad 0x0000000000000404, 0x0010000001000400
572 .quad 0x0010000001000400, 0x0000000000000000
573 .quad 0x0000100000010004, 0x0000100000010400
574 .quad 0x0000000000000000, 0x0010100001010004
575.L_s2:
576 .quad 0x0801080200100020, 0x0800080000000000
577 .quad 0x0000080000000000, 0x0001080200100020
578 .quad 0x0001000000100000, 0x0000000200000020
579 .quad 0x0801000200100020, 0x0800080200000020
580 .quad 0x0800000200000020, 0x0801080200100020
581 .quad 0x0801080000100000, 0x0800000000000000
582 .quad 0x0800080000000000, 0x0001000000100000
583 .quad 0x0000000200000020, 0x0801000200100020
584 .quad 0x0001080000100000, 0x0001000200100020
585 .quad 0x0800080200000020, 0x0000000000000000
586 .quad 0x0800000000000000, 0x0000080000000000
587 .quad 0x0001080200100020, 0x0801000000100000
588 .quad 0x0001000200100020, 0x0800000200000020
589 .quad 0x0000000000000000, 0x0001080000100000
590 .quad 0x0000080200000020, 0x0801080000100000
591 .quad 0x0801000000100000, 0x0000080200000020
592 .quad 0x0000000000000000, 0x0001080200100020
593 .quad 0x0801000200100020, 0x0001000000100000
594 .quad 0x0800080200000020, 0x0801000000100000
595 .quad 0x0801080000100000, 0x0000080000000000
596 .quad 0x0801000000100000, 0x0800080000000000
597 .quad 0x0000000200000020, 0x0801080200100020
598 .quad 0x0001080200100020, 0x0000000200000020
599 .quad 0x0000080000000000, 0x0800000000000000
600 .quad 0x0000080200000020, 0x0801080000100000
601 .quad 0x0001000000100000, 0x0800000200000020
602 .quad 0x0001000200100020, 0x0800080200000020
603 .quad 0x0800000200000020, 0x0001000200100020
604 .quad 0x0001080000100000, 0x0000000000000000
605 .quad 0x0800080000000000, 0x0000080200000020
606 .quad 0x0800000000000000, 0x0801000200100020
607 .quad 0x0801080200100020, 0x0001080000100000
608.L_s3:
609 .quad 0x0000002000000208, 0x0000202008020200
610 .quad 0x0000000000000000, 0x0000200008020008
611 .quad 0x0000002008000200, 0x0000000000000000
612 .quad 0x0000202000020208, 0x0000002008000200
613 .quad 0x0000200000020008, 0x0000000008000008
614 .quad 0x0000000008000008, 0x0000200000020000
615 .quad 0x0000202008020208, 0x0000200000020008
616 .quad 0x0000200008020000, 0x0000002000000208
617 .quad 0x0000000008000000, 0x0000000000000008
618 .quad 0x0000202008020200, 0x0000002000000200
619 .quad 0x0000202000020200, 0x0000200008020000
620 .quad 0x0000200008020008, 0x0000202000020208
621 .quad 0x0000002008000208, 0x0000202000020200
622 .quad 0x0000200000020000, 0x0000002008000208
623 .quad 0x0000000000000008, 0x0000202008020208
624 .quad 0x0000002000000200, 0x0000000008000000
625 .quad 0x0000202008020200, 0x0000000008000000
626 .quad 0x0000200000020008, 0x0000002000000208
627 .quad 0x0000200000020000, 0x0000202008020200
628 .quad 0x0000002008000200, 0x0000000000000000
629 .quad 0x0000002000000200, 0x0000200000020008
630 .quad 0x0000202008020208, 0x0000002008000200
631 .quad 0x0000000008000008, 0x0000002000000200
632 .quad 0x0000000000000000, 0x0000200008020008
633 .quad 0x0000002008000208, 0x0000200000020000
634 .quad 0x0000000008000000, 0x0000202008020208
635 .quad 0x0000000000000008, 0x0000202000020208
636 .quad 0x0000202000020200, 0x0000000008000008
637 .quad 0x0000200008020000, 0x0000002008000208
638 .quad 0x0000002000000208, 0x0000200008020000
639 .quad 0x0000202000020208, 0x0000000000000008
640 .quad 0x0000200008020008, 0x0000202000020200
641.L_s4:
642 .quad 0x1008020000002001, 0x1000020800002001
643 .quad 0x1000020800002001, 0x0000000800000000
644 .quad 0x0008020800002000, 0x1008000800000001
645 .quad 0x1008000000000001, 0x1000020000002001
646 .quad 0x0000000000000000, 0x0008020000002000
647 .quad 0x0008020000002000, 0x1008020800002001
648 .quad 0x1000000800000001, 0x0000000000000000
649 .quad 0x0008000800000000, 0x1008000000000001
650 .quad 0x1000000000000001, 0x0000020000002000
651 .quad 0x0008000000000000, 0x1008020000002001
652 .quad 0x0000000800000000, 0x0008000000000000
653 .quad 0x1000020000002001, 0x0000020800002000
654 .quad 0x1008000800000001, 0x1000000000000001
655 .quad 0x0000020800002000, 0x0008000800000000
656 .quad 0x0000020000002000, 0x0008020800002000
657 .quad 0x1008020800002001, 0x1000000800000001
658 .quad 0x0008000800000000, 0x1008000000000001
659 .quad 0x0008020000002000, 0x1008020800002001
660 .quad 0x1000000800000001, 0x0000000000000000
661 .quad 0x0000000000000000, 0x0008020000002000
662 .quad 0x0000020800002000, 0x0008000800000000
663 .quad 0x1008000800000001, 0x1000000000000001
664 .quad 0x1008020000002001, 0x1000020800002001
665 .quad 0x1000020800002001, 0x0000000800000000
666 .quad 0x1008020800002001, 0x1000000800000001
667 .quad 0x1000000000000001, 0x0000020000002000
668 .quad 0x1008000000000001, 0x1000020000002001
669 .quad 0x0008020800002000, 0x1008000800000001
670 .quad 0x1000020000002001, 0x0000020800002000
671 .quad 0x0008000000000000, 0x1008020000002001
672 .quad 0x0000000800000000, 0x0008000000000000
673 .quad 0x0000020000002000, 0x0008020800002000
674.L_s5:
675 .quad 0x0000001000000100, 0x0020001002080100
676 .quad 0x0020000002080000, 0x0420001002000100
677 .quad 0x0000000000080000, 0x0000001000000100
678 .quad 0x0400000000000000, 0x0020000002080000
679 .quad 0x0400001000080100, 0x0000000000080000
680 .quad 0x0020001002000100, 0x0400001000080100
681 .quad 0x0420001002000100, 0x0420000002080000
682 .quad 0x0000001000080100, 0x0400000000000000
683 .quad 0x0020000002000000, 0x0400000000080000
684 .quad 0x0400000000080000, 0x0000000000000000
685 .quad 0x0400001000000100, 0x0420001002080100
686 .quad 0x0420001002080100, 0x0020001002000100
687 .quad 0x0420000002080000, 0x0400001000000100
688 .quad 0x0000000000000000, 0x0420000002000000
689 .quad 0x0020001002080100, 0x0020000002000000
690 .quad 0x0420000002000000, 0x0000001000080100
691 .quad 0x0000000000080000, 0x0420001002000100
692 .quad 0x0000001000000100, 0x0020000002000000
693 .quad 0x0400000000000000, 0x0020000002080000
694 .quad 0x0420001002000100, 0x0400001000080100
695 .quad 0x0020001002000100, 0x0400000000000000
696 .quad 0x0420000002080000, 0x0020001002080100
697 .quad 0x0400001000080100, 0x0000001000000100
698 .quad 0x0020000002000000, 0x0420000002080000
699 .quad 0x0420001002080100, 0x0000001000080100
700 .quad 0x0420000002000000, 0x0420001002080100
701 .quad 0x0020000002080000, 0x0000000000000000
702 .quad 0x0400000000080000, 0x0420000002000000
703 .quad 0x0000001000080100, 0x0020001002000100
704 .quad 0x0400001000000100, 0x0000000000080000
705 .quad 0x0000000000000000, 0x0400000000080000
706 .quad 0x0020001002080100, 0x0400001000000100
707.L_s6:
708 .quad 0x0200000120000010, 0x0204000020000000
709 .quad 0x0000040000000000, 0x0204040120000010
710 .quad 0x0204000020000000, 0x0000000100000010
711 .quad 0x0204040120000010, 0x0004000000000000
712 .quad 0x0200040020000000, 0x0004040100000010
713 .quad 0x0004000000000000, 0x0200000120000010
714 .quad 0x0004000100000010, 0x0200040020000000
715 .quad 0x0200000020000000, 0x0000040100000010
716 .quad 0x0000000000000000, 0x0004000100000010
717 .quad 0x0200040120000010, 0x0000040000000000
718 .quad 0x0004040000000000, 0x0200040120000010
719 .quad 0x0000000100000010, 0x0204000120000010
720 .quad 0x0204000120000010, 0x0000000000000000
721 .quad 0x0004040100000010, 0x0204040020000000
722 .quad 0x0000040100000010, 0x0004040000000000
723 .quad 0x0204040020000000, 0x0200000020000000
724 .quad 0x0200040020000000, 0x0000000100000010
725 .quad 0x0204000120000010, 0x0004040000000000
726 .quad 0x0204040120000010, 0x0004000000000000
727 .quad 0x0000040100000010, 0x0200000120000010
728 .quad 0x0004000000000000, 0x0200040020000000
729 .quad 0x0200000020000000, 0x0000040100000010
730 .quad 0x0200000120000010, 0x0204040120000010
731 .quad 0x0004040000000000, 0x0204000020000000
732 .quad 0x0004040100000010, 0x0204040020000000
733 .quad 0x0000000000000000, 0x0204000120000010
734 .quad 0x0000000100000010, 0x0000040000000000
735 .quad 0x0204000020000000, 0x0004040100000010
736 .quad 0x0000040000000000, 0x0004000100000010
737 .quad 0x0200040120000010, 0x0000000000000000
738 .quad 0x0204040020000000, 0x0200000020000000
739 .quad 0x0004000100000010, 0x0200040120000010
740.L_s7:
741 .quad 0x0002000000200000, 0x2002000004200002
742 .quad 0x2000000004000802, 0x0000000000000000
743 .quad 0x0000000000000800, 0x2000000004000802
744 .quad 0x2002000000200802, 0x0002000004200800
745 .quad 0x2002000004200802, 0x0002000000200000
746 .quad 0x0000000000000000, 0x2000000004000002
747 .quad 0x2000000000000002, 0x0000000004000000
748 .quad 0x2002000004200002, 0x2000000000000802
749 .quad 0x0000000004000800, 0x2002000000200802
750 .quad 0x2002000000200002, 0x0000000004000800
751 .quad 0x2000000004000002, 0x0002000004200000
752 .quad 0x0002000004200800, 0x2002000000200002
753 .quad 0x0002000004200000, 0x0000000000000800
754 .quad 0x2000000000000802, 0x2002000004200802
755 .quad 0x0002000000200800, 0x2000000000000002
756 .quad 0x0000000004000000, 0x0002000000200800
757 .quad 0x0000000004000000, 0x0002000000200800
758 .quad 0x0002000000200000, 0x2000000004000802
759 .quad 0x2000000004000802, 0x2002000004200002
760 .quad 0x2002000004200002, 0x2000000000000002
761 .quad 0x2002000000200002, 0x0000000004000000
762 .quad 0x0000000004000800, 0x0002000000200000
763 .quad 0x0002000004200800, 0x2000000000000802
764 .quad 0x2002000000200802, 0x0002000004200800
765 .quad 0x2000000000000802, 0x2000000004000002
766 .quad 0x2002000004200802, 0x0002000004200000
767 .quad 0x0002000000200800, 0x0000000000000000
768 .quad 0x2000000000000002, 0x2002000004200802
769 .quad 0x0000000000000000, 0x2002000000200802
770 .quad 0x0002000004200000, 0x0000000000000800
771 .quad 0x2000000004000002, 0x0000000004000800
772 .quad 0x0000000000000800, 0x2002000000200002
773.L_s8:
774 .quad 0x0100010410001000, 0x0000010000001000
775 .quad 0x0000000000040000, 0x0100010410041000
776 .quad 0x0100000010000000, 0x0100010410001000
777 .quad 0x0000000400000000, 0x0100000010000000
778 .quad 0x0000000400040000, 0x0100000010040000
779 .quad 0x0100010410041000, 0x0000010000041000
780 .quad 0x0100010010041000, 0x0000010400041000
781 .quad 0x0000010000001000, 0x0000000400000000
782 .quad 0x0100000010040000, 0x0100000410000000
783 .quad 0x0100010010001000, 0x0000010400001000
784 .quad 0x0000010000041000, 0x0000000400040000
785 .quad 0x0100000410040000, 0x0100010010041000
786 .quad 0x0000010400001000, 0x0000000000000000
787 .quad 0x0000000000000000, 0x0100000410040000
788 .quad 0x0100000410000000, 0x0100010010001000
789 .quad 0x0000010400041000, 0x0000000000040000
790 .quad 0x0000010400041000, 0x0000000000040000
791 .quad 0x0100010010041000, 0x0000010000001000
792 .quad 0x0000000400000000, 0x0100000410040000
793 .quad 0x0000010000001000, 0x0000010400041000
794 .quad 0x0100010010001000, 0x0000000400000000
795 .quad 0x0100000410000000, 0x0100000010040000
796 .quad 0x0100000410040000, 0x0100000010000000
797 .quad 0x0000000000040000, 0x0100010410001000
798 .quad 0x0000000000000000, 0x0100010410041000
799 .quad 0x0000000400040000, 0x0100000410000000
800 .quad 0x0100000010040000, 0x0100010010001000
801 .quad 0x0100010410001000, 0x0000000000000000
802 .quad 0x0100010410041000, 0x0000010000041000
803 .quad 0x0000010000041000, 0x0000010400001000
804 .quad 0x0000010400001000, 0x0000000400040000
805 .quad 0x0100000010000000, 0x0100010010041000
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
new file mode 100644
index 000000000000..0e9c0668fe4e
--- /dev/null
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -0,0 +1,509 @@
1/*
2 * Glue Code for assembler optimized version of 3DES
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <asm/processor.h>
24#include <crypto/des.h>
25#include <linux/crypto.h>
26#include <linux/init.h>
27#include <linux/module.h>
28#include <linux/types.h>
29#include <crypto/algapi.h>
30
31struct des3_ede_x86_ctx {
32 u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
33 u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
34};
35
36/* regular block cipher functions */
37asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
38 const u8 *src);
39
40/* 3-way parallel cipher functions */
41asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
42 const u8 *src);
43
44static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
45 const u8 *src)
46{
47 u32 *enc_ctx = ctx->enc_expkey;
48
49 des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
50}
51
52static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 u32 *dec_ctx = ctx->dec_expkey;
56
57 des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
58}
59
60static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 u32 *enc_ctx = ctx->enc_expkey;
64
65 des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
66}
67
68static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
69 const u8 *src)
70{
71 u32 *dec_ctx = ctx->dec_expkey;
72
73 des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
74}
75
76static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
77{
78 des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src);
79}
80
81static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
82{
83 des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
84}
85
86static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
87 const u32 *expkey)
88{
89 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
90 unsigned int nbytes;
91 int err;
92
93 err = blkcipher_walk_virt(desc, walk);
94
95 while ((nbytes = walk->nbytes)) {
96 u8 *wsrc = walk->src.virt.addr;
97 u8 *wdst = walk->dst.virt.addr;
98
99 /* Process four block batch */
100 if (nbytes >= bsize * 3) {
101 do {
102 des3_ede_x86_64_crypt_blk_3way(expkey, wdst,
103 wsrc);
104
105 wsrc += bsize * 3;
106 wdst += bsize * 3;
107 nbytes -= bsize * 3;
108 } while (nbytes >= bsize * 3);
109
110 if (nbytes < bsize)
111 goto done;
112 }
113
114 /* Handle leftovers */
115 do {
116 des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc);
117
118 wsrc += bsize;
119 wdst += bsize;
120 nbytes -= bsize;
121 } while (nbytes >= bsize);
122
123done:
124 err = blkcipher_walk_done(desc, walk, nbytes);
125 }
126
127 return err;
128}
129
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes)
132{
133 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
134 struct blkcipher_walk walk;
135
136 blkcipher_walk_init(&walk, dst, src, nbytes);
137 return ecb_crypt(desc, &walk, ctx->enc_expkey);
138}
139
140static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
141 struct scatterlist *src, unsigned int nbytes)
142{
143 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
144 struct blkcipher_walk walk;
145
146 blkcipher_walk_init(&walk, dst, src, nbytes);
147 return ecb_crypt(desc, &walk, ctx->dec_expkey);
148}
149
150static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
151 struct blkcipher_walk *walk)
152{
153 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
154 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
155 unsigned int nbytes = walk->nbytes;
156 u64 *src = (u64 *)walk->src.virt.addr;
157 u64 *dst = (u64 *)walk->dst.virt.addr;
158 u64 *iv = (u64 *)walk->iv;
159
160 do {
161 *dst = *src ^ *iv;
162 des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
163 iv = dst;
164
165 src += 1;
166 dst += 1;
167 nbytes -= bsize;
168 } while (nbytes >= bsize);
169
170 *(u64 *)walk->iv = *iv;
171 return nbytes;
172}
173
174static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
175 struct scatterlist *src, unsigned int nbytes)
176{
177 struct blkcipher_walk walk;
178 int err;
179
180 blkcipher_walk_init(&walk, dst, src, nbytes);
181 err = blkcipher_walk_virt(desc, &walk);
182
183 while ((nbytes = walk.nbytes)) {
184 nbytes = __cbc_encrypt(desc, &walk);
185 err = blkcipher_walk_done(desc, &walk, nbytes);
186 }
187
188 return err;
189}
190
191static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
192 struct blkcipher_walk *walk)
193{
194 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
195 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
196 unsigned int nbytes = walk->nbytes;
197 u64 *src = (u64 *)walk->src.virt.addr;
198 u64 *dst = (u64 *)walk->dst.virt.addr;
199 u64 ivs[3 - 1];
200 u64 last_iv;
201
202 /* Start of the last block. */
203 src += nbytes / bsize - 1;
204 dst += nbytes / bsize - 1;
205
206 last_iv = *src;
207
208 /* Process four block batch */
209 if (nbytes >= bsize * 3) {
210 do {
211 nbytes -= bsize * 3 - bsize;
212 src -= 3 - 1;
213 dst -= 3 - 1;
214
215 ivs[0] = src[0];
216 ivs[1] = src[1];
217
218 des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
219
220 dst[1] ^= ivs[0];
221 dst[2] ^= ivs[1];
222
223 nbytes -= bsize;
224 if (nbytes < bsize)
225 goto done;
226
227 *dst ^= *(src - 1);
228 src -= 1;
229 dst -= 1;
230 } while (nbytes >= bsize * 3);
231 }
232
233 /* Handle leftovers */
234 for (;;) {
235 des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src);
236
237 nbytes -= bsize;
238 if (nbytes < bsize)
239 break;
240
241 *dst ^= *(src - 1);
242 src -= 1;
243 dst -= 1;
244 }
245
246done:
247 *dst ^= *(u64 *)walk->iv;
248 *(u64 *)walk->iv = last_iv;
249
250 return nbytes;
251}
252
253static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
254 struct scatterlist *src, unsigned int nbytes)
255{
256 struct blkcipher_walk walk;
257 int err;
258
259 blkcipher_walk_init(&walk, dst, src, nbytes);
260 err = blkcipher_walk_virt(desc, &walk);
261
262 while ((nbytes = walk.nbytes)) {
263 nbytes = __cbc_decrypt(desc, &walk);
264 err = blkcipher_walk_done(desc, &walk, nbytes);
265 }
266
267 return err;
268}
269
270static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
271 struct blkcipher_walk *walk)
272{
273 u8 *ctrblk = walk->iv;
274 u8 keystream[DES3_EDE_BLOCK_SIZE];
275 u8 *src = walk->src.virt.addr;
276 u8 *dst = walk->dst.virt.addr;
277 unsigned int nbytes = walk->nbytes;
278
279 des3_ede_enc_blk(ctx, keystream, ctrblk);
280 crypto_xor(keystream, src, nbytes);
281 memcpy(dst, keystream, nbytes);
282
283 crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
284}
285
286static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
287 struct blkcipher_walk *walk)
288{
289 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
290 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
291 unsigned int nbytes = walk->nbytes;
292 __be64 *src = (__be64 *)walk->src.virt.addr;
293 __be64 *dst = (__be64 *)walk->dst.virt.addr;
294 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
295 __be64 ctrblocks[3];
296
297 /* Process four block batch */
298 if (nbytes >= bsize * 3) {
299 do {
300 /* create ctrblks for parallel encrypt */
301 ctrblocks[0] = cpu_to_be64(ctrblk++);
302 ctrblocks[1] = cpu_to_be64(ctrblk++);
303 ctrblocks[2] = cpu_to_be64(ctrblk++);
304
305 des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
306 (u8 *)ctrblocks);
307
308 dst[0] = src[0] ^ ctrblocks[0];
309 dst[1] = src[1] ^ ctrblocks[1];
310 dst[2] = src[2] ^ ctrblocks[2];
311
312 src += 3;
313 dst += 3;
314 } while ((nbytes -= bsize * 3) >= bsize * 3);
315
316 if (nbytes < bsize)
317 goto done;
318 }
319
320 /* Handle leftovers */
321 do {
322 ctrblocks[0] = cpu_to_be64(ctrblk++);
323
324 des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
325
326 dst[0] = src[0] ^ ctrblocks[0];
327
328 src += 1;
329 dst += 1;
330 } while ((nbytes -= bsize) >= bsize);
331
332done:
333 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
334 return nbytes;
335}
336
337static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
338 struct scatterlist *src, unsigned int nbytes)
339{
340 struct blkcipher_walk walk;
341 int err;
342
343 blkcipher_walk_init(&walk, dst, src, nbytes);
344 err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
345
346 while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
347 nbytes = __ctr_crypt(desc, &walk);
348 err = blkcipher_walk_done(desc, &walk, nbytes);
349 }
350
351 if (walk.nbytes) {
352 ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
353 err = blkcipher_walk_done(desc, &walk, 0);
354 }
355
356 return err;
357}
358
359static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
360 unsigned int keylen)
361{
362 struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
363 u32 i, j, tmp;
364 int err;
365
366 /* Generate encryption context using generic implementation. */
367 err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
368 if (err < 0)
369 return err;
370
371 /* Fix encryption context for this implementation and form decryption
372 * context. */
373 j = DES3_EDE_EXPKEY_WORDS - 2;
374 for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
375 tmp = ror32(ctx->enc_expkey[i + 1], 4);
376 ctx->enc_expkey[i + 1] = tmp;
377
378 ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
379 ctx->dec_expkey[j + 1] = tmp;
380 }
381
382 return 0;
383}
384
385static struct crypto_alg des3_ede_algs[4] = { {
386 .cra_name = "des3_ede",
387 .cra_driver_name = "des3_ede-asm",
388 .cra_priority = 200,
389 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
390 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
391 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
392 .cra_alignmask = 0,
393 .cra_module = THIS_MODULE,
394 .cra_u = {
395 .cipher = {
396 .cia_min_keysize = DES3_EDE_KEY_SIZE,
397 .cia_max_keysize = DES3_EDE_KEY_SIZE,
398 .cia_setkey = des3_ede_x86_setkey,
399 .cia_encrypt = des3_ede_x86_encrypt,
400 .cia_decrypt = des3_ede_x86_decrypt,
401 }
402 }
403}, {
404 .cra_name = "ecb(des3_ede)",
405 .cra_driver_name = "ecb-des3_ede-asm",
406 .cra_priority = 300,
407 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
408 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
409 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
410 .cra_alignmask = 0,
411 .cra_type = &crypto_blkcipher_type,
412 .cra_module = THIS_MODULE,
413 .cra_u = {
414 .blkcipher = {
415 .min_keysize = DES3_EDE_KEY_SIZE,
416 .max_keysize = DES3_EDE_KEY_SIZE,
417 .setkey = des3_ede_x86_setkey,
418 .encrypt = ecb_encrypt,
419 .decrypt = ecb_decrypt,
420 },
421 },
422}, {
423 .cra_name = "cbc(des3_ede)",
424 .cra_driver_name = "cbc-des3_ede-asm",
425 .cra_priority = 300,
426 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
427 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
428 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
429 .cra_alignmask = 0,
430 .cra_type = &crypto_blkcipher_type,
431 .cra_module = THIS_MODULE,
432 .cra_u = {
433 .blkcipher = {
434 .min_keysize = DES3_EDE_KEY_SIZE,
435 .max_keysize = DES3_EDE_KEY_SIZE,
436 .ivsize = DES3_EDE_BLOCK_SIZE,
437 .setkey = des3_ede_x86_setkey,
438 .encrypt = cbc_encrypt,
439 .decrypt = cbc_decrypt,
440 },
441 },
442}, {
443 .cra_name = "ctr(des3_ede)",
444 .cra_driver_name = "ctr-des3_ede-asm",
445 .cra_priority = 300,
446 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
447 .cra_blocksize = 1,
448 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
449 .cra_alignmask = 0,
450 .cra_type = &crypto_blkcipher_type,
451 .cra_module = THIS_MODULE,
452 .cra_u = {
453 .blkcipher = {
454 .min_keysize = DES3_EDE_KEY_SIZE,
455 .max_keysize = DES3_EDE_KEY_SIZE,
456 .ivsize = DES3_EDE_BLOCK_SIZE,
457 .setkey = des3_ede_x86_setkey,
458 .encrypt = ctr_crypt,
459 .decrypt = ctr_crypt,
460 },
461 },
462} };
463
464static bool is_blacklisted_cpu(void)
465{
466 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
467 return false;
468
469 if (boot_cpu_data.x86 == 0x0f) {
470 /*
471 * On Pentium 4, des3_ede-x86_64 is slower than generic C
472 * implementation because use of 64bit rotates (which are really
473 * slow on P4). Therefore blacklist P4s.
474 */
475 return true;
476 }
477
478 return false;
479}
480
481static int force;
482module_param(force, int, 0);
483MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
484
485static int __init des3_ede_x86_init(void)
486{
487 if (!force && is_blacklisted_cpu()) {
488 pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
489 return -ENODEV;
490 }
491
492 return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
493}
494
495static void __exit des3_ede_x86_fini(void)
496{
497 crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
498}
499
500module_init(des3_ede_x86_init);
501module_exit(des3_ede_x86_fini);
502
503MODULE_LICENSE("GPL");
504MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
505MODULE_ALIAS("des3_ede");
506MODULE_ALIAS("des3_ede-asm");
507MODULE_ALIAS("des");
508MODULE_ALIAS("des-asm");
509MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");