diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-04 12:52:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-04 12:52:51 -0400 |
commit | 3e7a716a92a0e051f5502c7b689f8c9127c37c33 (patch) | |
tree | 2ebb892eb3a024f108e68a9577c767a53b955a4a /arch/x86/crypto | |
parent | c2df436bd2504f52808c10ab7d7da832f61ad3f0 (diff) | |
parent | ce5481d01f67ad304908ec2113515440c0fa86eb (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
- CTR(AES) optimisation on x86_64 using "by8" AVX.
- arm64 support to ccp
- Intel QAT crypto driver
- Qualcomm crypto engine driver
- x86-64 assembly optimisation for 3DES
- CTR(3DES) speed test
- move FIPS panic from module.c so that it only triggers on crypto
modules
- SP800-90A Deterministic Random Bit Generator (drbg).
- more test vectors for ghash.
- tweak self tests to catch partial block bugs.
- misc fixes.
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (94 commits)
crypto: drbg - fix failure of generating multiple of 2**16 bytes
crypto: ccp - Do not sign extend input data to CCP
crypto: testmgr - add missing spaces to drbg error strings
crypto: atmel-tdes - Switch to managed version of kzalloc
crypto: atmel-sha - Switch to managed version of kzalloc
crypto: testmgr - use chunks smaller than algo block size in chunk tests
crypto: qat - Fixed SKU1 dev issue
crypto: qat - Use hweight for bit counting
crypto: qat - Updated print outputs
crypto: qat - change ae_num to ae_id
crypto: qat - change slice->regions to slice->region
crypto: qat - use min_t macro
crypto: qat - remove unnecessary parentheses
crypto: qat - remove unneeded header
crypto: qat - checkpatch blank lines
crypto: qat - remove unnecessary return codes
crypto: Resolve shadow warnings
crypto: ccp - Remove "select OF" from Kconfig
crypto: caam - fix DECO RSR polling
crypto: qce - Let 'DEV_QCE' depend on both HAS_DMA and HAS_IOMEM
...
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 546 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 40 | ||||
-rw-r--r-- | arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 281 | ||||
-rw-r--r-- | arch/x86/crypto/des3_ede-asm_64.S | 805 | ||||
-rw-r--r-- | arch/x86/crypto/des3_ede_glue.c | 509 |
6 files changed, 2040 insertions, 145 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 61d6e281898b..d551165a3159 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | |||
14 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o | 14 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o |
15 | 15 | ||
16 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 16 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
17 | obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | 18 | obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o |
18 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 19 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
19 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 20 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
@@ -52,6 +53,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o | |||
52 | serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o | 53 | serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o |
53 | 54 | ||
54 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | 55 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
56 | des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o | ||
55 | camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | 57 | camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o |
56 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 58 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
57 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 59 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
@@ -76,7 +78,7 @@ ifeq ($(avx2_supported),yes) | |||
76 | endif | 78 | endif |
77 | 79 | ||
78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 80 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
79 | aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o | 81 | aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o |
80 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 82 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
81 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 83 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
82 | ifeq ($(avx2_supported),yes) | 84 | ifeq ($(avx2_supported),yes) |
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S new file mode 100644 index 000000000000..f091f122ed24 --- /dev/null +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S | |||
@@ -0,0 +1,546 @@ | |||
1 | /* | ||
2 | * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) | ||
3 | * | ||
4 | * This is AES128/192/256 CTR mode optimization implementation. It requires | ||
5 | * the support of Intel(R) AESNI and AVX instructions. | ||
6 | * | ||
7 | * This work was inspired by the AES CTR mode optimization published | ||
8 | * in Intel Optimized IPSEC Cryptograhpic library. | ||
9 | * Additional information on it can be found at: | ||
10 | * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 | ||
11 | * | ||
12 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
13 | * redistributing this file, you may do so under either license. | ||
14 | * | ||
15 | * GPL LICENSE SUMMARY | ||
16 | * | ||
17 | * Copyright(c) 2014 Intel Corporation. | ||
18 | * | ||
19 | * This program is free software; you can redistribute it and/or modify | ||
20 | * it under the terms of version 2 of the GNU General Public License as | ||
21 | * published by the Free Software Foundation. | ||
22 | * | ||
23 | * This program is distributed in the hope that it will be useful, but | ||
24 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
25 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
26 | * General Public License for more details. | ||
27 | * | ||
28 | * Contact Information: | ||
29 | * James Guilford <james.guilford@intel.com> | ||
30 | * Sean Gulley <sean.m.gulley@intel.com> | ||
31 | * Chandramouli Narayanan <mouli@linux.intel.com> | ||
32 | * | ||
33 | * BSD LICENSE | ||
34 | * | ||
35 | * Copyright(c) 2014 Intel Corporation. | ||
36 | * | ||
37 | * Redistribution and use in source and binary forms, with or without | ||
38 | * modification, are permitted provided that the following conditions | ||
39 | * are met: | ||
40 | * | ||
41 | * Redistributions of source code must retain the above copyright | ||
42 | * notice, this list of conditions and the following disclaimer. | ||
43 | * Redistributions in binary form must reproduce the above copyright | ||
44 | * notice, this list of conditions and the following disclaimer in | ||
45 | * the documentation and/or other materials provided with the | ||
46 | * distribution. | ||
47 | * Neither the name of Intel Corporation nor the names of its | ||
48 | * contributors may be used to endorse or promote products derived | ||
49 | * from this software without specific prior written permission. | ||
50 | * | ||
51 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
52 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
53 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
54 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
55 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
56 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
57 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
58 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
59 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
60 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
61 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
62 | * | ||
63 | */ | ||
64 | |||
65 | #include <linux/linkage.h> | ||
66 | #include <asm/inst.h> | ||
67 | |||
68 | #define CONCAT(a,b) a##b | ||
69 | #define VMOVDQ vmovdqu | ||
70 | |||
71 | #define xdata0 %xmm0 | ||
72 | #define xdata1 %xmm1 | ||
73 | #define xdata2 %xmm2 | ||
74 | #define xdata3 %xmm3 | ||
75 | #define xdata4 %xmm4 | ||
76 | #define xdata5 %xmm5 | ||
77 | #define xdata6 %xmm6 | ||
78 | #define xdata7 %xmm7 | ||
79 | #define xcounter %xmm8 | ||
80 | #define xbyteswap %xmm9 | ||
81 | #define xkey0 %xmm10 | ||
82 | #define xkey3 %xmm11 | ||
83 | #define xkey6 %xmm12 | ||
84 | #define xkey9 %xmm13 | ||
85 | #define xkey4 %xmm11 | ||
86 | #define xkey8 %xmm12 | ||
87 | #define xkey12 %xmm13 | ||
88 | #define xkeyA %xmm14 | ||
89 | #define xkeyB %xmm15 | ||
90 | |||
91 | #define p_in %rdi | ||
92 | #define p_iv %rsi | ||
93 | #define p_keys %rdx | ||
94 | #define p_out %rcx | ||
95 | #define num_bytes %r8 | ||
96 | |||
97 | #define tmp %r10 | ||
98 | #define DDQ(i) CONCAT(ddq_add_,i) | ||
99 | #define XMM(i) CONCAT(%xmm, i) | ||
100 | #define DDQ_DATA 0 | ||
101 | #define XDATA 1 | ||
102 | #define KEY_128 1 | ||
103 | #define KEY_192 2 | ||
104 | #define KEY_256 3 | ||
105 | |||
106 | .section .rodata | ||
107 | .align 16 | ||
108 | |||
109 | byteswap_const: | ||
110 | .octa 0x000102030405060708090A0B0C0D0E0F | ||
111 | ddq_add_1: | ||
112 | .octa 0x00000000000000000000000000000001 | ||
113 | ddq_add_2: | ||
114 | .octa 0x00000000000000000000000000000002 | ||
115 | ddq_add_3: | ||
116 | .octa 0x00000000000000000000000000000003 | ||
117 | ddq_add_4: | ||
118 | .octa 0x00000000000000000000000000000004 | ||
119 | ddq_add_5: | ||
120 | .octa 0x00000000000000000000000000000005 | ||
121 | ddq_add_6: | ||
122 | .octa 0x00000000000000000000000000000006 | ||
123 | ddq_add_7: | ||
124 | .octa 0x00000000000000000000000000000007 | ||
125 | ddq_add_8: | ||
126 | .octa 0x00000000000000000000000000000008 | ||
127 | |||
128 | .text | ||
129 | |||
130 | /* generate a unique variable for ddq_add_x */ | ||
131 | |||
132 | .macro setddq n | ||
133 | var_ddq_add = DDQ(\n) | ||
134 | .endm | ||
135 | |||
136 | /* generate a unique variable for xmm register */ | ||
137 | .macro setxdata n | ||
138 | var_xdata = XMM(\n) | ||
139 | .endm | ||
140 | |||
141 | /* club the numeric 'id' to the symbol 'name' */ | ||
142 | |||
143 | .macro club name, id | ||
144 | .altmacro | ||
145 | .if \name == DDQ_DATA | ||
146 | setddq %\id | ||
147 | .elseif \name == XDATA | ||
148 | setxdata %\id | ||
149 | .endif | ||
150 | .noaltmacro | ||
151 | .endm | ||
152 | |||
153 | /* | ||
154 | * do_aes num_in_par load_keys key_len | ||
155 | * This increments p_in, but not p_out | ||
156 | */ | ||
157 | .macro do_aes b, k, key_len | ||
158 | .set by, \b | ||
159 | .set load_keys, \k | ||
160 | .set klen, \key_len | ||
161 | |||
162 | .if (load_keys) | ||
163 | vmovdqa 0*16(p_keys), xkey0 | ||
164 | .endif | ||
165 | |||
166 | vpshufb xbyteswap, xcounter, xdata0 | ||
167 | |||
168 | .set i, 1 | ||
169 | .rept (by - 1) | ||
170 | club DDQ_DATA, i | ||
171 | club XDATA, i | ||
172 | vpaddd var_ddq_add(%rip), xcounter, var_xdata | ||
173 | vpshufb xbyteswap, var_xdata, var_xdata | ||
174 | .set i, (i +1) | ||
175 | .endr | ||
176 | |||
177 | vmovdqa 1*16(p_keys), xkeyA | ||
178 | |||
179 | vpxor xkey0, xdata0, xdata0 | ||
180 | club DDQ_DATA, by | ||
181 | vpaddd var_ddq_add(%rip), xcounter, xcounter | ||
182 | |||
183 | .set i, 1 | ||
184 | .rept (by - 1) | ||
185 | club XDATA, i | ||
186 | vpxor xkey0, var_xdata, var_xdata | ||
187 | .set i, (i +1) | ||
188 | .endr | ||
189 | |||
190 | vmovdqa 2*16(p_keys), xkeyB | ||
191 | |||
192 | .set i, 0 | ||
193 | .rept by | ||
194 | club XDATA, i | ||
195 | vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ | ||
196 | .set i, (i +1) | ||
197 | .endr | ||
198 | |||
199 | .if (klen == KEY_128) | ||
200 | .if (load_keys) | ||
201 | vmovdqa 3*16(p_keys), xkeyA | ||
202 | .endif | ||
203 | .else | ||
204 | vmovdqa 3*16(p_keys), xkeyA | ||
205 | .endif | ||
206 | |||
207 | .set i, 0 | ||
208 | .rept by | ||
209 | club XDATA, i | ||
210 | vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ | ||
211 | .set i, (i +1) | ||
212 | .endr | ||
213 | |||
214 | add $(16*by), p_in | ||
215 | |||
216 | .if (klen == KEY_128) | ||
217 | vmovdqa 4*16(p_keys), xkey4 | ||
218 | .else | ||
219 | .if (load_keys) | ||
220 | vmovdqa 4*16(p_keys), xkey4 | ||
221 | .endif | ||
222 | .endif | ||
223 | |||
224 | .set i, 0 | ||
225 | .rept by | ||
226 | club XDATA, i | ||
227 | vaesenc xkeyA, var_xdata, var_xdata /* key 3 */ | ||
228 | .set i, (i +1) | ||
229 | .endr | ||
230 | |||
231 | vmovdqa 5*16(p_keys), xkeyA | ||
232 | |||
233 | .set i, 0 | ||
234 | .rept by | ||
235 | club XDATA, i | ||
236 | vaesenc xkey4, var_xdata, var_xdata /* key 4 */ | ||
237 | .set i, (i +1) | ||
238 | .endr | ||
239 | |||
240 | .if (klen == KEY_128) | ||
241 | .if (load_keys) | ||
242 | vmovdqa 6*16(p_keys), xkeyB | ||
243 | .endif | ||
244 | .else | ||
245 | vmovdqa 6*16(p_keys), xkeyB | ||
246 | .endif | ||
247 | |||
248 | .set i, 0 | ||
249 | .rept by | ||
250 | club XDATA, i | ||
251 | vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ | ||
252 | .set i, (i +1) | ||
253 | .endr | ||
254 | |||
255 | vmovdqa 7*16(p_keys), xkeyA | ||
256 | |||
257 | .set i, 0 | ||
258 | .rept by | ||
259 | club XDATA, i | ||
260 | vaesenc xkeyB, var_xdata, var_xdata /* key 6 */ | ||
261 | .set i, (i +1) | ||
262 | .endr | ||
263 | |||
264 | .if (klen == KEY_128) | ||
265 | vmovdqa 8*16(p_keys), xkey8 | ||
266 | .else | ||
267 | .if (load_keys) | ||
268 | vmovdqa 8*16(p_keys), xkey8 | ||
269 | .endif | ||
270 | .endif | ||
271 | |||
272 | .set i, 0 | ||
273 | .rept by | ||
274 | club XDATA, i | ||
275 | vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ | ||
276 | .set i, (i +1) | ||
277 | .endr | ||
278 | |||
279 | .if (klen == KEY_128) | ||
280 | .if (load_keys) | ||
281 | vmovdqa 9*16(p_keys), xkeyA | ||
282 | .endif | ||
283 | .else | ||
284 | vmovdqa 9*16(p_keys), xkeyA | ||
285 | .endif | ||
286 | |||
287 | .set i, 0 | ||
288 | .rept by | ||
289 | club XDATA, i | ||
290 | vaesenc xkey8, var_xdata, var_xdata /* key 8 */ | ||
291 | .set i, (i +1) | ||
292 | .endr | ||
293 | |||
294 | vmovdqa 10*16(p_keys), xkeyB | ||
295 | |||
296 | .set i, 0 | ||
297 | .rept by | ||
298 | club XDATA, i | ||
299 | vaesenc xkeyA, var_xdata, var_xdata /* key 9 */ | ||
300 | .set i, (i +1) | ||
301 | .endr | ||
302 | |||
303 | .if (klen != KEY_128) | ||
304 | vmovdqa 11*16(p_keys), xkeyA | ||
305 | .endif | ||
306 | |||
307 | .set i, 0 | ||
308 | .rept by | ||
309 | club XDATA, i | ||
310 | /* key 10 */ | ||
311 | .if (klen == KEY_128) | ||
312 | vaesenclast xkeyB, var_xdata, var_xdata | ||
313 | .else | ||
314 | vaesenc xkeyB, var_xdata, var_xdata | ||
315 | .endif | ||
316 | .set i, (i +1) | ||
317 | .endr | ||
318 | |||
319 | .if (klen != KEY_128) | ||
320 | .if (load_keys) | ||
321 | vmovdqa 12*16(p_keys), xkey12 | ||
322 | .endif | ||
323 | |||
324 | .set i, 0 | ||
325 | .rept by | ||
326 | club XDATA, i | ||
327 | vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ | ||
328 | .set i, (i +1) | ||
329 | .endr | ||
330 | |||
331 | .if (klen == KEY_256) | ||
332 | vmovdqa 13*16(p_keys), xkeyA | ||
333 | .endif | ||
334 | |||
335 | .set i, 0 | ||
336 | .rept by | ||
337 | club XDATA, i | ||
338 | .if (klen == KEY_256) | ||
339 | /* key 12 */ | ||
340 | vaesenc xkey12, var_xdata, var_xdata | ||
341 | .else | ||
342 | vaesenclast xkey12, var_xdata, var_xdata | ||
343 | .endif | ||
344 | .set i, (i +1) | ||
345 | .endr | ||
346 | |||
347 | .if (klen == KEY_256) | ||
348 | vmovdqa 14*16(p_keys), xkeyB | ||
349 | |||
350 | .set i, 0 | ||
351 | .rept by | ||
352 | club XDATA, i | ||
353 | /* key 13 */ | ||
354 | vaesenc xkeyA, var_xdata, var_xdata | ||
355 | .set i, (i +1) | ||
356 | .endr | ||
357 | |||
358 | .set i, 0 | ||
359 | .rept by | ||
360 | club XDATA, i | ||
361 | /* key 14 */ | ||
362 | vaesenclast xkeyB, var_xdata, var_xdata | ||
363 | .set i, (i +1) | ||
364 | .endr | ||
365 | .endif | ||
366 | .endif | ||
367 | |||
368 | .set i, 0 | ||
369 | .rept (by / 2) | ||
370 | .set j, (i+1) | ||
371 | VMOVDQ (i*16 - 16*by)(p_in), xkeyA | ||
372 | VMOVDQ (j*16 - 16*by)(p_in), xkeyB | ||
373 | club XDATA, i | ||
374 | vpxor xkeyA, var_xdata, var_xdata | ||
375 | club XDATA, j | ||
376 | vpxor xkeyB, var_xdata, var_xdata | ||
377 | .set i, (i+2) | ||
378 | .endr | ||
379 | |||
380 | .if (i < by) | ||
381 | VMOVDQ (i*16 - 16*by)(p_in), xkeyA | ||
382 | club XDATA, i | ||
383 | vpxor xkeyA, var_xdata, var_xdata | ||
384 | .endif | ||
385 | |||
386 | .set i, 0 | ||
387 | .rept by | ||
388 | club XDATA, i | ||
389 | VMOVDQ var_xdata, i*16(p_out) | ||
390 | .set i, (i+1) | ||
391 | .endr | ||
392 | .endm | ||
393 | |||
394 | .macro do_aes_load val, key_len | ||
395 | do_aes \val, 1, \key_len | ||
396 | .endm | ||
397 | |||
398 | .macro do_aes_noload val, key_len | ||
399 | do_aes \val, 0, \key_len | ||
400 | .endm | ||
401 | |||
402 | /* main body of aes ctr load */ | ||
403 | |||
404 | .macro do_aes_ctrmain key_len | ||
405 | |||
406 | cmp $16, num_bytes | ||
407 | jb .Ldo_return2\key_len | ||
408 | |||
409 | vmovdqa byteswap_const(%rip), xbyteswap | ||
410 | vmovdqu (p_iv), xcounter | ||
411 | vpshufb xbyteswap, xcounter, xcounter | ||
412 | |||
413 | mov num_bytes, tmp | ||
414 | and $(7*16), tmp | ||
415 | jz .Lmult_of_8_blks\key_len | ||
416 | |||
417 | /* 1 <= tmp <= 7 */ | ||
418 | cmp $(4*16), tmp | ||
419 | jg .Lgt4\key_len | ||
420 | je .Leq4\key_len | ||
421 | |||
422 | .Llt4\key_len: | ||
423 | cmp $(2*16), tmp | ||
424 | jg .Leq3\key_len | ||
425 | je .Leq2\key_len | ||
426 | |||
427 | .Leq1\key_len: | ||
428 | do_aes_load 1, \key_len | ||
429 | add $(1*16), p_out | ||
430 | and $(~7*16), num_bytes | ||
431 | jz .Ldo_return2\key_len | ||
432 | jmp .Lmain_loop2\key_len | ||
433 | |||
434 | .Leq2\key_len: | ||
435 | do_aes_load 2, \key_len | ||
436 | add $(2*16), p_out | ||
437 | and $(~7*16), num_bytes | ||
438 | jz .Ldo_return2\key_len | ||
439 | jmp .Lmain_loop2\key_len | ||
440 | |||
441 | |||
442 | .Leq3\key_len: | ||
443 | do_aes_load 3, \key_len | ||
444 | add $(3*16), p_out | ||
445 | and $(~7*16), num_bytes | ||
446 | jz .Ldo_return2\key_len | ||
447 | jmp .Lmain_loop2\key_len | ||
448 | |||
449 | .Leq4\key_len: | ||
450 | do_aes_load 4, \key_len | ||
451 | add $(4*16), p_out | ||
452 | and $(~7*16), num_bytes | ||
453 | jz .Ldo_return2\key_len | ||
454 | jmp .Lmain_loop2\key_len | ||
455 | |||
456 | .Lgt4\key_len: | ||
457 | cmp $(6*16), tmp | ||
458 | jg .Leq7\key_len | ||
459 | je .Leq6\key_len | ||
460 | |||
461 | .Leq5\key_len: | ||
462 | do_aes_load 5, \key_len | ||
463 | add $(5*16), p_out | ||
464 | and $(~7*16), num_bytes | ||
465 | jz .Ldo_return2\key_len | ||
466 | jmp .Lmain_loop2\key_len | ||
467 | |||
468 | .Leq6\key_len: | ||
469 | do_aes_load 6, \key_len | ||
470 | add $(6*16), p_out | ||
471 | and $(~7*16), num_bytes | ||
472 | jz .Ldo_return2\key_len | ||
473 | jmp .Lmain_loop2\key_len | ||
474 | |||
475 | .Leq7\key_len: | ||
476 | do_aes_load 7, \key_len | ||
477 | add $(7*16), p_out | ||
478 | and $(~7*16), num_bytes | ||
479 | jz .Ldo_return2\key_len | ||
480 | jmp .Lmain_loop2\key_len | ||
481 | |||
482 | .Lmult_of_8_blks\key_len: | ||
483 | .if (\key_len != KEY_128) | ||
484 | vmovdqa 0*16(p_keys), xkey0 | ||
485 | vmovdqa 4*16(p_keys), xkey4 | ||
486 | vmovdqa 8*16(p_keys), xkey8 | ||
487 | vmovdqa 12*16(p_keys), xkey12 | ||
488 | .else | ||
489 | vmovdqa 0*16(p_keys), xkey0 | ||
490 | vmovdqa 3*16(p_keys), xkey4 | ||
491 | vmovdqa 6*16(p_keys), xkey8 | ||
492 | vmovdqa 9*16(p_keys), xkey12 | ||
493 | .endif | ||
494 | .align 16 | ||
495 | .Lmain_loop2\key_len: | ||
496 | /* num_bytes is a multiple of 8 and >0 */ | ||
497 | do_aes_noload 8, \key_len | ||
498 | add $(8*16), p_out | ||
499 | sub $(8*16), num_bytes | ||
500 | jne .Lmain_loop2\key_len | ||
501 | |||
502 | .Ldo_return2\key_len: | ||
503 | /* return updated IV */ | ||
504 | vpshufb xbyteswap, xcounter, xcounter | ||
505 | vmovdqu xcounter, (p_iv) | ||
506 | ret | ||
507 | .endm | ||
508 | |||
509 | /* | ||
510 | * routine to do AES128 CTR enc/decrypt "by8" | ||
511 | * XMM registers are clobbered. | ||
512 | * Saving/restoring must be done at a higher level | ||
513 | * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, | ||
514 | * unsigned int num_bytes) | ||
515 | */ | ||
516 | ENTRY(aes_ctr_enc_128_avx_by8) | ||
517 | /* call the aes main loop */ | ||
518 | do_aes_ctrmain KEY_128 | ||
519 | |||
520 | ENDPROC(aes_ctr_enc_128_avx_by8) | ||
521 | |||
522 | /* | ||
523 | * routine to do AES192 CTR enc/decrypt "by8" | ||
524 | * XMM registers are clobbered. | ||
525 | * Saving/restoring must be done at a higher level | ||
526 | * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, | ||
527 | * unsigned int num_bytes) | ||
528 | */ | ||
529 | ENTRY(aes_ctr_enc_192_avx_by8) | ||
530 | /* call the aes main loop */ | ||
531 | do_aes_ctrmain KEY_192 | ||
532 | |||
533 | ENDPROC(aes_ctr_enc_192_avx_by8) | ||
534 | |||
535 | /* | ||
536 | * routine to do AES256 CTR enc/decrypt "by8" | ||
537 | * XMM registers are clobbered. | ||
538 | * Saving/restoring must be done at a higher level | ||
539 | * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, | ||
540 | * unsigned int num_bytes) | ||
541 | */ | ||
542 | ENTRY(aes_ctr_enc_256_avx_by8) | ||
543 | /* call the aes main loop */ | ||
544 | do_aes_ctrmain KEY_256 | ||
545 | |||
546 | ENDPROC(aes_ctr_enc_256_avx_by8) | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 948ad0e77741..888950f29fd9 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -105,6 +105,9 @@ void crypto_fpu_exit(void); | |||
105 | #define AVX_GEN4_OPTSIZE 4096 | 105 | #define AVX_GEN4_OPTSIZE 4096 |
106 | 106 | ||
107 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
108 | |||
109 | static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, | ||
110 | const u8 *in, unsigned int len, u8 *iv); | ||
108 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 111 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
109 | const u8 *in, unsigned int len, u8 *iv); | 112 | const u8 *in, unsigned int len, u8 *iv); |
110 | 113 | ||
@@ -155,6 +158,12 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | |||
155 | 158 | ||
156 | 159 | ||
157 | #ifdef CONFIG_AS_AVX | 160 | #ifdef CONFIG_AS_AVX |
161 | asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, | ||
162 | void *keys, u8 *out, unsigned int num_bytes); | ||
163 | asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, | ||
164 | void *keys, u8 *out, unsigned int num_bytes); | ||
165 | asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, | ||
166 | void *keys, u8 *out, unsigned int num_bytes); | ||
158 | /* | 167 | /* |
159 | * asmlinkage void aesni_gcm_precomp_avx_gen2() | 168 | * asmlinkage void aesni_gcm_precomp_avx_gen2() |
160 | * gcm_data *my_ctx_data, context data | 169 | * gcm_data *my_ctx_data, context data |
@@ -472,6 +481,25 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | |||
472 | crypto_inc(ctrblk, AES_BLOCK_SIZE); | 481 | crypto_inc(ctrblk, AES_BLOCK_SIZE); |
473 | } | 482 | } |
474 | 483 | ||
484 | #ifdef CONFIG_AS_AVX | ||
485 | static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, | ||
486 | const u8 *in, unsigned int len, u8 *iv) | ||
487 | { | ||
488 | /* | ||
489 | * based on key length, override with the by8 version | ||
490 | * of ctr mode encryption/decryption for improved performance | ||
491 | * aes_set_key_common() ensures that key length is one of | ||
492 | * {128,192,256} | ||
493 | */ | ||
494 | if (ctx->key_length == AES_KEYSIZE_128) | ||
495 | aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); | ||
496 | else if (ctx->key_length == AES_KEYSIZE_192) | ||
497 | aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); | ||
498 | else | ||
499 | aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); | ||
500 | } | ||
501 | #endif | ||
502 | |||
475 | static int ctr_crypt(struct blkcipher_desc *desc, | 503 | static int ctr_crypt(struct blkcipher_desc *desc, |
476 | struct scatterlist *dst, struct scatterlist *src, | 504 | struct scatterlist *dst, struct scatterlist *src, |
477 | unsigned int nbytes) | 505 | unsigned int nbytes) |
@@ -486,8 +514,8 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
486 | 514 | ||
487 | kernel_fpu_begin(); | 515 | kernel_fpu_begin(); |
488 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | 516 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { |
489 | aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | 517 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, |
490 | nbytes & AES_BLOCK_MASK, walk.iv); | 518 | nbytes & AES_BLOCK_MASK, walk.iv); |
491 | nbytes &= AES_BLOCK_SIZE - 1; | 519 | nbytes &= AES_BLOCK_SIZE - 1; |
492 | err = blkcipher_walk_done(desc, &walk, nbytes); | 520 | err = blkcipher_walk_done(desc, &walk, nbytes); |
493 | } | 521 | } |
@@ -1493,6 +1521,14 @@ static int __init aesni_init(void) | |||
1493 | aesni_gcm_enc_tfm = aesni_gcm_enc; | 1521 | aesni_gcm_enc_tfm = aesni_gcm_enc; |
1494 | aesni_gcm_dec_tfm = aesni_gcm_dec; | 1522 | aesni_gcm_dec_tfm = aesni_gcm_dec; |
1495 | } | 1523 | } |
1524 | aesni_ctr_enc_tfm = aesni_ctr_enc; | ||
1525 | #ifdef CONFIG_AS_AVX | ||
1526 | if (cpu_has_avx) { | ||
1527 | /* optimize performance of ctr mode encryption transform */ | ||
1528 | aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; | ||
1529 | pr_info("AES CTR mode by8 optimization enabled\n"); | ||
1530 | } | ||
1531 | #endif | ||
1496 | #endif | 1532 | #endif |
1497 | 1533 | ||
1498 | err = crypto_fpu_init(); | 1534 | err = crypto_fpu_init(); |
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dbc4339b5417..26d49ebae040 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S | |||
@@ -72,6 +72,7 @@ | |||
72 | 72 | ||
73 | # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); | 73 | # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); |
74 | 74 | ||
75 | .text | ||
75 | ENTRY(crc_pcl) | 76 | ENTRY(crc_pcl) |
76 | #define bufp %rdi | 77 | #define bufp %rdi |
77 | #define bufp_dw %edi | 78 | #define bufp_dw %edi |
@@ -216,15 +217,11 @@ LABEL crc_ %i | |||
216 | ## 4) Combine three results: | 217 | ## 4) Combine three results: |
217 | ################################################################ | 218 | ################################################################ |
218 | 219 | ||
219 | lea (K_table-16)(%rip), bufp # first entry is for idx 1 | 220 | lea (K_table-8)(%rip), bufp # first entry is for idx 1 |
220 | shlq $3, %rax # rax *= 8 | 221 | shlq $3, %rax # rax *= 8 |
221 | subq %rax, tmp # tmp -= rax*8 | 222 | pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2 |
222 | shlq $1, %rax | 223 | leal (%eax,%eax,2), %eax # rax *= 3 (total *24) |
223 | subq %rax, tmp # tmp -= rax*16 | 224 | subq %rax, tmp # tmp -= rax*24 |
224 | # (total tmp -= rax*24) | ||
225 | addq %rax, bufp | ||
226 | |||
227 | movdqa (bufp), %xmm0 # 2 consts: K1:K2 | ||
228 | 225 | ||
229 | movq crc_init, %xmm1 # CRC for block 1 | 226 | movq crc_init, %xmm1 # CRC for block 1 |
230 | PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 | 227 | PCLMULQDQ 0x00,%xmm0,%xmm1 # Multiply by K2 |
@@ -238,9 +235,9 @@ LABEL crc_ %i | |||
238 | mov crc2, crc_init | 235 | mov crc2, crc_init |
239 | crc32 %rax, crc_init | 236 | crc32 %rax, crc_init |
240 | 237 | ||
241 | ################################################################ | 238 | ################################################################ |
242 | ## 5) Check for end: | 239 | ## 5) Check for end: |
243 | ################################################################ | 240 | ################################################################ |
244 | 241 | ||
245 | LABEL crc_ 0 | 242 | LABEL crc_ 0 |
246 | mov tmp, len | 243 | mov tmp, len |
@@ -331,136 +328,136 @@ ENDPROC(crc_pcl) | |||
331 | 328 | ||
332 | ################################################################ | 329 | ################################################################ |
333 | ## PCLMULQDQ tables | 330 | ## PCLMULQDQ tables |
334 | ## Table is 128 entries x 2 quad words each | 331 | ## Table is 128 entries x 2 words (8 bytes) each |
335 | ################################################################ | 332 | ################################################################ |
336 | .data | 333 | .section .rotata, "a", %progbits |
337 | .align 64 | 334 | .align 8 |
338 | K_table: | 335 | K_table: |
339 | .quad 0x14cd00bd6,0x105ec76f0 | 336 | .long 0x493c7d27, 0x00000001 |
340 | .quad 0x0ba4fc28e,0x14cd00bd6 | 337 | .long 0xba4fc28e, 0x493c7d27 |
341 | .quad 0x1d82c63da,0x0f20c0dfe | 338 | .long 0xddc0152b, 0xf20c0dfe |
342 | .quad 0x09e4addf8,0x0ba4fc28e | 339 | .long 0x9e4addf8, 0xba4fc28e |
343 | .quad 0x039d3b296,0x1384aa63a | 340 | .long 0x39d3b296, 0x3da6d0cb |
344 | .quad 0x102f9b8a2,0x1d82c63da | 341 | .long 0x0715ce53, 0xddc0152b |
345 | .quad 0x14237f5e6,0x01c291d04 | 342 | .long 0x47db8317, 0x1c291d04 |
346 | .quad 0x00d3b6092,0x09e4addf8 | 343 | .long 0x0d3b6092, 0x9e4addf8 |
347 | .quad 0x0c96cfdc0,0x0740eef02 | 344 | .long 0xc96cfdc0, 0x740eef02 |
348 | .quad 0x18266e456,0x039d3b296 | 345 | .long 0x878a92a7, 0x39d3b296 |
349 | .quad 0x0daece73e,0x0083a6eec | 346 | .long 0xdaece73e, 0x083a6eec |
350 | .quad 0x0ab7aff2a,0x102f9b8a2 | 347 | .long 0xab7aff2a, 0x0715ce53 |
351 | .quad 0x1248ea574,0x1c1733996 | 348 | .long 0x2162d385, 0xc49f4f67 |
352 | .quad 0x083348832,0x14237f5e6 | 349 | .long 0x83348832, 0x47db8317 |
353 | .quad 0x12c743124,0x02ad91c30 | 350 | .long 0x299847d5, 0x2ad91c30 |
354 | .quad 0x0b9e02b86,0x00d3b6092 | 351 | .long 0xb9e02b86, 0x0d3b6092 |
355 | .quad 0x018b33a4e,0x06992cea2 | 352 | .long 0x18b33a4e, 0x6992cea2 |
356 | .quad 0x1b331e26a,0x0c96cfdc0 | 353 | .long 0xb6dd949b, 0xc96cfdc0 |
357 | .quad 0x17d35ba46,0x07e908048 | 354 | .long 0x78d9ccb7, 0x7e908048 |
358 | .quad 0x1bf2e8b8a,0x18266e456 | 355 | .long 0xbac2fd7b, 0x878a92a7 |
359 | .quad 0x1a3e0968a,0x11ed1f9d8 | 356 | .long 0xa60ce07b, 0x1b3d8f29 |
360 | .quad 0x0ce7f39f4,0x0daece73e | 357 | .long 0xce7f39f4, 0xdaece73e |
361 | .quad 0x061d82e56,0x0f1d0f55e | 358 | .long 0x61d82e56, 0xf1d0f55e |
362 | .quad 0x0d270f1a2,0x0ab7aff2a | 359 | .long 0xd270f1a2, 0xab7aff2a |
363 | .quad 0x1c3f5f66c,0x0a87ab8a8 | 360 | .long 0xc619809d, 0xa87ab8a8 |
364 | .quad 0x12ed0daac,0x1248ea574 | 361 | .long 0x2b3cac5d, 0x2162d385 |
365 | .quad 0x065863b64,0x08462d800 | 362 | .long 0x65863b64, 0x8462d800 |
366 | .quad 0x11eef4f8e,0x083348832 | 363 | .long 0x1b03397f, 0x83348832 |
367 | .quad 0x1ee54f54c,0x071d111a8 | 364 | .long 0xebb883bd, 0x71d111a8 |
368 | .quad 0x0b3e32c28,0x12c743124 | 365 | .long 0xb3e32c28, 0x299847d5 |
369 | .quad 0x0064f7f26,0x0ffd852c6 | 366 | .long 0x064f7f26, 0xffd852c6 |
370 | .quad 0x0dd7e3b0c,0x0b9e02b86 | 367 | .long 0xdd7e3b0c, 0xb9e02b86 |
371 | .quad 0x0f285651c,0x0dcb17aa4 | 368 | .long 0xf285651c, 0xdcb17aa4 |
372 | .quad 0x010746f3c,0x018b33a4e | 369 | .long 0x10746f3c, 0x18b33a4e |
373 | .quad 0x1c24afea4,0x0f37c5aee | 370 | .long 0xc7a68855, 0xf37c5aee |
374 | .quad 0x0271d9844,0x1b331e26a | 371 | .long 0x271d9844, 0xb6dd949b |
375 | .quad 0x08e766a0c,0x06051d5a2 | 372 | .long 0x8e766a0c, 0x6051d5a2 |
376 | .quad 0x093a5f730,0x17d35ba46 | 373 | .long 0x93a5f730, 0x78d9ccb7 |
377 | .quad 0x06cb08e5c,0x11d5ca20e | 374 | .long 0x6cb08e5c, 0x18b0d4ff |
378 | .quad 0x06b749fb2,0x1bf2e8b8a | 375 | .long 0x6b749fb2, 0xbac2fd7b |
379 | .quad 0x1167f94f2,0x021f3d99c | 376 | .long 0x1393e203, 0x21f3d99c |
380 | .quad 0x0cec3662e,0x1a3e0968a | 377 | .long 0xcec3662e, 0xa60ce07b |
381 | .quad 0x19329634a,0x08f158014 | 378 | .long 0x96c515bb, 0x8f158014 |
382 | .quad 0x0e6fc4e6a,0x0ce7f39f4 | 379 | .long 0xe6fc4e6a, 0xce7f39f4 |
383 | .quad 0x08227bb8a,0x1a5e82106 | 380 | .long 0x8227bb8a, 0xa00457f7 |
384 | .quad 0x0b0cd4768,0x061d82e56 | 381 | .long 0xb0cd4768, 0x61d82e56 |
385 | .quad 0x13c2b89c4,0x188815ab2 | 382 | .long 0x39c7ff35, 0x8d6d2c43 |
386 | .quad 0x0d7a4825c,0x0d270f1a2 | 383 | .long 0xd7a4825c, 0xd270f1a2 |
387 | .quad 0x10f5ff2ba,0x105405f3e | 384 | .long 0x0ab3844b, 0x00ac29cf |
388 | .quad 0x00167d312,0x1c3f5f66c | 385 | .long 0x0167d312, 0xc619809d |
389 | .quad 0x0f6076544,0x0e9adf796 | 386 | .long 0xf6076544, 0xe9adf796 |
390 | .quad 0x026f6a60a,0x12ed0daac | 387 | .long 0x26f6a60a, 0x2b3cac5d |
391 | .quad 0x1a2adb74e,0x096638b34 | 388 | .long 0xa741c1bf, 0x96638b34 |
392 | .quad 0x19d34af3a,0x065863b64 | 389 | .long 0x98d8d9cb, 0x65863b64 |
393 | .quad 0x049c3cc9c,0x1e50585a0 | 390 | .long 0x49c3cc9c, 0xe0e9f351 |
394 | .quad 0x068bce87a,0x11eef4f8e | 391 | .long 0x68bce87a, 0x1b03397f |
395 | .quad 0x1524fa6c6,0x19f1c69dc | 392 | .long 0x57a3d037, 0x9af01f2d |
396 | .quad 0x16cba8aca,0x1ee54f54c | 393 | .long 0x6956fc3b, 0xebb883bd |
397 | .quad 0x042d98888,0x12913343e | 394 | .long 0x42d98888, 0x2cff42cf |
398 | .quad 0x1329d9f7e,0x0b3e32c28 | 395 | .long 0x3771e98f, 0xb3e32c28 |
399 | .quad 0x1b1c69528,0x088f25a3a | 396 | .long 0xb42ae3d9, 0x88f25a3a |
400 | .quad 0x02178513a,0x0064f7f26 | 397 | .long 0x2178513a, 0x064f7f26 |
401 | .quad 0x0e0ac139e,0x04e36f0b0 | 398 | .long 0xe0ac139e, 0x4e36f0b0 |
402 | .quad 0x0170076fa,0x0dd7e3b0c | 399 | .long 0x170076fa, 0xdd7e3b0c |
403 | .quad 0x141a1a2e2,0x0bd6f81f8 | 400 | .long 0x444dd413, 0xbd6f81f8 |
404 | .quad 0x16ad828b4,0x0f285651c | 401 | .long 0x6f345e45, 0xf285651c |
405 | .quad 0x041d17b64,0x19425cbba | 402 | .long 0x41d17b64, 0x91c9bd4b |
406 | .quad 0x1fae1cc66,0x010746f3c | 403 | .long 0xff0dba97, 0x10746f3c |
407 | .quad 0x1a75b4b00,0x18db37e8a | 404 | .long 0xa2b73df1, 0x885f087b |
408 | .quad 0x0f872e54c,0x1c24afea4 | 405 | .long 0xf872e54c, 0xc7a68855 |
409 | .quad 0x01e41e9fc,0x04c144932 | 406 | .long 0x1e41e9fc, 0x4c144932 |
410 | .quad 0x086d8e4d2,0x0271d9844 | 407 | .long 0x86d8e4d2, 0x271d9844 |
411 | .quad 0x160f7af7a,0x052148f02 | 408 | .long 0x651bd98b, 0x52148f02 |
412 | .quad 0x05bb8f1bc,0x08e766a0c | 409 | .long 0x5bb8f1bc, 0x8e766a0c |
413 | .quad 0x0a90fd27a,0x0a3c6f37a | 410 | .long 0xa90fd27a, 0xa3c6f37a |
414 | .quad 0x0b3af077a,0x093a5f730 | 411 | .long 0xb3af077a, 0x93a5f730 |
415 | .quad 0x04984d782,0x1d22c238e | 412 | .long 0x4984d782, 0xd7c0557f |
416 | .quad 0x0ca6ef3ac,0x06cb08e5c | 413 | .long 0xca6ef3ac, 0x6cb08e5c |
417 | .quad 0x0234e0b26,0x063ded06a | 414 | .long 0x234e0b26, 0x63ded06a |
418 | .quad 0x1d88abd4a,0x06b749fb2 | 415 | .long 0xdd66cbbb, 0x6b749fb2 |
419 | .quad 0x04597456a,0x04d56973c | 416 | .long 0x4597456a, 0x4d56973c |
420 | .quad 0x0e9e28eb4,0x1167f94f2 | 417 | .long 0xe9e28eb4, 0x1393e203 |
421 | .quad 0x07b3ff57a,0x19385bf2e | 418 | .long 0x7b3ff57a, 0x9669c9df |
422 | .quad 0x0c9c8b782,0x0cec3662e | 419 | .long 0xc9c8b782, 0xcec3662e |
423 | .quad 0x13a9cba9e,0x0e417f38a | 420 | .long 0x3f70cc6f, 0xe417f38a |
424 | .quad 0x093e106a4,0x19329634a | 421 | .long 0x93e106a4, 0x96c515bb |
425 | .quad 0x167001a9c,0x14e727980 | 422 | .long 0x62ec6c6d, 0x4b9e0f71 |
426 | .quad 0x1ddffc5d4,0x0e6fc4e6a | 423 | .long 0xd813b325, 0xe6fc4e6a |
427 | .quad 0x00df04680,0x0d104b8fc | 424 | .long 0x0df04680, 0xd104b8fc |
428 | .quad 0x02342001e,0x08227bb8a | 425 | .long 0x2342001e, 0x8227bb8a |
429 | .quad 0x00a2a8d7e,0x05b397730 | 426 | .long 0x0a2a8d7e, 0x5b397730 |
430 | .quad 0x168763fa6,0x0b0cd4768 | 427 | .long 0x6d9a4957, 0xb0cd4768 |
431 | .quad 0x1ed5a407a,0x0e78eb416 | 428 | .long 0xe8b6368b, 0xe78eb416 |
432 | .quad 0x0d2c3ed1a,0x13c2b89c4 | 429 | .long 0xd2c3ed1a, 0x39c7ff35 |
433 | .quad 0x0995a5724,0x1641378f0 | 430 | .long 0x995a5724, 0x61ff0e01 |
434 | .quad 0x19b1afbc4,0x0d7a4825c | 431 | .long 0x9ef68d35, 0xd7a4825c |
435 | .quad 0x109ffedc0,0x08d96551c | 432 | .long 0x0c139b31, 0x8d96551c |
436 | .quad 0x0f2271e60,0x10f5ff2ba | 433 | .long 0xf2271e60, 0x0ab3844b |
437 | .quad 0x00b0bf8ca,0x00bf80dd2 | 434 | .long 0x0b0bf8ca, 0x0bf80dd2 |
438 | .quad 0x123888b7a,0x00167d312 | 435 | .long 0x2664fd8b, 0x0167d312 |
439 | .quad 0x1e888f7dc,0x18dcddd1c | 436 | .long 0xed64812d, 0x8821abed |
440 | .quad 0x002ee03b2,0x0f6076544 | 437 | .long 0x02ee03b2, 0xf6076544 |
441 | .quad 0x183e8d8fe,0x06a45d2b2 | 438 | .long 0x8604ae0f, 0x6a45d2b2 |
442 | .quad 0x133d7a042,0x026f6a60a | 439 | .long 0x363bd6b3, 0x26f6a60a |
443 | .quad 0x116b0f50c,0x1dd3e10e8 | 440 | .long 0x135c83fd, 0xd8d26619 |
444 | .quad 0x05fabe670,0x1a2adb74e | 441 | .long 0x5fabe670, 0xa741c1bf |
445 | .quad 0x130004488,0x0de87806c | 442 | .long 0x35ec3279, 0xde87806c |
446 | .quad 0x000bcf5f6,0x19d34af3a | 443 | .long 0x00bcf5f6, 0x98d8d9cb |
447 | .quad 0x18f0c7078,0x014338754 | 444 | .long 0x8ae00689, 0x14338754 |
448 | .quad 0x017f27698,0x049c3cc9c | 445 | .long 0x17f27698, 0x49c3cc9c |
449 | .quad 0x058ca5f00,0x15e3e77ee | 446 | .long 0x58ca5f00, 0x5bd2011f |
450 | .quad 0x1af900c24,0x068bce87a | 447 | .long 0xaa7c7ad5, 0x68bce87a |
451 | .quad 0x0b5cfca28,0x0dd07448e | 448 | .long 0xb5cfca28, 0xdd07448e |
452 | .quad 0x0ded288f8,0x1524fa6c6 | 449 | .long 0xded288f8, 0x57a3d037 |
453 | .quad 0x059f229bc,0x1d8048348 | 450 | .long 0x59f229bc, 0xdde8f5b9 |
454 | .quad 0x06d390dec,0x16cba8aca | 451 | .long 0x6d390dec, 0x6956fc3b |
455 | .quad 0x037170390,0x0a3e3e02c | 452 | .long 0x37170390, 0xa3e3e02c |
456 | .quad 0x06353c1cc,0x042d98888 | 453 | .long 0x6353c1cc, 0x42d98888 |
457 | .quad 0x0c4584f5c,0x0d73c7bea | 454 | .long 0xc4584f5c, 0xd73c7bea |
458 | .quad 0x1f16a3418,0x1329d9f7e | 455 | .long 0xf48642e9, 0x3771e98f |
459 | .quad 0x0531377e2,0x185137662 | 456 | .long 0x531377e2, 0x80ff0093 |
460 | .quad 0x1d8d9ca7c,0x1b1c69528 | 457 | .long 0xdd35bc8d, 0xb42ae3d9 |
461 | .quad 0x0b25b29f2,0x18a08b5bc | 458 | .long 0xb25b29f2, 0x8fe4c34d |
462 | .quad 0x19fb2a8b0,0x02178513a | 459 | .long 0x9a5ede41, 0x2178513a |
463 | .quad 0x1a08fe6ac,0x1da758ae0 | 460 | .long 0xa563905d, 0xdf99fc11 |
464 | .quad 0x045cddf4e,0x0e0ac139e | 461 | .long 0x45cddf4e, 0xe0ac139e |
465 | .quad 0x1a91647f2,0x169cf9eb0 | 462 | .long 0xacfa3103, 0x6c23e841 |
466 | .quad 0x1a0f717c4,0x0170076fa | 463 | .long 0xa51b6135, 0x170076fa |
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S new file mode 100644 index 000000000000..038f6ae87c5e --- /dev/null +++ b/arch/x86/crypto/des3_ede-asm_64.S | |||
@@ -0,0 +1,805 @@ | |||
1 | /* | ||
2 | * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher | ||
3 | * | ||
4 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | |||
19 | .file "des3_ede-asm_64.S" | ||
20 | .text | ||
21 | |||
22 | #define s1 .L_s1 | ||
23 | #define s2 ((s1) + (64*8)) | ||
24 | #define s3 ((s2) + (64*8)) | ||
25 | #define s4 ((s3) + (64*8)) | ||
26 | #define s5 ((s4) + (64*8)) | ||
27 | #define s6 ((s5) + (64*8)) | ||
28 | #define s7 ((s6) + (64*8)) | ||
29 | #define s8 ((s7) + (64*8)) | ||
30 | |||
31 | /* register macros */ | ||
32 | #define CTX %rdi | ||
33 | |||
34 | #define RL0 %r8 | ||
35 | #define RL1 %r9 | ||
36 | #define RL2 %r10 | ||
37 | |||
38 | #define RL0d %r8d | ||
39 | #define RL1d %r9d | ||
40 | #define RL2d %r10d | ||
41 | |||
42 | #define RR0 %r11 | ||
43 | #define RR1 %r12 | ||
44 | #define RR2 %r13 | ||
45 | |||
46 | #define RR0d %r11d | ||
47 | #define RR1d %r12d | ||
48 | #define RR2d %r13d | ||
49 | |||
50 | #define RW0 %rax | ||
51 | #define RW1 %rbx | ||
52 | #define RW2 %rcx | ||
53 | |||
54 | #define RW0d %eax | ||
55 | #define RW1d %ebx | ||
56 | #define RW2d %ecx | ||
57 | |||
58 | #define RW0bl %al | ||
59 | #define RW1bl %bl | ||
60 | #define RW2bl %cl | ||
61 | |||
62 | #define RW0bh %ah | ||
63 | #define RW1bh %bh | ||
64 | #define RW2bh %ch | ||
65 | |||
66 | #define RT0 %r15 | ||
67 | #define RT1 %rbp | ||
68 | #define RT2 %r14 | ||
69 | #define RT3 %rdx | ||
70 | |||
71 | #define RT0d %r15d | ||
72 | #define RT1d %ebp | ||
73 | #define RT2d %r14d | ||
74 | #define RT3d %edx | ||
75 | |||
76 | /*********************************************************************** | ||
77 | * 1-way 3DES | ||
78 | ***********************************************************************/ | ||
79 | #define do_permutation(a, b, offset, mask) \ | ||
80 | movl a, RT0d; \ | ||
81 | shrl $(offset), RT0d; \ | ||
82 | xorl b, RT0d; \ | ||
83 | andl $(mask), RT0d; \ | ||
84 | xorl RT0d, b; \ | ||
85 | shll $(offset), RT0d; \ | ||
86 | xorl RT0d, a; | ||
87 | |||
88 | #define expand_to_64bits(val, mask) \ | ||
89 | movl val##d, RT0d; \ | ||
90 | rorl $4, RT0d; \ | ||
91 | shlq $32, RT0; \ | ||
92 | orq RT0, val; \ | ||
93 | andq mask, val; | ||
94 | |||
95 | #define compress_to_64bits(val) \ | ||
96 | movq val, RT0; \ | ||
97 | shrq $32, RT0; \ | ||
98 | roll $4, RT0d; \ | ||
99 | orl RT0d, val##d; | ||
100 | |||
101 | #define initial_permutation(left, right) \ | ||
102 | do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ | ||
103 | do_permutation(left##d, right##d, 16, 0x0000ffff); \ | ||
104 | do_permutation(right##d, left##d, 2, 0x33333333); \ | ||
105 | do_permutation(right##d, left##d, 8, 0x00ff00ff); \ | ||
106 | movabs $0x3f3f3f3f3f3f3f3f, RT3; \ | ||
107 | movl left##d, RW0d; \ | ||
108 | roll $1, right##d; \ | ||
109 | xorl right##d, RW0d; \ | ||
110 | andl $0xaaaaaaaa, RW0d; \ | ||
111 | xorl RW0d, left##d; \ | ||
112 | xorl RW0d, right##d; \ | ||
113 | roll $1, left##d; \ | ||
114 | expand_to_64bits(right, RT3); \ | ||
115 | expand_to_64bits(left, RT3); | ||
116 | |||
117 | #define final_permutation(left, right) \ | ||
118 | compress_to_64bits(right); \ | ||
119 | compress_to_64bits(left); \ | ||
120 | movl right##d, RW0d; \ | ||
121 | rorl $1, left##d; \ | ||
122 | xorl left##d, RW0d; \ | ||
123 | andl $0xaaaaaaaa, RW0d; \ | ||
124 | xorl RW0d, right##d; \ | ||
125 | xorl RW0d, left##d; \ | ||
126 | rorl $1, right##d; \ | ||
127 | do_permutation(right##d, left##d, 8, 0x00ff00ff); \ | ||
128 | do_permutation(right##d, left##d, 2, 0x33333333); \ | ||
129 | do_permutation(left##d, right##d, 16, 0x0000ffff); \ | ||
130 | do_permutation(left##d, right##d, 4, 0x0f0f0f0f); | ||
131 | |||
132 | #define round1(n, from, to, load_next_key) \ | ||
133 | xorq from, RW0; \ | ||
134 | \ | ||
135 | movzbl RW0bl, RT0d; \ | ||
136 | movzbl RW0bh, RT1d; \ | ||
137 | shrq $16, RW0; \ | ||
138 | movzbl RW0bl, RT2d; \ | ||
139 | movzbl RW0bh, RT3d; \ | ||
140 | shrq $16, RW0; \ | ||
141 | movq s8(, RT0, 8), RT0; \ | ||
142 | xorq s6(, RT1, 8), to; \ | ||
143 | movzbl RW0bl, RL1d; \ | ||
144 | movzbl RW0bh, RT1d; \ | ||
145 | shrl $16, RW0d; \ | ||
146 | xorq s4(, RT2, 8), RT0; \ | ||
147 | xorq s2(, RT3, 8), to; \ | ||
148 | movzbl RW0bl, RT2d; \ | ||
149 | movzbl RW0bh, RT3d; \ | ||
150 | xorq s7(, RL1, 8), RT0; \ | ||
151 | xorq s5(, RT1, 8), to; \ | ||
152 | xorq s3(, RT2, 8), RT0; \ | ||
153 | load_next_key(n, RW0); \ | ||
154 | xorq RT0, to; \ | ||
155 | xorq s1(, RT3, 8), to; \ | ||
156 | |||
157 | #define load_next_key(n, RWx) \ | ||
158 | movq (((n) + 1) * 8)(CTX), RWx; | ||
159 | |||
160 | #define dummy2(a, b) /*_*/ | ||
161 | |||
162 | #define read_block(io, left, right) \ | ||
163 | movl (io), left##d; \ | ||
164 | movl 4(io), right##d; \ | ||
165 | bswapl left##d; \ | ||
166 | bswapl right##d; | ||
167 | |||
168 | #define write_block(io, left, right) \ | ||
169 | bswapl left##d; \ | ||
170 | bswapl right##d; \ | ||
171 | movl left##d, (io); \ | ||
172 | movl right##d, 4(io); | ||
173 | |||
174 | ENTRY(des3_ede_x86_64_crypt_blk) | ||
175 | /* input: | ||
176 | * %rdi: round keys, CTX | ||
177 | * %rsi: dst | ||
178 | * %rdx: src | ||
179 | */ | ||
180 | pushq %rbp; | ||
181 | pushq %rbx; | ||
182 | pushq %r12; | ||
183 | pushq %r13; | ||
184 | pushq %r14; | ||
185 | pushq %r15; | ||
186 | |||
187 | read_block(%rdx, RL0, RR0); | ||
188 | initial_permutation(RL0, RR0); | ||
189 | |||
190 | movq (CTX), RW0; | ||
191 | |||
192 | round1(0, RR0, RL0, load_next_key); | ||
193 | round1(1, RL0, RR0, load_next_key); | ||
194 | round1(2, RR0, RL0, load_next_key); | ||
195 | round1(3, RL0, RR0, load_next_key); | ||
196 | round1(4, RR0, RL0, load_next_key); | ||
197 | round1(5, RL0, RR0, load_next_key); | ||
198 | round1(6, RR0, RL0, load_next_key); | ||
199 | round1(7, RL0, RR0, load_next_key); | ||
200 | round1(8, RR0, RL0, load_next_key); | ||
201 | round1(9, RL0, RR0, load_next_key); | ||
202 | round1(10, RR0, RL0, load_next_key); | ||
203 | round1(11, RL0, RR0, load_next_key); | ||
204 | round1(12, RR0, RL0, load_next_key); | ||
205 | round1(13, RL0, RR0, load_next_key); | ||
206 | round1(14, RR0, RL0, load_next_key); | ||
207 | round1(15, RL0, RR0, load_next_key); | ||
208 | |||
209 | round1(16+0, RL0, RR0, load_next_key); | ||
210 | round1(16+1, RR0, RL0, load_next_key); | ||
211 | round1(16+2, RL0, RR0, load_next_key); | ||
212 | round1(16+3, RR0, RL0, load_next_key); | ||
213 | round1(16+4, RL0, RR0, load_next_key); | ||
214 | round1(16+5, RR0, RL0, load_next_key); | ||
215 | round1(16+6, RL0, RR0, load_next_key); | ||
216 | round1(16+7, RR0, RL0, load_next_key); | ||
217 | round1(16+8, RL0, RR0, load_next_key); | ||
218 | round1(16+9, RR0, RL0, load_next_key); | ||
219 | round1(16+10, RL0, RR0, load_next_key); | ||
220 | round1(16+11, RR0, RL0, load_next_key); | ||
221 | round1(16+12, RL0, RR0, load_next_key); | ||
222 | round1(16+13, RR0, RL0, load_next_key); | ||
223 | round1(16+14, RL0, RR0, load_next_key); | ||
224 | round1(16+15, RR0, RL0, load_next_key); | ||
225 | |||
226 | round1(32+0, RR0, RL0, load_next_key); | ||
227 | round1(32+1, RL0, RR0, load_next_key); | ||
228 | round1(32+2, RR0, RL0, load_next_key); | ||
229 | round1(32+3, RL0, RR0, load_next_key); | ||
230 | round1(32+4, RR0, RL0, load_next_key); | ||
231 | round1(32+5, RL0, RR0, load_next_key); | ||
232 | round1(32+6, RR0, RL0, load_next_key); | ||
233 | round1(32+7, RL0, RR0, load_next_key); | ||
234 | round1(32+8, RR0, RL0, load_next_key); | ||
235 | round1(32+9, RL0, RR0, load_next_key); | ||
236 | round1(32+10, RR0, RL0, load_next_key); | ||
237 | round1(32+11, RL0, RR0, load_next_key); | ||
238 | round1(32+12, RR0, RL0, load_next_key); | ||
239 | round1(32+13, RL0, RR0, load_next_key); | ||
240 | round1(32+14, RR0, RL0, load_next_key); | ||
241 | round1(32+15, RL0, RR0, dummy2); | ||
242 | |||
243 | final_permutation(RR0, RL0); | ||
244 | write_block(%rsi, RR0, RL0); | ||
245 | |||
246 | popq %r15; | ||
247 | popq %r14; | ||
248 | popq %r13; | ||
249 | popq %r12; | ||
250 | popq %rbx; | ||
251 | popq %rbp; | ||
252 | |||
253 | ret; | ||
254 | ENDPROC(des3_ede_x86_64_crypt_blk) | ||
255 | |||
256 | /*********************************************************************** | ||
257 | * 3-way 3DES | ||
258 | ***********************************************************************/ | ||
259 | #define expand_to_64bits(val, mask) \ | ||
260 | movl val##d, RT0d; \ | ||
261 | rorl $4, RT0d; \ | ||
262 | shlq $32, RT0; \ | ||
263 | orq RT0, val; \ | ||
264 | andq mask, val; | ||
265 | |||
266 | #define compress_to_64bits(val) \ | ||
267 | movq val, RT0; \ | ||
268 | shrq $32, RT0; \ | ||
269 | roll $4, RT0d; \ | ||
270 | orl RT0d, val##d; | ||
271 | |||
272 | #define initial_permutation3(left, right) \ | ||
273 | do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ | ||
274 | do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ | ||
275 | do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ | ||
276 | do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ | ||
277 | do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ | ||
278 | do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ | ||
279 | \ | ||
280 | do_permutation(right##0d, left##0d, 2, 0x33333333); \ | ||
281 | do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ | ||
282 | do_permutation(right##1d, left##1d, 2, 0x33333333); \ | ||
283 | do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ | ||
284 | do_permutation(right##2d, left##2d, 2, 0x33333333); \ | ||
285 | do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ | ||
286 | \ | ||
287 | movabs $0x3f3f3f3f3f3f3f3f, RT3; \ | ||
288 | \ | ||
289 | movl left##0d, RW0d; \ | ||
290 | roll $1, right##0d; \ | ||
291 | xorl right##0d, RW0d; \ | ||
292 | andl $0xaaaaaaaa, RW0d; \ | ||
293 | xorl RW0d, left##0d; \ | ||
294 | xorl RW0d, right##0d; \ | ||
295 | roll $1, left##0d; \ | ||
296 | expand_to_64bits(right##0, RT3); \ | ||
297 | expand_to_64bits(left##0, RT3); \ | ||
298 | movl left##1d, RW1d; \ | ||
299 | roll $1, right##1d; \ | ||
300 | xorl right##1d, RW1d; \ | ||
301 | andl $0xaaaaaaaa, RW1d; \ | ||
302 | xorl RW1d, left##1d; \ | ||
303 | xorl RW1d, right##1d; \ | ||
304 | roll $1, left##1d; \ | ||
305 | expand_to_64bits(right##1, RT3); \ | ||
306 | expand_to_64bits(left##1, RT3); \ | ||
307 | movl left##2d, RW2d; \ | ||
308 | roll $1, right##2d; \ | ||
309 | xorl right##2d, RW2d; \ | ||
310 | andl $0xaaaaaaaa, RW2d; \ | ||
311 | xorl RW2d, left##2d; \ | ||
312 | xorl RW2d, right##2d; \ | ||
313 | roll $1, left##2d; \ | ||
314 | expand_to_64bits(right##2, RT3); \ | ||
315 | expand_to_64bits(left##2, RT3); | ||
316 | |||
317 | #define final_permutation3(left, right) \ | ||
318 | compress_to_64bits(right##0); \ | ||
319 | compress_to_64bits(left##0); \ | ||
320 | movl right##0d, RW0d; \ | ||
321 | rorl $1, left##0d; \ | ||
322 | xorl left##0d, RW0d; \ | ||
323 | andl $0xaaaaaaaa, RW0d; \ | ||
324 | xorl RW0d, right##0d; \ | ||
325 | xorl RW0d, left##0d; \ | ||
326 | rorl $1, right##0d; \ | ||
327 | compress_to_64bits(right##1); \ | ||
328 | compress_to_64bits(left##1); \ | ||
329 | movl right##1d, RW1d; \ | ||
330 | rorl $1, left##1d; \ | ||
331 | xorl left##1d, RW1d; \ | ||
332 | andl $0xaaaaaaaa, RW1d; \ | ||
333 | xorl RW1d, right##1d; \ | ||
334 | xorl RW1d, left##1d; \ | ||
335 | rorl $1, right##1d; \ | ||
336 | compress_to_64bits(right##2); \ | ||
337 | compress_to_64bits(left##2); \ | ||
338 | movl right##2d, RW2d; \ | ||
339 | rorl $1, left##2d; \ | ||
340 | xorl left##2d, RW2d; \ | ||
341 | andl $0xaaaaaaaa, RW2d; \ | ||
342 | xorl RW2d, right##2d; \ | ||
343 | xorl RW2d, left##2d; \ | ||
344 | rorl $1, right##2d; \ | ||
345 | \ | ||
346 | do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ | ||
347 | do_permutation(right##0d, left##0d, 2, 0x33333333); \ | ||
348 | do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ | ||
349 | do_permutation(right##1d, left##1d, 2, 0x33333333); \ | ||
350 | do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ | ||
351 | do_permutation(right##2d, left##2d, 2, 0x33333333); \ | ||
352 | \ | ||
353 | do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ | ||
354 | do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ | ||
355 | do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ | ||
356 | do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ | ||
357 | do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ | ||
358 | do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); | ||
359 | |||
360 | #define round3(n, from, to, load_next_key, do_movq) \ | ||
361 | xorq from##0, RW0; \ | ||
362 | movzbl RW0bl, RT3d; \ | ||
363 | movzbl RW0bh, RT1d; \ | ||
364 | shrq $16, RW0; \ | ||
365 | xorq s8(, RT3, 8), to##0; \ | ||
366 | xorq s6(, RT1, 8), to##0; \ | ||
367 | movzbl RW0bl, RT3d; \ | ||
368 | movzbl RW0bh, RT1d; \ | ||
369 | shrq $16, RW0; \ | ||
370 | xorq s4(, RT3, 8), to##0; \ | ||
371 | xorq s2(, RT1, 8), to##0; \ | ||
372 | movzbl RW0bl, RT3d; \ | ||
373 | movzbl RW0bh, RT1d; \ | ||
374 | shrl $16, RW0d; \ | ||
375 | xorq s7(, RT3, 8), to##0; \ | ||
376 | xorq s5(, RT1, 8), to##0; \ | ||
377 | movzbl RW0bl, RT3d; \ | ||
378 | movzbl RW0bh, RT1d; \ | ||
379 | load_next_key(n, RW0); \ | ||
380 | xorq s3(, RT3, 8), to##0; \ | ||
381 | xorq s1(, RT1, 8), to##0; \ | ||
382 | xorq from##1, RW1; \ | ||
383 | movzbl RW1bl, RT3d; \ | ||
384 | movzbl RW1bh, RT1d; \ | ||
385 | shrq $16, RW1; \ | ||
386 | xorq s8(, RT3, 8), to##1; \ | ||
387 | xorq s6(, RT1, 8), to##1; \ | ||
388 | movzbl RW1bl, RT3d; \ | ||
389 | movzbl RW1bh, RT1d; \ | ||
390 | shrq $16, RW1; \ | ||
391 | xorq s4(, RT3, 8), to##1; \ | ||
392 | xorq s2(, RT1, 8), to##1; \ | ||
393 | movzbl RW1bl, RT3d; \ | ||
394 | movzbl RW1bh, RT1d; \ | ||
395 | shrl $16, RW1d; \ | ||
396 | xorq s7(, RT3, 8), to##1; \ | ||
397 | xorq s5(, RT1, 8), to##1; \ | ||
398 | movzbl RW1bl, RT3d; \ | ||
399 | movzbl RW1bh, RT1d; \ | ||
400 | do_movq(RW0, RW1); \ | ||
401 | xorq s3(, RT3, 8), to##1; \ | ||
402 | xorq s1(, RT1, 8), to##1; \ | ||
403 | xorq from##2, RW2; \ | ||
404 | movzbl RW2bl, RT3d; \ | ||
405 | movzbl RW2bh, RT1d; \ | ||
406 | shrq $16, RW2; \ | ||
407 | xorq s8(, RT3, 8), to##2; \ | ||
408 | xorq s6(, RT1, 8), to##2; \ | ||
409 | movzbl RW2bl, RT3d; \ | ||
410 | movzbl RW2bh, RT1d; \ | ||
411 | shrq $16, RW2; \ | ||
412 | xorq s4(, RT3, 8), to##2; \ | ||
413 | xorq s2(, RT1, 8), to##2; \ | ||
414 | movzbl RW2bl, RT3d; \ | ||
415 | movzbl RW2bh, RT1d; \ | ||
416 | shrl $16, RW2d; \ | ||
417 | xorq s7(, RT3, 8), to##2; \ | ||
418 | xorq s5(, RT1, 8), to##2; \ | ||
419 | movzbl RW2bl, RT3d; \ | ||
420 | movzbl RW2bh, RT1d; \ | ||
421 | do_movq(RW0, RW2); \ | ||
422 | xorq s3(, RT3, 8), to##2; \ | ||
423 | xorq s1(, RT1, 8), to##2; | ||
424 | |||
425 | #define __movq(src, dst) \ | ||
426 | movq src, dst; | ||
427 | |||
428 | ENTRY(des3_ede_x86_64_crypt_blk_3way) | ||
429 | /* input: | ||
430 | * %rdi: ctx, round keys | ||
431 | * %rsi: dst (3 blocks) | ||
432 | * %rdx: src (3 blocks) | ||
433 | */ | ||
434 | |||
435 | pushq %rbp; | ||
436 | pushq %rbx; | ||
437 | pushq %r12; | ||
438 | pushq %r13; | ||
439 | pushq %r14; | ||
440 | pushq %r15; | ||
441 | |||
442 | /* load input */ | ||
443 | movl 0 * 4(%rdx), RL0d; | ||
444 | movl 1 * 4(%rdx), RR0d; | ||
445 | movl 2 * 4(%rdx), RL1d; | ||
446 | movl 3 * 4(%rdx), RR1d; | ||
447 | movl 4 * 4(%rdx), RL2d; | ||
448 | movl 5 * 4(%rdx), RR2d; | ||
449 | |||
450 | bswapl RL0d; | ||
451 | bswapl RR0d; | ||
452 | bswapl RL1d; | ||
453 | bswapl RR1d; | ||
454 | bswapl RL2d; | ||
455 | bswapl RR2d; | ||
456 | |||
457 | initial_permutation3(RL, RR); | ||
458 | |||
459 | movq 0(CTX), RW0; | ||
460 | movq RW0, RW1; | ||
461 | movq RW0, RW2; | ||
462 | |||
463 | round3(0, RR, RL, load_next_key, __movq); | ||
464 | round3(1, RL, RR, load_next_key, __movq); | ||
465 | round3(2, RR, RL, load_next_key, __movq); | ||
466 | round3(3, RL, RR, load_next_key, __movq); | ||
467 | round3(4, RR, RL, load_next_key, __movq); | ||
468 | round3(5, RL, RR, load_next_key, __movq); | ||
469 | round3(6, RR, RL, load_next_key, __movq); | ||
470 | round3(7, RL, RR, load_next_key, __movq); | ||
471 | round3(8, RR, RL, load_next_key, __movq); | ||
472 | round3(9, RL, RR, load_next_key, __movq); | ||
473 | round3(10, RR, RL, load_next_key, __movq); | ||
474 | round3(11, RL, RR, load_next_key, __movq); | ||
475 | round3(12, RR, RL, load_next_key, __movq); | ||
476 | round3(13, RL, RR, load_next_key, __movq); | ||
477 | round3(14, RR, RL, load_next_key, __movq); | ||
478 | round3(15, RL, RR, load_next_key, __movq); | ||
479 | |||
480 | round3(16+0, RL, RR, load_next_key, __movq); | ||
481 | round3(16+1, RR, RL, load_next_key, __movq); | ||
482 | round3(16+2, RL, RR, load_next_key, __movq); | ||
483 | round3(16+3, RR, RL, load_next_key, __movq); | ||
484 | round3(16+4, RL, RR, load_next_key, __movq); | ||
485 | round3(16+5, RR, RL, load_next_key, __movq); | ||
486 | round3(16+6, RL, RR, load_next_key, __movq); | ||
487 | round3(16+7, RR, RL, load_next_key, __movq); | ||
488 | round3(16+8, RL, RR, load_next_key, __movq); | ||
489 | round3(16+9, RR, RL, load_next_key, __movq); | ||
490 | round3(16+10, RL, RR, load_next_key, __movq); | ||
491 | round3(16+11, RR, RL, load_next_key, __movq); | ||
492 | round3(16+12, RL, RR, load_next_key, __movq); | ||
493 | round3(16+13, RR, RL, load_next_key, __movq); | ||
494 | round3(16+14, RL, RR, load_next_key, __movq); | ||
495 | round3(16+15, RR, RL, load_next_key, __movq); | ||
496 | |||
497 | round3(32+0, RR, RL, load_next_key, __movq); | ||
498 | round3(32+1, RL, RR, load_next_key, __movq); | ||
499 | round3(32+2, RR, RL, load_next_key, __movq); | ||
500 | round3(32+3, RL, RR, load_next_key, __movq); | ||
501 | round3(32+4, RR, RL, load_next_key, __movq); | ||
502 | round3(32+5, RL, RR, load_next_key, __movq); | ||
503 | round3(32+6, RR, RL, load_next_key, __movq); | ||
504 | round3(32+7, RL, RR, load_next_key, __movq); | ||
505 | round3(32+8, RR, RL, load_next_key, __movq); | ||
506 | round3(32+9, RL, RR, load_next_key, __movq); | ||
507 | round3(32+10, RR, RL, load_next_key, __movq); | ||
508 | round3(32+11, RL, RR, load_next_key, __movq); | ||
509 | round3(32+12, RR, RL, load_next_key, __movq); | ||
510 | round3(32+13, RL, RR, load_next_key, __movq); | ||
511 | round3(32+14, RR, RL, load_next_key, __movq); | ||
512 | round3(32+15, RL, RR, dummy2, dummy2); | ||
513 | |||
514 | final_permutation3(RR, RL); | ||
515 | |||
516 | bswapl RR0d; | ||
517 | bswapl RL0d; | ||
518 | bswapl RR1d; | ||
519 | bswapl RL1d; | ||
520 | bswapl RR2d; | ||
521 | bswapl RL2d; | ||
522 | |||
523 | movl RR0d, 0 * 4(%rsi); | ||
524 | movl RL0d, 1 * 4(%rsi); | ||
525 | movl RR1d, 2 * 4(%rsi); | ||
526 | movl RL1d, 3 * 4(%rsi); | ||
527 | movl RR2d, 4 * 4(%rsi); | ||
528 | movl RL2d, 5 * 4(%rsi); | ||
529 | |||
530 | popq %r15; | ||
531 | popq %r14; | ||
532 | popq %r13; | ||
533 | popq %r12; | ||
534 | popq %rbx; | ||
535 | popq %rbp; | ||
536 | |||
537 | ret; | ||
538 | ENDPROC(des3_ede_x86_64_crypt_blk_3way) | ||
539 | |||
540 | .data | ||
541 | .align 16 | ||
542 | .L_s1: | ||
543 | .quad 0x0010100001010400, 0x0000000000000000 | ||
544 | .quad 0x0000100000010000, 0x0010100001010404 | ||
545 | .quad 0x0010100001010004, 0x0000100000010404 | ||
546 | .quad 0x0000000000000004, 0x0000100000010000 | ||
547 | .quad 0x0000000000000400, 0x0010100001010400 | ||
548 | .quad 0x0010100001010404, 0x0000000000000400 | ||
549 | .quad 0x0010000001000404, 0x0010100001010004 | ||
550 | .quad 0x0010000001000000, 0x0000000000000004 | ||
551 | .quad 0x0000000000000404, 0x0010000001000400 | ||
552 | .quad 0x0010000001000400, 0x0000100000010400 | ||
553 | .quad 0x0000100000010400, 0x0010100001010000 | ||
554 | .quad 0x0010100001010000, 0x0010000001000404 | ||
555 | .quad 0x0000100000010004, 0x0010000001000004 | ||
556 | .quad 0x0010000001000004, 0x0000100000010004 | ||
557 | .quad 0x0000000000000000, 0x0000000000000404 | ||
558 | .quad 0x0000100000010404, 0x0010000001000000 | ||
559 | .quad 0x0000100000010000, 0x0010100001010404 | ||
560 | .quad 0x0000000000000004, 0x0010100001010000 | ||
561 | .quad 0x0010100001010400, 0x0010000001000000 | ||
562 | .quad 0x0010000001000000, 0x0000000000000400 | ||
563 | .quad 0x0010100001010004, 0x0000100000010000 | ||
564 | .quad 0x0000100000010400, 0x0010000001000004 | ||
565 | .quad 0x0000000000000400, 0x0000000000000004 | ||
566 | .quad 0x0010000001000404, 0x0000100000010404 | ||
567 | .quad 0x0010100001010404, 0x0000100000010004 | ||
568 | .quad 0x0010100001010000, 0x0010000001000404 | ||
569 | .quad 0x0010000001000004, 0x0000000000000404 | ||
570 | .quad 0x0000100000010404, 0x0010100001010400 | ||
571 | .quad 0x0000000000000404, 0x0010000001000400 | ||
572 | .quad 0x0010000001000400, 0x0000000000000000 | ||
573 | .quad 0x0000100000010004, 0x0000100000010400 | ||
574 | .quad 0x0000000000000000, 0x0010100001010004 | ||
575 | .L_s2: | ||
576 | .quad 0x0801080200100020, 0x0800080000000000 | ||
577 | .quad 0x0000080000000000, 0x0001080200100020 | ||
578 | .quad 0x0001000000100000, 0x0000000200000020 | ||
579 | .quad 0x0801000200100020, 0x0800080200000020 | ||
580 | .quad 0x0800000200000020, 0x0801080200100020 | ||
581 | .quad 0x0801080000100000, 0x0800000000000000 | ||
582 | .quad 0x0800080000000000, 0x0001000000100000 | ||
583 | .quad 0x0000000200000020, 0x0801000200100020 | ||
584 | .quad 0x0001080000100000, 0x0001000200100020 | ||
585 | .quad 0x0800080200000020, 0x0000000000000000 | ||
586 | .quad 0x0800000000000000, 0x0000080000000000 | ||
587 | .quad 0x0001080200100020, 0x0801000000100000 | ||
588 | .quad 0x0001000200100020, 0x0800000200000020 | ||
589 | .quad 0x0000000000000000, 0x0001080000100000 | ||
590 | .quad 0x0000080200000020, 0x0801080000100000 | ||
591 | .quad 0x0801000000100000, 0x0000080200000020 | ||
592 | .quad 0x0000000000000000, 0x0001080200100020 | ||
593 | .quad 0x0801000200100020, 0x0001000000100000 | ||
594 | .quad 0x0800080200000020, 0x0801000000100000 | ||
595 | .quad 0x0801080000100000, 0x0000080000000000 | ||
596 | .quad 0x0801000000100000, 0x0800080000000000 | ||
597 | .quad 0x0000000200000020, 0x0801080200100020 | ||
598 | .quad 0x0001080200100020, 0x0000000200000020 | ||
599 | .quad 0x0000080000000000, 0x0800000000000000 | ||
600 | .quad 0x0000080200000020, 0x0801080000100000 | ||
601 | .quad 0x0001000000100000, 0x0800000200000020 | ||
602 | .quad 0x0001000200100020, 0x0800080200000020 | ||
603 | .quad 0x0800000200000020, 0x0001000200100020 | ||
604 | .quad 0x0001080000100000, 0x0000000000000000 | ||
605 | .quad 0x0800080000000000, 0x0000080200000020 | ||
606 | .quad 0x0800000000000000, 0x0801000200100020 | ||
607 | .quad 0x0801080200100020, 0x0001080000100000 | ||
608 | .L_s3: | ||
609 | .quad 0x0000002000000208, 0x0000202008020200 | ||
610 | .quad 0x0000000000000000, 0x0000200008020008 | ||
611 | .quad 0x0000002008000200, 0x0000000000000000 | ||
612 | .quad 0x0000202000020208, 0x0000002008000200 | ||
613 | .quad 0x0000200000020008, 0x0000000008000008 | ||
614 | .quad 0x0000000008000008, 0x0000200000020000 | ||
615 | .quad 0x0000202008020208, 0x0000200000020008 | ||
616 | .quad 0x0000200008020000, 0x0000002000000208 | ||
617 | .quad 0x0000000008000000, 0x0000000000000008 | ||
618 | .quad 0x0000202008020200, 0x0000002000000200 | ||
619 | .quad 0x0000202000020200, 0x0000200008020000 | ||
620 | .quad 0x0000200008020008, 0x0000202000020208 | ||
621 | .quad 0x0000002008000208, 0x0000202000020200 | ||
622 | .quad 0x0000200000020000, 0x0000002008000208 | ||
623 | .quad 0x0000000000000008, 0x0000202008020208 | ||
624 | .quad 0x0000002000000200, 0x0000000008000000 | ||
625 | .quad 0x0000202008020200, 0x0000000008000000 | ||
626 | .quad 0x0000200000020008, 0x0000002000000208 | ||
627 | .quad 0x0000200000020000, 0x0000202008020200 | ||
628 | .quad 0x0000002008000200, 0x0000000000000000 | ||
629 | .quad 0x0000002000000200, 0x0000200000020008 | ||
630 | .quad 0x0000202008020208, 0x0000002008000200 | ||
631 | .quad 0x0000000008000008, 0x0000002000000200 | ||
632 | .quad 0x0000000000000000, 0x0000200008020008 | ||
633 | .quad 0x0000002008000208, 0x0000200000020000 | ||
634 | .quad 0x0000000008000000, 0x0000202008020208 | ||
635 | .quad 0x0000000000000008, 0x0000202000020208 | ||
636 | .quad 0x0000202000020200, 0x0000000008000008 | ||
637 | .quad 0x0000200008020000, 0x0000002008000208 | ||
638 | .quad 0x0000002000000208, 0x0000200008020000 | ||
639 | .quad 0x0000202000020208, 0x0000000000000008 | ||
640 | .quad 0x0000200008020008, 0x0000202000020200 | ||
641 | .L_s4: | ||
642 | .quad 0x1008020000002001, 0x1000020800002001 | ||
643 | .quad 0x1000020800002001, 0x0000000800000000 | ||
644 | .quad 0x0008020800002000, 0x1008000800000001 | ||
645 | .quad 0x1008000000000001, 0x1000020000002001 | ||
646 | .quad 0x0000000000000000, 0x0008020000002000 | ||
647 | .quad 0x0008020000002000, 0x1008020800002001 | ||
648 | .quad 0x1000000800000001, 0x0000000000000000 | ||
649 | .quad 0x0008000800000000, 0x1008000000000001 | ||
650 | .quad 0x1000000000000001, 0x0000020000002000 | ||
651 | .quad 0x0008000000000000, 0x1008020000002001 | ||
652 | .quad 0x0000000800000000, 0x0008000000000000 | ||
653 | .quad 0x1000020000002001, 0x0000020800002000 | ||
654 | .quad 0x1008000800000001, 0x1000000000000001 | ||
655 | .quad 0x0000020800002000, 0x0008000800000000 | ||
656 | .quad 0x0000020000002000, 0x0008020800002000 | ||
657 | .quad 0x1008020800002001, 0x1000000800000001 | ||
658 | .quad 0x0008000800000000, 0x1008000000000001 | ||
659 | .quad 0x0008020000002000, 0x1008020800002001 | ||
660 | .quad 0x1000000800000001, 0x0000000000000000 | ||
661 | .quad 0x0000000000000000, 0x0008020000002000 | ||
662 | .quad 0x0000020800002000, 0x0008000800000000 | ||
663 | .quad 0x1008000800000001, 0x1000000000000001 | ||
664 | .quad 0x1008020000002001, 0x1000020800002001 | ||
665 | .quad 0x1000020800002001, 0x0000000800000000 | ||
666 | .quad 0x1008020800002001, 0x1000000800000001 | ||
667 | .quad 0x1000000000000001, 0x0000020000002000 | ||
668 | .quad 0x1008000000000001, 0x1000020000002001 | ||
669 | .quad 0x0008020800002000, 0x1008000800000001 | ||
670 | .quad 0x1000020000002001, 0x0000020800002000 | ||
671 | .quad 0x0008000000000000, 0x1008020000002001 | ||
672 | .quad 0x0000000800000000, 0x0008000000000000 | ||
673 | .quad 0x0000020000002000, 0x0008020800002000 | ||
674 | .L_s5: | ||
675 | .quad 0x0000001000000100, 0x0020001002080100 | ||
676 | .quad 0x0020000002080000, 0x0420001002000100 | ||
677 | .quad 0x0000000000080000, 0x0000001000000100 | ||
678 | .quad 0x0400000000000000, 0x0020000002080000 | ||
679 | .quad 0x0400001000080100, 0x0000000000080000 | ||
680 | .quad 0x0020001002000100, 0x0400001000080100 | ||
681 | .quad 0x0420001002000100, 0x0420000002080000 | ||
682 | .quad 0x0000001000080100, 0x0400000000000000 | ||
683 | .quad 0x0020000002000000, 0x0400000000080000 | ||
684 | .quad 0x0400000000080000, 0x0000000000000000 | ||
685 | .quad 0x0400001000000100, 0x0420001002080100 | ||
686 | .quad 0x0420001002080100, 0x0020001002000100 | ||
687 | .quad 0x0420000002080000, 0x0400001000000100 | ||
688 | .quad 0x0000000000000000, 0x0420000002000000 | ||
689 | .quad 0x0020001002080100, 0x0020000002000000 | ||
690 | .quad 0x0420000002000000, 0x0000001000080100 | ||
691 | .quad 0x0000000000080000, 0x0420001002000100 | ||
692 | .quad 0x0000001000000100, 0x0020000002000000 | ||
693 | .quad 0x0400000000000000, 0x0020000002080000 | ||
694 | .quad 0x0420001002000100, 0x0400001000080100 | ||
695 | .quad 0x0020001002000100, 0x0400000000000000 | ||
696 | .quad 0x0420000002080000, 0x0020001002080100 | ||
697 | .quad 0x0400001000080100, 0x0000001000000100 | ||
698 | .quad 0x0020000002000000, 0x0420000002080000 | ||
699 | .quad 0x0420001002080100, 0x0000001000080100 | ||
700 | .quad 0x0420000002000000, 0x0420001002080100 | ||
701 | .quad 0x0020000002080000, 0x0000000000000000 | ||
702 | .quad 0x0400000000080000, 0x0420000002000000 | ||
703 | .quad 0x0000001000080100, 0x0020001002000100 | ||
704 | .quad 0x0400001000000100, 0x0000000000080000 | ||
705 | .quad 0x0000000000000000, 0x0400000000080000 | ||
706 | .quad 0x0020001002080100, 0x0400001000000100 | ||
707 | .L_s6: | ||
708 | .quad 0x0200000120000010, 0x0204000020000000 | ||
709 | .quad 0x0000040000000000, 0x0204040120000010 | ||
710 | .quad 0x0204000020000000, 0x0000000100000010 | ||
711 | .quad 0x0204040120000010, 0x0004000000000000 | ||
712 | .quad 0x0200040020000000, 0x0004040100000010 | ||
713 | .quad 0x0004000000000000, 0x0200000120000010 | ||
714 | .quad 0x0004000100000010, 0x0200040020000000 | ||
715 | .quad 0x0200000020000000, 0x0000040100000010 | ||
716 | .quad 0x0000000000000000, 0x0004000100000010 | ||
717 | .quad 0x0200040120000010, 0x0000040000000000 | ||
718 | .quad 0x0004040000000000, 0x0200040120000010 | ||
719 | .quad 0x0000000100000010, 0x0204000120000010 | ||
720 | .quad 0x0204000120000010, 0x0000000000000000 | ||
721 | .quad 0x0004040100000010, 0x0204040020000000 | ||
722 | .quad 0x0000040100000010, 0x0004040000000000 | ||
723 | .quad 0x0204040020000000, 0x0200000020000000 | ||
724 | .quad 0x0200040020000000, 0x0000000100000010 | ||
725 | .quad 0x0204000120000010, 0x0004040000000000 | ||
726 | .quad 0x0204040120000010, 0x0004000000000000 | ||
727 | .quad 0x0000040100000010, 0x0200000120000010 | ||
728 | .quad 0x0004000000000000, 0x0200040020000000 | ||
729 | .quad 0x0200000020000000, 0x0000040100000010 | ||
730 | .quad 0x0200000120000010, 0x0204040120000010 | ||
731 | .quad 0x0004040000000000, 0x0204000020000000 | ||
732 | .quad 0x0004040100000010, 0x0204040020000000 | ||
733 | .quad 0x0000000000000000, 0x0204000120000010 | ||
734 | .quad 0x0000000100000010, 0x0000040000000000 | ||
735 | .quad 0x0204000020000000, 0x0004040100000010 | ||
736 | .quad 0x0000040000000000, 0x0004000100000010 | ||
737 | .quad 0x0200040120000010, 0x0000000000000000 | ||
738 | .quad 0x0204040020000000, 0x0200000020000000 | ||
739 | .quad 0x0004000100000010, 0x0200040120000010 | ||
740 | .L_s7: | ||
741 | .quad 0x0002000000200000, 0x2002000004200002 | ||
742 | .quad 0x2000000004000802, 0x0000000000000000 | ||
743 | .quad 0x0000000000000800, 0x2000000004000802 | ||
744 | .quad 0x2002000000200802, 0x0002000004200800 | ||
745 | .quad 0x2002000004200802, 0x0002000000200000 | ||
746 | .quad 0x0000000000000000, 0x2000000004000002 | ||
747 | .quad 0x2000000000000002, 0x0000000004000000 | ||
748 | .quad 0x2002000004200002, 0x2000000000000802 | ||
749 | .quad 0x0000000004000800, 0x2002000000200802 | ||
750 | .quad 0x2002000000200002, 0x0000000004000800 | ||
751 | .quad 0x2000000004000002, 0x0002000004200000 | ||
752 | .quad 0x0002000004200800, 0x2002000000200002 | ||
753 | .quad 0x0002000004200000, 0x0000000000000800 | ||
754 | .quad 0x2000000000000802, 0x2002000004200802 | ||
755 | .quad 0x0002000000200800, 0x2000000000000002 | ||
756 | .quad 0x0000000004000000, 0x0002000000200800 | ||
757 | .quad 0x0000000004000000, 0x0002000000200800 | ||
758 | .quad 0x0002000000200000, 0x2000000004000802 | ||
759 | .quad 0x2000000004000802, 0x2002000004200002 | ||
760 | .quad 0x2002000004200002, 0x2000000000000002 | ||
761 | .quad 0x2002000000200002, 0x0000000004000000 | ||
762 | .quad 0x0000000004000800, 0x0002000000200000 | ||
763 | .quad 0x0002000004200800, 0x2000000000000802 | ||
764 | .quad 0x2002000000200802, 0x0002000004200800 | ||
765 | .quad 0x2000000000000802, 0x2000000004000002 | ||
766 | .quad 0x2002000004200802, 0x0002000004200000 | ||
767 | .quad 0x0002000000200800, 0x0000000000000000 | ||
768 | .quad 0x2000000000000002, 0x2002000004200802 | ||
769 | .quad 0x0000000000000000, 0x2002000000200802 | ||
770 | .quad 0x0002000004200000, 0x0000000000000800 | ||
771 | .quad 0x2000000004000002, 0x0000000004000800 | ||
772 | .quad 0x0000000000000800, 0x2002000000200002 | ||
773 | .L_s8: | ||
774 | .quad 0x0100010410001000, 0x0000010000001000 | ||
775 | .quad 0x0000000000040000, 0x0100010410041000 | ||
776 | .quad 0x0100000010000000, 0x0100010410001000 | ||
777 | .quad 0x0000000400000000, 0x0100000010000000 | ||
778 | .quad 0x0000000400040000, 0x0100000010040000 | ||
779 | .quad 0x0100010410041000, 0x0000010000041000 | ||
780 | .quad 0x0100010010041000, 0x0000010400041000 | ||
781 | .quad 0x0000010000001000, 0x0000000400000000 | ||
782 | .quad 0x0100000010040000, 0x0100000410000000 | ||
783 | .quad 0x0100010010001000, 0x0000010400001000 | ||
784 | .quad 0x0000010000041000, 0x0000000400040000 | ||
785 | .quad 0x0100000410040000, 0x0100010010041000 | ||
786 | .quad 0x0000010400001000, 0x0000000000000000 | ||
787 | .quad 0x0000000000000000, 0x0100000410040000 | ||
788 | .quad 0x0100000410000000, 0x0100010010001000 | ||
789 | .quad 0x0000010400041000, 0x0000000000040000 | ||
790 | .quad 0x0000010400041000, 0x0000000000040000 | ||
791 | .quad 0x0100010010041000, 0x0000010000001000 | ||
792 | .quad 0x0000000400000000, 0x0100000410040000 | ||
793 | .quad 0x0000010000001000, 0x0000010400041000 | ||
794 | .quad 0x0100010010001000, 0x0000000400000000 | ||
795 | .quad 0x0100000410000000, 0x0100000010040000 | ||
796 | .quad 0x0100000410040000, 0x0100000010000000 | ||
797 | .quad 0x0000000000040000, 0x0100010410001000 | ||
798 | .quad 0x0000000000000000, 0x0100010410041000 | ||
799 | .quad 0x0000000400040000, 0x0100000410000000 | ||
800 | .quad 0x0100000010040000, 0x0100010010001000 | ||
801 | .quad 0x0100010410001000, 0x0000000000000000 | ||
802 | .quad 0x0100010410041000, 0x0000010000041000 | ||
803 | .quad 0x0000010000041000, 0x0000010400001000 | ||
804 | .quad 0x0000010400001000, 0x0000000400040000 | ||
805 | .quad 0x0100000010000000, 0x0100010010041000 | ||
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c new file mode 100644 index 000000000000..0e9c0668fe4e --- /dev/null +++ b/arch/x86/crypto/des3_ede_glue.c | |||
@@ -0,0 +1,509 @@ | |||
1 | /* | ||
2 | * Glue Code for assembler optimized version of 3DES | ||
3 | * | ||
4 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <asm/processor.h> | ||
24 | #include <crypto/des.h> | ||
25 | #include <linux/crypto.h> | ||
26 | #include <linux/init.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | |||
31 | struct des3_ede_x86_ctx { | ||
32 | u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; | ||
33 | u32 dec_expkey[DES3_EDE_EXPKEY_WORDS]; | ||
34 | }; | ||
35 | |||
36 | /* regular block cipher functions */ | ||
37 | asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst, | ||
38 | const u8 *src); | ||
39 | |||
40 | /* 3-way parallel cipher functions */ | ||
41 | asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst, | ||
42 | const u8 *src); | ||
43 | |||
44 | static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, | ||
45 | const u8 *src) | ||
46 | { | ||
47 | u32 *enc_ctx = ctx->enc_expkey; | ||
48 | |||
49 | des3_ede_x86_64_crypt_blk(enc_ctx, dst, src); | ||
50 | } | ||
51 | |||
52 | static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, | ||
53 | const u8 *src) | ||
54 | { | ||
55 | u32 *dec_ctx = ctx->dec_expkey; | ||
56 | |||
57 | des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); | ||
58 | } | ||
59 | |||
60 | static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, | ||
61 | const u8 *src) | ||
62 | { | ||
63 | u32 *enc_ctx = ctx->enc_expkey; | ||
64 | |||
65 | des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src); | ||
66 | } | ||
67 | |||
68 | static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, | ||
69 | const u8 *src) | ||
70 | { | ||
71 | u32 *dec_ctx = ctx->dec_expkey; | ||
72 | |||
73 | des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src); | ||
74 | } | ||
75 | |||
76 | static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
77 | { | ||
78 | des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src); | ||
79 | } | ||
80 | |||
81 | static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
82 | { | ||
83 | des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); | ||
84 | } | ||
85 | |||
86 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
87 | const u32 *expkey) | ||
88 | { | ||
89 | unsigned int bsize = DES3_EDE_BLOCK_SIZE; | ||
90 | unsigned int nbytes; | ||
91 | int err; | ||
92 | |||
93 | err = blkcipher_walk_virt(desc, walk); | ||
94 | |||
95 | while ((nbytes = walk->nbytes)) { | ||
96 | u8 *wsrc = walk->src.virt.addr; | ||
97 | u8 *wdst = walk->dst.virt.addr; | ||
98 | |||
99 | /* Process four block batch */ | ||
100 | if (nbytes >= bsize * 3) { | ||
101 | do { | ||
102 | des3_ede_x86_64_crypt_blk_3way(expkey, wdst, | ||
103 | wsrc); | ||
104 | |||
105 | wsrc += bsize * 3; | ||
106 | wdst += bsize * 3; | ||
107 | nbytes -= bsize * 3; | ||
108 | } while (nbytes >= bsize * 3); | ||
109 | |||
110 | if (nbytes < bsize) | ||
111 | goto done; | ||
112 | } | ||
113 | |||
114 | /* Handle leftovers */ | ||
115 | do { | ||
116 | des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc); | ||
117 | |||
118 | wsrc += bsize; | ||
119 | wdst += bsize; | ||
120 | nbytes -= bsize; | ||
121 | } while (nbytes >= bsize); | ||
122 | |||
123 | done: | ||
124 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
125 | } | ||
126 | |||
127 | return err; | ||
128 | } | ||
129 | |||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
134 | struct blkcipher_walk walk; | ||
135 | |||
136 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
137 | return ecb_crypt(desc, &walk, ctx->enc_expkey); | ||
138 | } | ||
139 | |||
140 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
141 | struct scatterlist *src, unsigned int nbytes) | ||
142 | { | ||
143 | struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
144 | struct blkcipher_walk walk; | ||
145 | |||
146 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
147 | return ecb_crypt(desc, &walk, ctx->dec_expkey); | ||
148 | } | ||
149 | |||
150 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
151 | struct blkcipher_walk *walk) | ||
152 | { | ||
153 | struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
154 | unsigned int bsize = DES3_EDE_BLOCK_SIZE; | ||
155 | unsigned int nbytes = walk->nbytes; | ||
156 | u64 *src = (u64 *)walk->src.virt.addr; | ||
157 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
158 | u64 *iv = (u64 *)walk->iv; | ||
159 | |||
160 | do { | ||
161 | *dst = *src ^ *iv; | ||
162 | des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
163 | iv = dst; | ||
164 | |||
165 | src += 1; | ||
166 | dst += 1; | ||
167 | nbytes -= bsize; | ||
168 | } while (nbytes >= bsize); | ||
169 | |||
170 | *(u64 *)walk->iv = *iv; | ||
171 | return nbytes; | ||
172 | } | ||
173 | |||
174 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
175 | struct scatterlist *src, unsigned int nbytes) | ||
176 | { | ||
177 | struct blkcipher_walk walk; | ||
178 | int err; | ||
179 | |||
180 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
181 | err = blkcipher_walk_virt(desc, &walk); | ||
182 | |||
183 | while ((nbytes = walk.nbytes)) { | ||
184 | nbytes = __cbc_encrypt(desc, &walk); | ||
185 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
186 | } | ||
187 | |||
188 | return err; | ||
189 | } | ||
190 | |||
191 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
192 | struct blkcipher_walk *walk) | ||
193 | { | ||
194 | struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
195 | unsigned int bsize = DES3_EDE_BLOCK_SIZE; | ||
196 | unsigned int nbytes = walk->nbytes; | ||
197 | u64 *src = (u64 *)walk->src.virt.addr; | ||
198 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
199 | u64 ivs[3 - 1]; | ||
200 | u64 last_iv; | ||
201 | |||
202 | /* Start of the last block. */ | ||
203 | src += nbytes / bsize - 1; | ||
204 | dst += nbytes / bsize - 1; | ||
205 | |||
206 | last_iv = *src; | ||
207 | |||
208 | /* Process four block batch */ | ||
209 | if (nbytes >= bsize * 3) { | ||
210 | do { | ||
211 | nbytes -= bsize * 3 - bsize; | ||
212 | src -= 3 - 1; | ||
213 | dst -= 3 - 1; | ||
214 | |||
215 | ivs[0] = src[0]; | ||
216 | ivs[1] = src[1]; | ||
217 | |||
218 | des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); | ||
219 | |||
220 | dst[1] ^= ivs[0]; | ||
221 | dst[2] ^= ivs[1]; | ||
222 | |||
223 | nbytes -= bsize; | ||
224 | if (nbytes < bsize) | ||
225 | goto done; | ||
226 | |||
227 | *dst ^= *(src - 1); | ||
228 | src -= 1; | ||
229 | dst -= 1; | ||
230 | } while (nbytes >= bsize * 3); | ||
231 | } | ||
232 | |||
233 | /* Handle leftovers */ | ||
234 | for (;;) { | ||
235 | des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
236 | |||
237 | nbytes -= bsize; | ||
238 | if (nbytes < bsize) | ||
239 | break; | ||
240 | |||
241 | *dst ^= *(src - 1); | ||
242 | src -= 1; | ||
243 | dst -= 1; | ||
244 | } | ||
245 | |||
246 | done: | ||
247 | *dst ^= *(u64 *)walk->iv; | ||
248 | *(u64 *)walk->iv = last_iv; | ||
249 | |||
250 | return nbytes; | ||
251 | } | ||
252 | |||
253 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
254 | struct scatterlist *src, unsigned int nbytes) | ||
255 | { | ||
256 | struct blkcipher_walk walk; | ||
257 | int err; | ||
258 | |||
259 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
260 | err = blkcipher_walk_virt(desc, &walk); | ||
261 | |||
262 | while ((nbytes = walk.nbytes)) { | ||
263 | nbytes = __cbc_decrypt(desc, &walk); | ||
264 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
265 | } | ||
266 | |||
267 | return err; | ||
268 | } | ||
269 | |||
270 | static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, | ||
271 | struct blkcipher_walk *walk) | ||
272 | { | ||
273 | u8 *ctrblk = walk->iv; | ||
274 | u8 keystream[DES3_EDE_BLOCK_SIZE]; | ||
275 | u8 *src = walk->src.virt.addr; | ||
276 | u8 *dst = walk->dst.virt.addr; | ||
277 | unsigned int nbytes = walk->nbytes; | ||
278 | |||
279 | des3_ede_enc_blk(ctx, keystream, ctrblk); | ||
280 | crypto_xor(keystream, src, nbytes); | ||
281 | memcpy(dst, keystream, nbytes); | ||
282 | |||
283 | crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); | ||
284 | } | ||
285 | |||
286 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
287 | struct blkcipher_walk *walk) | ||
288 | { | ||
289 | struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
290 | unsigned int bsize = DES3_EDE_BLOCK_SIZE; | ||
291 | unsigned int nbytes = walk->nbytes; | ||
292 | __be64 *src = (__be64 *)walk->src.virt.addr; | ||
293 | __be64 *dst = (__be64 *)walk->dst.virt.addr; | ||
294 | u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); | ||
295 | __be64 ctrblocks[3]; | ||
296 | |||
297 | /* Process four block batch */ | ||
298 | if (nbytes >= bsize * 3) { | ||
299 | do { | ||
300 | /* create ctrblks for parallel encrypt */ | ||
301 | ctrblocks[0] = cpu_to_be64(ctrblk++); | ||
302 | ctrblocks[1] = cpu_to_be64(ctrblk++); | ||
303 | ctrblocks[2] = cpu_to_be64(ctrblk++); | ||
304 | |||
305 | des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks, | ||
306 | (u8 *)ctrblocks); | ||
307 | |||
308 | dst[0] = src[0] ^ ctrblocks[0]; | ||
309 | dst[1] = src[1] ^ ctrblocks[1]; | ||
310 | dst[2] = src[2] ^ ctrblocks[2]; | ||
311 | |||
312 | src += 3; | ||
313 | dst += 3; | ||
314 | } while ((nbytes -= bsize * 3) >= bsize * 3); | ||
315 | |||
316 | if (nbytes < bsize) | ||
317 | goto done; | ||
318 | } | ||
319 | |||
320 | /* Handle leftovers */ | ||
321 | do { | ||
322 | ctrblocks[0] = cpu_to_be64(ctrblk++); | ||
323 | |||
324 | des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
325 | |||
326 | dst[0] = src[0] ^ ctrblocks[0]; | ||
327 | |||
328 | src += 1; | ||
329 | dst += 1; | ||
330 | } while ((nbytes -= bsize) >= bsize); | ||
331 | |||
332 | done: | ||
333 | *(__be64 *)walk->iv = cpu_to_be64(ctrblk); | ||
334 | return nbytes; | ||
335 | } | ||
336 | |||
337 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
338 | struct scatterlist *src, unsigned int nbytes) | ||
339 | { | ||
340 | struct blkcipher_walk walk; | ||
341 | int err; | ||
342 | |||
343 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
344 | err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE); | ||
345 | |||
346 | while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { | ||
347 | nbytes = __ctr_crypt(desc, &walk); | ||
348 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
349 | } | ||
350 | |||
351 | if (walk.nbytes) { | ||
352 | ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); | ||
353 | err = blkcipher_walk_done(desc, &walk, 0); | ||
354 | } | ||
355 | |||
356 | return err; | ||
357 | } | ||
358 | |||
359 | static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
360 | unsigned int keylen) | ||
361 | { | ||
362 | struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm); | ||
363 | u32 i, j, tmp; | ||
364 | int err; | ||
365 | |||
366 | /* Generate encryption context using generic implementation. */ | ||
367 | err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen); | ||
368 | if (err < 0) | ||
369 | return err; | ||
370 | |||
371 | /* Fix encryption context for this implementation and form decryption | ||
372 | * context. */ | ||
373 | j = DES3_EDE_EXPKEY_WORDS - 2; | ||
374 | for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) { | ||
375 | tmp = ror32(ctx->enc_expkey[i + 1], 4); | ||
376 | ctx->enc_expkey[i + 1] = tmp; | ||
377 | |||
378 | ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0]; | ||
379 | ctx->dec_expkey[j + 1] = tmp; | ||
380 | } | ||
381 | |||
382 | return 0; | ||
383 | } | ||
384 | |||
385 | static struct crypto_alg des3_ede_algs[4] = { { | ||
386 | .cra_name = "des3_ede", | ||
387 | .cra_driver_name = "des3_ede-asm", | ||
388 | .cra_priority = 200, | ||
389 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
390 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
391 | .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), | ||
392 | .cra_alignmask = 0, | ||
393 | .cra_module = THIS_MODULE, | ||
394 | .cra_u = { | ||
395 | .cipher = { | ||
396 | .cia_min_keysize = DES3_EDE_KEY_SIZE, | ||
397 | .cia_max_keysize = DES3_EDE_KEY_SIZE, | ||
398 | .cia_setkey = des3_ede_x86_setkey, | ||
399 | .cia_encrypt = des3_ede_x86_encrypt, | ||
400 | .cia_decrypt = des3_ede_x86_decrypt, | ||
401 | } | ||
402 | } | ||
403 | }, { | ||
404 | .cra_name = "ecb(des3_ede)", | ||
405 | .cra_driver_name = "ecb-des3_ede-asm", | ||
406 | .cra_priority = 300, | ||
407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
408 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
409 | .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), | ||
410 | .cra_alignmask = 0, | ||
411 | .cra_type = &crypto_blkcipher_type, | ||
412 | .cra_module = THIS_MODULE, | ||
413 | .cra_u = { | ||
414 | .blkcipher = { | ||
415 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
416 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
417 | .setkey = des3_ede_x86_setkey, | ||
418 | .encrypt = ecb_encrypt, | ||
419 | .decrypt = ecb_decrypt, | ||
420 | }, | ||
421 | }, | ||
422 | }, { | ||
423 | .cra_name = "cbc(des3_ede)", | ||
424 | .cra_driver_name = "cbc-des3_ede-asm", | ||
425 | .cra_priority = 300, | ||
426 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
427 | .cra_blocksize = DES3_EDE_BLOCK_SIZE, | ||
428 | .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), | ||
429 | .cra_alignmask = 0, | ||
430 | .cra_type = &crypto_blkcipher_type, | ||
431 | .cra_module = THIS_MODULE, | ||
432 | .cra_u = { | ||
433 | .blkcipher = { | ||
434 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
435 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
436 | .ivsize = DES3_EDE_BLOCK_SIZE, | ||
437 | .setkey = des3_ede_x86_setkey, | ||
438 | .encrypt = cbc_encrypt, | ||
439 | .decrypt = cbc_decrypt, | ||
440 | }, | ||
441 | }, | ||
442 | }, { | ||
443 | .cra_name = "ctr(des3_ede)", | ||
444 | .cra_driver_name = "ctr-des3_ede-asm", | ||
445 | .cra_priority = 300, | ||
446 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
447 | .cra_blocksize = 1, | ||
448 | .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), | ||
449 | .cra_alignmask = 0, | ||
450 | .cra_type = &crypto_blkcipher_type, | ||
451 | .cra_module = THIS_MODULE, | ||
452 | .cra_u = { | ||
453 | .blkcipher = { | ||
454 | .min_keysize = DES3_EDE_KEY_SIZE, | ||
455 | .max_keysize = DES3_EDE_KEY_SIZE, | ||
456 | .ivsize = DES3_EDE_BLOCK_SIZE, | ||
457 | .setkey = des3_ede_x86_setkey, | ||
458 | .encrypt = ctr_crypt, | ||
459 | .decrypt = ctr_crypt, | ||
460 | }, | ||
461 | }, | ||
462 | } }; | ||
463 | |||
464 | static bool is_blacklisted_cpu(void) | ||
465 | { | ||
466 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
467 | return false; | ||
468 | |||
469 | if (boot_cpu_data.x86 == 0x0f) { | ||
470 | /* | ||
471 | * On Pentium 4, des3_ede-x86_64 is slower than generic C | ||
472 | * implementation because use of 64bit rotates (which are really | ||
473 | * slow on P4). Therefore blacklist P4s. | ||
474 | */ | ||
475 | return true; | ||
476 | } | ||
477 | |||
478 | return false; | ||
479 | } | ||
480 | |||
481 | static int force; | ||
482 | module_param(force, int, 0); | ||
483 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); | ||
484 | |||
485 | static int __init des3_ede_x86_init(void) | ||
486 | { | ||
487 | if (!force && is_blacklisted_cpu()) { | ||
488 | pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); | ||
489 | return -ENODEV; | ||
490 | } | ||
491 | |||
492 | return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); | ||
493 | } | ||
494 | |||
495 | static void __exit des3_ede_x86_fini(void) | ||
496 | { | ||
497 | crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); | ||
498 | } | ||
499 | |||
500 | module_init(des3_ede_x86_init); | ||
501 | module_exit(des3_ede_x86_fini); | ||
502 | |||
503 | MODULE_LICENSE("GPL"); | ||
504 | MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); | ||
505 | MODULE_ALIAS("des3_ede"); | ||
506 | MODULE_ALIAS("des3_ede-asm"); | ||
507 | MODULE_ALIAS("des"); | ||
508 | MODULE_ALIAS("des-asm"); | ||
509 | MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>"); | ||