aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-11 01:01:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-11 01:01:27 -0500
commit4f58cb90bcb04cfe18f524d1c9a65edef5eb3f51 (patch)
tree8c686e8b736eed7258921909282c0955543c7d2f /arch/x86
parente7691a1ce341c80ed9504244a36b31c025217391 (diff)
parent08c70fc3a239475122e20b7a21dfae4c264c24f7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (54 commits) crypto: gf128mul - remove leftover "(EXPERIMENTAL)" in Kconfig crypto: serpent-sse2 - remove unneeded LRW/XTS #ifdefs crypto: serpent-sse2 - select LRW and XTS crypto: twofish-x86_64-3way - remove unneeded LRW/XTS #ifdefs crypto: twofish-x86_64-3way - select LRW and XTS crypto: xts - remove dependency on EXPERIMENTAL crypto: lrw - remove dependency on EXPERIMENTAL crypto: picoxcell - fix boolean and / or confusion crypto: caam - remove DECO access initialization code crypto: caam - fix polarity of "propagate error" logic crypto: caam - more desc.h cleanups crypto: caam - desc.h - convert spaces to tabs crypto: talitos - convert talitos_error to struct device crypto: talitos - remove NO_IRQ references crypto: talitos - fix bad kfree crypto: convert drivers/crypto/* to use module_platform_driver() char: hw_random: convert drivers/char/hw_random/* to use module_platform_driver() crypto: serpent-sse2 - should select CRYPTO_CRYPTD crypto: serpent - rename serpent.c to serpent_generic.c crypto: serpent - cleanup checkpatch errors and warnings ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S638
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S761
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c1070
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c218
-rw-r--r--arch/x86/include/asm/serpent.h63
6 files changed, 2754 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 3537d4b91f74..2b0b9631474b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -5,12 +5,14 @@
5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o 5obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o 6obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
7obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o 7obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
8obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
8 9
9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 10obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
10obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 11obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
11obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
13obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 14obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
15obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
14obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o 16obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
15obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o 17obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
16 18
@@ -20,12 +22,14 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
20aes-i586-y := aes-i586-asm_32.o aes_glue.o 22aes-i586-y := aes-i586-asm_32.o aes_glue.o
21twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o 23twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
22salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o 24salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
25serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
23 26
24aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 27aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
25blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 28blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
26twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 29twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
27twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 30twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
28salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 31salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
32serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
29 33
30aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 34aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
31 35
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
new file mode 100644
index 000000000000..4e37677ca851
--- /dev/null
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -0,0 +1,638 @@
1/*
2 * Serpent Cipher 4-way parallel algorithm (i586/SSE2)
3 *
4 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Based on crypto/serpent.c by
7 * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
8 * 2003 Herbert Valerio Riedel <hvr@gnu.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 * USA
24 *
25 */
26
27.file "serpent-sse2-i586-asm_32.S"
28.text
29
30#define arg_ctx 4
31#define arg_dst 8
32#define arg_src 12
33#define arg_xor 16
34
35/**********************************************************************
36 4-way SSE2 serpent
37 **********************************************************************/
38#define CTX %edx
39
40#define RA %xmm0
41#define RB %xmm1
42#define RC %xmm2
43#define RD %xmm3
44#define RE %xmm4
45
46#define RT0 %xmm5
47#define RT1 %xmm6
48
49#define RNOT %xmm7
50
51#define get_key(i, j, t) \
52 movd (4*(i)+(j))*4(CTX), t; \
53 pshufd $0, t, t;
54
55#define K(x0, x1, x2, x3, x4, i) \
56 get_key(i, 0, x4); \
57 get_key(i, 1, RT0); \
58 get_key(i, 2, RT1); \
59 pxor x4, x0; \
60 pxor RT0, x1; \
61 pxor RT1, x2; \
62 get_key(i, 3, x4); \
63 pxor x4, x3;
64
65#define LK(x0, x1, x2, x3, x4, i) \
66 movdqa x0, x4; \
67 pslld $13, x0; \
68 psrld $(32 - 13), x4; \
69 por x4, x0; \
70 pxor x0, x1; \
71 movdqa x2, x4; \
72 pslld $3, x2; \
73 psrld $(32 - 3), x4; \
74 por x4, x2; \
75 pxor x2, x1; \
76 movdqa x1, x4; \
77 pslld $1, x1; \
78 psrld $(32 - 1), x4; \
79 por x4, x1; \
80 movdqa x0, x4; \
81 pslld $3, x4; \
82 pxor x2, x3; \
83 pxor x4, x3; \
84 movdqa x3, x4; \
85 pslld $7, x3; \
86 psrld $(32 - 7), x4; \
87 por x4, x3; \
88 movdqa x1, x4; \
89 pslld $7, x4; \
90 pxor x1, x0; \
91 pxor x3, x0; \
92 pxor x3, x2; \
93 pxor x4, x2; \
94 movdqa x0, x4; \
95 get_key(i, 1, RT0); \
96 pxor RT0, x1; \
97 get_key(i, 3, RT0); \
98 pxor RT0, x3; \
99 pslld $5, x0; \
100 psrld $(32 - 5), x4; \
101 por x4, x0; \
102 movdqa x2, x4; \
103 pslld $22, x2; \
104 psrld $(32 - 22), x4; \
105 por x4, x2; \
106 get_key(i, 0, RT0); \
107 pxor RT0, x0; \
108 get_key(i, 2, RT0); \
109 pxor RT0, x2;
110
111#define KL(x0, x1, x2, x3, x4, i) \
112 K(x0, x1, x2, x3, x4, i); \
113 movdqa x0, x4; \
114 psrld $5, x0; \
115 pslld $(32 - 5), x4; \
116 por x4, x0; \
117 movdqa x2, x4; \
118 psrld $22, x2; \
119 pslld $(32 - 22), x4; \
120 por x4, x2; \
121 pxor x3, x2; \
122 pxor x3, x0; \
123 movdqa x1, x4; \
124 pslld $7, x4; \
125 pxor x1, x0; \
126 pxor x4, x2; \
127 movdqa x1, x4; \
128 psrld $1, x1; \
129 pslld $(32 - 1), x4; \
130 por x4, x1; \
131 movdqa x3, x4; \
132 psrld $7, x3; \
133 pslld $(32 - 7), x4; \
134 por x4, x3; \
135 pxor x0, x1; \
136 movdqa x0, x4; \
137 pslld $3, x4; \
138 pxor x4, x3; \
139 movdqa x0, x4; \
140 psrld $13, x0; \
141 pslld $(32 - 13), x4; \
142 por x4, x0; \
143 pxor x2, x1; \
144 pxor x2, x3; \
145 movdqa x2, x4; \
146 psrld $3, x2; \
147 pslld $(32 - 3), x4; \
148 por x4, x2;
149
150#define S0(x0, x1, x2, x3, x4) \
151 movdqa x3, x4; \
152 por x0, x3; \
153 pxor x4, x0; \
154 pxor x2, x4; \
155 pxor RNOT, x4; \
156 pxor x1, x3; \
157 pand x0, x1; \
158 pxor x4, x1; \
159 pxor x0, x2; \
160 pxor x3, x0; \
161 por x0, x4; \
162 pxor x2, x0; \
163 pand x1, x2; \
164 pxor x2, x3; \
165 pxor RNOT, x1; \
166 pxor x4, x2; \
167 pxor x2, x1;
168
169#define S1(x0, x1, x2, x3, x4) \
170 movdqa x1, x4; \
171 pxor x0, x1; \
172 pxor x3, x0; \
173 pxor RNOT, x3; \
174 pand x1, x4; \
175 por x1, x0; \
176 pxor x2, x3; \
177 pxor x3, x0; \
178 pxor x3, x1; \
179 pxor x4, x3; \
180 por x4, x1; \
181 pxor x2, x4; \
182 pand x0, x2; \
183 pxor x1, x2; \
184 por x0, x1; \
185 pxor RNOT, x0; \
186 pxor x2, x0; \
187 pxor x1, x4;
188
189#define S2(x0, x1, x2, x3, x4) \
190 pxor RNOT, x3; \
191 pxor x0, x1; \
192 movdqa x0, x4; \
193 pand x2, x0; \
194 pxor x3, x0; \
195 por x4, x3; \
196 pxor x1, x2; \
197 pxor x1, x3; \
198 pand x0, x1; \
199 pxor x2, x0; \
200 pand x3, x2; \
201 por x1, x3; \
202 pxor RNOT, x0; \
203 pxor x0, x3; \
204 pxor x0, x4; \
205 pxor x2, x0; \
206 por x2, x1;
207
208#define S3(x0, x1, x2, x3, x4) \
209 movdqa x1, x4; \
210 pxor x3, x1; \
211 por x0, x3; \
212 pand x0, x4; \
213 pxor x2, x0; \
214 pxor x1, x2; \
215 pand x3, x1; \
216 pxor x3, x2; \
217 por x4, x0; \
218 pxor x3, x4; \
219 pxor x0, x1; \
220 pand x3, x0; \
221 pand x4, x3; \
222 pxor x2, x3; \
223 por x1, x4; \
224 pand x1, x2; \
225 pxor x3, x4; \
226 pxor x3, x0; \
227 pxor x2, x3;
228
229#define S4(x0, x1, x2, x3, x4) \
230 movdqa x3, x4; \
231 pand x0, x3; \
232 pxor x4, x0; \
233 pxor x2, x3; \
234 por x4, x2; \
235 pxor x1, x0; \
236 pxor x3, x4; \
237 por x0, x2; \
238 pxor x1, x2; \
239 pand x0, x1; \
240 pxor x4, x1; \
241 pand x2, x4; \
242 pxor x3, x2; \
243 pxor x0, x4; \
244 por x1, x3; \
245 pxor RNOT, x1; \
246 pxor x0, x3;
247
248#define S5(x0, x1, x2, x3, x4) \
249 movdqa x1, x4; \
250 por x0, x1; \
251 pxor x1, x2; \
252 pxor RNOT, x3; \
253 pxor x0, x4; \
254 pxor x2, x0; \
255 pand x4, x1; \
256 por x3, x4; \
257 pxor x0, x4; \
258 pand x3, x0; \
259 pxor x3, x1; \
260 pxor x2, x3; \
261 pxor x1, x0; \
262 pand x4, x2; \
263 pxor x2, x1; \
264 pand x0, x2; \
265 pxor x2, x3;
266
267#define S6(x0, x1, x2, x3, x4) \
268 movdqa x1, x4; \
269 pxor x0, x3; \
270 pxor x2, x1; \
271 pxor x0, x2; \
272 pand x3, x0; \
273 por x3, x1; \
274 pxor RNOT, x4; \
275 pxor x1, x0; \
276 pxor x2, x1; \
277 pxor x4, x3; \
278 pxor x0, x4; \
279 pand x0, x2; \
280 pxor x1, x4; \
281 pxor x3, x2; \
282 pand x1, x3; \
283 pxor x0, x3; \
284 pxor x2, x1;
285
286#define S7(x0, x1, x2, x3, x4) \
287 pxor RNOT, x1; \
288 movdqa x1, x4; \
289 pxor RNOT, x0; \
290 pand x2, x1; \
291 pxor x3, x1; \
292 por x4, x3; \
293 pxor x2, x4; \
294 pxor x3, x2; \
295 pxor x0, x3; \
296 por x1, x0; \
297 pand x0, x2; \
298 pxor x4, x0; \
299 pxor x3, x4; \
300 pand x0, x3; \
301 pxor x1, x4; \
302 pxor x4, x2; \
303 pxor x1, x3; \
304 por x0, x4; \
305 pxor x1, x4;
306
307#define SI0(x0, x1, x2, x3, x4) \
308 movdqa x3, x4; \
309 pxor x0, x1; \
310 por x1, x3; \
311 pxor x1, x4; \
312 pxor RNOT, x0; \
313 pxor x3, x2; \
314 pxor x0, x3; \
315 pand x1, x0; \
316 pxor x2, x0; \
317 pand x3, x2; \
318 pxor x4, x3; \
319 pxor x3, x2; \
320 pxor x3, x1; \
321 pand x0, x3; \
322 pxor x0, x1; \
323 pxor x2, x0; \
324 pxor x3, x4;
325
326#define SI1(x0, x1, x2, x3, x4) \
327 pxor x3, x1; \
328 movdqa x0, x4; \
329 pxor x2, x0; \
330 pxor RNOT, x2; \
331 por x1, x4; \
332 pxor x3, x4; \
333 pand x1, x3; \
334 pxor x2, x1; \
335 pand x4, x2; \
336 pxor x1, x4; \
337 por x3, x1; \
338 pxor x0, x3; \
339 pxor x0, x2; \
340 por x4, x0; \
341 pxor x4, x2; \
342 pxor x0, x1; \
343 pxor x1, x4;
344
345#define SI2(x0, x1, x2, x3, x4) \
346 pxor x1, x2; \
347 movdqa x3, x4; \
348 pxor RNOT, x3; \
349 por x2, x3; \
350 pxor x4, x2; \
351 pxor x0, x4; \
352 pxor x1, x3; \
353 por x2, x1; \
354 pxor x0, x2; \
355 pxor x4, x1; \
356 por x3, x4; \
357 pxor x3, x2; \
358 pxor x2, x4; \
359 pand x1, x2; \
360 pxor x3, x2; \
361 pxor x4, x3; \
362 pxor x0, x4;
363
364#define SI3(x0, x1, x2, x3, x4) \
365 pxor x1, x2; \
366 movdqa x1, x4; \
367 pand x2, x1; \
368 pxor x0, x1; \
369 por x4, x0; \
370 pxor x3, x4; \
371 pxor x3, x0; \
372 por x1, x3; \
373 pxor x2, x1; \
374 pxor x3, x1; \
375 pxor x2, x0; \
376 pxor x3, x2; \
377 pand x1, x3; \
378 pxor x0, x1; \
379 pand x2, x0; \
380 pxor x3, x4; \
381 pxor x0, x3; \
382 pxor x1, x0;
383
384#define SI4(x0, x1, x2, x3, x4) \
385 pxor x3, x2; \
386 movdqa x0, x4; \
387 pand x1, x0; \
388 pxor x2, x0; \
389 por x3, x2; \
390 pxor RNOT, x4; \
391 pxor x0, x1; \
392 pxor x2, x0; \
393 pand x4, x2; \
394 pxor x0, x2; \
395 por x4, x0; \
396 pxor x3, x0; \
397 pand x2, x3; \
398 pxor x3, x4; \
399 pxor x1, x3; \
400 pand x0, x1; \
401 pxor x1, x4; \
402 pxor x3, x0;
403
404#define SI5(x0, x1, x2, x3, x4) \
405 movdqa x1, x4; \
406 por x2, x1; \
407 pxor x4, x2; \
408 pxor x3, x1; \
409 pand x4, x3; \
410 pxor x3, x2; \
411 por x0, x3; \
412 pxor RNOT, x0; \
413 pxor x2, x3; \
414 por x0, x2; \
415 pxor x1, x4; \
416 pxor x4, x2; \
417 pand x0, x4; \
418 pxor x1, x0; \
419 pxor x3, x1; \
420 pand x2, x0; \
421 pxor x3, x2; \
422 pxor x2, x0; \
423 pxor x4, x2; \
424 pxor x3, x4;
425
426#define SI6(x0, x1, x2, x3, x4) \
427 pxor x2, x0; \
428 movdqa x0, x4; \
429 pand x3, x0; \
430 pxor x3, x2; \
431 pxor x2, x0; \
432 pxor x1, x3; \
433 por x4, x2; \
434 pxor x3, x2; \
435 pand x0, x3; \
436 pxor RNOT, x0; \
437 pxor x1, x3; \
438 pand x2, x1; \
439 pxor x0, x4; \
440 pxor x4, x3; \
441 pxor x2, x4; \
442 pxor x1, x0; \
443 pxor x0, x2;
444
445#define SI7(x0, x1, x2, x3, x4) \
446 movdqa x3, x4; \
447 pand x0, x3; \
448 pxor x2, x0; \
449 por x4, x2; \
450 pxor x1, x4; \
451 pxor RNOT, x0; \
452 por x3, x1; \
453 pxor x0, x4; \
454 pand x2, x0; \
455 pxor x1, x0; \
456 pand x2, x1; \
457 pxor x2, x3; \
458 pxor x3, x4; \
459 pand x3, x2; \
460 por x0, x3; \
461 pxor x4, x1; \
462 pxor x4, x3; \
463 pand x0, x4; \
464 pxor x2, x4;
465
466#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
467 movdqa x2, t3; \
468 movdqa x0, t1; \
469 unpcklps x3, t3; \
470 movdqa x0, t2; \
471 unpcklps x1, t1; \
472 unpckhps x1, t2; \
473 movdqa t3, x1; \
474 unpckhps x3, x2; \
475 movdqa t1, x0; \
476 movhlps t1, x1; \
477 movdqa t2, t1; \
478 movlhps t3, x0; \
479 movlhps x2, t1; \
480 movhlps t2, x2; \
481 movdqa x2, x3; \
482 movdqa t1, x2;
483
484#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
485 movdqu (0*4*4)(in), x0; \
486 movdqu (1*4*4)(in), x1; \
487 movdqu (2*4*4)(in), x2; \
488 movdqu (3*4*4)(in), x3; \
489 \
490 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
491
492#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
493 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
494 \
495 movdqu x0, (0*4*4)(out); \
496 movdqu x1, (1*4*4)(out); \
497 movdqu x2, (2*4*4)(out); \
498 movdqu x3, (3*4*4)(out);
499
500#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
501 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
502 \
503 movdqu (0*4*4)(out), t0; \
504 pxor t0, x0; \
505 movdqu x0, (0*4*4)(out); \
506 movdqu (1*4*4)(out), t0; \
507 pxor t0, x1; \
508 movdqu x1, (1*4*4)(out); \
509 movdqu (2*4*4)(out), t0; \
510 pxor t0, x2; \
511 movdqu x2, (2*4*4)(out); \
512 movdqu (3*4*4)(out), t0; \
513 pxor t0, x3; \
514 movdqu x3, (3*4*4)(out);
515
516.align 8
517.global __serpent_enc_blk_4way
518.type __serpent_enc_blk_4way,@function;
519
520__serpent_enc_blk_4way:
521 /* input:
522 * arg_ctx(%esp): ctx, CTX
523 * arg_dst(%esp): dst
524 * arg_src(%esp): src
525 * arg_xor(%esp): bool, if true: xor output
526 */
527
528 pcmpeqd RNOT, RNOT;
529
530 movl arg_ctx(%esp), CTX;
531
532 movl arg_src(%esp), %eax;
533 read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
534
535 K(RA, RB, RC, RD, RE, 0);
536 S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1);
537 S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2);
538 S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3);
539 S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4);
540 S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5);
541 S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6);
542 S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7);
543 S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8);
544 S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9);
545 S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10);
546 S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11);
547 S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12);
548 S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13);
549 S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14);
550 S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15);
551 S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16);
552 S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17);
553 S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18);
554 S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19);
555 S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20);
556 S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21);
557 S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22);
558 S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23);
559 S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24);
560 S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25);
561 S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26);
562 S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27);
563 S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28);
564 S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29);
565 S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30);
566 S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31);
567 S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32);
568
569 movl arg_dst(%esp), %eax;
570
571 cmpb $0, arg_xor(%esp);
572 jnz __enc_xor4;
573
574 write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
575
576 ret;
577
578__enc_xor4:
579 xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
580
581 ret;
582
583.align 8
584.global serpent_dec_blk_4way
585.type serpent_dec_blk_4way,@function;
586
587serpent_dec_blk_4way:
588 /* input:
589 * arg_ctx(%esp): ctx, CTX
590 * arg_dst(%esp): dst
591 * arg_src(%esp): src
592 */
593
594 pcmpeqd RNOT, RNOT;
595
596 movl arg_ctx(%esp), CTX;
597
598 movl arg_src(%esp), %eax;
599 read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
600
601 K(RA, RB, RC, RD, RE, 32);
602 SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31);
603 SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30);
604 SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29);
605 SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28);
606 SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27);
607 SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26);
608 SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25);
609 SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24);
610 SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23);
611 SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22);
612 SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21);
613 SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20);
614 SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19);
615 SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18);
616 SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17);
617 SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16);
618 SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15);
619 SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14);
620 SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13);
621 SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12);
622 SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11);
623 SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10);
624 SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9);
625 SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8);
626 SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7);
627 SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6);
628 SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5);
629 SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4);
630 SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3);
631 SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2);
632 SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1);
633 SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0);
634
635 movl arg_dst(%esp), %eax;
636 write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
637
638 ret;
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
new file mode 100644
index 000000000000..7f24a1540821
--- /dev/null
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -0,0 +1,761 @@
1/*
2 * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2)
3 *
4 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Based on crypto/serpent.c by
7 * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
8 * 2003 Herbert Valerio Riedel <hvr@gnu.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 * USA
24 *
25 */
26
27.file "serpent-sse2-x86_64-asm_64.S"
28.text
29
30#define CTX %rdi
31
32/**********************************************************************
33 8-way SSE2 serpent
34 **********************************************************************/
35#define RA1 %xmm0
36#define RB1 %xmm1
37#define RC1 %xmm2
38#define RD1 %xmm3
39#define RE1 %xmm4
40
41#define RA2 %xmm5
42#define RB2 %xmm6
43#define RC2 %xmm7
44#define RD2 %xmm8
45#define RE2 %xmm9
46
47#define RNOT %xmm10
48
49#define RK0 %xmm11
50#define RK1 %xmm12
51#define RK2 %xmm13
52#define RK3 %xmm14
53
54#define S0_1(x0, x1, x2, x3, x4) \
55 movdqa x3, x4; \
56 por x0, x3; \
57 pxor x4, x0; \
58 pxor x2, x4; \
59 pxor RNOT, x4; \
60 pxor x1, x3; \
61 pand x0, x1; \
62 pxor x4, x1; \
63 pxor x0, x2;
64#define S0_2(x0, x1, x2, x3, x4) \
65 pxor x3, x0; \
66 por x0, x4; \
67 pxor x2, x0; \
68 pand x1, x2; \
69 pxor x2, x3; \
70 pxor RNOT, x1; \
71 pxor x4, x2; \
72 pxor x2, x1;
73
74#define S1_1(x0, x1, x2, x3, x4) \
75 movdqa x1, x4; \
76 pxor x0, x1; \
77 pxor x3, x0; \
78 pxor RNOT, x3; \
79 pand x1, x4; \
80 por x1, x0; \
81 pxor x2, x3; \
82 pxor x3, x0; \
83 pxor x3, x1;
84#define S1_2(x0, x1, x2, x3, x4) \
85 pxor x4, x3; \
86 por x4, x1; \
87 pxor x2, x4; \
88 pand x0, x2; \
89 pxor x1, x2; \
90 por x0, x1; \
91 pxor RNOT, x0; \
92 pxor x2, x0; \
93 pxor x1, x4;
94
95#define S2_1(x0, x1, x2, x3, x4) \
96 pxor RNOT, x3; \
97 pxor x0, x1; \
98 movdqa x0, x4; \
99 pand x2, x0; \
100 pxor x3, x0; \
101 por x4, x3; \
102 pxor x1, x2; \
103 pxor x1, x3; \
104 pand x0, x1;
105#define S2_2(x0, x1, x2, x3, x4) \
106 pxor x2, x0; \
107 pand x3, x2; \
108 por x1, x3; \
109 pxor RNOT, x0; \
110 pxor x0, x3; \
111 pxor x0, x4; \
112 pxor x2, x0; \
113 por x2, x1;
114
115#define S3_1(x0, x1, x2, x3, x4) \
116 movdqa x1, x4; \
117 pxor x3, x1; \
118 por x0, x3; \
119 pand x0, x4; \
120 pxor x2, x0; \
121 pxor x1, x2; \
122 pand x3, x1; \
123 pxor x3, x2; \
124 por x4, x0; \
125 pxor x3, x4;
126#define S3_2(x0, x1, x2, x3, x4) \
127 pxor x0, x1; \
128 pand x3, x0; \
129 pand x4, x3; \
130 pxor x2, x3; \
131 por x1, x4; \
132 pand x1, x2; \
133 pxor x3, x4; \
134 pxor x3, x0; \
135 pxor x2, x3;
136
137#define S4_1(x0, x1, x2, x3, x4) \
138 movdqa x3, x4; \
139 pand x0, x3; \
140 pxor x4, x0; \
141 pxor x2, x3; \
142 por x4, x2; \
143 pxor x1, x0; \
144 pxor x3, x4; \
145 por x0, x2; \
146 pxor x1, x2;
147#define S4_2(x0, x1, x2, x3, x4) \
148 pand x0, x1; \
149 pxor x4, x1; \
150 pand x2, x4; \
151 pxor x3, x2; \
152 pxor x0, x4; \
153 por x1, x3; \
154 pxor RNOT, x1; \
155 pxor x0, x3;
156
157#define S5_1(x0, x1, x2, x3, x4) \
158 movdqa x1, x4; \
159 por x0, x1; \
160 pxor x1, x2; \
161 pxor RNOT, x3; \
162 pxor x0, x4; \
163 pxor x2, x0; \
164 pand x4, x1; \
165 por x3, x4; \
166 pxor x0, x4;
167#define S5_2(x0, x1, x2, x3, x4) \
168 pand x3, x0; \
169 pxor x3, x1; \
170 pxor x2, x3; \
171 pxor x1, x0; \
172 pand x4, x2; \
173 pxor x2, x1; \
174 pand x0, x2; \
175 pxor x2, x3;
176
177#define S6_1(x0, x1, x2, x3, x4) \
178 movdqa x1, x4; \
179 pxor x0, x3; \
180 pxor x2, x1; \
181 pxor x0, x2; \
182 pand x3, x0; \
183 por x3, x1; \
184 pxor RNOT, x4; \
185 pxor x1, x0; \
186 pxor x2, x1;
187#define S6_2(x0, x1, x2, x3, x4) \
188 pxor x4, x3; \
189 pxor x0, x4; \
190 pand x0, x2; \
191 pxor x1, x4; \
192 pxor x3, x2; \
193 pand x1, x3; \
194 pxor x0, x3; \
195 pxor x2, x1;
196
197#define S7_1(x0, x1, x2, x3, x4) \
198 pxor RNOT, x1; \
199 movdqa x1, x4; \
200 pxor RNOT, x0; \
201 pand x2, x1; \
202 pxor x3, x1; \
203 por x4, x3; \
204 pxor x2, x4; \
205 pxor x3, x2; \
206 pxor x0, x3; \
207 por x1, x0;
208#define S7_2(x0, x1, x2, x3, x4) \
209 pand x0, x2; \
210 pxor x4, x0; \
211 pxor x3, x4; \
212 pand x0, x3; \
213 pxor x1, x4; \
214 pxor x4, x2; \
215 pxor x1, x3; \
216 por x0, x4; \
217 pxor x1, x4;
218
219#define SI0_1(x0, x1, x2, x3, x4) \
220 movdqa x3, x4; \
221 pxor x0, x1; \
222 por x1, x3; \
223 pxor x1, x4; \
224 pxor RNOT, x0; \
225 pxor x3, x2; \
226 pxor x0, x3; \
227 pand x1, x0; \
228 pxor x2, x0;
229#define SI0_2(x0, x1, x2, x3, x4) \
230 pand x3, x2; \
231 pxor x4, x3; \
232 pxor x3, x2; \
233 pxor x3, x1; \
234 pand x0, x3; \
235 pxor x0, x1; \
236 pxor x2, x0; \
237 pxor x3, x4;
238
239#define SI1_1(x0, x1, x2, x3, x4) \
240 pxor x3, x1; \
241 movdqa x0, x4; \
242 pxor x2, x0; \
243 pxor RNOT, x2; \
244 por x1, x4; \
245 pxor x3, x4; \
246 pand x1, x3; \
247 pxor x2, x1; \
248 pand x4, x2;
249#define SI1_2(x0, x1, x2, x3, x4) \
250 pxor x1, x4; \
251 por x3, x1; \
252 pxor x0, x3; \
253 pxor x0, x2; \
254 por x4, x0; \
255 pxor x4, x2; \
256 pxor x0, x1; \
257 pxor x1, x4;
258
259#define SI2_1(x0, x1, x2, x3, x4) \
260 pxor x1, x2; \
261 movdqa x3, x4; \
262 pxor RNOT, x3; \
263 por x2, x3; \
264 pxor x4, x2; \
265 pxor x0, x4; \
266 pxor x1, x3; \
267 por x2, x1; \
268 pxor x0, x2;
269#define SI2_2(x0, x1, x2, x3, x4) \
270 pxor x4, x1; \
271 por x3, x4; \
272 pxor x3, x2; \
273 pxor x2, x4; \
274 pand x1, x2; \
275 pxor x3, x2; \
276 pxor x4, x3; \
277 pxor x0, x4;
278
279#define SI3_1(x0, x1, x2, x3, x4) \
280 pxor x1, x2; \
281 movdqa x1, x4; \
282 pand x2, x1; \
283 pxor x0, x1; \
284 por x4, x0; \
285 pxor x3, x4; \
286 pxor x3, x0; \
287 por x1, x3; \
288 pxor x2, x1;
289#define SI3_2(x0, x1, x2, x3, x4) \
290 pxor x3, x1; \
291 pxor x2, x0; \
292 pxor x3, x2; \
293 pand x1, x3; \
294 pxor x0, x1; \
295 pand x2, x0; \
296 pxor x3, x4; \
297 pxor x0, x3; \
298 pxor x1, x0;
299
300#define SI4_1(x0, x1, x2, x3, x4) \
301 pxor x3, x2; \
302 movdqa x0, x4; \
303 pand x1, x0; \
304 pxor x2, x0; \
305 por x3, x2; \
306 pxor RNOT, x4; \
307 pxor x0, x1; \
308 pxor x2, x0; \
309 pand x4, x2;
310#define SI4_2(x0, x1, x2, x3, x4) \
311 pxor x0, x2; \
312 por x4, x0; \
313 pxor x3, x0; \
314 pand x2, x3; \
315 pxor x3, x4; \
316 pxor x1, x3; \
317 pand x0, x1; \
318 pxor x1, x4; \
319 pxor x3, x0;
320
321#define SI5_1(x0, x1, x2, x3, x4) \
322 movdqa x1, x4; \
323 por x2, x1; \
324 pxor x4, x2; \
325 pxor x3, x1; \
326 pand x4, x3; \
327 pxor x3, x2; \
328 por x0, x3; \
329 pxor RNOT, x0; \
330 pxor x2, x3; \
331 por x0, x2;
332#define SI5_2(x0, x1, x2, x3, x4) \
333 pxor x1, x4; \
334 pxor x4, x2; \
335 pand x0, x4; \
336 pxor x1, x0; \
337 pxor x3, x1; \
338 pand x2, x0; \
339 pxor x3, x2; \
340 pxor x2, x0; \
341 pxor x4, x2; \
342 pxor x3, x4;
343
344#define SI6_1(x0, x1, x2, x3, x4) \
345 pxor x2, x0; \
346 movdqa x0, x4; \
347 pand x3, x0; \
348 pxor x3, x2; \
349 pxor x2, x0; \
350 pxor x1, x3; \
351 por x4, x2; \
352 pxor x3, x2; \
353 pand x0, x3;
354#define SI6_2(x0, x1, x2, x3, x4) \
355 pxor RNOT, x0; \
356 pxor x1, x3; \
357 pand x2, x1; \
358 pxor x0, x4; \
359 pxor x4, x3; \
360 pxor x2, x4; \
361 pxor x1, x0; \
362 pxor x0, x2;
363
364#define SI7_1(x0, x1, x2, x3, x4) \
365 movdqa x3, x4; \
366 pand x0, x3; \
367 pxor x2, x0; \
368 por x4, x2; \
369 pxor x1, x4; \
370 pxor RNOT, x0; \
371 por x3, x1; \
372 pxor x0, x4; \
373 pand x2, x0; \
374 pxor x1, x0;
375#define SI7_2(x0, x1, x2, x3, x4) \
376 pand x2, x1; \
377 pxor x2, x3; \
378 pxor x3, x4; \
379 pand x3, x2; \
380 por x0, x3; \
381 pxor x4, x1; \
382 pxor x4, x3; \
383 pand x0, x4; \
384 pxor x2, x4;
385
386#define get_key(i, j, t) \
387 movd (4*(i)+(j))*4(CTX), t; \
388 pshufd $0, t, t;
389
390#define K2(x0, x1, x2, x3, x4, i) \
391 get_key(i, 0, RK0); \
392 get_key(i, 1, RK1); \
393 get_key(i, 2, RK2); \
394 get_key(i, 3, RK3); \
395 pxor RK0, x0 ## 1; \
396 pxor RK1, x1 ## 1; \
397 pxor RK2, x2 ## 1; \
398 pxor RK3, x3 ## 1; \
399 pxor RK0, x0 ## 2; \
400 pxor RK1, x1 ## 2; \
401 pxor RK2, x2 ## 2; \
402 pxor RK3, x3 ## 2;
403
404#define LK2(x0, x1, x2, x3, x4, i) \
405 movdqa x0 ## 1, x4 ## 1; \
406 pslld $13, x0 ## 1; \
407 psrld $(32 - 13), x4 ## 1; \
408 por x4 ## 1, x0 ## 1; \
409 pxor x0 ## 1, x1 ## 1; \
410 movdqa x2 ## 1, x4 ## 1; \
411 pslld $3, x2 ## 1; \
412 psrld $(32 - 3), x4 ## 1; \
413 por x4 ## 1, x2 ## 1; \
414 pxor x2 ## 1, x1 ## 1; \
415 movdqa x0 ## 2, x4 ## 2; \
416 pslld $13, x0 ## 2; \
417 psrld $(32 - 13), x4 ## 2; \
418 por x4 ## 2, x0 ## 2; \
419 pxor x0 ## 2, x1 ## 2; \
420 movdqa x2 ## 2, x4 ## 2; \
421 pslld $3, x2 ## 2; \
422 psrld $(32 - 3), x4 ## 2; \
423 por x4 ## 2, x2 ## 2; \
424 pxor x2 ## 2, x1 ## 2; \
425 movdqa x1 ## 1, x4 ## 1; \
426 pslld $1, x1 ## 1; \
427 psrld $(32 - 1), x4 ## 1; \
428 por x4 ## 1, x1 ## 1; \
429 movdqa x0 ## 1, x4 ## 1; \
430 pslld $3, x4 ## 1; \
431 pxor x2 ## 1, x3 ## 1; \
432 pxor x4 ## 1, x3 ## 1; \
433 movdqa x3 ## 1, x4 ## 1; \
434 get_key(i, 1, RK1); \
435 movdqa x1 ## 2, x4 ## 2; \
436 pslld $1, x1 ## 2; \
437 psrld $(32 - 1), x4 ## 2; \
438 por x4 ## 2, x1 ## 2; \
439 movdqa x0 ## 2, x4 ## 2; \
440 pslld $3, x4 ## 2; \
441 pxor x2 ## 2, x3 ## 2; \
442 pxor x4 ## 2, x3 ## 2; \
443 movdqa x3 ## 2, x4 ## 2; \
444 get_key(i, 3, RK3); \
445 pslld $7, x3 ## 1; \
446 psrld $(32 - 7), x4 ## 1; \
447 por x4 ## 1, x3 ## 1; \
448 movdqa x1 ## 1, x4 ## 1; \
449 pslld $7, x4 ## 1; \
450 pxor x1 ## 1, x0 ## 1; \
451 pxor x3 ## 1, x0 ## 1; \
452 pxor x3 ## 1, x2 ## 1; \
453 pxor x4 ## 1, x2 ## 1; \
454 get_key(i, 0, RK0); \
455 pslld $7, x3 ## 2; \
456 psrld $(32 - 7), x4 ## 2; \
457 por x4 ## 2, x3 ## 2; \
458 movdqa x1 ## 2, x4 ## 2; \
459 pslld $7, x4 ## 2; \
460 pxor x1 ## 2, x0 ## 2; \
461 pxor x3 ## 2, x0 ## 2; \
462 pxor x3 ## 2, x2 ## 2; \
463 pxor x4 ## 2, x2 ## 2; \
464 get_key(i, 2, RK2); \
465 pxor RK1, x1 ## 1; \
466 pxor RK3, x3 ## 1; \
467 movdqa x0 ## 1, x4 ## 1; \
468 pslld $5, x0 ## 1; \
469 psrld $(32 - 5), x4 ## 1; \
470 por x4 ## 1, x0 ## 1; \
471 movdqa x2 ## 1, x4 ## 1; \
472 pslld $22, x2 ## 1; \
473 psrld $(32 - 22), x4 ## 1; \
474 por x4 ## 1, x2 ## 1; \
475 pxor RK0, x0 ## 1; \
476 pxor RK2, x2 ## 1; \
477 pxor RK1, x1 ## 2; \
478 pxor RK3, x3 ## 2; \
479 movdqa x0 ## 2, x4 ## 2; \
480 pslld $5, x0 ## 2; \
481 psrld $(32 - 5), x4 ## 2; \
482 por x4 ## 2, x0 ## 2; \
483 movdqa x2 ## 2, x4 ## 2; \
484 pslld $22, x2 ## 2; \
485 psrld $(32 - 22), x4 ## 2; \
486 por x4 ## 2, x2 ## 2; \
487 pxor RK0, x0 ## 2; \
488 pxor RK2, x2 ## 2;
489
490#define KL2(x0, x1, x2, x3, x4, i) \
491 pxor RK0, x0 ## 1; \
492 pxor RK2, x2 ## 1; \
493 movdqa x0 ## 1, x4 ## 1; \
494 psrld $5, x0 ## 1; \
495 pslld $(32 - 5), x4 ## 1; \
496 por x4 ## 1, x0 ## 1; \
497 pxor RK3, x3 ## 1; \
498 pxor RK1, x1 ## 1; \
499 movdqa x2 ## 1, x4 ## 1; \
500 psrld $22, x2 ## 1; \
501 pslld $(32 - 22), x4 ## 1; \
502 por x4 ## 1, x2 ## 1; \
503 pxor x3 ## 1, x2 ## 1; \
504 pxor RK0, x0 ## 2; \
505 pxor RK2, x2 ## 2; \
506 movdqa x0 ## 2, x4 ## 2; \
507 psrld $5, x0 ## 2; \
508 pslld $(32 - 5), x4 ## 2; \
509 por x4 ## 2, x0 ## 2; \
510 pxor RK3, x3 ## 2; \
511 pxor RK1, x1 ## 2; \
512 movdqa x2 ## 2, x4 ## 2; \
513 psrld $22, x2 ## 2; \
514 pslld $(32 - 22), x4 ## 2; \
515 por x4 ## 2, x2 ## 2; \
516 pxor x3 ## 2, x2 ## 2; \
517 pxor x3 ## 1, x0 ## 1; \
518 movdqa x1 ## 1, x4 ## 1; \
519 pslld $7, x4 ## 1; \
520 pxor x1 ## 1, x0 ## 1; \
521 pxor x4 ## 1, x2 ## 1; \
522 movdqa x1 ## 1, x4 ## 1; \
523 psrld $1, x1 ## 1; \
524 pslld $(32 - 1), x4 ## 1; \
525 por x4 ## 1, x1 ## 1; \
526 pxor x3 ## 2, x0 ## 2; \
527 movdqa x1 ## 2, x4 ## 2; \
528 pslld $7, x4 ## 2; \
529 pxor x1 ## 2, x0 ## 2; \
530 pxor x4 ## 2, x2 ## 2; \
531 movdqa x1 ## 2, x4 ## 2; \
532 psrld $1, x1 ## 2; \
533 pslld $(32 - 1), x4 ## 2; \
534 por x4 ## 2, x1 ## 2; \
535 movdqa x3 ## 1, x4 ## 1; \
536 psrld $7, x3 ## 1; \
537 pslld $(32 - 7), x4 ## 1; \
538 por x4 ## 1, x3 ## 1; \
539 pxor x0 ## 1, x1 ## 1; \
540 movdqa x0 ## 1, x4 ## 1; \
541 pslld $3, x4 ## 1; \
542 pxor x4 ## 1, x3 ## 1; \
543 movdqa x0 ## 1, x4 ## 1; \
544 movdqa x3 ## 2, x4 ## 2; \
545 psrld $7, x3 ## 2; \
546 pslld $(32 - 7), x4 ## 2; \
547 por x4 ## 2, x3 ## 2; \
548 pxor x0 ## 2, x1 ## 2; \
549 movdqa x0 ## 2, x4 ## 2; \
550 pslld $3, x4 ## 2; \
551 pxor x4 ## 2, x3 ## 2; \
552 movdqa x0 ## 2, x4 ## 2; \
553 psrld $13, x0 ## 1; \
554 pslld $(32 - 13), x4 ## 1; \
555 por x4 ## 1, x0 ## 1; \
556 pxor x2 ## 1, x1 ## 1; \
557 pxor x2 ## 1, x3 ## 1; \
558 movdqa x2 ## 1, x4 ## 1; \
559 psrld $3, x2 ## 1; \
560 pslld $(32 - 3), x4 ## 1; \
561 por x4 ## 1, x2 ## 1; \
562 psrld $13, x0 ## 2; \
563 pslld $(32 - 13), x4 ## 2; \
564 por x4 ## 2, x0 ## 2; \
565 pxor x2 ## 2, x1 ## 2; \
566 pxor x2 ## 2, x3 ## 2; \
567 movdqa x2 ## 2, x4 ## 2; \
568 psrld $3, x2 ## 2; \
569 pslld $(32 - 3), x4 ## 2; \
570 por x4 ## 2, x2 ## 2;
571
572#define S(SBOX, x0, x1, x2, x3, x4) \
573 SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
574 SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
575 SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
576 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
577
578#define SP(SBOX, x0, x1, x2, x3, x4, i) \
579 get_key(i, 0, RK0); \
580 SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
581 get_key(i, 2, RK2); \
582 SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
583 get_key(i, 3, RK3); \
584 SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
585 get_key(i, 1, RK1); \
586 SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
587
588#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
589 movdqa x2, t3; \
590 movdqa x0, t1; \
591 unpcklps x3, t3; \
592 movdqa x0, t2; \
593 unpcklps x1, t1; \
594 unpckhps x1, t2; \
595 movdqa t3, x1; \
596 unpckhps x3, x2; \
597 movdqa t1, x0; \
598 movhlps t1, x1; \
599 movdqa t2, t1; \
600 movlhps t3, x0; \
601 movlhps x2, t1; \
602 movhlps t2, x2; \
603 movdqa x2, x3; \
604 movdqa t1, x2;
605
606#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
607 movdqu (0*4*4)(in), x0; \
608 movdqu (1*4*4)(in), x1; \
609 movdqu (2*4*4)(in), x2; \
610 movdqu (3*4*4)(in), x3; \
611 \
612 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
613
614#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
615 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
616 \
617 movdqu x0, (0*4*4)(out); \
618 movdqu x1, (1*4*4)(out); \
619 movdqu x2, (2*4*4)(out); \
620 movdqu x3, (3*4*4)(out);
621
622#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
623 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
624 \
625 movdqu (0*4*4)(out), t0; \
626 pxor t0, x0; \
627 movdqu x0, (0*4*4)(out); \
628 movdqu (1*4*4)(out), t0; \
629 pxor t0, x1; \
630 movdqu x1, (1*4*4)(out); \
631 movdqu (2*4*4)(out), t0; \
632 pxor t0, x2; \
633 movdqu x2, (2*4*4)(out); \
634 movdqu (3*4*4)(out), t0; \
635 pxor t0, x3; \
636 movdqu x3, (3*4*4)(out);
637
638.align 8
639.global __serpent_enc_blk_8way
640.type __serpent_enc_blk_8way,@function;
641
642__serpent_enc_blk_8way:
643 /* input:
644 * %rdi: ctx, CTX
645 * %rsi: dst
646 * %rdx: src
647 * %rcx: bool, if true: xor output
648 */
649
650 pcmpeqd RNOT, RNOT;
651
652 leaq (4*4*4)(%rdx), %rax;
653 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
654 read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
655
656 K2(RA, RB, RC, RD, RE, 0);
657 S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
658 S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
659 S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
660 S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
661 S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
662 S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
663 S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
664 S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
665 S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
666 S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
667 S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
668 S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
669 S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
670 S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
671 S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
672 S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
673 S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
674 S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
675 S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
676 S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
677 S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
678 S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
679 S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
680 S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
681 S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
682 S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
683 S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
684 S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
685 S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
686 S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
687 S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
688 S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
689
690 leaq (4*4*4)(%rsi), %rax;
691
692 testb %cl, %cl;
693 jnz __enc_xor8;
694
695 write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
696 write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
697
698 ret;
699
700__enc_xor8:
701 xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
702 xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
703
704 ret;
705
706.align 8
707.global serpent_dec_blk_8way
708.type serpent_dec_blk_8way,@function;
709
710serpent_dec_blk_8way:
711 /* input:
712 * %rdi: ctx, CTX
713 * %rsi: dst
714 * %rdx: src
715 */
716
717 pcmpeqd RNOT, RNOT;
718
719 leaq (4*4*4)(%rdx), %rax;
720 read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
721 read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
722
723 K2(RA, RB, RC, RD, RE, 32);
724 SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
725 SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
726 SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
727 SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
728 SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
729 SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
730 SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
731 SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
732 SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
733 SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
734 SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
735 SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
736 SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
737 SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
738 SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
739 SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
740 SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
741 SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
742 SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
743 SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
744 SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
745 SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
746 SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
747 SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
748 SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
749 SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
750 SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
751 SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
752 SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
753 SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
754 SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
755 S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
756
757 leaq (4*4*4)(%rsi), %rax;
758 write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
759 write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
760
761 ret;
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
new file mode 100644
index 000000000000..7955a9b76b91
--- /dev/null
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -0,0 +1,1070 @@
1/*
2 * Glue Code for SSE2 assembler versions of Serpent Cipher
3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * Glue code based on aesni-intel_glue.c by:
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 *
10 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
11 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
12 * CTR part based on code (crypto/ctr.c) by:
13 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
28 * USA
29 *
30 */
31
32#include <linux/module.h>
33#include <linux/hardirq.h>
34#include <linux/types.h>
35#include <linux/crypto.h>
36#include <linux/err.h>
37#include <crypto/algapi.h>
38#include <crypto/serpent.h>
39#include <crypto/cryptd.h>
40#include <crypto/b128ops.h>
41#include <crypto/ctr.h>
42#include <crypto/lrw.h>
43#include <crypto/xts.h>
44#include <asm/i387.h>
45#include <asm/serpent.h>
46#include <crypto/scatterwalk.h>
47#include <linux/workqueue.h>
48#include <linux/spinlock.h>
49
50struct async_serpent_ctx {
51 struct cryptd_ablkcipher *cryptd_tfm;
52};
53
54static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
55{
56 if (fpu_enabled)
57 return true;
58
59 /* SSE2 is only used when chunk to be processed is large enough, so
60 * do not enable FPU until it is necessary.
61 */
62 if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
63 return false;
64
65 kernel_fpu_begin();
66 return true;
67}
68
69static inline void serpent_fpu_end(bool fpu_enabled)
70{
71 if (fpu_enabled)
72 kernel_fpu_end();
73}
74
75static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76 bool enc)
77{
78 bool fpu_enabled = false;
79 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80 const unsigned int bsize = SERPENT_BLOCK_SIZE;
81 unsigned int nbytes;
82 int err;
83
84 err = blkcipher_walk_virt(desc, walk);
85 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86
87 while ((nbytes = walk->nbytes)) {
88 u8 *wsrc = walk->src.virt.addr;
89 u8 *wdst = walk->dst.virt.addr;
90
91 fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
92
93 /* Process multi-block batch */
94 if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
95 do {
96 if (enc)
97 serpent_enc_blk_xway(ctx, wdst, wsrc);
98 else
99 serpent_dec_blk_xway(ctx, wdst, wsrc);
100
101 wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
102 wdst += bsize * SERPENT_PARALLEL_BLOCKS;
103 nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
104 } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
105
106 if (nbytes < bsize)
107 goto done;
108 }
109
110 /* Handle leftovers */
111 do {
112 if (enc)
113 __serpent_encrypt(ctx, wdst, wsrc);
114 else
115 __serpent_decrypt(ctx, wdst, wsrc);
116
117 wsrc += bsize;
118 wdst += bsize;
119 nbytes -= bsize;
120 } while (nbytes >= bsize);
121
122done:
123 err = blkcipher_walk_done(desc, walk, nbytes);
124 }
125
126 serpent_fpu_end(fpu_enabled);
127 return err;
128}
129
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes)
132{
133 struct blkcipher_walk walk;
134
135 blkcipher_walk_init(&walk, dst, src, nbytes);
136 return ecb_crypt(desc, &walk, true);
137}
138
139static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140 struct scatterlist *src, unsigned int nbytes)
141{
142 struct blkcipher_walk walk;
143
144 blkcipher_walk_init(&walk, dst, src, nbytes);
145 return ecb_crypt(desc, &walk, false);
146}
147
148static struct crypto_alg blk_ecb_alg = {
149 .cra_name = "__ecb-serpent-sse2",
150 .cra_driver_name = "__driver-ecb-serpent-sse2",
151 .cra_priority = 0,
152 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
153 .cra_blocksize = SERPENT_BLOCK_SIZE,
154 .cra_ctxsize = sizeof(struct serpent_ctx),
155 .cra_alignmask = 0,
156 .cra_type = &crypto_blkcipher_type,
157 .cra_module = THIS_MODULE,
158 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
159 .cra_u = {
160 .blkcipher = {
161 .min_keysize = SERPENT_MIN_KEY_SIZE,
162 .max_keysize = SERPENT_MAX_KEY_SIZE,
163 .setkey = serpent_setkey,
164 .encrypt = ecb_encrypt,
165 .decrypt = ecb_decrypt,
166 },
167 },
168};
169
170static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
171 struct blkcipher_walk *walk)
172{
173 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
174 const unsigned int bsize = SERPENT_BLOCK_SIZE;
175 unsigned int nbytes = walk->nbytes;
176 u128 *src = (u128 *)walk->src.virt.addr;
177 u128 *dst = (u128 *)walk->dst.virt.addr;
178 u128 *iv = (u128 *)walk->iv;
179
180 do {
181 u128_xor(dst, src, iv);
182 __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
183 iv = dst;
184
185 src += 1;
186 dst += 1;
187 nbytes -= bsize;
188 } while (nbytes >= bsize);
189
190 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
191 return nbytes;
192}
193
194static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
195 struct scatterlist *src, unsigned int nbytes)
196{
197 struct blkcipher_walk walk;
198 int err;
199
200 blkcipher_walk_init(&walk, dst, src, nbytes);
201 err = blkcipher_walk_virt(desc, &walk);
202
203 while ((nbytes = walk.nbytes)) {
204 nbytes = __cbc_encrypt(desc, &walk);
205 err = blkcipher_walk_done(desc, &walk, nbytes);
206 }
207
208 return err;
209}
210
211static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
212 struct blkcipher_walk *walk)
213{
214 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
215 const unsigned int bsize = SERPENT_BLOCK_SIZE;
216 unsigned int nbytes = walk->nbytes;
217 u128 *src = (u128 *)walk->src.virt.addr;
218 u128 *dst = (u128 *)walk->dst.virt.addr;
219 u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
220 u128 last_iv;
221 int i;
222
223 /* Start of the last block. */
224 src += nbytes / bsize - 1;
225 dst += nbytes / bsize - 1;
226
227 last_iv = *src;
228
229 /* Process multi-block batch */
230 if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
231 do {
232 nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
233 src -= SERPENT_PARALLEL_BLOCKS - 1;
234 dst -= SERPENT_PARALLEL_BLOCKS - 1;
235
236 for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
237 ivs[i] = src[i];
238
239 serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
240
241 for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
242 u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
243
244 nbytes -= bsize;
245 if (nbytes < bsize)
246 goto done;
247
248 u128_xor(dst, dst, src - 1);
249 src -= 1;
250 dst -= 1;
251 } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
252
253 if (nbytes < bsize)
254 goto done;
255 }
256
257 /* Handle leftovers */
258 for (;;) {
259 __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
260
261 nbytes -= bsize;
262 if (nbytes < bsize)
263 break;
264
265 u128_xor(dst, dst, src - 1);
266 src -= 1;
267 dst -= 1;
268 }
269
270done:
271 u128_xor(dst, dst, (u128 *)walk->iv);
272 *(u128 *)walk->iv = last_iv;
273
274 return nbytes;
275}
276
277static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
278 struct scatterlist *src, unsigned int nbytes)
279{
280 bool fpu_enabled = false;
281 struct blkcipher_walk walk;
282 int err;
283
284 blkcipher_walk_init(&walk, dst, src, nbytes);
285 err = blkcipher_walk_virt(desc, &walk);
286 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
287
288 while ((nbytes = walk.nbytes)) {
289 fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
290 nbytes = __cbc_decrypt(desc, &walk);
291 err = blkcipher_walk_done(desc, &walk, nbytes);
292 }
293
294 serpent_fpu_end(fpu_enabled);
295 return err;
296}
297
298static struct crypto_alg blk_cbc_alg = {
299 .cra_name = "__cbc-serpent-sse2",
300 .cra_driver_name = "__driver-cbc-serpent-sse2",
301 .cra_priority = 0,
302 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
303 .cra_blocksize = SERPENT_BLOCK_SIZE,
304 .cra_ctxsize = sizeof(struct serpent_ctx),
305 .cra_alignmask = 0,
306 .cra_type = &crypto_blkcipher_type,
307 .cra_module = THIS_MODULE,
308 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
309 .cra_u = {
310 .blkcipher = {
311 .min_keysize = SERPENT_MIN_KEY_SIZE,
312 .max_keysize = SERPENT_MAX_KEY_SIZE,
313 .setkey = serpent_setkey,
314 .encrypt = cbc_encrypt,
315 .decrypt = cbc_decrypt,
316 },
317 },
318};
319
320static inline void u128_to_be128(be128 *dst, const u128 *src)
321{
322 dst->a = cpu_to_be64(src->a);
323 dst->b = cpu_to_be64(src->b);
324}
325
326static inline void be128_to_u128(u128 *dst, const be128 *src)
327{
328 dst->a = be64_to_cpu(src->a);
329 dst->b = be64_to_cpu(src->b);
330}
331
332static inline void u128_inc(u128 *i)
333{
334 i->b++;
335 if (!i->b)
336 i->a++;
337}
338
339static void ctr_crypt_final(struct blkcipher_desc *desc,
340 struct blkcipher_walk *walk)
341{
342 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
343 u8 *ctrblk = walk->iv;
344 u8 keystream[SERPENT_BLOCK_SIZE];
345 u8 *src = walk->src.virt.addr;
346 u8 *dst = walk->dst.virt.addr;
347 unsigned int nbytes = walk->nbytes;
348
349 __serpent_encrypt(ctx, keystream, ctrblk);
350 crypto_xor(keystream, src, nbytes);
351 memcpy(dst, keystream, nbytes);
352
353 crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
354}
355
356static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
357 struct blkcipher_walk *walk)
358{
359 struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
360 const unsigned int bsize = SERPENT_BLOCK_SIZE;
361 unsigned int nbytes = walk->nbytes;
362 u128 *src = (u128 *)walk->src.virt.addr;
363 u128 *dst = (u128 *)walk->dst.virt.addr;
364 u128 ctrblk;
365 be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
366 int i;
367
368 be128_to_u128(&ctrblk, (be128 *)walk->iv);
369
370 /* Process multi-block batch */
371 if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
372 do {
373 /* create ctrblks for parallel encrypt */
374 for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
375 if (dst != src)
376 dst[i] = src[i];
377
378 u128_to_be128(&ctrblocks[i], &ctrblk);
379 u128_inc(&ctrblk);
380 }
381
382 serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
383 (u8 *)ctrblocks);
384
385 src += SERPENT_PARALLEL_BLOCKS;
386 dst += SERPENT_PARALLEL_BLOCKS;
387 nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
388 } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
389
390 if (nbytes < bsize)
391 goto done;
392 }
393
394 /* Handle leftovers */
395 do {
396 if (dst != src)
397 *dst = *src;
398
399 u128_to_be128(&ctrblocks[0], &ctrblk);
400 u128_inc(&ctrblk);
401
402 __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
403 u128_xor(dst, dst, (u128 *)ctrblocks);
404
405 src += 1;
406 dst += 1;
407 nbytes -= bsize;
408 } while (nbytes >= bsize);
409
410done:
411 u128_to_be128((be128 *)walk->iv, &ctrblk);
412 return nbytes;
413}
414
415static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
416 struct scatterlist *src, unsigned int nbytes)
417{
418 bool fpu_enabled = false;
419 struct blkcipher_walk walk;
420 int err;
421
422 blkcipher_walk_init(&walk, dst, src, nbytes);
423 err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
424 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
425
426 while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
427 fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
428 nbytes = __ctr_crypt(desc, &walk);
429 err = blkcipher_walk_done(desc, &walk, nbytes);
430 }
431
432 serpent_fpu_end(fpu_enabled);
433
434 if (walk.nbytes) {
435 ctr_crypt_final(desc, &walk);
436 err = blkcipher_walk_done(desc, &walk, 0);
437 }
438
439 return err;
440}
441
442static struct crypto_alg blk_ctr_alg = {
443 .cra_name = "__ctr-serpent-sse2",
444 .cra_driver_name = "__driver-ctr-serpent-sse2",
445 .cra_priority = 0,
446 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
447 .cra_blocksize = 1,
448 .cra_ctxsize = sizeof(struct serpent_ctx),
449 .cra_alignmask = 0,
450 .cra_type = &crypto_blkcipher_type,
451 .cra_module = THIS_MODULE,
452 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
453 .cra_u = {
454 .blkcipher = {
455 .min_keysize = SERPENT_MIN_KEY_SIZE,
456 .max_keysize = SERPENT_MAX_KEY_SIZE,
457 .ivsize = SERPENT_BLOCK_SIZE,
458 .setkey = serpent_setkey,
459 .encrypt = ctr_crypt,
460 .decrypt = ctr_crypt,
461 },
462 },
463};
464
465struct crypt_priv {
466 struct serpent_ctx *ctx;
467 bool fpu_enabled;
468};
469
470static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
471{
472 const unsigned int bsize = SERPENT_BLOCK_SIZE;
473 struct crypt_priv *ctx = priv;
474 int i;
475
476 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
477
478 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
479 serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
480 return;
481 }
482
483 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
484 __serpent_encrypt(ctx->ctx, srcdst, srcdst);
485}
486
487static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
488{
489 const unsigned int bsize = SERPENT_BLOCK_SIZE;
490 struct crypt_priv *ctx = priv;
491 int i;
492
493 ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
494
495 if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
496 serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
497 return;
498 }
499
500 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
501 __serpent_decrypt(ctx->ctx, srcdst, srcdst);
502}
503
504struct serpent_lrw_ctx {
505 struct lrw_table_ctx lrw_table;
506 struct serpent_ctx serpent_ctx;
507};
508
509static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
510 unsigned int keylen)
511{
512 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
513 int err;
514
515 err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
516 SERPENT_BLOCK_SIZE);
517 if (err)
518 return err;
519
520 return lrw_init_table(&ctx->lrw_table, key + keylen -
521 SERPENT_BLOCK_SIZE);
522}
523
524static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
525 struct scatterlist *src, unsigned int nbytes)
526{
527 struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
528 be128 buf[SERPENT_PARALLEL_BLOCKS];
529 struct crypt_priv crypt_ctx = {
530 .ctx = &ctx->serpent_ctx,
531 .fpu_enabled = false,
532 };
533 struct lrw_crypt_req req = {
534 .tbuf = buf,
535 .tbuflen = sizeof(buf),
536
537 .table_ctx = &ctx->lrw_table,
538 .crypt_ctx = &crypt_ctx,
539 .crypt_fn = encrypt_callback,
540 };
541 int ret;
542
543 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
544 ret = lrw_crypt(desc, dst, src, nbytes, &req);
545 serpent_fpu_end(crypt_ctx.fpu_enabled);
546
547 return ret;
548}
549
550static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
551 struct scatterlist *src, unsigned int nbytes)
552{
553 struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
554 be128 buf[SERPENT_PARALLEL_BLOCKS];
555 struct crypt_priv crypt_ctx = {
556 .ctx = &ctx->serpent_ctx,
557 .fpu_enabled = false,
558 };
559 struct lrw_crypt_req req = {
560 .tbuf = buf,
561 .tbuflen = sizeof(buf),
562
563 .table_ctx = &ctx->lrw_table,
564 .crypt_ctx = &crypt_ctx,
565 .crypt_fn = decrypt_callback,
566 };
567 int ret;
568
569 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
570 ret = lrw_crypt(desc, dst, src, nbytes, &req);
571 serpent_fpu_end(crypt_ctx.fpu_enabled);
572
573 return ret;
574}
575
576static void lrw_exit_tfm(struct crypto_tfm *tfm)
577{
578 struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
579
580 lrw_free_table(&ctx->lrw_table);
581}
582
583static struct crypto_alg blk_lrw_alg = {
584 .cra_name = "__lrw-serpent-sse2",
585 .cra_driver_name = "__driver-lrw-serpent-sse2",
586 .cra_priority = 0,
587 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
588 .cra_blocksize = SERPENT_BLOCK_SIZE,
589 .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
590 .cra_alignmask = 0,
591 .cra_type = &crypto_blkcipher_type,
592 .cra_module = THIS_MODULE,
593 .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
594 .cra_exit = lrw_exit_tfm,
595 .cra_u = {
596 .blkcipher = {
597 .min_keysize = SERPENT_MIN_KEY_SIZE +
598 SERPENT_BLOCK_SIZE,
599 .max_keysize = SERPENT_MAX_KEY_SIZE +
600 SERPENT_BLOCK_SIZE,
601 .ivsize = SERPENT_BLOCK_SIZE,
602 .setkey = lrw_serpent_setkey,
603 .encrypt = lrw_encrypt,
604 .decrypt = lrw_decrypt,
605 },
606 },
607};
608
609struct serpent_xts_ctx {
610 struct serpent_ctx tweak_ctx;
611 struct serpent_ctx crypt_ctx;
612};
613
614static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
615 unsigned int keylen)
616{
617 struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
618 u32 *flags = &tfm->crt_flags;
619 int err;
620
621 /* key consists of keys of equal size concatenated, therefore
622 * the length must be even
623 */
624 if (keylen % 2) {
625 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
626 return -EINVAL;
627 }
628
629 /* first half of xts-key is for crypt */
630 err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
631 if (err)
632 return err;
633
634 /* second half of xts-key is for tweak */
635 return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
636}
637
638static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
639 struct scatterlist *src, unsigned int nbytes)
640{
641 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
642 be128 buf[SERPENT_PARALLEL_BLOCKS];
643 struct crypt_priv crypt_ctx = {
644 .ctx = &ctx->crypt_ctx,
645 .fpu_enabled = false,
646 };
647 struct xts_crypt_req req = {
648 .tbuf = buf,
649 .tbuflen = sizeof(buf),
650
651 .tweak_ctx = &ctx->tweak_ctx,
652 .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
653 .crypt_ctx = &crypt_ctx,
654 .crypt_fn = encrypt_callback,
655 };
656 int ret;
657
658 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
659 ret = xts_crypt(desc, dst, src, nbytes, &req);
660 serpent_fpu_end(crypt_ctx.fpu_enabled);
661
662 return ret;
663}
664
665static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
666 struct scatterlist *src, unsigned int nbytes)
667{
668 struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
669 be128 buf[SERPENT_PARALLEL_BLOCKS];
670 struct crypt_priv crypt_ctx = {
671 .ctx = &ctx->crypt_ctx,
672 .fpu_enabled = false,
673 };
674 struct xts_crypt_req req = {
675 .tbuf = buf,
676 .tbuflen = sizeof(buf),
677
678 .tweak_ctx = &ctx->tweak_ctx,
679 .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
680 .crypt_ctx = &crypt_ctx,
681 .crypt_fn = decrypt_callback,
682 };
683 int ret;
684
685 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
686 ret = xts_crypt(desc, dst, src, nbytes, &req);
687 serpent_fpu_end(crypt_ctx.fpu_enabled);
688
689 return ret;
690}
691
692static struct crypto_alg blk_xts_alg = {
693 .cra_name = "__xts-serpent-sse2",
694 .cra_driver_name = "__driver-xts-serpent-sse2",
695 .cra_priority = 0,
696 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
697 .cra_blocksize = SERPENT_BLOCK_SIZE,
698 .cra_ctxsize = sizeof(struct serpent_xts_ctx),
699 .cra_alignmask = 0,
700 .cra_type = &crypto_blkcipher_type,
701 .cra_module = THIS_MODULE,
702 .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
703 .cra_u = {
704 .blkcipher = {
705 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
706 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
707 .ivsize = SERPENT_BLOCK_SIZE,
708 .setkey = xts_serpent_setkey,
709 .encrypt = xts_encrypt,
710 .decrypt = xts_decrypt,
711 },
712 },
713};
714
715static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
716 unsigned int key_len)
717{
718 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
719 struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
720 int err;
721
722 crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
723 crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
724 & CRYPTO_TFM_REQ_MASK);
725 err = crypto_ablkcipher_setkey(child, key, key_len);
726 crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
727 & CRYPTO_TFM_RES_MASK);
728 return err;
729}
730
731static int __ablk_encrypt(struct ablkcipher_request *req)
732{
733 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
734 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
735 struct blkcipher_desc desc;
736
737 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
738 desc.info = req->info;
739 desc.flags = 0;
740
741 return crypto_blkcipher_crt(desc.tfm)->encrypt(
742 &desc, req->dst, req->src, req->nbytes);
743}
744
745static int ablk_encrypt(struct ablkcipher_request *req)
746{
747 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
748 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
749
750 if (!irq_fpu_usable()) {
751 struct ablkcipher_request *cryptd_req =
752 ablkcipher_request_ctx(req);
753
754 memcpy(cryptd_req, req, sizeof(*req));
755 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
756
757 return crypto_ablkcipher_encrypt(cryptd_req);
758 } else {
759 return __ablk_encrypt(req);
760 }
761}
762
763static int ablk_decrypt(struct ablkcipher_request *req)
764{
765 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
766 struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
767
768 if (!irq_fpu_usable()) {
769 struct ablkcipher_request *cryptd_req =
770 ablkcipher_request_ctx(req);
771
772 memcpy(cryptd_req, req, sizeof(*req));
773 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
774
775 return crypto_ablkcipher_decrypt(cryptd_req);
776 } else {
777 struct blkcipher_desc desc;
778
779 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
780 desc.info = req->info;
781 desc.flags = 0;
782
783 return crypto_blkcipher_crt(desc.tfm)->decrypt(
784 &desc, req->dst, req->src, req->nbytes);
785 }
786}
787
788static void ablk_exit(struct crypto_tfm *tfm)
789{
790 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
791
792 cryptd_free_ablkcipher(ctx->cryptd_tfm);
793}
794
795static void ablk_init_common(struct crypto_tfm *tfm,
796 struct cryptd_ablkcipher *cryptd_tfm)
797{
798 struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
799
800 ctx->cryptd_tfm = cryptd_tfm;
801 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
802 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
803}
804
805static int ablk_ecb_init(struct crypto_tfm *tfm)
806{
807 struct cryptd_ablkcipher *cryptd_tfm;
808
809 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
810 if (IS_ERR(cryptd_tfm))
811 return PTR_ERR(cryptd_tfm);
812 ablk_init_common(tfm, cryptd_tfm);
813 return 0;
814}
815
816static struct crypto_alg ablk_ecb_alg = {
817 .cra_name = "ecb(serpent)",
818 .cra_driver_name = "ecb-serpent-sse2",
819 .cra_priority = 400,
820 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
821 .cra_blocksize = SERPENT_BLOCK_SIZE,
822 .cra_ctxsize = sizeof(struct async_serpent_ctx),
823 .cra_alignmask = 0,
824 .cra_type = &crypto_ablkcipher_type,
825 .cra_module = THIS_MODULE,
826 .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
827 .cra_init = ablk_ecb_init,
828 .cra_exit = ablk_exit,
829 .cra_u = {
830 .ablkcipher = {
831 .min_keysize = SERPENT_MIN_KEY_SIZE,
832 .max_keysize = SERPENT_MAX_KEY_SIZE,
833 .setkey = ablk_set_key,
834 .encrypt = ablk_encrypt,
835 .decrypt = ablk_decrypt,
836 },
837 },
838};
839
840static int ablk_cbc_init(struct crypto_tfm *tfm)
841{
842 struct cryptd_ablkcipher *cryptd_tfm;
843
844 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
845 if (IS_ERR(cryptd_tfm))
846 return PTR_ERR(cryptd_tfm);
847 ablk_init_common(tfm, cryptd_tfm);
848 return 0;
849}
850
851static struct crypto_alg ablk_cbc_alg = {
852 .cra_name = "cbc(serpent)",
853 .cra_driver_name = "cbc-serpent-sse2",
854 .cra_priority = 400,
855 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
856 .cra_blocksize = SERPENT_BLOCK_SIZE,
857 .cra_ctxsize = sizeof(struct async_serpent_ctx),
858 .cra_alignmask = 0,
859 .cra_type = &crypto_ablkcipher_type,
860 .cra_module = THIS_MODULE,
861 .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
862 .cra_init = ablk_cbc_init,
863 .cra_exit = ablk_exit,
864 .cra_u = {
865 .ablkcipher = {
866 .min_keysize = SERPENT_MIN_KEY_SIZE,
867 .max_keysize = SERPENT_MAX_KEY_SIZE,
868 .ivsize = SERPENT_BLOCK_SIZE,
869 .setkey = ablk_set_key,
870 .encrypt = __ablk_encrypt,
871 .decrypt = ablk_decrypt,
872 },
873 },
874};
875
876static int ablk_ctr_init(struct crypto_tfm *tfm)
877{
878 struct cryptd_ablkcipher *cryptd_tfm;
879
880 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
881 if (IS_ERR(cryptd_tfm))
882 return PTR_ERR(cryptd_tfm);
883 ablk_init_common(tfm, cryptd_tfm);
884 return 0;
885}
886
887static struct crypto_alg ablk_ctr_alg = {
888 .cra_name = "ctr(serpent)",
889 .cra_driver_name = "ctr-serpent-sse2",
890 .cra_priority = 400,
891 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
892 .cra_blocksize = 1,
893 .cra_ctxsize = sizeof(struct async_serpent_ctx),
894 .cra_alignmask = 0,
895 .cra_type = &crypto_ablkcipher_type,
896 .cra_module = THIS_MODULE,
897 .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
898 .cra_init = ablk_ctr_init,
899 .cra_exit = ablk_exit,
900 .cra_u = {
901 .ablkcipher = {
902 .min_keysize = SERPENT_MIN_KEY_SIZE,
903 .max_keysize = SERPENT_MAX_KEY_SIZE,
904 .ivsize = SERPENT_BLOCK_SIZE,
905 .setkey = ablk_set_key,
906 .encrypt = ablk_encrypt,
907 .decrypt = ablk_encrypt,
908 .geniv = "chainiv",
909 },
910 },
911};
912
913static int ablk_lrw_init(struct crypto_tfm *tfm)
914{
915 struct cryptd_ablkcipher *cryptd_tfm;
916
917 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
918 if (IS_ERR(cryptd_tfm))
919 return PTR_ERR(cryptd_tfm);
920 ablk_init_common(tfm, cryptd_tfm);
921 return 0;
922}
923
924static struct crypto_alg ablk_lrw_alg = {
925 .cra_name = "lrw(serpent)",
926 .cra_driver_name = "lrw-serpent-sse2",
927 .cra_priority = 400,
928 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
929 .cra_blocksize = SERPENT_BLOCK_SIZE,
930 .cra_ctxsize = sizeof(struct async_serpent_ctx),
931 .cra_alignmask = 0,
932 .cra_type = &crypto_ablkcipher_type,
933 .cra_module = THIS_MODULE,
934 .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
935 .cra_init = ablk_lrw_init,
936 .cra_exit = ablk_exit,
937 .cra_u = {
938 .ablkcipher = {
939 .min_keysize = SERPENT_MIN_KEY_SIZE +
940 SERPENT_BLOCK_SIZE,
941 .max_keysize = SERPENT_MAX_KEY_SIZE +
942 SERPENT_BLOCK_SIZE,
943 .ivsize = SERPENT_BLOCK_SIZE,
944 .setkey = ablk_set_key,
945 .encrypt = ablk_encrypt,
946 .decrypt = ablk_decrypt,
947 },
948 },
949};
950
951static int ablk_xts_init(struct crypto_tfm *tfm)
952{
953 struct cryptd_ablkcipher *cryptd_tfm;
954
955 cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
956 if (IS_ERR(cryptd_tfm))
957 return PTR_ERR(cryptd_tfm);
958 ablk_init_common(tfm, cryptd_tfm);
959 return 0;
960}
961
962static struct crypto_alg ablk_xts_alg = {
963 .cra_name = "xts(serpent)",
964 .cra_driver_name = "xts-serpent-sse2",
965 .cra_priority = 400,
966 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
967 .cra_blocksize = SERPENT_BLOCK_SIZE,
968 .cra_ctxsize = sizeof(struct async_serpent_ctx),
969 .cra_alignmask = 0,
970 .cra_type = &crypto_ablkcipher_type,
971 .cra_module = THIS_MODULE,
972 .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list),
973 .cra_init = ablk_xts_init,
974 .cra_exit = ablk_exit,
975 .cra_u = {
976 .ablkcipher = {
977 .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
978 .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
979 .ivsize = SERPENT_BLOCK_SIZE,
980 .setkey = ablk_set_key,
981 .encrypt = ablk_encrypt,
982 .decrypt = ablk_decrypt,
983 },
984 },
985};
986
987static int __init serpent_sse2_init(void)
988{
989 int err;
990
991 if (!cpu_has_xmm2) {
992 printk(KERN_INFO "SSE2 instructions are not detected.\n");
993 return -ENODEV;
994 }
995
996 err = crypto_register_alg(&blk_ecb_alg);
997 if (err)
998 goto blk_ecb_err;
999 err = crypto_register_alg(&blk_cbc_alg);
1000 if (err)
1001 goto blk_cbc_err;
1002 err = crypto_register_alg(&blk_ctr_alg);
1003 if (err)
1004 goto blk_ctr_err;
1005 err = crypto_register_alg(&ablk_ecb_alg);
1006 if (err)
1007 goto ablk_ecb_err;
1008 err = crypto_register_alg(&ablk_cbc_alg);
1009 if (err)
1010 goto ablk_cbc_err;
1011 err = crypto_register_alg(&ablk_ctr_alg);
1012 if (err)
1013 goto ablk_ctr_err;
1014 err = crypto_register_alg(&blk_lrw_alg);
1015 if (err)
1016 goto blk_lrw_err;
1017 err = crypto_register_alg(&ablk_lrw_alg);
1018 if (err)
1019 goto ablk_lrw_err;
1020 err = crypto_register_alg(&blk_xts_alg);
1021 if (err)
1022 goto blk_xts_err;
1023 err = crypto_register_alg(&ablk_xts_alg);
1024 if (err)
1025 goto ablk_xts_err;
1026 return err;
1027
1028 crypto_unregister_alg(&ablk_xts_alg);
1029ablk_xts_err:
1030 crypto_unregister_alg(&blk_xts_alg);
1031blk_xts_err:
1032 crypto_unregister_alg(&ablk_lrw_alg);
1033ablk_lrw_err:
1034 crypto_unregister_alg(&blk_lrw_alg);
1035blk_lrw_err:
1036 crypto_unregister_alg(&ablk_ctr_alg);
1037ablk_ctr_err:
1038 crypto_unregister_alg(&ablk_cbc_alg);
1039ablk_cbc_err:
1040 crypto_unregister_alg(&ablk_ecb_alg);
1041ablk_ecb_err:
1042 crypto_unregister_alg(&blk_ctr_alg);
1043blk_ctr_err:
1044 crypto_unregister_alg(&blk_cbc_alg);
1045blk_cbc_err:
1046 crypto_unregister_alg(&blk_ecb_alg);
1047blk_ecb_err:
1048 return err;
1049}
1050
1051static void __exit serpent_sse2_exit(void)
1052{
1053 crypto_unregister_alg(&ablk_xts_alg);
1054 crypto_unregister_alg(&blk_xts_alg);
1055 crypto_unregister_alg(&ablk_lrw_alg);
1056 crypto_unregister_alg(&blk_lrw_alg);
1057 crypto_unregister_alg(&ablk_ctr_alg);
1058 crypto_unregister_alg(&ablk_cbc_alg);
1059 crypto_unregister_alg(&ablk_ecb_alg);
1060 crypto_unregister_alg(&blk_ctr_alg);
1061 crypto_unregister_alg(&blk_cbc_alg);
1062 crypto_unregister_alg(&blk_ecb_alg);
1063}
1064
1065module_init(serpent_sse2_init);
1066module_exit(serpent_sse2_exit);
1067
1068MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized");
1069MODULE_LICENSE("GPL");
1070MODULE_ALIAS("serpent");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 5ede9c444c3e..7fee8c152f93 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -32,6 +32,8 @@
32#include <crypto/algapi.h> 32#include <crypto/algapi.h>
33#include <crypto/twofish.h> 33#include <crypto/twofish.h>
34#include <crypto/b128ops.h> 34#include <crypto/b128ops.h>
35#include <crypto/lrw.h>
36#include <crypto/xts.h>
35 37
36/* regular block cipher functions from twofish_x86_64 module */ 38/* regular block cipher functions from twofish_x86_64 module */
37asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, 39asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
@@ -432,6 +434,209 @@ static struct crypto_alg blk_ctr_alg = {
432 }, 434 },
433}; 435};
434 436
437static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
438{
439 const unsigned int bsize = TF_BLOCK_SIZE;
440 struct twofish_ctx *ctx = priv;
441 int i;
442
443 if (nbytes == 3 * bsize) {
444 twofish_enc_blk_3way(ctx, srcdst, srcdst);
445 return;
446 }
447
448 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
449 twofish_enc_blk(ctx, srcdst, srcdst);
450}
451
452static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
453{
454 const unsigned int bsize = TF_BLOCK_SIZE;
455 struct twofish_ctx *ctx = priv;
456 int i;
457
458 if (nbytes == 3 * bsize) {
459 twofish_dec_blk_3way(ctx, srcdst, srcdst);
460 return;
461 }
462
463 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
464 twofish_dec_blk(ctx, srcdst, srcdst);
465}
466
467struct twofish_lrw_ctx {
468 struct lrw_table_ctx lrw_table;
469 struct twofish_ctx twofish_ctx;
470};
471
472static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
473 unsigned int keylen)
474{
475 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
476 int err;
477
478 err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
479 &tfm->crt_flags);
480 if (err)
481 return err;
482
483 return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
484}
485
486static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
487 struct scatterlist *src, unsigned int nbytes)
488{
489 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
490 be128 buf[3];
491 struct lrw_crypt_req req = {
492 .tbuf = buf,
493 .tbuflen = sizeof(buf),
494
495 .table_ctx = &ctx->lrw_table,
496 .crypt_ctx = &ctx->twofish_ctx,
497 .crypt_fn = encrypt_callback,
498 };
499
500 return lrw_crypt(desc, dst, src, nbytes, &req);
501}
502
503static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
504 struct scatterlist *src, unsigned int nbytes)
505{
506 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
507 be128 buf[3];
508 struct lrw_crypt_req req = {
509 .tbuf = buf,
510 .tbuflen = sizeof(buf),
511
512 .table_ctx = &ctx->lrw_table,
513 .crypt_ctx = &ctx->twofish_ctx,
514 .crypt_fn = decrypt_callback,
515 };
516
517 return lrw_crypt(desc, dst, src, nbytes, &req);
518}
519
520static void lrw_exit_tfm(struct crypto_tfm *tfm)
521{
522 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
523
524 lrw_free_table(&ctx->lrw_table);
525}
526
527static struct crypto_alg blk_lrw_alg = {
528 .cra_name = "lrw(twofish)",
529 .cra_driver_name = "lrw-twofish-3way",
530 .cra_priority = 300,
531 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
532 .cra_blocksize = TF_BLOCK_SIZE,
533 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
534 .cra_alignmask = 0,
535 .cra_type = &crypto_blkcipher_type,
536 .cra_module = THIS_MODULE,
537 .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
538 .cra_exit = lrw_exit_tfm,
539 .cra_u = {
540 .blkcipher = {
541 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
542 .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
543 .ivsize = TF_BLOCK_SIZE,
544 .setkey = lrw_twofish_setkey,
545 .encrypt = lrw_encrypt,
546 .decrypt = lrw_decrypt,
547 },
548 },
549};
550
551struct twofish_xts_ctx {
552 struct twofish_ctx tweak_ctx;
553 struct twofish_ctx crypt_ctx;
554};
555
556static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
557 unsigned int keylen)
558{
559 struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
560 u32 *flags = &tfm->crt_flags;
561 int err;
562
563 /* key consists of keys of equal size concatenated, therefore
564 * the length must be even
565 */
566 if (keylen % 2) {
567 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
568 return -EINVAL;
569 }
570
571 /* first half of xts-key is for crypt */
572 err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
573 if (err)
574 return err;
575
576 /* second half of xts-key is for tweak */
577 return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
578 flags);
579}
580
581static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
582 struct scatterlist *src, unsigned int nbytes)
583{
584 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
585 be128 buf[3];
586 struct xts_crypt_req req = {
587 .tbuf = buf,
588 .tbuflen = sizeof(buf),
589
590 .tweak_ctx = &ctx->tweak_ctx,
591 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
592 .crypt_ctx = &ctx->crypt_ctx,
593 .crypt_fn = encrypt_callback,
594 };
595
596 return xts_crypt(desc, dst, src, nbytes, &req);
597}
598
599static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
600 struct scatterlist *src, unsigned int nbytes)
601{
602 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
603 be128 buf[3];
604 struct xts_crypt_req req = {
605 .tbuf = buf,
606 .tbuflen = sizeof(buf),
607
608 .tweak_ctx = &ctx->tweak_ctx,
609 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
610 .crypt_ctx = &ctx->crypt_ctx,
611 .crypt_fn = decrypt_callback,
612 };
613
614 return xts_crypt(desc, dst, src, nbytes, &req);
615}
616
617static struct crypto_alg blk_xts_alg = {
618 .cra_name = "xts(twofish)",
619 .cra_driver_name = "xts-twofish-3way",
620 .cra_priority = 300,
621 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
622 .cra_blocksize = TF_BLOCK_SIZE,
623 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
624 .cra_alignmask = 0,
625 .cra_type = &crypto_blkcipher_type,
626 .cra_module = THIS_MODULE,
627 .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
628 .cra_u = {
629 .blkcipher = {
630 .min_keysize = TF_MIN_KEY_SIZE * 2,
631 .max_keysize = TF_MAX_KEY_SIZE * 2,
632 .ivsize = TF_BLOCK_SIZE,
633 .setkey = xts_twofish_setkey,
634 .encrypt = xts_encrypt,
635 .decrypt = xts_decrypt,
636 },
637 },
638};
639
435int __init init(void) 640int __init init(void)
436{ 641{
437 int err; 642 int err;
@@ -445,9 +650,20 @@ int __init init(void)
445 err = crypto_register_alg(&blk_ctr_alg); 650 err = crypto_register_alg(&blk_ctr_alg);
446 if (err) 651 if (err)
447 goto ctr_err; 652 goto ctr_err;
653 err = crypto_register_alg(&blk_lrw_alg);
654 if (err)
655 goto blk_lrw_err;
656 err = crypto_register_alg(&blk_xts_alg);
657 if (err)
658 goto blk_xts_err;
448 659
449 return 0; 660 return 0;
450 661
662 crypto_unregister_alg(&blk_xts_alg);
663blk_xts_err:
664 crypto_unregister_alg(&blk_lrw_alg);
665blk_lrw_err:
666 crypto_unregister_alg(&blk_ctr_alg);
451ctr_err: 667ctr_err:
452 crypto_unregister_alg(&blk_cbc_alg); 668 crypto_unregister_alg(&blk_cbc_alg);
453cbc_err: 669cbc_err:
@@ -458,6 +674,8 @@ ecb_err:
458 674
459void __exit fini(void) 675void __exit fini(void)
460{ 676{
677 crypto_unregister_alg(&blk_xts_alg);
678 crypto_unregister_alg(&blk_lrw_alg);
461 crypto_unregister_alg(&blk_ctr_alg); 679 crypto_unregister_alg(&blk_ctr_alg);
462 crypto_unregister_alg(&blk_cbc_alg); 680 crypto_unregister_alg(&blk_cbc_alg);
463 crypto_unregister_alg(&blk_ecb_alg); 681 crypto_unregister_alg(&blk_ecb_alg);
diff --git a/arch/x86/include/asm/serpent.h b/arch/x86/include/asm/serpent.h
new file mode 100644
index 000000000000..d3ef63fe0c81
--- /dev/null
+++ b/arch/x86/include/asm/serpent.h
@@ -0,0 +1,63 @@
1#ifndef ASM_X86_SERPENT_H
2#define ASM_X86_SERPENT_H
3
4#include <linux/crypto.h>
5#include <crypto/serpent.h>
6
7#ifdef CONFIG_X86_32
8
9#define SERPENT_PARALLEL_BLOCKS 4
10
11asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst,
12 const u8 *src, bool xor);
13asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst,
14 const u8 *src);
15
16static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
17 const u8 *src)
18{
19 __serpent_enc_blk_4way(ctx, dst, src, false);
20}
21
22static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
23 const u8 *src)
24{
25 __serpent_enc_blk_4way(ctx, dst, src, true);
26}
27
28static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
29 const u8 *src)
30{
31 serpent_dec_blk_4way(ctx, dst, src);
32}
33
34#else
35
36#define SERPENT_PARALLEL_BLOCKS 8
37
38asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst,
39 const u8 *src, bool xor);
40asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst,
41 const u8 *src);
42
43static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
44 const u8 *src)
45{
46 __serpent_enc_blk_8way(ctx, dst, src, false);
47}
48
49static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
50 const u8 *src)
51{
52 __serpent_enc_blk_8way(ctx, dst, src, true);
53}
54
55static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
56 const u8 *src)
57{
58 serpent_dec_blk_8way(ctx, dst, src);
59}
60
61#endif
62
63#endif