diff options
-rw-r--r-- | arch/x86/crypto/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/crypto/serpent-avx2-asm_64.S | 800 | ||||
-rw-r--r-- | arch/x86/crypto/serpent_avx2_glue.c | 562 | ||||
-rw-r--r-- | arch/x86/crypto/serpent_avx_glue.c | 62 | ||||
-rw-r--r-- | arch/x86/include/asm/crypto/serpent-avx.h | 24 | ||||
-rw-r--r-- | crypto/Kconfig | 23 | ||||
-rw-r--r-- | crypto/testmgr.c | 15 |
7 files changed, 1468 insertions, 20 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 1f6e0c2e9140..a21af593ab8d 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -43,6 +43,7 @@ endif | |||
43 | # These modules require assembler to support AVX2. | 43 | # These modules require assembler to support AVX2. |
44 | ifeq ($(avx2_supported),yes) | 44 | ifeq ($(avx2_supported),yes) |
45 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o | 45 | obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o |
46 | obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o | ||
46 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o | 47 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o |
47 | endif | 48 | endif |
48 | 49 | ||
@@ -72,6 +73,7 @@ endif | |||
72 | 73 | ||
73 | ifeq ($(avx2_supported),yes) | 74 | ifeq ($(avx2_supported),yes) |
74 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o | 75 | blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o |
76 | serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o | ||
75 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o | 77 | twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o |
76 | endif | 78 | endif |
77 | 79 | ||
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S new file mode 100644 index 000000000000..b222085cccac --- /dev/null +++ b/arch/x86/crypto/serpent-avx2-asm_64.S | |||
@@ -0,0 +1,800 @@ | |||
1 | /* | ||
2 | * x86_64/AVX2 assembler optimized version of Serpent | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on AVX assembler implementation of Serpent by: | ||
7 | * Copyright © 2012 Johannes Goetzfried | ||
8 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include "glue_helper-asm-avx2.S" | ||
19 | |||
20 | .file "serpent-avx2-asm_64.S" | ||
21 | |||
22 | .data | ||
23 | .align 16 | ||
24 | |||
25 | .Lbswap128_mask: | ||
26 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
27 | .Lxts_gf128mul_and_shl1_mask_0: | ||
28 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
29 | .Lxts_gf128mul_and_shl1_mask_1: | ||
30 | .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 | ||
31 | |||
32 | .text | ||
33 | |||
34 | #define CTX %rdi | ||
35 | |||
36 | #define RNOT %ymm0 | ||
37 | #define tp %ymm1 | ||
38 | |||
39 | #define RA1 %ymm2 | ||
40 | #define RA2 %ymm3 | ||
41 | #define RB1 %ymm4 | ||
42 | #define RB2 %ymm5 | ||
43 | #define RC1 %ymm6 | ||
44 | #define RC2 %ymm7 | ||
45 | #define RD1 %ymm8 | ||
46 | #define RD2 %ymm9 | ||
47 | #define RE1 %ymm10 | ||
48 | #define RE2 %ymm11 | ||
49 | |||
50 | #define RK0 %ymm12 | ||
51 | #define RK1 %ymm13 | ||
52 | #define RK2 %ymm14 | ||
53 | #define RK3 %ymm15 | ||
54 | |||
55 | #define RK0x %xmm12 | ||
56 | #define RK1x %xmm13 | ||
57 | #define RK2x %xmm14 | ||
58 | #define RK3x %xmm15 | ||
59 | |||
60 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
61 | vpor x0, x3, tp; \ | ||
62 | vpxor x3, x0, x0; \ | ||
63 | vpxor x2, x3, x4; \ | ||
64 | vpxor RNOT, x4, x4; \ | ||
65 | vpxor x1, tp, x3; \ | ||
66 | vpand x0, x1, x1; \ | ||
67 | vpxor x4, x1, x1; \ | ||
68 | vpxor x0, x2, x2; | ||
69 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
70 | vpxor x3, x0, x0; \ | ||
71 | vpor x0, x4, x4; \ | ||
72 | vpxor x2, x0, x0; \ | ||
73 | vpand x1, x2, x2; \ | ||
74 | vpxor x2, x3, x3; \ | ||
75 | vpxor RNOT, x1, x1; \ | ||
76 | vpxor x4, x2, x2; \ | ||
77 | vpxor x2, x1, x1; | ||
78 | |||
79 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
80 | vpxor x0, x1, tp; \ | ||
81 | vpxor x3, x0, x0; \ | ||
82 | vpxor RNOT, x3, x3; \ | ||
83 | vpand tp, x1, x4; \ | ||
84 | vpor tp, x0, x0; \ | ||
85 | vpxor x2, x3, x3; \ | ||
86 | vpxor x3, x0, x0; \ | ||
87 | vpxor x3, tp, x1; | ||
88 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
89 | vpxor x4, x3, x3; \ | ||
90 | vpor x4, x1, x1; \ | ||
91 | vpxor x2, x4, x4; \ | ||
92 | vpand x0, x2, x2; \ | ||
93 | vpxor x1, x2, x2; \ | ||
94 | vpor x0, x1, x1; \ | ||
95 | vpxor RNOT, x0, x0; \ | ||
96 | vpxor x2, x0, x0; \ | ||
97 | vpxor x1, x4, x4; | ||
98 | |||
99 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
100 | vpxor RNOT, x3, x3; \ | ||
101 | vpxor x0, x1, x1; \ | ||
102 | vpand x2, x0, tp; \ | ||
103 | vpxor x3, tp, tp; \ | ||
104 | vpor x0, x3, x3; \ | ||
105 | vpxor x1, x2, x2; \ | ||
106 | vpxor x1, x3, x3; \ | ||
107 | vpand tp, x1, x1; | ||
108 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
109 | vpxor x2, tp, tp; \ | ||
110 | vpand x3, x2, x2; \ | ||
111 | vpor x1, x3, x3; \ | ||
112 | vpxor RNOT, tp, tp; \ | ||
113 | vpxor tp, x3, x3; \ | ||
114 | vpxor tp, x0, x4; \ | ||
115 | vpxor x2, tp, x0; \ | ||
116 | vpor x2, x1, x1; | ||
117 | |||
118 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
119 | vpxor x3, x1, tp; \ | ||
120 | vpor x0, x3, x3; \ | ||
121 | vpand x0, x1, x4; \ | ||
122 | vpxor x2, x0, x0; \ | ||
123 | vpxor tp, x2, x2; \ | ||
124 | vpand x3, tp, x1; \ | ||
125 | vpxor x3, x2, x2; \ | ||
126 | vpor x4, x0, x0; \ | ||
127 | vpxor x3, x4, x4; | ||
128 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
129 | vpxor x0, x1, x1; \ | ||
130 | vpand x3, x0, x0; \ | ||
131 | vpand x4, x3, x3; \ | ||
132 | vpxor x2, x3, x3; \ | ||
133 | vpor x1, x4, x4; \ | ||
134 | vpand x1, x2, x2; \ | ||
135 | vpxor x3, x4, x4; \ | ||
136 | vpxor x3, x0, x0; \ | ||
137 | vpxor x2, x3, x3; | ||
138 | |||
139 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
140 | vpand x0, x3, tp; \ | ||
141 | vpxor x3, x0, x0; \ | ||
142 | vpxor x2, tp, tp; \ | ||
143 | vpor x3, x2, x2; \ | ||
144 | vpxor x1, x0, x0; \ | ||
145 | vpxor tp, x3, x4; \ | ||
146 | vpor x0, x2, x2; \ | ||
147 | vpxor x1, x2, x2; | ||
148 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
149 | vpand x0, x1, x1; \ | ||
150 | vpxor x4, x1, x1; \ | ||
151 | vpand x2, x4, x4; \ | ||
152 | vpxor tp, x2, x2; \ | ||
153 | vpxor x0, x4, x4; \ | ||
154 | vpor x1, tp, x3; \ | ||
155 | vpxor RNOT, x1, x1; \ | ||
156 | vpxor x0, x3, x3; | ||
157 | |||
158 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
159 | vpor x0, x1, tp; \ | ||
160 | vpxor tp, x2, x2; \ | ||
161 | vpxor RNOT, x3, x3; \ | ||
162 | vpxor x0, x1, x4; \ | ||
163 | vpxor x2, x0, x0; \ | ||
164 | vpand x4, tp, x1; \ | ||
165 | vpor x3, x4, x4; \ | ||
166 | vpxor x0, x4, x4; | ||
167 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
168 | vpand x3, x0, x0; \ | ||
169 | vpxor x3, x1, x1; \ | ||
170 | vpxor x2, x3, x3; \ | ||
171 | vpxor x1, x0, x0; \ | ||
172 | vpand x4, x2, x2; \ | ||
173 | vpxor x2, x1, x1; \ | ||
174 | vpand x0, x2, x2; \ | ||
175 | vpxor x2, x3, x3; | ||
176 | |||
177 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
178 | vpxor x0, x3, x3; \ | ||
179 | vpxor x2, x1, tp; \ | ||
180 | vpxor x0, x2, x2; \ | ||
181 | vpand x3, x0, x0; \ | ||
182 | vpor x3, tp, tp; \ | ||
183 | vpxor RNOT, x1, x4; \ | ||
184 | vpxor tp, x0, x0; \ | ||
185 | vpxor x2, tp, x1; | ||
186 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
187 | vpxor x4, x3, x3; \ | ||
188 | vpxor x0, x4, x4; \ | ||
189 | vpand x0, x2, x2; \ | ||
190 | vpxor x1, x4, x4; \ | ||
191 | vpxor x3, x2, x2; \ | ||
192 | vpand x1, x3, x3; \ | ||
193 | vpxor x0, x3, x3; \ | ||
194 | vpxor x2, x1, x1; | ||
195 | |||
196 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
197 | vpxor RNOT, x1, tp; \ | ||
198 | vpxor RNOT, x0, x0; \ | ||
199 | vpand x2, tp, x1; \ | ||
200 | vpxor x3, x1, x1; \ | ||
201 | vpor tp, x3, x3; \ | ||
202 | vpxor x2, tp, x4; \ | ||
203 | vpxor x3, x2, x2; \ | ||
204 | vpxor x0, x3, x3; \ | ||
205 | vpor x1, x0, x0; | ||
206 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
207 | vpand x0, x2, x2; \ | ||
208 | vpxor x4, x0, x0; \ | ||
209 | vpxor x3, x4, x4; \ | ||
210 | vpand x0, x3, x3; \ | ||
211 | vpxor x1, x4, x4; \ | ||
212 | vpxor x4, x2, x2; \ | ||
213 | vpxor x1, x3, x3; \ | ||
214 | vpor x0, x4, x4; \ | ||
215 | vpxor x1, x4, x4; | ||
216 | |||
217 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
218 | vpxor x0, x1, x1; \ | ||
219 | vpor x1, x3, tp; \ | ||
220 | vpxor x1, x3, x4; \ | ||
221 | vpxor RNOT, x0, x0; \ | ||
222 | vpxor tp, x2, x2; \ | ||
223 | vpxor x0, tp, x3; \ | ||
224 | vpand x1, x0, x0; \ | ||
225 | vpxor x2, x0, x0; | ||
226 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
227 | vpand x3, x2, x2; \ | ||
228 | vpxor x4, x3, x3; \ | ||
229 | vpxor x3, x2, x2; \ | ||
230 | vpxor x3, x1, x1; \ | ||
231 | vpand x0, x3, x3; \ | ||
232 | vpxor x0, x1, x1; \ | ||
233 | vpxor x2, x0, x0; \ | ||
234 | vpxor x3, x4, x4; | ||
235 | |||
236 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
237 | vpxor x3, x1, x1; \ | ||
238 | vpxor x2, x0, tp; \ | ||
239 | vpxor RNOT, x2, x2; \ | ||
240 | vpor x1, x0, x4; \ | ||
241 | vpxor x3, x4, x4; \ | ||
242 | vpand x1, x3, x3; \ | ||
243 | vpxor x2, x1, x1; \ | ||
244 | vpand x4, x2, x2; | ||
245 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
246 | vpxor x1, x4, x4; \ | ||
247 | vpor x3, x1, x1; \ | ||
248 | vpxor tp, x3, x3; \ | ||
249 | vpxor tp, x2, x2; \ | ||
250 | vpor x4, tp, x0; \ | ||
251 | vpxor x4, x2, x2; \ | ||
252 | vpxor x0, x1, x1; \ | ||
253 | vpxor x1, x4, x4; | ||
254 | |||
255 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
256 | vpxor x1, x2, x2; \ | ||
257 | vpxor RNOT, x3, tp; \ | ||
258 | vpor x2, tp, tp; \ | ||
259 | vpxor x3, x2, x2; \ | ||
260 | vpxor x0, x3, x4; \ | ||
261 | vpxor x1, tp, x3; \ | ||
262 | vpor x2, x1, x1; \ | ||
263 | vpxor x0, x2, x2; | ||
264 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
265 | vpxor x4, x1, x1; \ | ||
266 | vpor x3, x4, x4; \ | ||
267 | vpxor x3, x2, x2; \ | ||
268 | vpxor x2, x4, x4; \ | ||
269 | vpand x1, x2, x2; \ | ||
270 | vpxor x3, x2, x2; \ | ||
271 | vpxor x4, x3, x3; \ | ||
272 | vpxor x0, x4, x4; | ||
273 | |||
274 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
275 | vpxor x1, x2, x2; \ | ||
276 | vpand x2, x1, tp; \ | ||
277 | vpxor x0, tp, tp; \ | ||
278 | vpor x1, x0, x0; \ | ||
279 | vpxor x3, x1, x4; \ | ||
280 | vpxor x3, x0, x0; \ | ||
281 | vpor tp, x3, x3; \ | ||
282 | vpxor x2, tp, x1; | ||
283 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
284 | vpxor x3, x1, x1; \ | ||
285 | vpxor x2, x0, x0; \ | ||
286 | vpxor x3, x2, x2; \ | ||
287 | vpand x1, x3, x3; \ | ||
288 | vpxor x0, x1, x1; \ | ||
289 | vpand x2, x0, x0; \ | ||
290 | vpxor x3, x4, x4; \ | ||
291 | vpxor x0, x3, x3; \ | ||
292 | vpxor x1, x0, x0; | ||
293 | |||
294 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
295 | vpxor x3, x2, x2; \ | ||
296 | vpand x1, x0, tp; \ | ||
297 | vpxor x2, tp, tp; \ | ||
298 | vpor x3, x2, x2; \ | ||
299 | vpxor RNOT, x0, x4; \ | ||
300 | vpxor tp, x1, x1; \ | ||
301 | vpxor x2, tp, x0; \ | ||
302 | vpand x4, x2, x2; | ||
303 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
304 | vpxor x0, x2, x2; \ | ||
305 | vpor x4, x0, x0; \ | ||
306 | vpxor x3, x0, x0; \ | ||
307 | vpand x2, x3, x3; \ | ||
308 | vpxor x3, x4, x4; \ | ||
309 | vpxor x1, x3, x3; \ | ||
310 | vpand x0, x1, x1; \ | ||
311 | vpxor x1, x4, x4; \ | ||
312 | vpxor x3, x0, x0; | ||
313 | |||
314 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
315 | vpor x2, x1, tp; \ | ||
316 | vpxor x1, x2, x2; \ | ||
317 | vpxor x3, tp, tp; \ | ||
318 | vpand x1, x3, x3; \ | ||
319 | vpxor x3, x2, x2; \ | ||
320 | vpor x0, x3, x3; \ | ||
321 | vpxor RNOT, x0, x0; \ | ||
322 | vpxor x2, x3, x3; \ | ||
323 | vpor x0, x2, x2; | ||
324 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
325 | vpxor tp, x1, x4; \ | ||
326 | vpxor x4, x2, x2; \ | ||
327 | vpand x0, x4, x4; \ | ||
328 | vpxor tp, x0, x0; \ | ||
329 | vpxor x3, tp, x1; \ | ||
330 | vpand x2, x0, x0; \ | ||
331 | vpxor x3, x2, x2; \ | ||
332 | vpxor x2, x0, x0; \ | ||
333 | vpxor x4, x2, x2; \ | ||
334 | vpxor x3, x4, x4; | ||
335 | |||
336 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
337 | vpxor x2, x0, x0; \ | ||
338 | vpand x3, x0, tp; \ | ||
339 | vpxor x3, x2, x2; \ | ||
340 | vpxor x2, tp, tp; \ | ||
341 | vpxor x1, x3, x3; \ | ||
342 | vpor x0, x2, x2; \ | ||
343 | vpxor x3, x2, x2; \ | ||
344 | vpand tp, x3, x3; | ||
345 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
346 | vpxor RNOT, tp, tp; \ | ||
347 | vpxor x1, x3, x3; \ | ||
348 | vpand x2, x1, x1; \ | ||
349 | vpxor tp, x0, x4; \ | ||
350 | vpxor x4, x3, x3; \ | ||
351 | vpxor x2, x4, x4; \ | ||
352 | vpxor x1, tp, x0; \ | ||
353 | vpxor x0, x2, x2; | ||
354 | |||
355 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
356 | vpand x0, x3, tp; \ | ||
357 | vpxor x2, x0, x0; \ | ||
358 | vpor x3, x2, x2; \ | ||
359 | vpxor x1, x3, x4; \ | ||
360 | vpxor RNOT, x0, x0; \ | ||
361 | vpor tp, x1, x1; \ | ||
362 | vpxor x0, x4, x4; \ | ||
363 | vpand x2, x0, x0; \ | ||
364 | vpxor x1, x0, x0; | ||
365 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
366 | vpand x2, x1, x1; \ | ||
367 | vpxor x2, tp, x3; \ | ||
368 | vpxor x3, x4, x4; \ | ||
369 | vpand x3, x2, x2; \ | ||
370 | vpor x0, x3, x3; \ | ||
371 | vpxor x4, x1, x1; \ | ||
372 | vpxor x4, x3, x3; \ | ||
373 | vpand x0, x4, x4; \ | ||
374 | vpxor x2, x4, x4; | ||
375 | |||
376 | #define get_key(i,j,t) \ | ||
377 | vpbroadcastd (4*(i)+(j))*4(CTX), t; | ||
378 | |||
379 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
380 | get_key(i, 0, RK0); \ | ||
381 | get_key(i, 1, RK1); \ | ||
382 | get_key(i, 2, RK2); \ | ||
383 | get_key(i, 3, RK3); \ | ||
384 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
385 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
386 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
387 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
388 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
389 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
390 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
391 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
392 | |||
393 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
394 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
395 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
396 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
397 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
398 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
399 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
400 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
401 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
402 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
403 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
404 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
405 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
406 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
407 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
408 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
409 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
410 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
411 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
412 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
413 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
414 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
415 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
416 | get_key(i, 1, RK1); \ | ||
417 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
418 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
419 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
420 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
421 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
422 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
423 | get_key(i, 3, RK3); \ | ||
424 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
425 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
426 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
427 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
428 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
429 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
430 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
431 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
432 | get_key(i, 0, RK0); \ | ||
433 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
434 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
435 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
436 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
437 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
438 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
439 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
440 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
441 | get_key(i, 2, RK2); \ | ||
442 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
443 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
444 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
445 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
446 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
447 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
448 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
449 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
450 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
451 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
452 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
453 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
454 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
455 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
456 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
457 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
458 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
459 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
460 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
461 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
462 | |||
463 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
464 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
465 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
466 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
467 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
468 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
469 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
470 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
471 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
472 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
473 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
474 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
475 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
476 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
477 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
478 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
479 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
480 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
481 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
482 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
483 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
484 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
485 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
486 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
487 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
488 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
489 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
490 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
491 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
492 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
493 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
494 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
495 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
496 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
497 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
498 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
499 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
500 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
501 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
502 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
503 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
504 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
505 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
506 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
507 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
508 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
509 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
510 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
511 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
512 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
513 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
514 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
515 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
516 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
517 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
518 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
519 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
520 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
521 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
522 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
523 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
524 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
525 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
526 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
527 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
528 | |||
529 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
530 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
531 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
532 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
533 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
534 | |||
535 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
536 | get_key(i, 0, RK0); \ | ||
537 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
538 | get_key(i, 2, RK2); \ | ||
539 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
540 | get_key(i, 3, RK3); \ | ||
541 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
542 | get_key(i, 1, RK1); \ | ||
543 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
544 | |||
545 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
546 | vpunpckldq x1, x0, t0; \ | ||
547 | vpunpckhdq x1, x0, t2; \ | ||
548 | vpunpckldq x3, x2, t1; \ | ||
549 | vpunpckhdq x3, x2, x3; \ | ||
550 | \ | ||
551 | vpunpcklqdq t1, t0, x0; \ | ||
552 | vpunpckhqdq t1, t0, x1; \ | ||
553 | vpunpcklqdq x3, t2, x2; \ | ||
554 | vpunpckhqdq x3, t2, x3; | ||
555 | |||
556 | #define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ | ||
557 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
558 | |||
559 | #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ | ||
560 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
561 | |||
562 | .align 8 | ||
563 | __serpent_enc_blk16: | ||
564 | /* input: | ||
565 | * %rdi: ctx, CTX | ||
566 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext | ||
567 | * output: | ||
568 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext | ||
569 | */ | ||
570 | |||
571 | vpcmpeqd RNOT, RNOT, RNOT; | ||
572 | |||
573 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
574 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
575 | |||
576 | K2(RA, RB, RC, RD, RE, 0); | ||
577 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
578 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
579 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
580 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
581 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
582 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
583 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
584 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
585 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
586 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
587 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
588 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
589 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
590 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
591 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
592 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
593 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
594 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
595 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
596 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
597 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
598 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
599 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
600 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
601 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
602 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
603 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
604 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
605 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
606 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
607 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
608 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
609 | |||
610 | write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
611 | write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
612 | |||
613 | ret; | ||
614 | ENDPROC(__serpent_enc_blk16) | ||
615 | |||
616 | .align 8 | ||
617 | __serpent_dec_blk16: | ||
618 | /* input: | ||
619 | * %rdi: ctx, CTX | ||
620 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext | ||
621 | * output: | ||
622 | * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext | ||
623 | */ | ||
624 | |||
625 | vpcmpeqd RNOT, RNOT, RNOT; | ||
626 | |||
627 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
628 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
629 | |||
630 | K2(RA, RB, RC, RD, RE, 32); | ||
631 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
632 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
633 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
634 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
635 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
636 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
637 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
638 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
639 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
640 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
641 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
642 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
643 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
644 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
645 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
646 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
647 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
648 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
649 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
650 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
651 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
652 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
653 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
654 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
655 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
656 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
657 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
658 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
659 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
660 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
661 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
662 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
663 | |||
664 | write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
665 | write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
666 | |||
667 | ret; | ||
668 | ENDPROC(__serpent_dec_blk16) | ||
669 | |||
670 | ENTRY(serpent_ecb_enc_16way) | ||
671 | /* input: | ||
672 | * %rdi: ctx, CTX | ||
673 | * %rsi: dst | ||
674 | * %rdx: src | ||
675 | */ | ||
676 | |||
677 | vzeroupper; | ||
678 | |||
679 | load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
680 | |||
681 | call __serpent_enc_blk16; | ||
682 | |||
683 | store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
684 | |||
685 | vzeroupper; | ||
686 | |||
687 | ret; | ||
688 | ENDPROC(serpent_ecb_enc_16way) | ||
689 | |||
690 | ENTRY(serpent_ecb_dec_16way) | ||
691 | /* input: | ||
692 | * %rdi: ctx, CTX | ||
693 | * %rsi: dst | ||
694 | * %rdx: src | ||
695 | */ | ||
696 | |||
697 | vzeroupper; | ||
698 | |||
699 | load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
700 | |||
701 | call __serpent_dec_blk16; | ||
702 | |||
703 | store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
704 | |||
705 | vzeroupper; | ||
706 | |||
707 | ret; | ||
708 | ENDPROC(serpent_ecb_dec_16way) | ||
709 | |||
710 | ENTRY(serpent_cbc_dec_16way) | ||
711 | /* input: | ||
712 | * %rdi: ctx, CTX | ||
713 | * %rsi: dst | ||
714 | * %rdx: src | ||
715 | */ | ||
716 | |||
717 | vzeroupper; | ||
718 | |||
719 | load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
720 | |||
721 | call __serpent_dec_blk16; | ||
722 | |||
723 | store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2, | ||
724 | RK0); | ||
725 | |||
726 | vzeroupper; | ||
727 | |||
728 | ret; | ||
729 | ENDPROC(serpent_cbc_dec_16way) | ||
730 | |||
731 | ENTRY(serpent_ctr_16way) | ||
732 | /* input: | ||
733 | * %rdi: ctx, CTX | ||
734 | * %rsi: dst (16 blocks) | ||
735 | * %rdx: src (16 blocks) | ||
736 | * %rcx: iv (little endian, 128bit) | ||
737 | */ | ||
738 | |||
739 | vzeroupper; | ||
740 | |||
741 | load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
742 | RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, | ||
743 | tp); | ||
744 | |||
745 | call __serpent_enc_blk16; | ||
746 | |||
747 | store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
748 | |||
749 | vzeroupper; | ||
750 | |||
751 | ret; | ||
752 | ENDPROC(serpent_ctr_16way) | ||
753 | |||
754 | ENTRY(serpent_xts_enc_16way) | ||
755 | /* input: | ||
756 | * %rdi: ctx, CTX | ||
757 | * %rsi: dst (16 blocks) | ||
758 | * %rdx: src (16 blocks) | ||
759 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
760 | */ | ||
761 | |||
762 | vzeroupper; | ||
763 | |||
764 | load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
765 | RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, | ||
766 | .Lxts_gf128mul_and_shl1_mask_0, | ||
767 | .Lxts_gf128mul_and_shl1_mask_1); | ||
768 | |||
769 | call __serpent_enc_blk16; | ||
770 | |||
771 | store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
772 | |||
773 | vzeroupper; | ||
774 | |||
775 | ret; | ||
776 | ENDPROC(serpent_xts_enc_16way) | ||
777 | |||
778 | ENTRY(serpent_xts_dec_16way) | ||
779 | /* input: | ||
780 | * %rdi: ctx, CTX | ||
781 | * %rsi: dst (16 blocks) | ||
782 | * %rdx: src (16 blocks) | ||
783 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
784 | */ | ||
785 | |||
786 | vzeroupper; | ||
787 | |||
788 | load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
789 | RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT, | ||
790 | .Lxts_gf128mul_and_shl1_mask_0, | ||
791 | .Lxts_gf128mul_and_shl1_mask_1); | ||
792 | |||
793 | call __serpent_dec_blk16; | ||
794 | |||
795 | store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
796 | |||
797 | vzeroupper; | ||
798 | |||
799 | ret; | ||
800 | ENDPROC(serpent_xts_dec_16way) | ||
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c new file mode 100644 index 000000000000..23aabc6c20a5 --- /dev/null +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
@@ -0,0 +1,562 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX2 assembler optimized version of Serpent | ||
3 | * | ||
4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/lrw.h> | ||
20 | #include <crypto/xts.h> | ||
21 | #include <crypto/serpent.h> | ||
22 | #include <asm/xcr.h> | ||
23 | #include <asm/xsave.h> | ||
24 | #include <asm/crypto/serpent-avx.h> | ||
25 | #include <asm/crypto/ablk_helper.h> | ||
26 | #include <asm/crypto/glue_helper.h> | ||
27 | |||
28 | #define SERPENT_AVX2_PARALLEL_BLOCKS 16 | ||
29 | |||
30 | /* 16-way AVX2 parallel cipher functions */ | ||
31 | asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst, | ||
32 | const u8 *src); | ||
33 | asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst, | ||
34 | const u8 *src); | ||
35 | asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); | ||
36 | |||
37 | asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src, | ||
38 | le128 *iv); | ||
39 | asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, | ||
40 | const u8 *src, le128 *iv); | ||
41 | asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, | ||
42 | const u8 *src, le128 *iv); | ||
43 | |||
44 | static const struct common_glue_ctx serpent_enc = { | ||
45 | .num_funcs = 3, | ||
46 | .fpu_blocks_limit = 8, | ||
47 | |||
48 | .funcs = { { | ||
49 | .num_blocks = 16, | ||
50 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) } | ||
51 | }, { | ||
52 | .num_blocks = 8, | ||
53 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } | ||
54 | }, { | ||
55 | .num_blocks = 1, | ||
56 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
57 | } } | ||
58 | }; | ||
59 | |||
60 | static const struct common_glue_ctx serpent_ctr = { | ||
61 | .num_funcs = 3, | ||
62 | .fpu_blocks_limit = 8, | ||
63 | |||
64 | .funcs = { { | ||
65 | .num_blocks = 16, | ||
66 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) } | ||
67 | }, { | ||
68 | .num_blocks = 8, | ||
69 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } | ||
70 | }, { | ||
71 | .num_blocks = 1, | ||
72 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } | ||
73 | } } | ||
74 | }; | ||
75 | |||
76 | static const struct common_glue_ctx serpent_enc_xts = { | ||
77 | .num_funcs = 3, | ||
78 | .fpu_blocks_limit = 8, | ||
79 | |||
80 | .funcs = { { | ||
81 | .num_blocks = 16, | ||
82 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) } | ||
83 | }, { | ||
84 | .num_blocks = 8, | ||
85 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } | ||
86 | }, { | ||
87 | .num_blocks = 1, | ||
88 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } | ||
89 | } } | ||
90 | }; | ||
91 | |||
92 | static const struct common_glue_ctx serpent_dec = { | ||
93 | .num_funcs = 3, | ||
94 | .fpu_blocks_limit = 8, | ||
95 | |||
96 | .funcs = { { | ||
97 | .num_blocks = 16, | ||
98 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) } | ||
99 | }, { | ||
100 | .num_blocks = 8, | ||
101 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } | ||
102 | }, { | ||
103 | .num_blocks = 1, | ||
104 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
105 | } } | ||
106 | }; | ||
107 | |||
108 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
109 | .num_funcs = 3, | ||
110 | .fpu_blocks_limit = 8, | ||
111 | |||
112 | .funcs = { { | ||
113 | .num_blocks = 16, | ||
114 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) } | ||
115 | }, { | ||
116 | .num_blocks = 8, | ||
117 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } | ||
118 | }, { | ||
119 | .num_blocks = 1, | ||
120 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
121 | } } | ||
122 | }; | ||
123 | |||
124 | static const struct common_glue_ctx serpent_dec_xts = { | ||
125 | .num_funcs = 3, | ||
126 | .fpu_blocks_limit = 8, | ||
127 | |||
128 | .funcs = { { | ||
129 | .num_blocks = 16, | ||
130 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) } | ||
131 | }, { | ||
132 | .num_blocks = 8, | ||
133 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } | ||
134 | }, { | ||
135 | .num_blocks = 1, | ||
136 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } | ||
137 | } } | ||
138 | }; | ||
139 | |||
140 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
141 | struct scatterlist *src, unsigned int nbytes) | ||
142 | { | ||
143 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
144 | } | ||
145 | |||
146 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
147 | struct scatterlist *src, unsigned int nbytes) | ||
148 | { | ||
149 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
150 | } | ||
151 | |||
152 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
153 | struct scatterlist *src, unsigned int nbytes) | ||
154 | { | ||
155 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
156 | dst, src, nbytes); | ||
157 | } | ||
158 | |||
159 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
160 | struct scatterlist *src, unsigned int nbytes) | ||
161 | { | ||
162 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
163 | nbytes); | ||
164 | } | ||
165 | |||
166 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
170 | } | ||
171 | |||
172 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
173 | { | ||
174 | /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ | ||
175 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); | ||
176 | } | ||
177 | |||
178 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
179 | { | ||
180 | glue_fpu_end(fpu_enabled); | ||
181 | } | ||
182 | |||
183 | struct crypt_priv { | ||
184 | struct serpent_ctx *ctx; | ||
185 | bool fpu_enabled; | ||
186 | }; | ||
187 | |||
188 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
189 | { | ||
190 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
191 | struct crypt_priv *ctx = priv; | ||
192 | int i; | ||
193 | |||
194 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
195 | |||
196 | if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { | ||
197 | serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
198 | srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
199 | nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
200 | } | ||
201 | |||
202 | while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { | ||
203 | serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); | ||
204 | srcdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
205 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
206 | } | ||
207 | |||
208 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
209 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
210 | } | ||
211 | |||
212 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
213 | { | ||
214 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
215 | struct crypt_priv *ctx = priv; | ||
216 | int i; | ||
217 | |||
218 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
219 | |||
220 | if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { | ||
221 | serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
222 | srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
223 | nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; | ||
224 | } | ||
225 | |||
226 | while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { | ||
227 | serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); | ||
228 | srcdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
229 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
230 | } | ||
231 | |||
232 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
233 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
234 | } | ||
235 | |||
236 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
237 | struct scatterlist *src, unsigned int nbytes) | ||
238 | { | ||
239 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
240 | be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; | ||
241 | struct crypt_priv crypt_ctx = { | ||
242 | .ctx = &ctx->serpent_ctx, | ||
243 | .fpu_enabled = false, | ||
244 | }; | ||
245 | struct lrw_crypt_req req = { | ||
246 | .tbuf = buf, | ||
247 | .tbuflen = sizeof(buf), | ||
248 | |||
249 | .table_ctx = &ctx->lrw_table, | ||
250 | .crypt_ctx = &crypt_ctx, | ||
251 | .crypt_fn = encrypt_callback, | ||
252 | }; | ||
253 | int ret; | ||
254 | |||
255 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
256 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
257 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
263 | struct scatterlist *src, unsigned int nbytes) | ||
264 | { | ||
265 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
266 | be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; | ||
267 | struct crypt_priv crypt_ctx = { | ||
268 | .ctx = &ctx->serpent_ctx, | ||
269 | .fpu_enabled = false, | ||
270 | }; | ||
271 | struct lrw_crypt_req req = { | ||
272 | .tbuf = buf, | ||
273 | .tbuflen = sizeof(buf), | ||
274 | |||
275 | .table_ctx = &ctx->lrw_table, | ||
276 | .crypt_ctx = &crypt_ctx, | ||
277 | .crypt_fn = decrypt_callback, | ||
278 | }; | ||
279 | int ret; | ||
280 | |||
281 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
282 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
283 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
284 | |||
285 | return ret; | ||
286 | } | ||
287 | |||
288 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
289 | struct scatterlist *src, unsigned int nbytes) | ||
290 | { | ||
291 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
292 | |||
293 | return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, | ||
294 | XTS_TWEAK_CAST(__serpent_encrypt), | ||
295 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
296 | } | ||
297 | |||
298 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
299 | struct scatterlist *src, unsigned int nbytes) | ||
300 | { | ||
301 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
302 | |||
303 | return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, | ||
304 | XTS_TWEAK_CAST(__serpent_encrypt), | ||
305 | &ctx->tweak_ctx, &ctx->crypt_ctx); | ||
306 | } | ||
307 | |||
308 | static struct crypto_alg srp_algs[10] = { { | ||
309 | .cra_name = "__ecb-serpent-avx2", | ||
310 | .cra_driver_name = "__driver-ecb-serpent-avx2", | ||
311 | .cra_priority = 0, | ||
312 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
313 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
314 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
315 | .cra_alignmask = 0, | ||
316 | .cra_type = &crypto_blkcipher_type, | ||
317 | .cra_module = THIS_MODULE, | ||
318 | .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list), | ||
319 | .cra_u = { | ||
320 | .blkcipher = { | ||
321 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
322 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
323 | .setkey = serpent_setkey, | ||
324 | .encrypt = ecb_encrypt, | ||
325 | .decrypt = ecb_decrypt, | ||
326 | }, | ||
327 | }, | ||
328 | }, { | ||
329 | .cra_name = "__cbc-serpent-avx2", | ||
330 | .cra_driver_name = "__driver-cbc-serpent-avx2", | ||
331 | .cra_priority = 0, | ||
332 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
333 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
334 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
335 | .cra_alignmask = 0, | ||
336 | .cra_type = &crypto_blkcipher_type, | ||
337 | .cra_module = THIS_MODULE, | ||
338 | .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list), | ||
339 | .cra_u = { | ||
340 | .blkcipher = { | ||
341 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
342 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
343 | .setkey = serpent_setkey, | ||
344 | .encrypt = cbc_encrypt, | ||
345 | .decrypt = cbc_decrypt, | ||
346 | }, | ||
347 | }, | ||
348 | }, { | ||
349 | .cra_name = "__ctr-serpent-avx2", | ||
350 | .cra_driver_name = "__driver-ctr-serpent-avx2", | ||
351 | .cra_priority = 0, | ||
352 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
353 | .cra_blocksize = 1, | ||
354 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
355 | .cra_alignmask = 0, | ||
356 | .cra_type = &crypto_blkcipher_type, | ||
357 | .cra_module = THIS_MODULE, | ||
358 | .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list), | ||
359 | .cra_u = { | ||
360 | .blkcipher = { | ||
361 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
362 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
363 | .ivsize = SERPENT_BLOCK_SIZE, | ||
364 | .setkey = serpent_setkey, | ||
365 | .encrypt = ctr_crypt, | ||
366 | .decrypt = ctr_crypt, | ||
367 | }, | ||
368 | }, | ||
369 | }, { | ||
370 | .cra_name = "__lrw-serpent-avx2", | ||
371 | .cra_driver_name = "__driver-lrw-serpent-avx2", | ||
372 | .cra_priority = 0, | ||
373 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
374 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
375 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
376 | .cra_alignmask = 0, | ||
377 | .cra_type = &crypto_blkcipher_type, | ||
378 | .cra_module = THIS_MODULE, | ||
379 | .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list), | ||
380 | .cra_exit = lrw_serpent_exit_tfm, | ||
381 | .cra_u = { | ||
382 | .blkcipher = { | ||
383 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
384 | SERPENT_BLOCK_SIZE, | ||
385 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
386 | SERPENT_BLOCK_SIZE, | ||
387 | .ivsize = SERPENT_BLOCK_SIZE, | ||
388 | .setkey = lrw_serpent_setkey, | ||
389 | .encrypt = lrw_encrypt, | ||
390 | .decrypt = lrw_decrypt, | ||
391 | }, | ||
392 | }, | ||
393 | }, { | ||
394 | .cra_name = "__xts-serpent-avx2", | ||
395 | .cra_driver_name = "__driver-xts-serpent-avx2", | ||
396 | .cra_priority = 0, | ||
397 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
398 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
399 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
400 | .cra_alignmask = 0, | ||
401 | .cra_type = &crypto_blkcipher_type, | ||
402 | .cra_module = THIS_MODULE, | ||
403 | .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list), | ||
404 | .cra_u = { | ||
405 | .blkcipher = { | ||
406 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
407 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
408 | .ivsize = SERPENT_BLOCK_SIZE, | ||
409 | .setkey = xts_serpent_setkey, | ||
410 | .encrypt = xts_encrypt, | ||
411 | .decrypt = xts_decrypt, | ||
412 | }, | ||
413 | }, | ||
414 | }, { | ||
415 | .cra_name = "ecb(serpent)", | ||
416 | .cra_driver_name = "ecb-serpent-avx2", | ||
417 | .cra_priority = 600, | ||
418 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
419 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
420 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
421 | .cra_alignmask = 0, | ||
422 | .cra_type = &crypto_ablkcipher_type, | ||
423 | .cra_module = THIS_MODULE, | ||
424 | .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list), | ||
425 | .cra_init = ablk_init, | ||
426 | .cra_exit = ablk_exit, | ||
427 | .cra_u = { | ||
428 | .ablkcipher = { | ||
429 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
430 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
431 | .setkey = ablk_set_key, | ||
432 | .encrypt = ablk_encrypt, | ||
433 | .decrypt = ablk_decrypt, | ||
434 | }, | ||
435 | }, | ||
436 | }, { | ||
437 | .cra_name = "cbc(serpent)", | ||
438 | .cra_driver_name = "cbc-serpent-avx2", | ||
439 | .cra_priority = 600, | ||
440 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
441 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
442 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
443 | .cra_alignmask = 0, | ||
444 | .cra_type = &crypto_ablkcipher_type, | ||
445 | .cra_module = THIS_MODULE, | ||
446 | .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list), | ||
447 | .cra_init = ablk_init, | ||
448 | .cra_exit = ablk_exit, | ||
449 | .cra_u = { | ||
450 | .ablkcipher = { | ||
451 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
452 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
453 | .ivsize = SERPENT_BLOCK_SIZE, | ||
454 | .setkey = ablk_set_key, | ||
455 | .encrypt = __ablk_encrypt, | ||
456 | .decrypt = ablk_decrypt, | ||
457 | }, | ||
458 | }, | ||
459 | }, { | ||
460 | .cra_name = "ctr(serpent)", | ||
461 | .cra_driver_name = "ctr-serpent-avx2", | ||
462 | .cra_priority = 600, | ||
463 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
464 | .cra_blocksize = 1, | ||
465 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
466 | .cra_alignmask = 0, | ||
467 | .cra_type = &crypto_ablkcipher_type, | ||
468 | .cra_module = THIS_MODULE, | ||
469 | .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list), | ||
470 | .cra_init = ablk_init, | ||
471 | .cra_exit = ablk_exit, | ||
472 | .cra_u = { | ||
473 | .ablkcipher = { | ||
474 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
475 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
476 | .ivsize = SERPENT_BLOCK_SIZE, | ||
477 | .setkey = ablk_set_key, | ||
478 | .encrypt = ablk_encrypt, | ||
479 | .decrypt = ablk_encrypt, | ||
480 | .geniv = "chainiv", | ||
481 | }, | ||
482 | }, | ||
483 | }, { | ||
484 | .cra_name = "lrw(serpent)", | ||
485 | .cra_driver_name = "lrw-serpent-avx2", | ||
486 | .cra_priority = 600, | ||
487 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
488 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
489 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
490 | .cra_alignmask = 0, | ||
491 | .cra_type = &crypto_ablkcipher_type, | ||
492 | .cra_module = THIS_MODULE, | ||
493 | .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list), | ||
494 | .cra_init = ablk_init, | ||
495 | .cra_exit = ablk_exit, | ||
496 | .cra_u = { | ||
497 | .ablkcipher = { | ||
498 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
499 | SERPENT_BLOCK_SIZE, | ||
500 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
501 | SERPENT_BLOCK_SIZE, | ||
502 | .ivsize = SERPENT_BLOCK_SIZE, | ||
503 | .setkey = ablk_set_key, | ||
504 | .encrypt = ablk_encrypt, | ||
505 | .decrypt = ablk_decrypt, | ||
506 | }, | ||
507 | }, | ||
508 | }, { | ||
509 | .cra_name = "xts(serpent)", | ||
510 | .cra_driver_name = "xts-serpent-avx2", | ||
511 | .cra_priority = 600, | ||
512 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
513 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
514 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
515 | .cra_alignmask = 0, | ||
516 | .cra_type = &crypto_ablkcipher_type, | ||
517 | .cra_module = THIS_MODULE, | ||
518 | .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list), | ||
519 | .cra_init = ablk_init, | ||
520 | .cra_exit = ablk_exit, | ||
521 | .cra_u = { | ||
522 | .ablkcipher = { | ||
523 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
524 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
525 | .ivsize = SERPENT_BLOCK_SIZE, | ||
526 | .setkey = ablk_set_key, | ||
527 | .encrypt = ablk_encrypt, | ||
528 | .decrypt = ablk_decrypt, | ||
529 | }, | ||
530 | }, | ||
531 | } }; | ||
532 | |||
533 | static int __init init(void) | ||
534 | { | ||
535 | u64 xcr0; | ||
536 | |||
537 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | ||
538 | pr_info("AVX2 instructions are not detected.\n"); | ||
539 | return -ENODEV; | ||
540 | } | ||
541 | |||
542 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
543 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
544 | pr_info("AVX detected but unusable.\n"); | ||
545 | return -ENODEV; | ||
546 | } | ||
547 | |||
548 | return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs)); | ||
549 | } | ||
550 | |||
551 | static void __exit fini(void) | ||
552 | { | ||
553 | crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs)); | ||
554 | } | ||
555 | |||
556 | module_init(init); | ||
557 | module_exit(fini); | ||
558 | |||
559 | MODULE_LICENSE("GPL"); | ||
560 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); | ||
561 | MODULE_ALIAS("serpent"); | ||
562 | MODULE_ALIAS("serpent-asm"); | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 0f8519cf4ac2..9ae83cf8d21e 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -41,7 +41,32 @@ | |||
41 | #include <asm/crypto/ablk_helper.h> | 41 | #include <asm/crypto/ablk_helper.h> |
42 | #include <asm/crypto/glue_helper.h> | 42 | #include <asm/crypto/glue_helper.h> |
43 | 43 | ||
44 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 44 | /* 8-way parallel cipher functions */ |
45 | asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
46 | const u8 *src); | ||
47 | EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx); | ||
48 | |||
49 | asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
50 | const u8 *src); | ||
51 | EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx); | ||
52 | |||
53 | asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
54 | const u8 *src); | ||
55 | EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); | ||
56 | |||
57 | asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
58 | const u8 *src, le128 *iv); | ||
59 | EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); | ||
60 | |||
61 | asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
62 | const u8 *src, le128 *iv); | ||
63 | EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx); | ||
64 | |||
65 | asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
66 | const u8 *src, le128 *iv); | ||
67 | EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx); | ||
68 | |||
69 | void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
45 | { | 70 | { |
46 | be128 ctrblk; | 71 | be128 ctrblk; |
47 | 72 | ||
@@ -51,18 +76,22 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | |||
51 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | 76 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
52 | u128_xor(dst, src, (u128 *)&ctrblk); | 77 | u128_xor(dst, src, (u128 *)&ctrblk); |
53 | } | 78 | } |
79 | EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); | ||
54 | 80 | ||
55 | static void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 81 | void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
56 | { | 82 | { |
57 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | 83 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, |
58 | GLUE_FUNC_CAST(__serpent_encrypt)); | 84 | GLUE_FUNC_CAST(__serpent_encrypt)); |
59 | } | 85 | } |
86 | EXPORT_SYMBOL_GPL(serpent_xts_enc); | ||
60 | 87 | ||
61 | static void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 88 | void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
62 | { | 89 | { |
63 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | 90 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, |
64 | GLUE_FUNC_CAST(__serpent_decrypt)); | 91 | GLUE_FUNC_CAST(__serpent_decrypt)); |
65 | } | 92 | } |
93 | EXPORT_SYMBOL_GPL(serpent_xts_dec); | ||
94 | |||
66 | 95 | ||
67 | static const struct common_glue_ctx serpent_enc = { | 96 | static const struct common_glue_ctx serpent_enc = { |
68 | .num_funcs = 2, | 97 | .num_funcs = 2, |
@@ -86,7 +115,7 @@ static const struct common_glue_ctx serpent_ctr = { | |||
86 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } | 115 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } |
87 | }, { | 116 | }, { |
88 | .num_blocks = 1, | 117 | .num_blocks = 1, |
89 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | 118 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } |
90 | } } | 119 | } } |
91 | }; | 120 | }; |
92 | 121 | ||
@@ -224,13 +253,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
224 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | 253 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); |
225 | } | 254 | } |
226 | 255 | ||
227 | struct serpent_lrw_ctx { | 256 | int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, |
228 | struct lrw_table_ctx lrw_table; | 257 | unsigned int keylen) |
229 | struct serpent_ctx serpent_ctx; | ||
230 | }; | ||
231 | |||
232 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
233 | unsigned int keylen) | ||
234 | { | 258 | { |
235 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 259 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
236 | int err; | 260 | int err; |
@@ -243,6 +267,7 @@ static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
243 | return lrw_init_table(&ctx->lrw_table, key + keylen - | 267 | return lrw_init_table(&ctx->lrw_table, key + keylen - |
244 | SERPENT_BLOCK_SIZE); | 268 | SERPENT_BLOCK_SIZE); |
245 | } | 269 | } |
270 | EXPORT_SYMBOL_GPL(lrw_serpent_setkey); | ||
246 | 271 | ||
247 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 272 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
248 | struct scatterlist *src, unsigned int nbytes) | 273 | struct scatterlist *src, unsigned int nbytes) |
@@ -296,20 +321,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
296 | return ret; | 321 | return ret; |
297 | } | 322 | } |
298 | 323 | ||
299 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | 324 | void lrw_serpent_exit_tfm(struct crypto_tfm *tfm) |
300 | { | 325 | { |
301 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 326 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
302 | 327 | ||
303 | lrw_free_table(&ctx->lrw_table); | 328 | lrw_free_table(&ctx->lrw_table); |
304 | } | 329 | } |
330 | EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm); | ||
305 | 331 | ||
306 | struct serpent_xts_ctx { | 332 | int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, |
307 | struct serpent_ctx tweak_ctx; | 333 | unsigned int keylen) |
308 | struct serpent_ctx crypt_ctx; | ||
309 | }; | ||
310 | |||
311 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
312 | unsigned int keylen) | ||
313 | { | 334 | { |
314 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 335 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); |
315 | u32 *flags = &tfm->crt_flags; | 336 | u32 *flags = &tfm->crt_flags; |
@@ -331,6 +352,7 @@ static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
331 | /* second half of xts-key is for tweak */ | 352 | /* second half of xts-key is for tweak */ |
332 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | 353 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); |
333 | } | 354 | } |
355 | EXPORT_SYMBOL_GPL(xts_serpent_setkey); | ||
334 | 356 | ||
335 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 357 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
336 | struct scatterlist *src, unsigned int nbytes) | 358 | struct scatterlist *src, unsigned int nbytes) |
@@ -420,7 +442,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
420 | .cra_alignmask = 0, | 442 | .cra_alignmask = 0, |
421 | .cra_type = &crypto_blkcipher_type, | 443 | .cra_type = &crypto_blkcipher_type, |
422 | .cra_module = THIS_MODULE, | 444 | .cra_module = THIS_MODULE, |
423 | .cra_exit = lrw_exit_tfm, | 445 | .cra_exit = lrw_serpent_exit_tfm, |
424 | .cra_u = { | 446 | .cra_u = { |
425 | .blkcipher = { | 447 | .blkcipher = { |
426 | .min_keysize = SERPENT_MIN_KEY_SIZE + | 448 | .min_keysize = SERPENT_MIN_KEY_SIZE + |
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 56e79cc57eaf..33c2b8a435da 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h | |||
@@ -6,6 +6,16 @@ | |||
6 | 6 | ||
7 | #define SERPENT_PARALLEL_BLOCKS 8 | 7 | #define SERPENT_PARALLEL_BLOCKS 8 |
8 | 8 | ||
9 | struct serpent_lrw_ctx { | ||
10 | struct lrw_table_ctx lrw_table; | ||
11 | struct serpent_ctx serpent_ctx; | ||
12 | }; | ||
13 | |||
14 | struct serpent_xts_ctx { | ||
15 | struct serpent_ctx tweak_ctx; | ||
16 | struct serpent_ctx crypt_ctx; | ||
17 | }; | ||
18 | |||
9 | asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 19 | asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
10 | const u8 *src); | 20 | const u8 *src); |
11 | asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 21 | asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
@@ -21,4 +31,18 @@ asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | |||
21 | asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 31 | asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
22 | const u8 *src, le128 *iv); | 32 | const u8 *src, le128 *iv); |
23 | 33 | ||
34 | extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, | ||
35 | le128 *iv); | ||
36 | |||
37 | extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
38 | extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); | ||
39 | |||
40 | extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
41 | unsigned int keylen); | ||
42 | |||
43 | extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm); | ||
44 | |||
45 | extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
46 | unsigned int keylen); | ||
47 | |||
24 | #endif | 48 | #endif |
diff --git a/crypto/Kconfig b/crypto/Kconfig index 1ba48ddd4da1..9ad3d78c1075 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig | |||
@@ -1131,6 +1131,29 @@ config CRYPTO_SERPENT_AVX_X86_64 | |||
1131 | See also: | 1131 | See also: |
1132 | <http://www.cl.cam.ac.uk/~rja14/serpent.html> | 1132 | <http://www.cl.cam.ac.uk/~rja14/serpent.html> |
1133 | 1133 | ||
1134 | config CRYPTO_SERPENT_AVX2_X86_64 | ||
1135 | tristate "Serpent cipher algorithm (x86_64/AVX2)" | ||
1136 | depends on X86 && 64BIT | ||
1137 | select CRYPTO_ALGAPI | ||
1138 | select CRYPTO_CRYPTD | ||
1139 | select CRYPTO_ABLK_HELPER_X86 | ||
1140 | select CRYPTO_GLUE_HELPER_X86 | ||
1141 | select CRYPTO_SERPENT | ||
1142 | select CRYPTO_SERPENT_AVX_X86_64 | ||
1143 | select CRYPTO_LRW | ||
1144 | select CRYPTO_XTS | ||
1145 | help | ||
1146 | Serpent cipher algorithm, by Anderson, Biham & Knudsen. | ||
1147 | |||
1148 | Keys are allowed to be from 0 to 256 bits in length, in steps | ||
1149 | of 8 bits. | ||
1150 | |||
1151 | This module provides Serpent cipher algorithm that processes 16 | ||
1152 | blocks parallel using AVX2 instruction set. | ||
1153 | |||
1154 | See also: | ||
1155 | <http://www.cl.cam.ac.uk/~rja14/serpent.html> | ||
1156 | |||
1134 | config CRYPTO_TEA | 1157 | config CRYPTO_TEA |
1135 | tristate "TEA, XTEA and XETA cipher algorithms" | 1158 | tristate "TEA, XTEA and XETA cipher algorithms" |
1136 | select CRYPTO_ALGAPI | 1159 | select CRYPTO_ALGAPI |
diff --git a/crypto/testmgr.c b/crypto/testmgr.c index fea7841dd6f3..f5e13dea8cc9 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c | |||
@@ -1645,6 +1645,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1645 | .alg = "__cbc-serpent-avx", | 1645 | .alg = "__cbc-serpent-avx", |
1646 | .test = alg_test_null, | 1646 | .test = alg_test_null, |
1647 | }, { | 1647 | }, { |
1648 | .alg = "__cbc-serpent-avx2", | ||
1649 | .test = alg_test_null, | ||
1650 | }, { | ||
1648 | .alg = "__cbc-serpent-sse2", | 1651 | .alg = "__cbc-serpent-sse2", |
1649 | .test = alg_test_null, | 1652 | .test = alg_test_null, |
1650 | }, { | 1653 | }, { |
@@ -1673,6 +1676,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1673 | .alg = "__driver-cbc-serpent-avx", | 1676 | .alg = "__driver-cbc-serpent-avx", |
1674 | .test = alg_test_null, | 1677 | .test = alg_test_null, |
1675 | }, { | 1678 | }, { |
1679 | .alg = "__driver-cbc-serpent-avx2", | ||
1680 | .test = alg_test_null, | ||
1681 | }, { | ||
1676 | .alg = "__driver-cbc-serpent-sse2", | 1682 | .alg = "__driver-cbc-serpent-sse2", |
1677 | .test = alg_test_null, | 1683 | .test = alg_test_null, |
1678 | }, { | 1684 | }, { |
@@ -1701,6 +1707,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1701 | .alg = "__driver-ecb-serpent-avx", | 1707 | .alg = "__driver-ecb-serpent-avx", |
1702 | .test = alg_test_null, | 1708 | .test = alg_test_null, |
1703 | }, { | 1709 | }, { |
1710 | .alg = "__driver-ecb-serpent-avx2", | ||
1711 | .test = alg_test_null, | ||
1712 | }, { | ||
1704 | .alg = "__driver-ecb-serpent-sse2", | 1713 | .alg = "__driver-ecb-serpent-sse2", |
1705 | .test = alg_test_null, | 1714 | .test = alg_test_null, |
1706 | }, { | 1715 | }, { |
@@ -1969,6 +1978,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1969 | .alg = "cryptd(__driver-cbc-camellia-aesni)", | 1978 | .alg = "cryptd(__driver-cbc-camellia-aesni)", |
1970 | .test = alg_test_null, | 1979 | .test = alg_test_null, |
1971 | }, { | 1980 | }, { |
1981 | .alg = "cryptd(__driver-cbc-serpent-avx2)", | ||
1982 | .test = alg_test_null, | ||
1983 | }, { | ||
1972 | .alg = "cryptd(__driver-ecb-aes-aesni)", | 1984 | .alg = "cryptd(__driver-ecb-aes-aesni)", |
1973 | .test = alg_test_null, | 1985 | .test = alg_test_null, |
1974 | .fips_allowed = 1, | 1986 | .fips_allowed = 1, |
@@ -1988,6 +2000,9 @@ static const struct alg_test_desc alg_test_descs[] = { | |||
1988 | .alg = "cryptd(__driver-ecb-serpent-avx)", | 2000 | .alg = "cryptd(__driver-ecb-serpent-avx)", |
1989 | .test = alg_test_null, | 2001 | .test = alg_test_null, |
1990 | }, { | 2002 | }, { |
2003 | .alg = "cryptd(__driver-ecb-serpent-avx2)", | ||
2004 | .test = alg_test_null, | ||
2005 | }, { | ||
1991 | .alg = "cryptd(__driver-ecb-serpent-sse2)", | 2006 | .alg = "cryptd(__driver-ecb-serpent-sse2)", |
1992 | .test = alg_test_null, | 2007 | .test = alg_test_null, |
1993 | }, { | 2008 | }, { |