aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S335
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c648
-rw-r--r--crypto/Kconfig17
-rw-r--r--crypto/testmgr.c60
5 files changed, 1062 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 565e82b00142..5bacb4a226ac 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
13obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 13obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
14obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o 14obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
15obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o 15obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
16obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
16obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 17obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
17obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 18obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
18obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 19obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -34,6 +35,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
34aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 35aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
35camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o 36camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
36cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o 37cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
38cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
37blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 39blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
38twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 40twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
39twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 41twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
new file mode 100644
index 000000000000..d258ce0d2e06
--- /dev/null
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -0,0 +1,335 @@
1/*
2 * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24.file "cast6-avx-x86_64-asm_64.S"
25.text
26
27.extern cast6_s1
28.extern cast6_s2
29.extern cast6_s3
30.extern cast6_s4
31
32/* structure of crypto context */
33#define km 0
34#define kr (12*4*4)
35
36/* s-boxes */
37#define s1 cast6_s1
38#define s2 cast6_s2
39#define s3 cast6_s3
40#define s4 cast6_s4
41
42/**********************************************************************
43 8-way AVX cast6
44 **********************************************************************/
45#define CTX %rdi
46
47#define RA1 %xmm0
48#define RB1 %xmm1
49#define RC1 %xmm2
50#define RD1 %xmm3
51
52#define RA2 %xmm4
53#define RB2 %xmm5
54#define RC2 %xmm6
55#define RD2 %xmm7
56
57#define RX %xmm8
58
59#define RKM %xmm9
60#define RKRF %xmm10
61#define RKRR %xmm11
62
63#define RTMP %xmm12
64#define RMASK %xmm13
65#define R32 %xmm14
66
67#define RID1 %rax
68#define RID1b %al
69#define RID2 %rbx
70#define RID2b %bl
71
72#define RGI1 %rdx
73#define RGI1bl %dl
74#define RGI1bh %dh
75#define RGI2 %rcx
76#define RGI2bl %cl
77#define RGI2bh %ch
78
79#define RFS1 %r8
80#define RFS1d %r8d
81#define RFS2 %r9
82#define RFS2d %r9d
83#define RFS3 %r10
84#define RFS3d %r10d
85
86
87#define lookup_32bit(src, dst, op1, op2, op3) \
88 movb src ## bl, RID1b; \
89 movb src ## bh, RID2b; \
90 movl s1(, RID1, 4), dst ## d; \
91 op1 s2(, RID2, 4), dst ## d; \
92 shrq $16, src; \
93 movb src ## bl, RID1b; \
94 movb src ## bh, RID2b; \
95 op2 s3(, RID1, 4), dst ## d; \
96 op3 s4(, RID2, 4), dst ## d;
97
98#define F(a, x, op0, op1, op2, op3) \
99 op0 a, RKM, x; \
100 vpslld RKRF, x, RTMP; \
101 vpsrld RKRR, x, x; \
102 vpor RTMP, x, x; \
103 \
104 vpshufb RMASK, x, x; \
105 vmovq x, RGI1; \
106 vpsrldq $8, x, x; \
107 vmovq x, RGI2; \
108 \
109 lookup_32bit(RGI1, RFS1, op1, op2, op3); \
110 shrq $16, RGI1; \
111 lookup_32bit(RGI1, RFS2, op1, op2, op3); \
112 shlq $32, RFS2; \
113 orq RFS1, RFS2; \
114 \
115 lookup_32bit(RGI2, RFS1, op1, op2, op3); \
116 shrq $16, RGI2; \
117 lookup_32bit(RGI2, RFS3, op1, op2, op3); \
118 shlq $32, RFS3; \
119 orq RFS1, RFS3; \
120 \
121 vmovq RFS2, x; \
122 vpinsrq $1, RFS3, x, x;
123
124#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
125#define F2(b, x) F(b, x, vpxor, subl, addl, xorl)
126#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
127
128#define qop(in, out, x, f) \
129 F ## f(in ## 1, x); \
130 vpxor out ## 1, x, out ## 1; \
131 F ## f(in ## 2, x); \
132 vpxor out ## 2, x, out ## 2; \
133
134#define Q(n) \
135 vbroadcastss (km+(4*(4*n+0)))(CTX), RKM; \
136 vpinsrb $0, (kr+(4*n+0))(CTX), RKRF, RKRF; \
137 vpsubq RKRF, R32, RKRR; \
138 qop(RD, RC, RX, 1); \
139 \
140 vbroadcastss (km+(4*(4*n+1)))(CTX), RKM; \
141 vpinsrb $0, (kr+(4*n+1))(CTX), RKRF, RKRF; \
142 vpsubq RKRF, R32, RKRR; \
143 qop(RC, RB, RX, 2); \
144 \
145 vbroadcastss (km+(4*(4*n+2)))(CTX), RKM; \
146 vpinsrb $0, (kr+(4*n+2))(CTX), RKRF, RKRF; \
147 vpsubq RKRF, R32, RKRR; \
148 qop(RB, RA, RX, 3); \
149 \
150 vbroadcastss (km+(4*(4*n+3)))(CTX), RKM; \
151 vpinsrb $0, (kr+(4*n+3))(CTX), RKRF, RKRF; \
152 vpsubq RKRF, R32, RKRR; \
153 qop(RA, RD, RX, 1);
154
155#define QBAR(n) \
156 vbroadcastss (km+(4*(4*n+3)))(CTX), RKM; \
157 vpinsrb $0, (kr+(4*n+3))(CTX), RKRF, RKRF; \
158 vpsubq RKRF, R32, RKRR; \
159 qop(RA, RD, RX, 1); \
160 \
161 vbroadcastss (km+(4*(4*n+2)))(CTX), RKM; \
162 vpinsrb $0, (kr+(4*n+2))(CTX), RKRF, RKRF; \
163 vpsubq RKRF, R32, RKRR; \
164 qop(RB, RA, RX, 3); \
165 \
166 vbroadcastss (km+(4*(4*n+1)))(CTX), RKM; \
167 vpinsrb $0, (kr+(4*n+1))(CTX), RKRF, RKRF; \
168 vpsubq RKRF, R32, RKRR; \
169 qop(RC, RB, RX, 2); \
170 \
171 vbroadcastss (km+(4*(4*n+0)))(CTX), RKM; \
172 vpinsrb $0, (kr+(4*n+0))(CTX), RKRF, RKRF; \
173 vpsubq RKRF, R32, RKRR; \
174 qop(RD, RC, RX, 1);
175
176
177#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
178 vpunpckldq x1, x0, t0; \
179 vpunpckhdq x1, x0, t2; \
180 vpunpckldq x3, x2, t1; \
181 vpunpckhdq x3, x2, x3; \
182 \
183 vpunpcklqdq t1, t0, x0; \
184 vpunpckhqdq t1, t0, x1; \
185 vpunpcklqdq x3, t2, x2; \
186 vpunpckhqdq x3, t2, x3;
187
188#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
189 vmovdqu (0*4*4)(in), x0; \
190 vmovdqu (1*4*4)(in), x1; \
191 vmovdqu (2*4*4)(in), x2; \
192 vmovdqu (3*4*4)(in), x3; \
193 vpshufb RMASK, x0, x0; \
194 vpshufb RMASK, x1, x1; \
195 vpshufb RMASK, x2, x2; \
196 vpshufb RMASK, x3, x3; \
197 \
198 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
199
200#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
201 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
202 \
203 vpshufb RMASK, x0, x0; \
204 vpshufb RMASK, x1, x1; \
205 vpshufb RMASK, x2, x2; \
206 vpshufb RMASK, x3, x3; \
207 vmovdqu x0, (0*4*4)(out); \
208 vmovdqu x1, (1*4*4)(out); \
209 vmovdqu x2, (2*4*4)(out); \
210 vmovdqu x3, (3*4*4)(out);
211
212#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
213 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
214 \
215 vpshufb RMASK, x0, x0; \
216 vpshufb RMASK, x1, x1; \
217 vpshufb RMASK, x2, x2; \
218 vpshufb RMASK, x3, x3; \
219 vpxor (0*4*4)(out), x0, x0; \
220 vmovdqu x0, (0*4*4)(out); \
221 vpxor (1*4*4)(out), x1, x1; \
222 vmovdqu x1, (1*4*4)(out); \
223 vpxor (2*4*4)(out), x2, x2; \
224 vmovdqu x2, (2*4*4)(out); \
225 vpxor (3*4*4)(out), x3, x3; \
226 vmovdqu x3, (3*4*4)(out);
227
228.align 16
229.Lbswap_mask:
230 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
231.L32_mask:
232 .byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
233
234.align 16
235.global __cast6_enc_blk_8way
236.type __cast6_enc_blk_8way,@function;
237
238__cast6_enc_blk_8way:
239 /* input:
240 * %rdi: ctx, CTX
241 * %rsi: dst
242 * %rdx: src
243 * %rcx: bool, if true: xor output
244 */
245
246 pushq %rbx;
247 pushq %rcx;
248
249 vmovdqu .Lbswap_mask, RMASK;
250 vmovdqu .L32_mask, R32;
251 vpxor RKRF, RKRF, RKRF;
252
253 leaq (4*4*4)(%rdx), %rax;
254 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
255 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
256
257 xorq RID1, RID1;
258 xorq RID2, RID2;
259
260 Q(0);
261 Q(1);
262 Q(2);
263 Q(3);
264 Q(4);
265 Q(5);
266 QBAR(6);
267 QBAR(7);
268 QBAR(8);
269 QBAR(9);
270 QBAR(10);
271 QBAR(11);
272
273 popq %rcx;
274 popq %rbx;
275
276 leaq (4*4*4)(%rsi), %rax;
277
278 testb %cl, %cl;
279 jnz __enc_xor8;
280
281 outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
282 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
283
284 ret;
285
286__enc_xor8:
287 outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
288 outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
289
290 ret;
291
292.align 16
293.global cast6_dec_blk_8way
294.type cast6_dec_blk_8way,@function;
295
296cast6_dec_blk_8way:
297 /* input:
298 * %rdi: ctx, CTX
299 * %rsi: dst
300 * %rdx: src
301 */
302
303 pushq %rbx;
304
305 vmovdqu .Lbswap_mask, RMASK;
306 vmovdqu .L32_mask, R32;
307 vpxor RKRF, RKRF, RKRF;
308
309 leaq (4*4*4)(%rdx), %rax;
310 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
311 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
312
313 xorq RID1, RID1;
314 xorq RID2, RID2;
315
316 Q(11);
317 Q(10);
318 Q(9);
319 Q(8);
320 Q(7);
321 Q(6);
322 QBAR(5);
323 QBAR(4);
324 QBAR(3);
325 QBAR(2);
326 QBAR(1);
327 QBAR(0);
328
329 popq %rbx;
330
331 leaq (4*4*4)(%rsi), %rax;
332 outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
333 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
334
335 ret;
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
new file mode 100644
index 000000000000..15e5f85a5011
--- /dev/null
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -0,0 +1,648 @@
1/*
2 * Glue Code for the AVX assembler implemention of the Cast6 Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/hardirq.h>
26#include <linux/types.h>
27#include <linux/crypto.h>
28#include <linux/err.h>
29#include <crypto/algapi.h>
30#include <crypto/cast6.h>
31#include <crypto/cryptd.h>
32#include <crypto/b128ops.h>
33#include <crypto/ctr.h>
34#include <crypto/lrw.h>
35#include <crypto/xts.h>
36#include <asm/xcr.h>
37#include <asm/xsave.h>
38#include <asm/crypto/ablk_helper.h>
39#include <asm/crypto/glue_helper.h>
40
41#define CAST6_PARALLEL_BLOCKS 8
42
43asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst,
44 const u8 *src, bool xor);
45asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst,
46 const u8 *src);
47
48static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst,
49 const u8 *src)
50{
51 __cast6_enc_blk_8way(ctx, dst, src, false);
52}
53
54static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
55 const u8 *src)
56{
57 __cast6_enc_blk_8way(ctx, dst, src, true);
58}
59
60static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 cast6_dec_blk_8way(ctx, dst, src);
64}
65
66
67static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
68{
69 u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
70 unsigned int j;
71
72 for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
73 ivs[j] = src[j];
74
75 cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
76
77 for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
78 u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
79}
80
81static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
82{
83 be128 ctrblk;
84
85 u128_to_be128(&ctrblk, iv);
86 u128_inc(iv);
87
88 __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
89 u128_xor(dst, src, (u128 *)&ctrblk);
90}
91
92static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
93 u128 *iv)
94{
95 be128 ctrblks[CAST6_PARALLEL_BLOCKS];
96 unsigned int i;
97
98 for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
99 if (dst != src)
100 dst[i] = src[i];
101
102 u128_to_be128(&ctrblks[i], iv);
103 u128_inc(iv);
104 }
105
106 cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
107}
108
109static const struct common_glue_ctx cast6_enc = {
110 .num_funcs = 2,
111 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
112
113 .funcs = { {
114 .num_blocks = CAST6_PARALLEL_BLOCKS,
115 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) }
116 }, {
117 .num_blocks = 1,
118 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
119 } }
120};
121
122static const struct common_glue_ctx cast6_ctr = {
123 .num_funcs = 2,
124 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
125
126 .funcs = { {
127 .num_blocks = CAST6_PARALLEL_BLOCKS,
128 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) }
129 }, {
130 .num_blocks = 1,
131 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
132 } }
133};
134
135static const struct common_glue_ctx cast6_dec = {
136 .num_funcs = 2,
137 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
138
139 .funcs = { {
140 .num_blocks = CAST6_PARALLEL_BLOCKS,
141 .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) }
142 }, {
143 .num_blocks = 1,
144 .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
145 } }
146};
147
148static const struct common_glue_ctx cast6_dec_cbc = {
149 .num_funcs = 2,
150 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
151
152 .funcs = { {
153 .num_blocks = CAST6_PARALLEL_BLOCKS,
154 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) }
155 }, {
156 .num_blocks = 1,
157 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
158 } }
159};
160
161static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
162 struct scatterlist *src, unsigned int nbytes)
163{
164 return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
165}
166
167static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
168 struct scatterlist *src, unsigned int nbytes)
169{
170 return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
171}
172
173static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
174 struct scatterlist *src, unsigned int nbytes)
175{
176 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
177 dst, src, nbytes);
178}
179
180static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
181 struct scatterlist *src, unsigned int nbytes)
182{
183 return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
184 nbytes);
185}
186
187static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
188 struct scatterlist *src, unsigned int nbytes)
189{
190 return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
191}
192
193static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
194{
195 return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
196 NULL, fpu_enabled, nbytes);
197}
198
199static inline void cast6_fpu_end(bool fpu_enabled)
200{
201 glue_fpu_end(fpu_enabled);
202}
203
204struct crypt_priv {
205 struct cast6_ctx *ctx;
206 bool fpu_enabled;
207};
208
209static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
210{
211 const unsigned int bsize = CAST6_BLOCK_SIZE;
212 struct crypt_priv *ctx = priv;
213 int i;
214
215 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
216
217 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
218 cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst);
219 return;
220 }
221
222 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
223 __cast6_encrypt(ctx->ctx, srcdst, srcdst);
224}
225
226static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
227{
228 const unsigned int bsize = CAST6_BLOCK_SIZE;
229 struct crypt_priv *ctx = priv;
230 int i;
231
232 ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
233
234 if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
235 cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst);
236 return;
237 }
238
239 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
240 __cast6_decrypt(ctx->ctx, srcdst, srcdst);
241}
242
243struct cast6_lrw_ctx {
244 struct lrw_table_ctx lrw_table;
245 struct cast6_ctx cast6_ctx;
246};
247
248static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
249 unsigned int keylen)
250{
251 struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
252 int err;
253
254 err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
255 &tfm->crt_flags);
256 if (err)
257 return err;
258
259 return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
260}
261
262static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
263 struct scatterlist *src, unsigned int nbytes)
264{
265 struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
266 be128 buf[CAST6_PARALLEL_BLOCKS];
267 struct crypt_priv crypt_ctx = {
268 .ctx = &ctx->cast6_ctx,
269 .fpu_enabled = false,
270 };
271 struct lrw_crypt_req req = {
272 .tbuf = buf,
273 .tbuflen = sizeof(buf),
274
275 .table_ctx = &ctx->lrw_table,
276 .crypt_ctx = &crypt_ctx,
277 .crypt_fn = encrypt_callback,
278 };
279 int ret;
280
281 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
282 ret = lrw_crypt(desc, dst, src, nbytes, &req);
283 cast6_fpu_end(crypt_ctx.fpu_enabled);
284
285 return ret;
286}
287
288static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
289 struct scatterlist *src, unsigned int nbytes)
290{
291 struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
292 be128 buf[CAST6_PARALLEL_BLOCKS];
293 struct crypt_priv crypt_ctx = {
294 .ctx = &ctx->cast6_ctx,
295 .fpu_enabled = false,
296 };
297 struct lrw_crypt_req req = {
298 .tbuf = buf,
299 .tbuflen = sizeof(buf),
300
301 .table_ctx = &ctx->lrw_table,
302 .crypt_ctx = &crypt_ctx,
303 .crypt_fn = decrypt_callback,
304 };
305 int ret;
306
307 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
308 ret = lrw_crypt(desc, dst, src, nbytes, &req);
309 cast6_fpu_end(crypt_ctx.fpu_enabled);
310
311 return ret;
312}
313
314static void lrw_exit_tfm(struct crypto_tfm *tfm)
315{
316 struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
317
318 lrw_free_table(&ctx->lrw_table);
319}
320
321struct cast6_xts_ctx {
322 struct cast6_ctx tweak_ctx;
323 struct cast6_ctx crypt_ctx;
324};
325
326static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
327 unsigned int keylen)
328{
329 struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
330 u32 *flags = &tfm->crt_flags;
331 int err;
332
333 /* key consists of keys of equal size concatenated, therefore
334 * the length must be even
335 */
336 if (keylen % 2) {
337 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
338 return -EINVAL;
339 }
340
341 /* first half of xts-key is for crypt */
342 err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
343 if (err)
344 return err;
345
346 /* second half of xts-key is for tweak */
347 return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
348 flags);
349}
350
351static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
352 struct scatterlist *src, unsigned int nbytes)
353{
354 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
355 be128 buf[CAST6_PARALLEL_BLOCKS];
356 struct crypt_priv crypt_ctx = {
357 .ctx = &ctx->crypt_ctx,
358 .fpu_enabled = false,
359 };
360 struct xts_crypt_req req = {
361 .tbuf = buf,
362 .tbuflen = sizeof(buf),
363
364 .tweak_ctx = &ctx->tweak_ctx,
365 .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
366 .crypt_ctx = &crypt_ctx,
367 .crypt_fn = encrypt_callback,
368 };
369 int ret;
370
371 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
372 ret = xts_crypt(desc, dst, src, nbytes, &req);
373 cast6_fpu_end(crypt_ctx.fpu_enabled);
374
375 return ret;
376}
377
378static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
379 struct scatterlist *src, unsigned int nbytes)
380{
381 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
382 be128 buf[CAST6_PARALLEL_BLOCKS];
383 struct crypt_priv crypt_ctx = {
384 .ctx = &ctx->crypt_ctx,
385 .fpu_enabled = false,
386 };
387 struct xts_crypt_req req = {
388 .tbuf = buf,
389 .tbuflen = sizeof(buf),
390
391 .tweak_ctx = &ctx->tweak_ctx,
392 .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
393 .crypt_ctx = &crypt_ctx,
394 .crypt_fn = decrypt_callback,
395 };
396 int ret;
397
398 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
399 ret = xts_crypt(desc, dst, src, nbytes, &req);
400 cast6_fpu_end(crypt_ctx.fpu_enabled);
401
402 return ret;
403}
404
405static struct crypto_alg cast6_algs[10] = { {
406 .cra_name = "__ecb-cast6-avx",
407 .cra_driver_name = "__driver-ecb-cast6-avx",
408 .cra_priority = 0,
409 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
410 .cra_blocksize = CAST6_BLOCK_SIZE,
411 .cra_ctxsize = sizeof(struct cast6_ctx),
412 .cra_alignmask = 0,
413 .cra_type = &crypto_blkcipher_type,
414 .cra_module = THIS_MODULE,
415 .cra_u = {
416 .blkcipher = {
417 .min_keysize = CAST6_MIN_KEY_SIZE,
418 .max_keysize = CAST6_MAX_KEY_SIZE,
419 .setkey = cast6_setkey,
420 .encrypt = ecb_encrypt,
421 .decrypt = ecb_decrypt,
422 },
423 },
424}, {
425 .cra_name = "__cbc-cast6-avx",
426 .cra_driver_name = "__driver-cbc-cast6-avx",
427 .cra_priority = 0,
428 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
429 .cra_blocksize = CAST6_BLOCK_SIZE,
430 .cra_ctxsize = sizeof(struct cast6_ctx),
431 .cra_alignmask = 0,
432 .cra_type = &crypto_blkcipher_type,
433 .cra_module = THIS_MODULE,
434 .cra_u = {
435 .blkcipher = {
436 .min_keysize = CAST6_MIN_KEY_SIZE,
437 .max_keysize = CAST6_MAX_KEY_SIZE,
438 .setkey = cast6_setkey,
439 .encrypt = cbc_encrypt,
440 .decrypt = cbc_decrypt,
441 },
442 },
443}, {
444 .cra_name = "__ctr-cast6-avx",
445 .cra_driver_name = "__driver-ctr-cast6-avx",
446 .cra_priority = 0,
447 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
448 .cra_blocksize = 1,
449 .cra_ctxsize = sizeof(struct cast6_ctx),
450 .cra_alignmask = 0,
451 .cra_type = &crypto_blkcipher_type,
452 .cra_module = THIS_MODULE,
453 .cra_u = {
454 .blkcipher = {
455 .min_keysize = CAST6_MIN_KEY_SIZE,
456 .max_keysize = CAST6_MAX_KEY_SIZE,
457 .ivsize = CAST6_BLOCK_SIZE,
458 .setkey = cast6_setkey,
459 .encrypt = ctr_crypt,
460 .decrypt = ctr_crypt,
461 },
462 },
463}, {
464 .cra_name = "__lrw-cast6-avx",
465 .cra_driver_name = "__driver-lrw-cast6-avx",
466 .cra_priority = 0,
467 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
468 .cra_blocksize = CAST6_BLOCK_SIZE,
469 .cra_ctxsize = sizeof(struct cast6_lrw_ctx),
470 .cra_alignmask = 0,
471 .cra_type = &crypto_blkcipher_type,
472 .cra_module = THIS_MODULE,
473 .cra_exit = lrw_exit_tfm,
474 .cra_u = {
475 .blkcipher = {
476 .min_keysize = CAST6_MIN_KEY_SIZE +
477 CAST6_BLOCK_SIZE,
478 .max_keysize = CAST6_MAX_KEY_SIZE +
479 CAST6_BLOCK_SIZE,
480 .ivsize = CAST6_BLOCK_SIZE,
481 .setkey = lrw_cast6_setkey,
482 .encrypt = lrw_encrypt,
483 .decrypt = lrw_decrypt,
484 },
485 },
486}, {
487 .cra_name = "__xts-cast6-avx",
488 .cra_driver_name = "__driver-xts-cast6-avx",
489 .cra_priority = 0,
490 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
491 .cra_blocksize = CAST6_BLOCK_SIZE,
492 .cra_ctxsize = sizeof(struct cast6_xts_ctx),
493 .cra_alignmask = 0,
494 .cra_type = &crypto_blkcipher_type,
495 .cra_module = THIS_MODULE,
496 .cra_u = {
497 .blkcipher = {
498 .min_keysize = CAST6_MIN_KEY_SIZE * 2,
499 .max_keysize = CAST6_MAX_KEY_SIZE * 2,
500 .ivsize = CAST6_BLOCK_SIZE,
501 .setkey = xts_cast6_setkey,
502 .encrypt = xts_encrypt,
503 .decrypt = xts_decrypt,
504 },
505 },
506}, {
507 .cra_name = "ecb(cast6)",
508 .cra_driver_name = "ecb-cast6-avx",
509 .cra_priority = 200,
510 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
511 .cra_blocksize = CAST6_BLOCK_SIZE,
512 .cra_ctxsize = sizeof(struct async_helper_ctx),
513 .cra_alignmask = 0,
514 .cra_type = &crypto_ablkcipher_type,
515 .cra_module = THIS_MODULE,
516 .cra_init = ablk_init,
517 .cra_exit = ablk_exit,
518 .cra_u = {
519 .ablkcipher = {
520 .min_keysize = CAST6_MIN_KEY_SIZE,
521 .max_keysize = CAST6_MAX_KEY_SIZE,
522 .setkey = ablk_set_key,
523 .encrypt = ablk_encrypt,
524 .decrypt = ablk_decrypt,
525 },
526 },
527}, {
528 .cra_name = "cbc(cast6)",
529 .cra_driver_name = "cbc-cast6-avx",
530 .cra_priority = 200,
531 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
532 .cra_blocksize = CAST6_BLOCK_SIZE,
533 .cra_ctxsize = sizeof(struct async_helper_ctx),
534 .cra_alignmask = 0,
535 .cra_type = &crypto_ablkcipher_type,
536 .cra_module = THIS_MODULE,
537 .cra_init = ablk_init,
538 .cra_exit = ablk_exit,
539 .cra_u = {
540 .ablkcipher = {
541 .min_keysize = CAST6_MIN_KEY_SIZE,
542 .max_keysize = CAST6_MAX_KEY_SIZE,
543 .ivsize = CAST6_BLOCK_SIZE,
544 .setkey = ablk_set_key,
545 .encrypt = __ablk_encrypt,
546 .decrypt = ablk_decrypt,
547 },
548 },
549}, {
550 .cra_name = "ctr(cast6)",
551 .cra_driver_name = "ctr-cast6-avx",
552 .cra_priority = 200,
553 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
554 .cra_blocksize = 1,
555 .cra_ctxsize = sizeof(struct async_helper_ctx),
556 .cra_alignmask = 0,
557 .cra_type = &crypto_ablkcipher_type,
558 .cra_module = THIS_MODULE,
559 .cra_init = ablk_init,
560 .cra_exit = ablk_exit,
561 .cra_u = {
562 .ablkcipher = {
563 .min_keysize = CAST6_MIN_KEY_SIZE,
564 .max_keysize = CAST6_MAX_KEY_SIZE,
565 .ivsize = CAST6_BLOCK_SIZE,
566 .setkey = ablk_set_key,
567 .encrypt = ablk_encrypt,
568 .decrypt = ablk_encrypt,
569 .geniv = "chainiv",
570 },
571 },
572}, {
573 .cra_name = "lrw(cast6)",
574 .cra_driver_name = "lrw-cast6-avx",
575 .cra_priority = 200,
576 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
577 .cra_blocksize = CAST6_BLOCK_SIZE,
578 .cra_ctxsize = sizeof(struct async_helper_ctx),
579 .cra_alignmask = 0,
580 .cra_type = &crypto_ablkcipher_type,
581 .cra_module = THIS_MODULE,
582 .cra_init = ablk_init,
583 .cra_exit = ablk_exit,
584 .cra_u = {
585 .ablkcipher = {
586 .min_keysize = CAST6_MIN_KEY_SIZE +
587 CAST6_BLOCK_SIZE,
588 .max_keysize = CAST6_MAX_KEY_SIZE +
589 CAST6_BLOCK_SIZE,
590 .ivsize = CAST6_BLOCK_SIZE,
591 .setkey = ablk_set_key,
592 .encrypt = ablk_encrypt,
593 .decrypt = ablk_decrypt,
594 },
595 },
596}, {
597 .cra_name = "xts(cast6)",
598 .cra_driver_name = "xts-cast6-avx",
599 .cra_priority = 200,
600 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
601 .cra_blocksize = CAST6_BLOCK_SIZE,
602 .cra_ctxsize = sizeof(struct async_helper_ctx),
603 .cra_alignmask = 0,
604 .cra_type = &crypto_ablkcipher_type,
605 .cra_module = THIS_MODULE,
606 .cra_init = ablk_init,
607 .cra_exit = ablk_exit,
608 .cra_u = {
609 .ablkcipher = {
610 .min_keysize = CAST6_MIN_KEY_SIZE * 2,
611 .max_keysize = CAST6_MAX_KEY_SIZE * 2,
612 .ivsize = CAST6_BLOCK_SIZE,
613 .setkey = ablk_set_key,
614 .encrypt = ablk_encrypt,
615 .decrypt = ablk_decrypt,
616 },
617 },
618} };
619
620static int __init cast6_init(void)
621{
622 u64 xcr0;
623
624 if (!cpu_has_avx || !cpu_has_osxsave) {
625 pr_info("AVX instructions are not detected.\n");
626 return -ENODEV;
627 }
628
629 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
630 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
631 pr_info("AVX detected but unusable.\n");
632 return -ENODEV;
633 }
634
635 return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
636}
637
638static void __exit cast6_exit(void)
639{
640 crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
641}
642
643module_init(cast6_init);
644module_exit(cast6_exit);
645
646MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
647MODULE_LICENSE("GPL");
648MODULE_ALIAS("cast6");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index cda97fcaa822..fe8ed62efe2f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -713,6 +713,23 @@ config CRYPTO_CAST6
713 The CAST6 encryption algorithm (synonymous with CAST-256) is 713 The CAST6 encryption algorithm (synonymous with CAST-256) is
714 described in RFC2612. 714 described in RFC2612.
715 715
716config CRYPTO_CAST6_AVX_X86_64
717 tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
718 depends on X86 && 64BIT
719 select CRYPTO_ALGAPI
720 select CRYPTO_CRYPTD
721 select CRYPTO_ABLK_HELPER_X86
722 select CRYPTO_GLUE_HELPER_X86
723 select CRYPTO_CAST6
724 select CRYPTO_LRW
725 select CRYPTO_XTS
726 help
727 The CAST6 encryption algorithm (synonymous with CAST-256) is
728 described in RFC2612.
729
730 This module provides the Cast6 cipher algorithm that processes
731 eight blocks parallel using the AVX instruction set.
732
716config CRYPTO_DES 733config CRYPTO_DES
717 tristate "DES and Triple DES EDE cipher algorithms" 734 tristate "DES and Triple DES EDE cipher algorithms"
718 select CRYPTO_ALGAPI 735 select CRYPTO_ALGAPI
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index cff3c1c3f83c..575b57c3244b 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1549,6 +1549,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1549 } 1549 }
1550 } 1550 }
1551 }, { 1551 }, {
1552 .alg = "__cbc-cast6-avx",
1553 .test = alg_test_null,
1554 .suite = {
1555 .cipher = {
1556 .enc = {
1557 .vecs = NULL,
1558 .count = 0
1559 },
1560 .dec = {
1561 .vecs = NULL,
1562 .count = 0
1563 }
1564 }
1565 }
1566 }, {
1552 .alg = "__cbc-serpent-avx", 1567 .alg = "__cbc-serpent-avx",
1553 .test = alg_test_null, 1568 .test = alg_test_null,
1554 .suite = { 1569 .suite = {
@@ -1625,6 +1640,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1625 } 1640 }
1626 } 1641 }
1627 }, { 1642 }, {
1643 .alg = "__driver-cbc-cast6-avx",
1644 .test = alg_test_null,
1645 .suite = {
1646 .cipher = {
1647 .enc = {
1648 .vecs = NULL,
1649 .count = 0
1650 },
1651 .dec = {
1652 .vecs = NULL,
1653 .count = 0
1654 }
1655 }
1656 }
1657 }, {
1628 .alg = "__driver-cbc-serpent-avx", 1658 .alg = "__driver-cbc-serpent-avx",
1629 .test = alg_test_null, 1659 .test = alg_test_null,
1630 .suite = { 1660 .suite = {
@@ -1701,6 +1731,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1701 } 1731 }
1702 } 1732 }
1703 }, { 1733 }, {
1734 .alg = "__driver-ecb-cast6-avx",
1735 .test = alg_test_null,
1736 .suite = {
1737 .cipher = {
1738 .enc = {
1739 .vecs = NULL,
1740 .count = 0
1741 },
1742 .dec = {
1743 .vecs = NULL,
1744 .count = 0
1745 }
1746 }
1747 }
1748 }, {
1704 .alg = "__driver-ecb-serpent-avx", 1749 .alg = "__driver-ecb-serpent-avx",
1705 .test = alg_test_null, 1750 .test = alg_test_null,
1706 .suite = { 1751 .suite = {
@@ -2027,6 +2072,21 @@ static const struct alg_test_desc alg_test_descs[] = {
2027 } 2072 }
2028 } 2073 }
2029 }, { 2074 }, {
2075 .alg = "cryptd(__driver-ecb-cast6-avx)",
2076 .test = alg_test_null,
2077 .suite = {
2078 .cipher = {
2079 .enc = {
2080 .vecs = NULL,
2081 .count = 0
2082 },
2083 .dec = {
2084 .vecs = NULL,
2085 .count = 0
2086 }
2087 }
2088 }
2089 }, {
2030 .alg = "cryptd(__driver-ecb-serpent-avx)", 2090 .alg = "cryptd(__driver-ecb-serpent-avx)",
2031 .test = alg_test_null, 2091 .test = alg_test_null,
2032 .suite = { 2092 .suite = {