aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>2012-05-28 09:54:24 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2012-06-12 04:46:07 -0400
commit107778b592576c0c8e8d2ca7a2aa5415a4908223 (patch)
tree0e07f6abd2acaf69bf25efacf520584d748c860b
parent4d03c5047a07a62563e1a8fa798ea258f048bfde (diff)
crypto: twofish - add x86_64/avx assembler implementation
This patch adds a x86_64/avx assembler implementation of the Twofish block cipher. The implementation processes eight blocks in parallel (two 4 block chunk AVX operations). The table-lookups are done in general-purpose registers. For small blocksizes the 3way-parallel functions from the twofish-x86_64-3way module are called. A good performance increase is provided for blocksizes greater or equal to 128B. Patch has been tested with tcrypt and automated filesystem tests. Tcrypt benchmark results: Intel Core i5-2500 CPU (fam:6, model:42, step:7) twofish-avx-x86_64 vs. twofish-x86_64-3way 128bit key: (lrw:256bit) (xts:256bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.96x 0.97x 1.00x 0.95x 0.97x 0.97x 0.96x 0.95x 0.95x 0.98x 64B 0.99x 0.99x 1.00x 0.99x 0.98x 0.98x 0.99x 0.98x 0.99x 0.98x 256B 1.20x 1.21x 1.00x 1.19x 1.15x 1.14x 1.19x 1.20x 1.18x 1.19x 1024B 1.29x 1.30x 1.00x 1.28x 1.23x 1.24x 1.26x 1.28x 1.26x 1.27x 8192B 1.31x 1.32x 1.00x 1.31x 1.25x 1.25x 1.28x 1.29x 1.28x 1.30x 256bit key: (lrw:384bit) (xts:512bit) size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec 16B 0.96x 0.96x 1.00x 0.96x 0.97x 0.98x 0.95x 0.95x 0.95x 0.96x 64B 1.00x 0.99x 1.00x 0.98x 0.98x 1.01x 0.98x 0.98x 0.98x 0.98x 256B 1.20x 1.21x 1.00x 1.21x 1.15x 1.15x 1.19x 1.20x 1.18x 1.19x 1024B 1.29x 1.30x 1.00x 1.28x 1.23x 1.23x 1.26x 1.27x 1.26x 1.27x 8192B 1.31x 1.33x 1.00x 1.31x 1.26x 1.26x 1.29x 1.29x 1.28x 1.30x twofish-avx-x86_64 vs aes-asm (8kB block): 128bit 256bit ecb-enc 1.19x 1.63x ecb-dec 1.18x 1.62x cbc-enc 0.75x 1.03x cbc-dec 1.23x 1.67x ctr-enc 1.24x 1.65x ctr-dec 1.24x 1.65x lrw-enc 1.15x 1.53x lrw-dec 1.14x 1.52x xts-enc 1.16x 1.56x xts-dec 1.16x 1.56x Signed-off-by: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S301
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c1086
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c2
-rw-r--r--crypto/Kconfig24
-rw-r--r--crypto/tcrypt.c23
-rw-r--r--crypto/testmgr.c60
7 files changed, 1498 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 479f95a744f7..3420feef0c70 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
12obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 12obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 13obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
14obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o 14obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
15obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
15obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 16obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
16obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o 17obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
17obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o 18obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -30,6 +31,7 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
30blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 31blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
31twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 32twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
32twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o 33twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
34twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o
33salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 35salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
34serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o 36serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
35 37
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
new file mode 100644
index 000000000000..fc31b89ba4c3
--- /dev/null
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -0,0 +1,301 @@
1/*
2 * Twofish Cipher 8-way parallel algorithm (AVX/x86_64)
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
20 * USA
21 *
22 */
23
24.file "twofish-avx-x86_64-asm_64.S"
25.text
26
27/* structure of crypto context */
28#define s0 0
29#define s1 1024
30#define s2 2048
31#define s3 3072
32#define w 4096
33#define k 4128
34
35/**********************************************************************
36 8-way AVX twofish
37 **********************************************************************/
38#define CTX %rdi
39
40#define RA1 %xmm0
41#define RB1 %xmm1
42#define RC1 %xmm2
43#define RD1 %xmm3
44
45#define RA2 %xmm4
46#define RB2 %xmm5
47#define RC2 %xmm6
48#define RD2 %xmm7
49
50#define RX %xmm8
51#define RY %xmm9
52
53#define RK1 %xmm10
54#define RK2 %xmm11
55
56#define RID1 %rax
57#define RID1b %al
58#define RID2 %rbx
59#define RID2b %bl
60
61#define RGI1 %rdx
62#define RGI1bl %dl
63#define RGI1bh %dh
64#define RGI2 %rcx
65#define RGI2bl %cl
66#define RGI2bh %ch
67
68#define RGS1 %r8
69#define RGS1d %r8d
70#define RGS2 %r9
71#define RGS2d %r9d
72#define RGS3 %r10
73#define RGS3d %r10d
74
75
76#define lookup_32bit(t0, t1, t2, t3, src, dst) \
77 movb src ## bl, RID1b; \
78 movb src ## bh, RID2b; \
79 movl t0(CTX, RID1, 4), dst ## d; \
80 xorl t1(CTX, RID2, 4), dst ## d; \
81 shrq $16, src; \
82 movb src ## bl, RID1b; \
83 movb src ## bh, RID2b; \
84 xorl t2(CTX, RID1, 4), dst ## d; \
85 xorl t3(CTX, RID2, 4), dst ## d;
86
87#define G(a, x, t0, t1, t2, t3) \
88 vmovq a, RGI1; \
89 vpsrldq $8, a, x; \
90 vmovq x, RGI2; \
91 \
92 lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
93 shrq $16, RGI1; \
94 lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
95 shlq $32, RGS2; \
96 orq RGS1, RGS2; \
97 \
98 lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
99 shrq $16, RGI2; \
100 lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
101 shlq $32, RGS3; \
102 orq RGS1, RGS3; \
103 \
104 vmovq RGS2, x; \
105 vpinsrq $1, RGS3, x, x;
106
107#define encround(a, b, c, d, x, y) \
108 G(a, x, s0, s1, s2, s3); \
109 G(b, y, s1, s2, s3, s0); \
110 vpaddd x, y, x; \
111 vpaddd y, x, y; \
112 vpaddd x, RK1, x; \
113 vpaddd y, RK2, y; \
114 vpxor x, c, c; \
115 vpsrld $1, c, x; \
116 vpslld $(32 - 1), c, c; \
117 vpor c, x, c; \
118 vpslld $1, d, x; \
119 vpsrld $(32 - 1), d, d; \
120 vpor d, x, d; \
121 vpxor d, y, d;
122
123#define decround(a, b, c, d, x, y) \
124 G(a, x, s0, s1, s2, s3); \
125 G(b, y, s1, s2, s3, s0); \
126 vpaddd x, y, x; \
127 vpaddd y, x, y; \
128 vpaddd y, RK2, y; \
129 vpxor d, y, d; \
130 vpsrld $1, d, y; \
131 vpslld $(32 - 1), d, d; \
132 vpor d, y, d; \
133 vpslld $1, c, y; \
134 vpsrld $(32 - 1), c, c; \
135 vpor c, y, c; \
136 vpaddd x, RK1, x; \
137 vpxor x, c, c;
138
139#define encrypt_round(n, a, b, c, d) \
140 vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
141 vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
142 encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
143 encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
144
145#define decrypt_round(n, a, b, c, d) \
146 vbroadcastss (k+4*(2*(n)))(CTX), RK1; \
147 vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \
148 decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
149 decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
150
151#define encrypt_cycle(n) \
152 encrypt_round((2*n), RA, RB, RC, RD); \
153 encrypt_round(((2*n) + 1), RC, RD, RA, RB);
154
155#define decrypt_cycle(n) \
156 decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
157 decrypt_round((2*n), RA, RB, RC, RD);
158
159
160#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
161 vpunpckldq x1, x0, t0; \
162 vpunpckhdq x1, x0, t2; \
163 vpunpckldq x3, x2, t1; \
164 vpunpckhdq x3, x2, x3; \
165 \
166 vpunpcklqdq t1, t0, x0; \
167 vpunpckhqdq t1, t0, x1; \
168 vpunpcklqdq x3, t2, x2; \
169 vpunpckhqdq x3, t2, x3;
170
171#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
172 vpxor (0*4*4)(in), wkey, x0; \
173 vpxor (1*4*4)(in), wkey, x1; \
174 vpxor (2*4*4)(in), wkey, x2; \
175 vpxor (3*4*4)(in), wkey, x3; \
176 \
177 transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
178
179#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
180 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
181 \
182 vpxor x0, wkey, x0; \
183 vmovdqu x0, (0*4*4)(out); \
184 vpxor x1, wkey, x1; \
185 vmovdqu x1, (1*4*4)(out); \
186 vpxor x2, wkey, x2; \
187 vmovdqu x2, (2*4*4)(out); \
188 vpxor x3, wkey, x3; \
189 vmovdqu x3, (3*4*4)(out);
190
191#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
192 transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
193 \
194 vpxor x0, wkey, x0; \
195 vpxor (0*4*4)(out), x0, x0; \
196 vmovdqu x0, (0*4*4)(out); \
197 vpxor x1, wkey, x1; \
198 vpxor (1*4*4)(out), x1, x1; \
199 vmovdqu x1, (1*4*4)(out); \
200 vpxor x2, wkey, x2; \
201 vpxor (2*4*4)(out), x2, x2; \
202 vmovdqu x2, (2*4*4)(out); \
203 vpxor x3, wkey, x3; \
204 vpxor (3*4*4)(out), x3, x3; \
205 vmovdqu x3, (3*4*4)(out);
206
207.align 8
208.global __twofish_enc_blk_8way
209.type __twofish_enc_blk_8way,@function;
210
211__twofish_enc_blk_8way:
212 /* input:
213 * %rdi: ctx, CTX
214 * %rsi: dst
215 * %rdx: src
216 * %rcx: bool, if true: xor output
217 */
218
219 pushq %rbx;
220 pushq %rcx;
221
222 vmovdqu w(CTX), RK1;
223
224 leaq (4*4*4)(%rdx), %rax;
225 inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
226 inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
227
228 xorq RID1, RID1;
229 xorq RID2, RID2;
230
231 encrypt_cycle(0);
232 encrypt_cycle(1);
233 encrypt_cycle(2);
234 encrypt_cycle(3);
235 encrypt_cycle(4);
236 encrypt_cycle(5);
237 encrypt_cycle(6);
238 encrypt_cycle(7);
239
240 vmovdqu (w+4*4)(CTX), RK1;
241
242 popq %rcx;
243 popq %rbx;
244
245 leaq (4*4*4)(%rsi), %rax;
246 leaq (4*4*4)(%rax), %rdx;
247
248 testb %cl, %cl;
249 jnz __enc_xor8;
250
251 outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
252 outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
253
254 ret;
255
256__enc_xor8:
257 outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
258 outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
259
260 ret;
261
262.align 8
263.global twofish_dec_blk_8way
264.type twofish_dec_blk_8way,@function;
265
266twofish_dec_blk_8way:
267 /* input:
268 * %rdi: ctx, CTX
269 * %rsi: dst
270 * %rdx: src
271 */
272
273 pushq %rbx;
274
275 vmovdqu (w+4*4)(CTX), RK1;
276
277 leaq (4*4*4)(%rdx), %rax;
278 inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
279 inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
280
281 xorq RID1, RID1;
282 xorq RID2, RID2;
283
284 decrypt_cycle(7);
285 decrypt_cycle(6);
286 decrypt_cycle(5);
287 decrypt_cycle(4);
288 decrypt_cycle(3);
289 decrypt_cycle(2);
290 decrypt_cycle(1);
291 decrypt_cycle(0);
292
293 vmovdqu (w)(CTX), RK1;
294
295 popq %rbx;
296
297 leaq (4*4*4)(%rsi), %rax;
298 outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
299 outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
300
301 ret;
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
new file mode 100644
index 000000000000..599f19e4bef6
--- /dev/null
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -0,0 +1,1086 @@
1/*
2 * Glue Code for AVX assembler version of Twofish Cipher
3 *
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 *
7 * Glue code based on serpent_sse2_glue.c by:
8 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
23 * USA
24 *
25 */
26
27#include <linux/module.h>
28#include <linux/hardirq.h>
29#include <linux/types.h>
30#include <linux/crypto.h>
31#include <linux/err.h>
32#include <crypto/algapi.h>
33#include <crypto/twofish.h>
34#include <crypto/cryptd.h>
35#include <crypto/b128ops.h>
36#include <crypto/ctr.h>
37#include <crypto/lrw.h>
38#include <crypto/xts.h>
39#include <asm/i387.h>
40#include <asm/xcr.h>
41#include <asm/xsave.h>
42#include <crypto/scatterwalk.h>
43#include <linux/workqueue.h>
44#include <linux/spinlock.h>
45
46
47#define TWOFISH_PARALLEL_BLOCKS 8
48
49/* regular block cipher functions from twofish_x86_64 module */
50asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
51 const u8 *src);
52asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
53 const u8 *src);
54
55/* 3-way parallel cipher functions from twofish_x86_64-3way module */
56asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
57 const u8 *src, bool xor);
58asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
59 const u8 *src);
60
61static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
62 const u8 *src)
63{
64 __twofish_enc_blk_3way(ctx, dst, src, false);
65}
66
67static inline void twofish_enc_blk_3way_xor(struct twofish_ctx *ctx, u8 *dst,
68 const u8 *src)
69{
70 __twofish_enc_blk_3way(ctx, dst, src, true);
71}
72
73/* 8-way parallel cipher functions */
74asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
75 const u8 *src, bool xor);
76asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
77 const u8 *src);
78
79static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
80 const u8 *src)
81{
82 __twofish_enc_blk_8way(ctx, dst, src, false);
83}
84
85static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
86 const u8 *src)
87{
88 __twofish_enc_blk_8way(ctx, dst, src, true);
89}
90
91static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
92 const u8 *src)
93{
94 twofish_dec_blk_8way(ctx, dst, src);
95}
96
97
98
99struct async_twofish_ctx {
100 struct cryptd_ablkcipher *cryptd_tfm;
101};
102
103static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
104{
105 if (fpu_enabled)
106 return true;
107
108 /* AVX is only used when chunk to be processed is large enough, so
109 * do not enable FPU until it is necessary.
110 */
111 if (nbytes < TF_BLOCK_SIZE * TWOFISH_PARALLEL_BLOCKS)
112 return false;
113
114 kernel_fpu_begin();
115 return true;
116}
117
118static inline void twofish_fpu_end(bool fpu_enabled)
119{
120 if (fpu_enabled)
121 kernel_fpu_end();
122}
123
124static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
125 bool enc)
126{
127 bool fpu_enabled = false;
128 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
129 const unsigned int bsize = TF_BLOCK_SIZE;
130 unsigned int nbytes;
131 int err;
132
133 err = blkcipher_walk_virt(desc, walk);
134 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
135
136 while ((nbytes = walk->nbytes)) {
137 u8 *wsrc = walk->src.virt.addr;
138 u8 *wdst = walk->dst.virt.addr;
139
140 fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes);
141
142 /* Process multi-block batch */
143 if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) {
144 do {
145 if (enc)
146 twofish_enc_blk_xway(ctx, wdst, wsrc);
147 else
148 twofish_dec_blk_xway(ctx, wdst, wsrc);
149
150 wsrc += bsize * TWOFISH_PARALLEL_BLOCKS;
151 wdst += bsize * TWOFISH_PARALLEL_BLOCKS;
152 nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS;
153 } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS);
154
155 if (nbytes < bsize)
156 goto done;
157 }
158
159 /* Process three block batch */
160 if (nbytes >= bsize * 3) {
161 do {
162 if (enc)
163 twofish_enc_blk_3way(ctx, wdst, wsrc);
164 else
165 twofish_dec_blk_3way(ctx, wdst, wsrc);
166
167 wsrc += bsize * 3;
168 wdst += bsize * 3;
169 nbytes -= bsize * 3;
170 } while (nbytes >= bsize * 3);
171
172 if (nbytes < bsize)
173 goto done;
174 }
175
176 /* Handle leftovers */
177 do {
178 if (enc)
179 twofish_enc_blk(ctx, wdst, wsrc);
180 else
181 twofish_dec_blk(ctx, wdst, wsrc);
182
183 wsrc += bsize;
184 wdst += bsize;
185 nbytes -= bsize;
186 } while (nbytes >= bsize);
187
188done:
189 err = blkcipher_walk_done(desc, walk, nbytes);
190 }
191
192 twofish_fpu_end(fpu_enabled);
193 return err;
194}
195
196static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
197 struct scatterlist *src, unsigned int nbytes)
198{
199 struct blkcipher_walk walk;
200
201 blkcipher_walk_init(&walk, dst, src, nbytes);
202 return ecb_crypt(desc, &walk, true);
203}
204
205static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
206 struct scatterlist *src, unsigned int nbytes)
207{
208 struct blkcipher_walk walk;
209
210 blkcipher_walk_init(&walk, dst, src, nbytes);
211 return ecb_crypt(desc, &walk, false);
212}
213
214static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
215 struct blkcipher_walk *walk)
216{
217 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
218 const unsigned int bsize = TF_BLOCK_SIZE;
219 unsigned int nbytes = walk->nbytes;
220 u128 *src = (u128 *)walk->src.virt.addr;
221 u128 *dst = (u128 *)walk->dst.virt.addr;
222 u128 *iv = (u128 *)walk->iv;
223
224 do {
225 u128_xor(dst, src, iv);
226 twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
227 iv = dst;
228
229 src += 1;
230 dst += 1;
231 nbytes -= bsize;
232 } while (nbytes >= bsize);
233
234 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
235 return nbytes;
236}
237
238static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
239 struct scatterlist *src, unsigned int nbytes)
240{
241 struct blkcipher_walk walk;
242 int err;
243
244 blkcipher_walk_init(&walk, dst, src, nbytes);
245 err = blkcipher_walk_virt(desc, &walk);
246
247 while ((nbytes = walk.nbytes)) {
248 nbytes = __cbc_encrypt(desc, &walk);
249 err = blkcipher_walk_done(desc, &walk, nbytes);
250 }
251
252 return err;
253}
254
255static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
256 struct blkcipher_walk *walk)
257{
258 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
259 const unsigned int bsize = TF_BLOCK_SIZE;
260 unsigned int nbytes = walk->nbytes;
261 u128 *src = (u128 *)walk->src.virt.addr;
262 u128 *dst = (u128 *)walk->dst.virt.addr;
263 u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
264 u128 last_iv;
265 int i;
266
267 /* Start of the last block. */
268 src += nbytes / bsize - 1;
269 dst += nbytes / bsize - 1;
270
271 last_iv = *src;
272
273 /* Process multi-block batch */
274 if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) {
275 do {
276 nbytes -= bsize * (TWOFISH_PARALLEL_BLOCKS - 1);
277 src -= TWOFISH_PARALLEL_BLOCKS - 1;
278 dst -= TWOFISH_PARALLEL_BLOCKS - 1;
279
280 for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++)
281 ivs[i] = src[i];
282
283 twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
284
285 for (i = 0; i < TWOFISH_PARALLEL_BLOCKS - 1; i++)
286 u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
287
288 nbytes -= bsize;
289 if (nbytes < bsize)
290 goto done;
291
292 u128_xor(dst, dst, src - 1);
293 src -= 1;
294 dst -= 1;
295 } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS);
296
297 if (nbytes < bsize)
298 goto done;
299 }
300
301 /* Process three block batch */
302 if (nbytes >= bsize * 3) {
303 do {
304 nbytes -= bsize * (3 - 1);
305 src -= 3 - 1;
306 dst -= 3 - 1;
307
308 ivs[0] = src[0];
309 ivs[1] = src[1];
310
311 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
312
313 u128_xor(dst + 1, dst + 1, ivs + 0);
314 u128_xor(dst + 2, dst + 2, ivs + 1);
315
316 nbytes -= bsize;
317 if (nbytes < bsize)
318 goto done;
319
320 u128_xor(dst, dst, src - 1);
321 src -= 1;
322 dst -= 1;
323 } while (nbytes >= bsize * 3);
324
325 if (nbytes < bsize)
326 goto done;
327 }
328
329 /* Handle leftovers */
330 for (;;) {
331 twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
332
333 nbytes -= bsize;
334 if (nbytes < bsize)
335 break;
336
337 u128_xor(dst, dst, src - 1);
338 src -= 1;
339 dst -= 1;
340 }
341
342done:
343 u128_xor(dst, dst, (u128 *)walk->iv);
344 *(u128 *)walk->iv = last_iv;
345
346 return nbytes;
347}
348
349static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
350 struct scatterlist *src, unsigned int nbytes)
351{
352 bool fpu_enabled = false;
353 struct blkcipher_walk walk;
354 int err;
355
356 blkcipher_walk_init(&walk, dst, src, nbytes);
357 err = blkcipher_walk_virt(desc, &walk);
358 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
359
360 while ((nbytes = walk.nbytes)) {
361 fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes);
362 nbytes = __cbc_decrypt(desc, &walk);
363 err = blkcipher_walk_done(desc, &walk, nbytes);
364 }
365
366 twofish_fpu_end(fpu_enabled);
367 return err;
368}
369
370static inline void u128_to_be128(be128 *dst, const u128 *src)
371{
372 dst->a = cpu_to_be64(src->a);
373 dst->b = cpu_to_be64(src->b);
374}
375
376static inline void be128_to_u128(u128 *dst, const be128 *src)
377{
378 dst->a = be64_to_cpu(src->a);
379 dst->b = be64_to_cpu(src->b);
380}
381
382static inline void u128_inc(u128 *i)
383{
384 i->b++;
385 if (!i->b)
386 i->a++;
387}
388
389static void ctr_crypt_final(struct blkcipher_desc *desc,
390 struct blkcipher_walk *walk)
391{
392 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
393 u8 *ctrblk = walk->iv;
394 u8 keystream[TF_BLOCK_SIZE];
395 u8 *src = walk->src.virt.addr;
396 u8 *dst = walk->dst.virt.addr;
397 unsigned int nbytes = walk->nbytes;
398
399 twofish_enc_blk(ctx, keystream, ctrblk);
400 crypto_xor(keystream, src, nbytes);
401 memcpy(dst, keystream, nbytes);
402
403 crypto_inc(ctrblk, TF_BLOCK_SIZE);
404}
405
406static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
407 struct blkcipher_walk *walk)
408{
409 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
410 const unsigned int bsize = TF_BLOCK_SIZE;
411 unsigned int nbytes = walk->nbytes;
412 u128 *src = (u128 *)walk->src.virt.addr;
413 u128 *dst = (u128 *)walk->dst.virt.addr;
414 u128 ctrblk;
415 be128 ctrblocks[TWOFISH_PARALLEL_BLOCKS];
416 int i;
417
418 be128_to_u128(&ctrblk, (be128 *)walk->iv);
419
420 /* Process multi-block batch */
421 if (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS) {
422 do {
423 /* create ctrblks for parallel encrypt */
424 for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
425 if (dst != src)
426 dst[i] = src[i];
427
428 u128_to_be128(&ctrblocks[i], &ctrblk);
429 u128_inc(&ctrblk);
430 }
431
432 twofish_enc_blk_xway_xor(ctx, (u8 *)dst,
433 (u8 *)ctrblocks);
434
435 src += TWOFISH_PARALLEL_BLOCKS;
436 dst += TWOFISH_PARALLEL_BLOCKS;
437 nbytes -= bsize * TWOFISH_PARALLEL_BLOCKS;
438 } while (nbytes >= bsize * TWOFISH_PARALLEL_BLOCKS);
439
440 if (nbytes < bsize)
441 goto done;
442 }
443
444 /* Process three block batch */
445 if (nbytes >= bsize * 3) {
446 do {
447 if (dst != src) {
448 dst[0] = src[0];
449 dst[1] = src[1];
450 dst[2] = src[2];
451 }
452
453 /* create ctrblks for parallel encrypt */
454 u128_to_be128(&ctrblocks[0], &ctrblk);
455 u128_inc(&ctrblk);
456 u128_to_be128(&ctrblocks[1], &ctrblk);
457 u128_inc(&ctrblk);
458 u128_to_be128(&ctrblocks[2], &ctrblk);
459 u128_inc(&ctrblk);
460
461 twofish_enc_blk_3way_xor(ctx, (u8 *)dst,
462 (u8 *)ctrblocks);
463
464 src += 3;
465 dst += 3;
466 nbytes -= bsize * 3;
467 } while (nbytes >= bsize * 3);
468
469 if (nbytes < bsize)
470 goto done;
471 }
472
473 /* Handle leftovers */
474 do {
475 if (dst != src)
476 *dst = *src;
477
478 u128_to_be128(&ctrblocks[0], &ctrblk);
479 u128_inc(&ctrblk);
480
481 twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
482 u128_xor(dst, dst, (u128 *)ctrblocks);
483
484 src += 1;
485 dst += 1;
486 nbytes -= bsize;
487 } while (nbytes >= bsize);
488
489done:
490 u128_to_be128((be128 *)walk->iv, &ctrblk);
491 return nbytes;
492}
493
494static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
495 struct scatterlist *src, unsigned int nbytes)
496{
497 bool fpu_enabled = false;
498 struct blkcipher_walk walk;
499 int err;
500
501 blkcipher_walk_init(&walk, dst, src, nbytes);
502 err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
503 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
504
505 while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
506 fpu_enabled = twofish_fpu_begin(fpu_enabled, nbytes);
507 nbytes = __ctr_crypt(desc, &walk);
508 err = blkcipher_walk_done(desc, &walk, nbytes);
509 }
510
511 twofish_fpu_end(fpu_enabled);
512
513 if (walk.nbytes) {
514 ctr_crypt_final(desc, &walk);
515 err = blkcipher_walk_done(desc, &walk, 0);
516 }
517
518 return err;
519}
520
521struct crypt_priv {
522 struct twofish_ctx *ctx;
523 bool fpu_enabled;
524};
525
526static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
527{
528 const unsigned int bsize = TF_BLOCK_SIZE;
529 struct crypt_priv *ctx = priv;
530 int i;
531
532 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
533
534 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
535 twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
536 return;
537 }
538
539 for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
540 twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
541
542 nbytes %= bsize * 3;
543
544 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
545 twofish_enc_blk(ctx->ctx, srcdst, srcdst);
546}
547
548static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
549{
550 const unsigned int bsize = TF_BLOCK_SIZE;
551 struct crypt_priv *ctx = priv;
552 int i;
553
554 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
555
556 if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
557 twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
558 return;
559 }
560
561 for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
562 twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
563
564 nbytes %= bsize * 3;
565
566 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
567 twofish_dec_blk(ctx->ctx, srcdst, srcdst);
568}
569
570struct twofish_lrw_ctx {
571 struct lrw_table_ctx lrw_table;
572 struct twofish_ctx twofish_ctx;
573};
574
575static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
576 unsigned int keylen)
577{
578 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
579 int err;
580
581 err = __twofish_setkey(&ctx->twofish_ctx, key,
582 keylen - TF_BLOCK_SIZE, &tfm->crt_flags);
583 if (err)
584 return err;
585
586 return lrw_init_table(&ctx->lrw_table, key + keylen -
587 TF_BLOCK_SIZE);
588}
589
590static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
591 struct scatterlist *src, unsigned int nbytes)
592{
593 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
594 be128 buf[TWOFISH_PARALLEL_BLOCKS];
595 struct crypt_priv crypt_ctx = {
596 .ctx = &ctx->twofish_ctx,
597 .fpu_enabled = false,
598 };
599 struct lrw_crypt_req req = {
600 .tbuf = buf,
601 .tbuflen = sizeof(buf),
602
603 .table_ctx = &ctx->lrw_table,
604 .crypt_ctx = &crypt_ctx,
605 .crypt_fn = encrypt_callback,
606 };
607 int ret;
608
609 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
610 ret = lrw_crypt(desc, dst, src, nbytes, &req);
611 twofish_fpu_end(crypt_ctx.fpu_enabled);
612
613 return ret;
614}
615
616static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
617 struct scatterlist *src, unsigned int nbytes)
618{
619 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
620 be128 buf[TWOFISH_PARALLEL_BLOCKS];
621 struct crypt_priv crypt_ctx = {
622 .ctx = &ctx->twofish_ctx,
623 .fpu_enabled = false,
624 };
625 struct lrw_crypt_req req = {
626 .tbuf = buf,
627 .tbuflen = sizeof(buf),
628
629 .table_ctx = &ctx->lrw_table,
630 .crypt_ctx = &crypt_ctx,
631 .crypt_fn = decrypt_callback,
632 };
633 int ret;
634
635 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
636 ret = lrw_crypt(desc, dst, src, nbytes, &req);
637 twofish_fpu_end(crypt_ctx.fpu_enabled);
638
639 return ret;
640}
641
642static void lrw_exit_tfm(struct crypto_tfm *tfm)
643{
644 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
645
646 lrw_free_table(&ctx->lrw_table);
647}
648
649struct twofish_xts_ctx {
650 struct twofish_ctx tweak_ctx;
651 struct twofish_ctx crypt_ctx;
652};
653
654static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
655 unsigned int keylen)
656{
657 struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
658 u32 *flags = &tfm->crt_flags;
659 int err;
660
661 /* key consists of keys of equal size concatenated, therefore
662 * the length must be even
663 */
664 if (keylen % 2) {
665 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
666 return -EINVAL;
667 }
668
669 /* first half of xts-key is for crypt */
670 err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
671 if (err)
672 return err;
673
674 /* second half of xts-key is for tweak */
675 return __twofish_setkey(&ctx->tweak_ctx,
676 key + keylen / 2, keylen / 2, flags);
677}
678
679static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
680 struct scatterlist *src, unsigned int nbytes)
681{
682 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
683 be128 buf[TWOFISH_PARALLEL_BLOCKS];
684 struct crypt_priv crypt_ctx = {
685 .ctx = &ctx->crypt_ctx,
686 .fpu_enabled = false,
687 };
688 struct xts_crypt_req req = {
689 .tbuf = buf,
690 .tbuflen = sizeof(buf),
691
692 .tweak_ctx = &ctx->tweak_ctx,
693 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
694 .crypt_ctx = &crypt_ctx,
695 .crypt_fn = encrypt_callback,
696 };
697 int ret;
698
699 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
700 ret = xts_crypt(desc, dst, src, nbytes, &req);
701 twofish_fpu_end(crypt_ctx.fpu_enabled);
702
703 return ret;
704}
705
706static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
707 struct scatterlist *src, unsigned int nbytes)
708{
709 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
710 be128 buf[TWOFISH_PARALLEL_BLOCKS];
711 struct crypt_priv crypt_ctx = {
712 .ctx = &ctx->crypt_ctx,
713 .fpu_enabled = false,
714 };
715 struct xts_crypt_req req = {
716 .tbuf = buf,
717 .tbuflen = sizeof(buf),
718
719 .tweak_ctx = &ctx->tweak_ctx,
720 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
721 .crypt_ctx = &crypt_ctx,
722 .crypt_fn = decrypt_callback,
723 };
724 int ret;
725
726 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
727 ret = xts_crypt(desc, dst, src, nbytes, &req);
728 twofish_fpu_end(crypt_ctx.fpu_enabled);
729
730 return ret;
731}
732
733static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
734 unsigned int key_len)
735{
736 struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm);
737 struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
738 int err;
739
740 crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
741 crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
742 & CRYPTO_TFM_REQ_MASK);
743 err = crypto_ablkcipher_setkey(child, key, key_len);
744 crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
745 & CRYPTO_TFM_RES_MASK);
746 return err;
747}
748
749static int __ablk_encrypt(struct ablkcipher_request *req)
750{
751 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
752 struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm);
753 struct blkcipher_desc desc;
754
755 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
756 desc.info = req->info;
757 desc.flags = 0;
758
759 return crypto_blkcipher_crt(desc.tfm)->encrypt(
760 &desc, req->dst, req->src, req->nbytes);
761}
762
763static int ablk_encrypt(struct ablkcipher_request *req)
764{
765 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
766 struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm);
767
768 if (!irq_fpu_usable()) {
769 struct ablkcipher_request *cryptd_req =
770 ablkcipher_request_ctx(req);
771
772 memcpy(cryptd_req, req, sizeof(*req));
773 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
774
775 return crypto_ablkcipher_encrypt(cryptd_req);
776 } else {
777 return __ablk_encrypt(req);
778 }
779}
780
781static int ablk_decrypt(struct ablkcipher_request *req)
782{
783 struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
784 struct async_twofish_ctx *ctx = crypto_ablkcipher_ctx(tfm);
785
786 if (!irq_fpu_usable()) {
787 struct ablkcipher_request *cryptd_req =
788 ablkcipher_request_ctx(req);
789
790 memcpy(cryptd_req, req, sizeof(*req));
791 ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
792
793 return crypto_ablkcipher_decrypt(cryptd_req);
794 } else {
795 struct blkcipher_desc desc;
796
797 desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
798 desc.info = req->info;
799 desc.flags = 0;
800
801 return crypto_blkcipher_crt(desc.tfm)->decrypt(
802 &desc, req->dst, req->src, req->nbytes);
803 }
804}
805
806static void ablk_exit(struct crypto_tfm *tfm)
807{
808 struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm);
809
810 cryptd_free_ablkcipher(ctx->cryptd_tfm);
811}
812
813static int ablk_init(struct crypto_tfm *tfm)
814{
815 struct async_twofish_ctx *ctx = crypto_tfm_ctx(tfm);
816 struct cryptd_ablkcipher *cryptd_tfm;
817 char drv_name[CRYPTO_MAX_ALG_NAME];
818
819 snprintf(drv_name, sizeof(drv_name), "__driver-%s",
820 crypto_tfm_alg_driver_name(tfm));
821
822 cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
823 if (IS_ERR(cryptd_tfm))
824 return PTR_ERR(cryptd_tfm);
825
826 ctx->cryptd_tfm = cryptd_tfm;
827 tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
828 crypto_ablkcipher_reqsize(&cryptd_tfm->base);
829
830 return 0;
831}
832
833static struct crypto_alg twofish_algs[10] = { {
834 .cra_name = "__ecb-twofish-avx",
835 .cra_driver_name = "__driver-ecb-twofish-avx",
836 .cra_priority = 0,
837 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
838 .cra_blocksize = TF_BLOCK_SIZE,
839 .cra_ctxsize = sizeof(struct twofish_ctx),
840 .cra_alignmask = 0,
841 .cra_type = &crypto_blkcipher_type,
842 .cra_module = THIS_MODULE,
843 .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list),
844 .cra_u = {
845 .blkcipher = {
846 .min_keysize = TF_MIN_KEY_SIZE,
847 .max_keysize = TF_MAX_KEY_SIZE,
848 .setkey = twofish_setkey,
849 .encrypt = ecb_encrypt,
850 .decrypt = ecb_decrypt,
851 },
852 },
853}, {
854 .cra_name = "__cbc-twofish-avx",
855 .cra_driver_name = "__driver-cbc-twofish-avx",
856 .cra_priority = 0,
857 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
858 .cra_blocksize = TF_BLOCK_SIZE,
859 .cra_ctxsize = sizeof(struct twofish_ctx),
860 .cra_alignmask = 0,
861 .cra_type = &crypto_blkcipher_type,
862 .cra_module = THIS_MODULE,
863 .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list),
864 .cra_u = {
865 .blkcipher = {
866 .min_keysize = TF_MIN_KEY_SIZE,
867 .max_keysize = TF_MAX_KEY_SIZE,
868 .setkey = twofish_setkey,
869 .encrypt = cbc_encrypt,
870 .decrypt = cbc_decrypt,
871 },
872 },
873}, {
874 .cra_name = "__ctr-twofish-avx",
875 .cra_driver_name = "__driver-ctr-twofish-avx",
876 .cra_priority = 0,
877 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
878 .cra_blocksize = 1,
879 .cra_ctxsize = sizeof(struct twofish_ctx),
880 .cra_alignmask = 0,
881 .cra_type = &crypto_blkcipher_type,
882 .cra_module = THIS_MODULE,
883 .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list),
884 .cra_u = {
885 .blkcipher = {
886 .min_keysize = TF_MIN_KEY_SIZE,
887 .max_keysize = TF_MAX_KEY_SIZE,
888 .ivsize = TF_BLOCK_SIZE,
889 .setkey = twofish_setkey,
890 .encrypt = ctr_crypt,
891 .decrypt = ctr_crypt,
892 },
893 },
894}, {
895 .cra_name = "__lrw-twofish-avx",
896 .cra_driver_name = "__driver-lrw-twofish-avx",
897 .cra_priority = 0,
898 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
899 .cra_blocksize = TF_BLOCK_SIZE,
900 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
901 .cra_alignmask = 0,
902 .cra_type = &crypto_blkcipher_type,
903 .cra_module = THIS_MODULE,
904 .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list),
905 .cra_exit = lrw_exit_tfm,
906 .cra_u = {
907 .blkcipher = {
908 .min_keysize = TF_MIN_KEY_SIZE +
909 TF_BLOCK_SIZE,
910 .max_keysize = TF_MAX_KEY_SIZE +
911 TF_BLOCK_SIZE,
912 .ivsize = TF_BLOCK_SIZE,
913 .setkey = lrw_twofish_setkey,
914 .encrypt = lrw_encrypt,
915 .decrypt = lrw_decrypt,
916 },
917 },
918}, {
919 .cra_name = "__xts-twofish-avx",
920 .cra_driver_name = "__driver-xts-twofish-avx",
921 .cra_priority = 0,
922 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
923 .cra_blocksize = TF_BLOCK_SIZE,
924 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
925 .cra_alignmask = 0,
926 .cra_type = &crypto_blkcipher_type,
927 .cra_module = THIS_MODULE,
928 .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list),
929 .cra_u = {
930 .blkcipher = {
931 .min_keysize = TF_MIN_KEY_SIZE * 2,
932 .max_keysize = TF_MAX_KEY_SIZE * 2,
933 .ivsize = TF_BLOCK_SIZE,
934 .setkey = xts_twofish_setkey,
935 .encrypt = xts_encrypt,
936 .decrypt = xts_decrypt,
937 },
938 },
939}, {
940 .cra_name = "ecb(twofish)",
941 .cra_driver_name = "ecb-twofish-avx",
942 .cra_priority = 400,
943 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
944 .cra_blocksize = TF_BLOCK_SIZE,
945 .cra_ctxsize = sizeof(struct async_twofish_ctx),
946 .cra_alignmask = 0,
947 .cra_type = &crypto_ablkcipher_type,
948 .cra_module = THIS_MODULE,
949 .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list),
950 .cra_init = ablk_init,
951 .cra_exit = ablk_exit,
952 .cra_u = {
953 .ablkcipher = {
954 .min_keysize = TF_MIN_KEY_SIZE,
955 .max_keysize = TF_MAX_KEY_SIZE,
956 .setkey = ablk_set_key,
957 .encrypt = ablk_encrypt,
958 .decrypt = ablk_decrypt,
959 },
960 },
961}, {
962 .cra_name = "cbc(twofish)",
963 .cra_driver_name = "cbc-twofish-avx",
964 .cra_priority = 400,
965 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
966 .cra_blocksize = TF_BLOCK_SIZE,
967 .cra_ctxsize = sizeof(struct async_twofish_ctx),
968 .cra_alignmask = 0,
969 .cra_type = &crypto_ablkcipher_type,
970 .cra_module = THIS_MODULE,
971 .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list),
972 .cra_init = ablk_init,
973 .cra_exit = ablk_exit,
974 .cra_u = {
975 .ablkcipher = {
976 .min_keysize = TF_MIN_KEY_SIZE,
977 .max_keysize = TF_MAX_KEY_SIZE,
978 .ivsize = TF_BLOCK_SIZE,
979 .setkey = ablk_set_key,
980 .encrypt = __ablk_encrypt,
981 .decrypt = ablk_decrypt,
982 },
983 },
984}, {
985 .cra_name = "ctr(twofish)",
986 .cra_driver_name = "ctr-twofish-avx",
987 .cra_priority = 400,
988 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
989 .cra_blocksize = 1,
990 .cra_ctxsize = sizeof(struct async_twofish_ctx),
991 .cra_alignmask = 0,
992 .cra_type = &crypto_ablkcipher_type,
993 .cra_module = THIS_MODULE,
994 .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list),
995 .cra_init = ablk_init,
996 .cra_exit = ablk_exit,
997 .cra_u = {
998 .ablkcipher = {
999 .min_keysize = TF_MIN_KEY_SIZE,
1000 .max_keysize = TF_MAX_KEY_SIZE,
1001 .ivsize = TF_BLOCK_SIZE,
1002 .setkey = ablk_set_key,
1003 .encrypt = ablk_encrypt,
1004 .decrypt = ablk_encrypt,
1005 .geniv = "chainiv",
1006 },
1007 },
1008}, {
1009 .cra_name = "lrw(twofish)",
1010 .cra_driver_name = "lrw-twofish-avx",
1011 .cra_priority = 400,
1012 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1013 .cra_blocksize = TF_BLOCK_SIZE,
1014 .cra_ctxsize = sizeof(struct async_twofish_ctx),
1015 .cra_alignmask = 0,
1016 .cra_type = &crypto_ablkcipher_type,
1017 .cra_module = THIS_MODULE,
1018 .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list),
1019 .cra_init = ablk_init,
1020 .cra_exit = ablk_exit,
1021 .cra_u = {
1022 .ablkcipher = {
1023 .min_keysize = TF_MIN_KEY_SIZE +
1024 TF_BLOCK_SIZE,
1025 .max_keysize = TF_MAX_KEY_SIZE +
1026 TF_BLOCK_SIZE,
1027 .ivsize = TF_BLOCK_SIZE,
1028 .setkey = ablk_set_key,
1029 .encrypt = ablk_encrypt,
1030 .decrypt = ablk_decrypt,
1031 },
1032 },
1033}, {
1034 .cra_name = "xts(twofish)",
1035 .cra_driver_name = "xts-twofish-avx",
1036 .cra_priority = 400,
1037 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
1038 .cra_blocksize = TF_BLOCK_SIZE,
1039 .cra_ctxsize = sizeof(struct async_twofish_ctx),
1040 .cra_alignmask = 0,
1041 .cra_type = &crypto_ablkcipher_type,
1042 .cra_module = THIS_MODULE,
1043 .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list),
1044 .cra_init = ablk_init,
1045 .cra_exit = ablk_exit,
1046 .cra_u = {
1047 .ablkcipher = {
1048 .min_keysize = TF_MIN_KEY_SIZE * 2,
1049 .max_keysize = TF_MAX_KEY_SIZE * 2,
1050 .ivsize = TF_BLOCK_SIZE,
1051 .setkey = ablk_set_key,
1052 .encrypt = ablk_encrypt,
1053 .decrypt = ablk_decrypt,
1054 },
1055 },
1056} };
1057
1058static int __init twofish_init(void)
1059{
1060 u64 xcr0;
1061
1062 if (!cpu_has_avx || !cpu_has_osxsave) {
1063 printk(KERN_INFO "AVX instructions are not detected.\n");
1064 return -ENODEV;
1065 }
1066
1067 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
1068 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
1069 printk(KERN_INFO "AVX detected but unusable.\n");
1070 return -ENODEV;
1071 }
1072
1073 return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
1074}
1075
1076static void __exit twofish_exit(void)
1077{
1078 crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
1079}
1080
1081module_init(twofish_init);
1082module_exit(twofish_exit);
1083
1084MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
1085MODULE_LICENSE("GPL");
1086MODULE_ALIAS("twofish");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 922ab24cce31..77e4e55a2660 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -45,8 +45,10 @@ asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
45/* 3-way parallel cipher functions */ 45/* 3-way parallel cipher functions */
46asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 46asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
47 const u8 *src, bool xor); 47 const u8 *src, bool xor);
48EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
48asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, 49asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
49 const u8 *src); 50 const u8 *src);
51EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
50 52
51static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 53static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
52 const u8 *src) 54 const u8 *src)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 8e84225c096b..e00a4e49e013 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -913,6 +913,30 @@ config CRYPTO_TWOFISH_X86_64_3WAY
913 See also: 913 See also:
914 <http://www.schneier.com/twofish.html> 914 <http://www.schneier.com/twofish.html>
915 915
916config CRYPTO_TWOFISH_AVX_X86_64
917 tristate "Twofish cipher algorithm (x86_64/AVX)"
918 depends on X86 && 64BIT
919 select CRYPTO_ALGAPI
920 select CRYPTO_CRYPTD
921 select CRYPTO_TWOFISH_COMMON
922 select CRYPTO_TWOFISH_X86_64
923 select CRYPTO_TWOFISH_X86_64_3WAY
924 select CRYPTO_LRW
925 select CRYPTO_XTS
926 help
927 Twofish cipher algorithm (x86_64/AVX).
928
929 Twofish was submitted as an AES (Advanced Encryption Standard)
930 candidate cipher by researchers at CounterPane Systems. It is a
931 16 round block cipher supporting key sizes of 128, 192, and 256
932 bits.
933
934 This module provides the Twofish cipher algorithm that processes
935 eight blocks parallel using the AVX Instruction Set.
936
937 See also:
938 <http://www.schneier.com/twofish.html>
939
916comment "Compression" 940comment "Compression"
917 941
918config CRYPTO_DEFLATE 942config CRYPTO_DEFLATE
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 750cce44bad6..2af879786e75 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1566,6 +1566,29 @@ static int do_test(int m)
1566 speed_template_32_64); 1566 speed_template_32_64);
1567 break; 1567 break;
1568 1568
1569 case 504:
1570 test_acipher_speed("ecb(twofish)", ENCRYPT, sec, NULL, 0,
1571 speed_template_16_24_32);
1572 test_acipher_speed("ecb(twofish)", DECRYPT, sec, NULL, 0,
1573 speed_template_16_24_32);
1574 test_acipher_speed("cbc(twofish)", ENCRYPT, sec, NULL, 0,
1575 speed_template_16_24_32);
1576 test_acipher_speed("cbc(twofish)", DECRYPT, sec, NULL, 0,
1577 speed_template_16_24_32);
1578 test_acipher_speed("ctr(twofish)", ENCRYPT, sec, NULL, 0,
1579 speed_template_16_24_32);
1580 test_acipher_speed("ctr(twofish)", DECRYPT, sec, NULL, 0,
1581 speed_template_16_24_32);
1582 test_acipher_speed("lrw(twofish)", ENCRYPT, sec, NULL, 0,
1583 speed_template_32_40_48);
1584 test_acipher_speed("lrw(twofish)", DECRYPT, sec, NULL, 0,
1585 speed_template_32_40_48);
1586 test_acipher_speed("xts(twofish)", ENCRYPT, sec, NULL, 0,
1587 speed_template_32_48_64);
1588 test_acipher_speed("xts(twofish)", DECRYPT, sec, NULL, 0,
1589 speed_template_32_48_64);
1590 break;
1591
1569 case 1000: 1592 case 1000:
1570 test_available(); 1593 test_available();
1571 break; 1594 break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index eb6d20f8ec5d..73b3ec6fe1a2 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1549,6 +1549,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1549 } 1549 }
1550 } 1550 }
1551 }, { 1551 }, {
1552 .alg = "__cbc-twofish-avx",
1553 .test = alg_test_null,
1554 .suite = {
1555 .cipher = {
1556 .enc = {
1557 .vecs = NULL,
1558 .count = 0
1559 },
1560 .dec = {
1561 .vecs = NULL,
1562 .count = 0
1563 }
1564 }
1565 }
1566 }, {
1552 .alg = "__driver-cbc-aes-aesni", 1567 .alg = "__driver-cbc-aes-aesni",
1553 .test = alg_test_null, 1568 .test = alg_test_null,
1554 .suite = { 1569 .suite = {
@@ -1579,6 +1594,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1579 } 1594 }
1580 } 1595 }
1581 }, { 1596 }, {
1597 .alg = "__driver-cbc-twofish-avx",
1598 .test = alg_test_null,
1599 .suite = {
1600 .cipher = {
1601 .enc = {
1602 .vecs = NULL,
1603 .count = 0
1604 },
1605 .dec = {
1606 .vecs = NULL,
1607 .count = 0
1608 }
1609 }
1610 }
1611 }, {
1582 .alg = "__driver-ecb-aes-aesni", 1612 .alg = "__driver-ecb-aes-aesni",
1583 .test = alg_test_null, 1613 .test = alg_test_null,
1584 .suite = { 1614 .suite = {
@@ -1609,6 +1639,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1609 } 1639 }
1610 } 1640 }
1611 }, { 1641 }, {
1642 .alg = "__driver-ecb-twofish-avx",
1643 .test = alg_test_null,
1644 .suite = {
1645 .cipher = {
1646 .enc = {
1647 .vecs = NULL,
1648 .count = 0
1649 },
1650 .dec = {
1651 .vecs = NULL,
1652 .count = 0
1653 }
1654 }
1655 }
1656 }, {
1612 .alg = "__ghash-pclmulqdqni", 1657 .alg = "__ghash-pclmulqdqni",
1613 .test = alg_test_null, 1658 .test = alg_test_null,
1614 .suite = { 1659 .suite = {
@@ -1806,6 +1851,21 @@ static const struct alg_test_desc alg_test_descs[] = {
1806 } 1851 }
1807 } 1852 }
1808 }, { 1853 }, {
1854 .alg = "cryptd(__driver-ecb-twofish-avx)",
1855 .test = alg_test_null,
1856 .suite = {
1857 .cipher = {
1858 .enc = {
1859 .vecs = NULL,
1860 .count = 0
1861 },
1862 .dec = {
1863 .vecs = NULL,
1864 .count = 0
1865 }
1866 }
1867 }
1868 }, {
1809 .alg = "cryptd(__ghash-pclmulqdqni)", 1869 .alg = "cryptd(__ghash-pclmulqdqni)",
1810 .test = alg_test_null, 1870 .test = alg_test_null,
1811 .suite = { 1871 .suite = {