aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>2011-09-26 09:47:25 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2011-10-21 08:23:08 -0400
commit8280daad436edb7dd9e7e06fc13bcecb6b2a885c (patch)
tree0d4cb032c6da8617bd4a2dd84bd8ef1a605fa19d /arch/x86/crypto
parent91d41f159d75d602f6001218eec64c5e761475a6 (diff)
crypto: twofish - add 3-way parallel x86_64 assembler implemention
Patch adds 3-way parallel x86_64 assembly implementation of twofish as new module. New assembler functions crypt data in three blocks chunks, improving cipher performance on out-of-order CPUs. Patch has been tested with tcrypt and automated filesystem tests. Summary of the tcrypt benchmarks: Twofish 3-way-asm vs twofish asm (128bit 8kb block ECB) encrypt: 1.3x speed decrypt: 1.3x speed Twofish 3-way-asm vs twofish asm (128bit 8kb block CBC) encrypt: 1.07x speed decrypt: 1.4x speed Twofish 3-way-asm vs twofish asm (128bit 8kb block CTR) encrypt: 1.4x speed Twofish 3-way-asm vs AES asm (128bit 8kb block ECB) encrypt: 1.0x speed decrypt: 1.0x speed Twofish 3-way-asm vs AES asm (128bit 8kb block CBC) encrypt: 0.84x speed decrypt: 1.09x speed Twofish 3-way-asm vs AES asm (128bit 8kb block CTR) encrypt: 1.15x speed Full output: http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-twofish-3way-asm-x86_64.txt http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-twofish-asm-x86_64.txt http://koti.mbnet.fi/axh/kernel/crypto/tcrypt-speed-aes-asm-x86_64.txt Tests were run on: vendor_id : AuthenticAMD cpu family : 16 model : 10 model name : AMD Phenom(tm) II X6 1055T Processor Also userspace test were run on: vendor_id : GenuineIntel cpu family : 6 model : 15 model name : Intel(R) Xeon(R) CPU E7330 @ 2.40GHz stepping : 11 Userspace test results: Encryption/decryption of twofish 3-way vs x86_64-asm on AMD Phenom II: encrypt: 1.27x decrypt: 1.25x Encryption/decryption of twofish 3-way vs x86_64-asm on Intel Xeon E7330: encrypt: 1.36x decrypt: 1.36x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S316
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c472
3 files changed, 790 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 725addfacf0a..3537d4b91f74 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 9obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
10obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o 10obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
11obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o 11obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
12obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
12obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o 13obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
13obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o 14obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
14obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o 15obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -23,6 +24,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
23aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o 24aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
24blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o 25blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
25twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o 26twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
27twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
26salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o 28salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
27 29
28aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o 30aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
new file mode 100644
index 000000000000..5b012a2c5119
--- /dev/null
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -0,0 +1,316 @@
1/*
2 * Twofish Cipher 3-way parallel algorithm (x86_64)
3 *
4 * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
19 * USA
20 *
21 */
22
23.file "twofish-x86_64-asm-3way.S"
24.text
25
26/* structure of crypto context */
27#define s0 0
28#define s1 1024
29#define s2 2048
30#define s3 3072
31#define w 4096
32#define k 4128
33
34/**********************************************************************
35 3-way twofish
36 **********************************************************************/
37#define CTX %rdi
38#define RIO %rdx
39
40#define RAB0 %rax
41#define RAB1 %rbx
42#define RAB2 %rcx
43
44#define RAB0d %eax
45#define RAB1d %ebx
46#define RAB2d %ecx
47
48#define RAB0bh %ah
49#define RAB1bh %bh
50#define RAB2bh %ch
51
52#define RAB0bl %al
53#define RAB1bl %bl
54#define RAB2bl %cl
55
56#define RCD0 %r8
57#define RCD1 %r9
58#define RCD2 %r10
59
60#define RCD0d %r8d
61#define RCD1d %r9d
62#define RCD2d %r10d
63
64#define RX0 %rbp
65#define RX1 %r11
66#define RX2 %r12
67
68#define RX0d %ebp
69#define RX1d %r11d
70#define RX2d %r12d
71
72#define RY0 %r13
73#define RY1 %r14
74#define RY2 %r15
75
76#define RY0d %r13d
77#define RY1d %r14d
78#define RY2d %r15d
79
80#define RT0 %rdx
81#define RT1 %rsi
82
83#define RT0d %edx
84#define RT1d %esi
85
86#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
87 movzbl ab ## bl, tmp2 ## d; \
88 movzbl ab ## bh, tmp1 ## d; \
89 rorq $(rot), ab; \
90 op1##l T0(CTX, tmp2, 4), dst ## d; \
91 op2##l T1(CTX, tmp1, 4), dst ## d;
92
93/*
94 * Combined G1 & G2 function. Reordered with help of rotates to have moves
95 * at begining.
96 */
97#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
98 /* G1,1 && G2,1 */ \
99 do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
100 do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
101 \
102 do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
103 do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
104 \
105 do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
106 do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
107 \
108 /* G1,2 && G2,2 */ \
109 do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
110 do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
111 xchgq cd ## 0, ab ## 0; \
112 \
113 do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
114 do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
115 xchgq cd ## 1, ab ## 1; \
116 \
117 do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
118 do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
119 xchgq cd ## 2, ab ## 2;
120
121#define enc_round_end(ab, x, y, n) \
122 addl y ## d, x ## d; \
123 addl x ## d, y ## d; \
124 addl k+4*(2*(n))(CTX), x ## d; \
125 xorl ab ## d, x ## d; \
126 addl k+4*(2*(n)+1)(CTX), y ## d; \
127 shrq $32, ab; \
128 roll $1, ab ## d; \
129 xorl y ## d, ab ## d; \
130 shlq $32, ab; \
131 rorl $1, x ## d; \
132 orq x, ab;
133
134#define dec_round_end(ba, x, y, n) \
135 addl y ## d, x ## d; \
136 addl x ## d, y ## d; \
137 addl k+4*(2*(n))(CTX), x ## d; \
138 addl k+4*(2*(n)+1)(CTX), y ## d; \
139 xorl ba ## d, y ## d; \
140 shrq $32, ba; \
141 roll $1, ba ## d; \
142 xorl x ## d, ba ## d; \
143 shlq $32, ba; \
144 rorl $1, y ## d; \
145 orq y, ba;
146
147#define encrypt_round3(ab, cd, n) \
148 g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
149 \
150 enc_round_end(ab ## 0, RX0, RY0, n); \
151 enc_round_end(ab ## 1, RX1, RY1, n); \
152 enc_round_end(ab ## 2, RX2, RY2, n);
153
154#define decrypt_round3(ba, dc, n) \
155 g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
156 \
157 dec_round_end(ba ## 0, RX0, RY0, n); \
158 dec_round_end(ba ## 1, RX1, RY1, n); \
159 dec_round_end(ba ## 2, RX2, RY2, n);
160
161#define encrypt_cycle3(ab, cd, n) \
162 encrypt_round3(ab, cd, n*2); \
163 encrypt_round3(ab, cd, (n*2)+1);
164
165#define decrypt_cycle3(ba, dc, n) \
166 decrypt_round3(ba, dc, (n*2)+1); \
167 decrypt_round3(ba, dc, (n*2));
168
169#define inpack3(in, n, xy, m) \
170 movq 4*(n)(in), xy ## 0; \
171 xorq w+4*m(CTX), xy ## 0; \
172 \
173 movq 4*(4+(n))(in), xy ## 1; \
174 xorq w+4*m(CTX), xy ## 1; \
175 \
176 movq 4*(8+(n))(in), xy ## 2; \
177 xorq w+4*m(CTX), xy ## 2;
178
179#define outunpack3(op, out, n, xy, m) \
180 xorq w+4*m(CTX), xy ## 0; \
181 op ## q xy ## 0, 4*(n)(out); \
182 \
183 xorq w+4*m(CTX), xy ## 1; \
184 op ## q xy ## 1, 4*(4+(n))(out); \
185 \
186 xorq w+4*m(CTX), xy ## 2; \
187 op ## q xy ## 2, 4*(8+(n))(out);
188
189#define inpack_enc3() \
190 inpack3(RIO, 0, RAB, 0); \
191 inpack3(RIO, 2, RCD, 2);
192
193#define outunpack_enc3(op) \
194 outunpack3(op, RIO, 2, RAB, 6); \
195 outunpack3(op, RIO, 0, RCD, 4);
196
197#define inpack_dec3() \
198 inpack3(RIO, 0, RAB, 4); \
199 rorq $32, RAB0; \
200 rorq $32, RAB1; \
201 rorq $32, RAB2; \
202 inpack3(RIO, 2, RCD, 6); \
203 rorq $32, RCD0; \
204 rorq $32, RCD1; \
205 rorq $32, RCD2;
206
207#define outunpack_dec3() \
208 rorq $32, RCD0; \
209 rorq $32, RCD1; \
210 rorq $32, RCD2; \
211 outunpack3(mov, RIO, 0, RCD, 0); \
212 rorq $32, RAB0; \
213 rorq $32, RAB1; \
214 rorq $32, RAB2; \
215 outunpack3(mov, RIO, 2, RAB, 2);
216
217.align 8
218.global __twofish_enc_blk_3way
219.type __twofish_enc_blk_3way,@function;
220
221__twofish_enc_blk_3way:
222 /* input:
223 * %rdi: ctx, CTX
224 * %rsi: dst
225 * %rdx: src, RIO
226 * %rcx: bool, if true: xor output
227 */
228 pushq %r15;
229 pushq %r14;
230 pushq %r13;
231 pushq %r12;
232 pushq %rbp;
233 pushq %rbx;
234
235 pushq %rcx; /* bool xor */
236 pushq %rsi; /* dst */
237
238 inpack_enc3();
239
240 encrypt_cycle3(RAB, RCD, 0);
241 encrypt_cycle3(RAB, RCD, 1);
242 encrypt_cycle3(RAB, RCD, 2);
243 encrypt_cycle3(RAB, RCD, 3);
244 encrypt_cycle3(RAB, RCD, 4);
245 encrypt_cycle3(RAB, RCD, 5);
246 encrypt_cycle3(RAB, RCD, 6);
247 encrypt_cycle3(RAB, RCD, 7);
248
249 popq RIO; /* dst */
250 popq %rbp; /* bool xor */
251
252 testb %bpl, %bpl;
253 jnz __enc_xor3;
254
255 outunpack_enc3(mov);
256
257 popq %rbx;
258 popq %rbp;
259 popq %r12;
260 popq %r13;
261 popq %r14;
262 popq %r15;
263 ret;
264
265__enc_xor3:
266 outunpack_enc3(xor);
267
268 popq %rbx;
269 popq %rbp;
270 popq %r12;
271 popq %r13;
272 popq %r14;
273 popq %r15;
274 ret;
275
276.global twofish_dec_blk_3way
277.type twofish_dec_blk_3way,@function;
278
279twofish_dec_blk_3way:
280 /* input:
281 * %rdi: ctx, CTX
282 * %rsi: dst
283 * %rdx: src, RIO
284 */
285 pushq %r15;
286 pushq %r14;
287 pushq %r13;
288 pushq %r12;
289 pushq %rbp;
290 pushq %rbx;
291
292 pushq %rsi; /* dst */
293
294 inpack_dec3();
295
296 decrypt_cycle3(RAB, RCD, 7);
297 decrypt_cycle3(RAB, RCD, 6);
298 decrypt_cycle3(RAB, RCD, 5);
299 decrypt_cycle3(RAB, RCD, 4);
300 decrypt_cycle3(RAB, RCD, 3);
301 decrypt_cycle3(RAB, RCD, 2);
302 decrypt_cycle3(RAB, RCD, 1);
303 decrypt_cycle3(RAB, RCD, 0);
304
305 popq RIO; /* dst */
306
307 outunpack_dec3();
308
309 popq %rbx;
310 popq %rbp;
311 popq %r12;
312 popq %r13;
313 popq %r14;
314 popq %r15;
315 ret;
316
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
new file mode 100644
index 000000000000..0cbf9faea86a
--- /dev/null
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -0,0 +1,472 @@
1/*
2 * Glue Code for 3-way parallel assembler optimized version of Twofish
3 *
4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24 * USA
25 *
26 */
27
28#include <linux/crypto.h>
29#include <linux/init.h>
30#include <linux/module.h>
31#include <linux/types.h>
32#include <crypto/algapi.h>
33#include <crypto/twofish.h>
34#include <crypto/b128ops.h>
35
36/* regular block cipher functions from twofish_x86_64 module */
37asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
38 const u8 *src);
39asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst,
40 const u8 *src);
41
42/* 3-way parallel cipher functions */
43asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
44 const u8 *src, bool xor);
45asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
46 const u8 *src);
47
48static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
49 const u8 *src)
50{
51 __twofish_enc_blk_3way(ctx, dst, src, false);
52}
53
54static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst,
55 const u8 *src)
56{
57 __twofish_enc_blk_3way(ctx, dst, src, true);
58}
59
60static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
61 void (*fn)(struct twofish_ctx *, u8 *, const u8 *),
62 void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *))
63{
64 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
65 unsigned int bsize = TF_BLOCK_SIZE;
66 unsigned int nbytes;
67 int err;
68
69 err = blkcipher_walk_virt(desc, walk);
70
71 while ((nbytes = walk->nbytes)) {
72 u8 *wsrc = walk->src.virt.addr;
73 u8 *wdst = walk->dst.virt.addr;
74
75 /* Process three block batch */
76 if (nbytes >= bsize * 3) {
77 do {
78 fn_3way(ctx, wdst, wsrc);
79
80 wsrc += bsize * 3;
81 wdst += bsize * 3;
82 nbytes -= bsize * 3;
83 } while (nbytes >= bsize * 3);
84
85 if (nbytes < bsize)
86 goto done;
87 }
88
89 /* Handle leftovers */
90 do {
91 fn(ctx, wdst, wsrc);
92
93 wsrc += bsize;
94 wdst += bsize;
95 nbytes -= bsize;
96 } while (nbytes >= bsize);
97
98done:
99 err = blkcipher_walk_done(desc, walk, nbytes);
100 }
101
102 return err;
103}
104
105static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
106 struct scatterlist *src, unsigned int nbytes)
107{
108 struct blkcipher_walk walk;
109
110 blkcipher_walk_init(&walk, dst, src, nbytes);
111 return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way);
112}
113
114static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
115 struct scatterlist *src, unsigned int nbytes)
116{
117 struct blkcipher_walk walk;
118
119 blkcipher_walk_init(&walk, dst, src, nbytes);
120 return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
121}
122
123static struct crypto_alg blk_ecb_alg = {
124 .cra_name = "ecb(twofish)",
125 .cra_driver_name = "ecb-twofish-3way",
126 .cra_priority = 300,
127 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
128 .cra_blocksize = TF_BLOCK_SIZE,
129 .cra_ctxsize = sizeof(struct twofish_ctx),
130 .cra_alignmask = 0,
131 .cra_type = &crypto_blkcipher_type,
132 .cra_module = THIS_MODULE,
133 .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
134 .cra_u = {
135 .blkcipher = {
136 .min_keysize = TF_MIN_KEY_SIZE,
137 .max_keysize = TF_MAX_KEY_SIZE,
138 .setkey = twofish_setkey,
139 .encrypt = ecb_encrypt,
140 .decrypt = ecb_decrypt,
141 },
142 },
143};
144
145static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
146 struct blkcipher_walk *walk)
147{
148 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
149 unsigned int bsize = TF_BLOCK_SIZE;
150 unsigned int nbytes = walk->nbytes;
151 u128 *src = (u128 *)walk->src.virt.addr;
152 u128 *dst = (u128 *)walk->dst.virt.addr;
153 u128 *iv = (u128 *)walk->iv;
154
155 do {
156 u128_xor(dst, src, iv);
157 twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
158 iv = dst;
159
160 src += 1;
161 dst += 1;
162 nbytes -= bsize;
163 } while (nbytes >= bsize);
164
165 u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
166 return nbytes;
167}
168
169static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
170 struct scatterlist *src, unsigned int nbytes)
171{
172 struct blkcipher_walk walk;
173 int err;
174
175 blkcipher_walk_init(&walk, dst, src, nbytes);
176 err = blkcipher_walk_virt(desc, &walk);
177
178 while ((nbytes = walk.nbytes)) {
179 nbytes = __cbc_encrypt(desc, &walk);
180 err = blkcipher_walk_done(desc, &walk, nbytes);
181 }
182
183 return err;
184}
185
186static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
187 struct blkcipher_walk *walk)
188{
189 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
190 unsigned int bsize = TF_BLOCK_SIZE;
191 unsigned int nbytes = walk->nbytes;
192 u128 *src = (u128 *)walk->src.virt.addr;
193 u128 *dst = (u128 *)walk->dst.virt.addr;
194 u128 ivs[3 - 1];
195 u128 last_iv;
196
197 /* Start of the last block. */
198 src += nbytes / bsize - 1;
199 dst += nbytes / bsize - 1;
200
201 last_iv = *src;
202
203 /* Process three block batch */
204 if (nbytes >= bsize * 3) {
205 do {
206 nbytes -= bsize * (3 - 1);
207 src -= 3 - 1;
208 dst -= 3 - 1;
209
210 ivs[0] = src[0];
211 ivs[1] = src[1];
212
213 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
214
215 u128_xor(dst + 1, dst + 1, ivs + 0);
216 u128_xor(dst + 2, dst + 2, ivs + 1);
217
218 nbytes -= bsize;
219 if (nbytes < bsize)
220 goto done;
221
222 u128_xor(dst, dst, src - 1);
223 src -= 1;
224 dst -= 1;
225 } while (nbytes >= bsize * 3);
226
227 if (nbytes < bsize)
228 goto done;
229 }
230
231 /* Handle leftovers */
232 for (;;) {
233 twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
234
235 nbytes -= bsize;
236 if (nbytes < bsize)
237 break;
238
239 u128_xor(dst, dst, src - 1);
240 src -= 1;
241 dst -= 1;
242 }
243
244done:
245 u128_xor(dst, dst, (u128 *)walk->iv);
246 *(u128 *)walk->iv = last_iv;
247
248 return nbytes;
249}
250
251static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
252 struct scatterlist *src, unsigned int nbytes)
253{
254 struct blkcipher_walk walk;
255 int err;
256
257 blkcipher_walk_init(&walk, dst, src, nbytes);
258 err = blkcipher_walk_virt(desc, &walk);
259
260 while ((nbytes = walk.nbytes)) {
261 nbytes = __cbc_decrypt(desc, &walk);
262 err = blkcipher_walk_done(desc, &walk, nbytes);
263 }
264
265 return err;
266}
267
268static struct crypto_alg blk_cbc_alg = {
269 .cra_name = "cbc(twofish)",
270 .cra_driver_name = "cbc-twofish-3way",
271 .cra_priority = 300,
272 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
273 .cra_blocksize = TF_BLOCK_SIZE,
274 .cra_ctxsize = sizeof(struct twofish_ctx),
275 .cra_alignmask = 0,
276 .cra_type = &crypto_blkcipher_type,
277 .cra_module = THIS_MODULE,
278 .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
279 .cra_u = {
280 .blkcipher = {
281 .min_keysize = TF_MIN_KEY_SIZE,
282 .max_keysize = TF_MAX_KEY_SIZE,
283 .ivsize = TF_BLOCK_SIZE,
284 .setkey = twofish_setkey,
285 .encrypt = cbc_encrypt,
286 .decrypt = cbc_decrypt,
287 },
288 },
289};
290
291static inline void u128_to_be128(be128 *dst, const u128 *src)
292{
293 dst->a = cpu_to_be64(src->a);
294 dst->b = cpu_to_be64(src->b);
295}
296
297static inline void be128_to_u128(u128 *dst, const be128 *src)
298{
299 dst->a = be64_to_cpu(src->a);
300 dst->b = be64_to_cpu(src->b);
301}
302
303static inline void u128_inc(u128 *i)
304{
305 i->b++;
306 if (!i->b)
307 i->a++;
308}
309
310static void ctr_crypt_final(struct blkcipher_desc *desc,
311 struct blkcipher_walk *walk)
312{
313 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
314 u8 *ctrblk = walk->iv;
315 u8 keystream[TF_BLOCK_SIZE];
316 u8 *src = walk->src.virt.addr;
317 u8 *dst = walk->dst.virt.addr;
318 unsigned int nbytes = walk->nbytes;
319
320 twofish_enc_blk(ctx, keystream, ctrblk);
321 crypto_xor(keystream, src, nbytes);
322 memcpy(dst, keystream, nbytes);
323
324 crypto_inc(ctrblk, TF_BLOCK_SIZE);
325}
326
327static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
328 struct blkcipher_walk *walk)
329{
330 struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
331 unsigned int bsize = TF_BLOCK_SIZE;
332 unsigned int nbytes = walk->nbytes;
333 u128 *src = (u128 *)walk->src.virt.addr;
334 u128 *dst = (u128 *)walk->dst.virt.addr;
335 u128 ctrblk;
336 be128 ctrblocks[3];
337
338 be128_to_u128(&ctrblk, (be128 *)walk->iv);
339
340 /* Process three block batch */
341 if (nbytes >= bsize * 3) {
342 do {
343 if (dst != src) {
344 dst[0] = src[0];
345 dst[1] = src[1];
346 dst[2] = src[2];
347 }
348
349 /* create ctrblks for parallel encrypt */
350 u128_to_be128(&ctrblocks[0], &ctrblk);
351 u128_inc(&ctrblk);
352 u128_to_be128(&ctrblocks[1], &ctrblk);
353 u128_inc(&ctrblk);
354 u128_to_be128(&ctrblocks[2], &ctrblk);
355 u128_inc(&ctrblk);
356
357 twofish_enc_blk_xor_3way(ctx, (u8 *)dst,
358 (u8 *)ctrblocks);
359
360 src += 3;
361 dst += 3;
362 nbytes -= bsize * 3;
363 } while (nbytes >= bsize * 3);
364
365 if (nbytes < bsize)
366 goto done;
367 }
368
369 /* Handle leftovers */
370 do {
371 if (dst != src)
372 *dst = *src;
373
374 u128_to_be128(&ctrblocks[0], &ctrblk);
375 u128_inc(&ctrblk);
376
377 twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
378 u128_xor(dst, dst, (u128 *)ctrblocks);
379
380 src += 1;
381 dst += 1;
382 nbytes -= bsize;
383 } while (nbytes >= bsize);
384
385done:
386 u128_to_be128((be128 *)walk->iv, &ctrblk);
387 return nbytes;
388}
389
390static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
391 struct scatterlist *src, unsigned int nbytes)
392{
393 struct blkcipher_walk walk;
394 int err;
395
396 blkcipher_walk_init(&walk, dst, src, nbytes);
397 err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE);
398
399 while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) {
400 nbytes = __ctr_crypt(desc, &walk);
401 err = blkcipher_walk_done(desc, &walk, nbytes);
402 }
403
404 if (walk.nbytes) {
405 ctr_crypt_final(desc, &walk);
406 err = blkcipher_walk_done(desc, &walk, 0);
407 }
408
409 return err;
410}
411
412static struct crypto_alg blk_ctr_alg = {
413 .cra_name = "ctr(twofish)",
414 .cra_driver_name = "ctr-twofish-3way",
415 .cra_priority = 300,
416 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
417 .cra_blocksize = TF_BLOCK_SIZE,
418 .cra_ctxsize = sizeof(struct twofish_ctx),
419 .cra_alignmask = 0,
420 .cra_type = &crypto_blkcipher_type,
421 .cra_module = THIS_MODULE,
422 .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
423 .cra_u = {
424 .blkcipher = {
425 .min_keysize = TF_MIN_KEY_SIZE,
426 .max_keysize = TF_MAX_KEY_SIZE,
427 .ivsize = TF_BLOCK_SIZE,
428 .setkey = twofish_setkey,
429 .encrypt = ctr_crypt,
430 .decrypt = ctr_crypt,
431 },
432 },
433};
434
435int __init init(void)
436{
437 int err;
438
439 err = crypto_register_alg(&blk_ecb_alg);
440 if (err)
441 goto ecb_err;
442 err = crypto_register_alg(&blk_cbc_alg);
443 if (err)
444 goto cbc_err;
445 err = crypto_register_alg(&blk_ctr_alg);
446 if (err)
447 goto ctr_err;
448
449 return 0;
450
451ctr_err:
452 crypto_unregister_alg(&blk_cbc_alg);
453cbc_err:
454 crypto_unregister_alg(&blk_ecb_alg);
455ecb_err:
456 return err;
457}
458
459void __exit fini(void)
460{
461 crypto_unregister_alg(&blk_ctr_alg);
462 crypto_unregister_alg(&blk_cbc_alg);
463 crypto_unregister_alg(&blk_ecb_alg);
464}
465
466module_init(init);
467module_exit(fini);
468
469MODULE_LICENSE("GPL");
470MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
471MODULE_ALIAS("twofish");
472MODULE_ALIAS("twofish-asm");