aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoachim Fritschi <jfritschi@freenet.de>2006-06-20 07:12:02 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2006-09-20 21:16:29 -0400
commiteaf44088ff467410dd15a033fef118888002ffe6 (patch)
tree72b225b910342ae74e1b0915ceff61b4ead97883
parentb9f535ffe38f7eb61ac2219d32d97c377b69f70d (diff)
[CRYPTO] twofish: x86-64 assembly version
The patch passed the trycpt tests and automated filesystem tests. This rewrite resulted in some nice perfomance increase over my last patch. Short summary of the tcrypt benchmarks: Twofish Assembler vs. Twofish C (256bit 8kb block CBC) encrypt: -27% Cycles decrypt: -23% Cycles Twofish Assembler vs. AES Assembler (128bit 8kb block CBC) encrypt: +18% Cycles decrypt: +15% Cycles Twofish Assembler vs. AES Assembler (256bit 8kb block CBC) encrypt: -9% Cycles decrypt: -8% Cycles Full Output: http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-c-x86_64.txt http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-twofish-asm-x86_64.txt http://homepages.tu-darmstadt.de/~fritschi/twofish/tcrypt-speed-aes-asm-x86_64.txt Here is another bonnie++ benchmark with encrypted filesystems. Most runs maxed out the hd. It should give some idea what the module can do for encrypted filesystem performance even though you can't see the full numbers. http://homepages.tu-darmstadt.de/~fritschi/twofish/output_20060610_130806_x86_64.html Signed-off-by: Joachim Fritschi <jfritschi@freenet.de> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86_64/crypto/Makefile3
-rw-r--r--arch/x86_64/crypto/twofish-x86_64-asm.S324
-rw-r--r--arch/x86_64/crypto/twofish.c97
-rw-r--r--crypto/Kconfig15
4 files changed, 439 insertions, 0 deletions
diff --git a/arch/x86_64/crypto/Makefile b/arch/x86_64/crypto/Makefile
index 426d20f4b72e..15b538a8b7f7 100644
--- a/arch/x86_64/crypto/Makefile
+++ b/arch/x86_64/crypto/Makefile
@@ -5,5 +5,8 @@
5# 5#
6 6
7obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o 7obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
8obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
8 9
9aes-x86_64-y := aes-x86_64-asm.o aes.o 10aes-x86_64-y := aes-x86_64-asm.o aes.o
11twofish-x86_64-y := twofish-x86_64-asm.o twofish.o
12
diff --git a/arch/x86_64/crypto/twofish-x86_64-asm.S b/arch/x86_64/crypto/twofish-x86_64-asm.S
new file mode 100644
index 000000000000..35974a586615
--- /dev/null
+++ b/arch/x86_64/crypto/twofish-x86_64-asm.S
@@ -0,0 +1,324 @@
1/***************************************************************************
2* Copyright (C) 2006 by Joachim Fritschi, <jfritschi@freenet.de> *
3* *
4* This program is free software; you can redistribute it and/or modify *
5* it under the terms of the GNU General Public License as published by *
6* the Free Software Foundation; either version 2 of the License, or *
7* (at your option) any later version. *
8* *
9* This program is distributed in the hope that it will be useful, *
10* but WITHOUT ANY WARRANTY; without even the implied warranty of *
11* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12* GNU General Public License for more details. *
13* *
14* You should have received a copy of the GNU General Public License *
15* along with this program; if not, write to the *
16* Free Software Foundation, Inc., *
17* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
18***************************************************************************/
19
20.file "twofish-x86_64-asm.S"
21.text
22
23#include <asm/asm-offsets.h>
24
25#define a_offset 0
26#define b_offset 4
27#define c_offset 8
28#define d_offset 12
29
30/* Structure of the crypto context struct*/
31
32#define s0 0 /* S0 Array 256 Words each */
33#define s1 1024 /* S1 Array */
34#define s2 2048 /* S2 Array */
35#define s3 3072 /* S3 Array */
36#define w 4096 /* 8 whitening keys (word) */
37#define k 4128 /* key 1-32 ( word ) */
38
39/* define a few register aliases to allow macro substitution */
40
41#define R0 %rax
42#define R0D %eax
43#define R0B %al
44#define R0H %ah
45
46#define R1 %rbx
47#define R1D %ebx
48#define R1B %bl
49#define R1H %bh
50
51#define R2 %rcx
52#define R2D %ecx
53#define R2B %cl
54#define R2H %ch
55
56#define R3 %rdx
57#define R3D %edx
58#define R3B %dl
59#define R3H %dh
60
61
62/* performs input whitening */
63#define input_whitening(src,context,offset)\
64 xor w+offset(context), src;
65
66/* performs input whitening */
67#define output_whitening(src,context,offset)\
68 xor w+16+offset(context), src;
69
70
71/*
72 * a input register containing a (rotated 16)
73 * b input register containing b
74 * c input register containing c
75 * d input register containing d (already rol $1)
76 * operations on a and b are interleaved to increase performance
77 */
78#define encrypt_round(a,b,c,d,round)\
79 movzx b ## B, %edi;\
80 mov s1(%r11,%rdi,4),%r8d;\
81 movzx a ## B, %edi;\
82 mov s2(%r11,%rdi,4),%r9d;\
83 movzx b ## H, %edi;\
84 ror $16, b ## D;\
85 xor s2(%r11,%rdi,4),%r8d;\
86 movzx a ## H, %edi;\
87 ror $16, a ## D;\
88 xor s3(%r11,%rdi,4),%r9d;\
89 movzx b ## B, %edi;\
90 xor s3(%r11,%rdi,4),%r8d;\
91 movzx a ## B, %edi;\
92 xor (%r11,%rdi,4), %r9d;\
93 movzx b ## H, %edi;\
94 ror $15, b ## D;\
95 xor (%r11,%rdi,4), %r8d;\
96 movzx a ## H, %edi;\
97 xor s1(%r11,%rdi,4),%r9d;\
98 add %r8d, %r9d;\
99 add %r9d, %r8d;\
100 add k+round(%r11), %r9d;\
101 xor %r9d, c ## D;\
102 rol $15, c ## D;\
103 add k+4+round(%r11),%r8d;\
104 xor %r8d, d ## D;
105
106/*
107 * a input register containing a(rotated 16)
108 * b input register containing b
109 * c input register containing c
110 * d input register containing d (already rol $1)
111 * operations on a and b are interleaved to increase performance
112 * during the round a and b are prepared for the output whitening
113 */
114#define encrypt_last_round(a,b,c,d,round)\
115 mov b ## D, %r10d;\
116 shl $32, %r10;\
117 movzx b ## B, %edi;\
118 mov s1(%r11,%rdi,4),%r8d;\
119 movzx a ## B, %edi;\
120 mov s2(%r11,%rdi,4),%r9d;\
121 movzx b ## H, %edi;\
122 ror $16, b ## D;\
123 xor s2(%r11,%rdi,4),%r8d;\
124 movzx a ## H, %edi;\
125 ror $16, a ## D;\
126 xor s3(%r11,%rdi,4),%r9d;\
127 movzx b ## B, %edi;\
128 xor s3(%r11,%rdi,4),%r8d;\
129 movzx a ## B, %edi;\
130 xor (%r11,%rdi,4), %r9d;\
131 xor a, %r10;\
132 movzx b ## H, %edi;\
133 xor (%r11,%rdi,4), %r8d;\
134 movzx a ## H, %edi;\
135 xor s1(%r11,%rdi,4),%r9d;\
136 add %r8d, %r9d;\
137 add %r9d, %r8d;\
138 add k+round(%r11), %r9d;\
139 xor %r9d, c ## D;\
140 ror $1, c ## D;\
141 add k+4+round(%r11),%r8d;\
142 xor %r8d, d ## D
143
144/*
145 * a input register containing a
146 * b input register containing b (rotated 16)
147 * c input register containing c (already rol $1)
148 * d input register containing d
149 * operations on a and b are interleaved to increase performance
150 */
151#define decrypt_round(a,b,c,d,round)\
152 movzx a ## B, %edi;\
153 mov (%r11,%rdi,4), %r9d;\
154 movzx b ## B, %edi;\
155 mov s3(%r11,%rdi,4),%r8d;\
156 movzx a ## H, %edi;\
157 ror $16, a ## D;\
158 xor s1(%r11,%rdi,4),%r9d;\
159 movzx b ## H, %edi;\
160 ror $16, b ## D;\
161 xor (%r11,%rdi,4), %r8d;\
162 movzx a ## B, %edi;\
163 xor s2(%r11,%rdi,4),%r9d;\
164 movzx b ## B, %edi;\
165 xor s1(%r11,%rdi,4),%r8d;\
166 movzx a ## H, %edi;\
167 ror $15, a ## D;\
168 xor s3(%r11,%rdi,4),%r9d;\
169 movzx b ## H, %edi;\
170 xor s2(%r11,%rdi,4),%r8d;\
171 add %r8d, %r9d;\
172 add %r9d, %r8d;\
173 add k+round(%r11), %r9d;\
174 xor %r9d, c ## D;\
175 add k+4+round(%r11),%r8d;\
176 xor %r8d, d ## D;\
177 rol $15, d ## D;
178
179/*
180 * a input register containing a
181 * b input register containing b
182 * c input register containing c (already rol $1)
183 * d input register containing d
184 * operations on a and b are interleaved to increase performance
185 * during the round a and b are prepared for the output whitening
186 */
187#define decrypt_last_round(a,b,c,d,round)\
188 movzx a ## B, %edi;\
189 mov (%r11,%rdi,4), %r9d;\
190 movzx b ## B, %edi;\
191 mov s3(%r11,%rdi,4),%r8d;\
192 movzx b ## H, %edi;\
193 ror $16, b ## D;\
194 xor (%r11,%rdi,4), %r8d;\
195 movzx a ## H, %edi;\
196 mov b ## D, %r10d;\
197 shl $32, %r10;\
198 xor a, %r10;\
199 ror $16, a ## D;\
200 xor s1(%r11,%rdi,4),%r9d;\
201 movzx b ## B, %edi;\
202 xor s1(%r11,%rdi,4),%r8d;\
203 movzx a ## B, %edi;\
204 xor s2(%r11,%rdi,4),%r9d;\
205 movzx b ## H, %edi;\
206 xor s2(%r11,%rdi,4),%r8d;\
207 movzx a ## H, %edi;\
208 xor s3(%r11,%rdi,4),%r9d;\
209 add %r8d, %r9d;\
210 add %r9d, %r8d;\
211 add k+round(%r11), %r9d;\
212 xor %r9d, c ## D;\
213 add k+4+round(%r11),%r8d;\
214 xor %r8d, d ## D;\
215 ror $1, d ## D;
216
217.align 8
218.global twofish_enc_blk
219.global twofish_dec_blk
220
221twofish_enc_blk:
222 pushq R1
223
224 /* %rdi contains the crypto tfm adress */
225 /* %rsi contains the output adress */
226 /* %rdx contains the input adress */
227 add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */
228 /* ctx adress is moved to free one non-rex register
229 as target for the 8bit high operations */
230 mov %rdi, %r11
231
232 movq (R3), R1
233 movq 8(R3), R3
234 input_whitening(R1,%r11,a_offset)
235 input_whitening(R3,%r11,c_offset)
236 mov R1D, R0D
237 rol $16, R0D
238 shr $32, R1
239 mov R3D, R2D
240 shr $32, R3
241 rol $1, R3D
242
243 encrypt_round(R0,R1,R2,R3,0);
244 encrypt_round(R2,R3,R0,R1,8);
245 encrypt_round(R0,R1,R2,R3,2*8);
246 encrypt_round(R2,R3,R0,R1,3*8);
247 encrypt_round(R0,R1,R2,R3,4*8);
248 encrypt_round(R2,R3,R0,R1,5*8);
249 encrypt_round(R0,R1,R2,R3,6*8);
250 encrypt_round(R2,R3,R0,R1,7*8);
251 encrypt_round(R0,R1,R2,R3,8*8);
252 encrypt_round(R2,R3,R0,R1,9*8);
253 encrypt_round(R0,R1,R2,R3,10*8);
254 encrypt_round(R2,R3,R0,R1,11*8);
255 encrypt_round(R0,R1,R2,R3,12*8);
256 encrypt_round(R2,R3,R0,R1,13*8);
257 encrypt_round(R0,R1,R2,R3,14*8);
258 encrypt_last_round(R2,R3,R0,R1,15*8);
259
260
261 output_whitening(%r10,%r11,a_offset)
262 movq %r10, (%rsi)
263
264 shl $32, R1
265 xor R0, R1
266
267 output_whitening(R1,%r11,c_offset)
268 movq R1, 8(%rsi)
269
270 popq R1
271 movq $1,%rax
272 ret
273
274twofish_dec_blk:
275 pushq R1
276
277 /* %rdi contains the crypto tfm adress */
278 /* %rsi contains the output adress */
279 /* %rdx contains the input adress */
280 add $crypto_tfm_ctx_offset, %rdi /* set ctx adress */
281 /* ctx adress is moved to free one non-rex register
282 as target for the 8bit high operations */
283 mov %rdi, %r11
284
285 movq (R3), R1
286 movq 8(R3), R3
287 output_whitening(R1,%r11,a_offset)
288 output_whitening(R3,%r11,c_offset)
289 mov R1D, R0D
290 shr $32, R1
291 rol $16, R1D
292 mov R3D, R2D
293 shr $32, R3
294 rol $1, R2D
295
296 decrypt_round(R0,R1,R2,R3,15*8);
297 decrypt_round(R2,R3,R0,R1,14*8);
298 decrypt_round(R0,R1,R2,R3,13*8);
299 decrypt_round(R2,R3,R0,R1,12*8);
300 decrypt_round(R0,R1,R2,R3,11*8);
301 decrypt_round(R2,R3,R0,R1,10*8);
302 decrypt_round(R0,R1,R2,R3,9*8);
303 decrypt_round(R2,R3,R0,R1,8*8);
304 decrypt_round(R0,R1,R2,R3,7*8);
305 decrypt_round(R2,R3,R0,R1,6*8);
306 decrypt_round(R0,R1,R2,R3,5*8);
307 decrypt_round(R2,R3,R0,R1,4*8);
308 decrypt_round(R0,R1,R2,R3,3*8);
309 decrypt_round(R2,R3,R0,R1,2*8);
310 decrypt_round(R0,R1,R2,R3,1*8);
311 decrypt_last_round(R2,R3,R0,R1,0);
312
313 input_whitening(%r10,%r11,a_offset)
314 movq %r10, (%rsi)
315
316 shl $32, R1
317 xor R0, R1
318
319 input_whitening(R1,%r11,c_offset)
320 movq R1, 8(%rsi)
321
322 popq R1
323 movq $1,%rax
324 ret
diff --git a/arch/x86_64/crypto/twofish.c b/arch/x86_64/crypto/twofish.c
new file mode 100644
index 000000000000..182d91d5cfb9
--- /dev/null
+++ b/arch/x86_64/crypto/twofish.c
@@ -0,0 +1,97 @@
1/*
2 * Glue Code for optimized x86_64 assembler version of TWOFISH
3 *
4 * Originally Twofish for GPG
5 * By Matthew Skala <mskala@ansuz.sooke.bc.ca>, July 26, 1998
6 * 256-bit key length added March 20, 1999
7 * Some modifications to reduce the text size by Werner Koch, April, 1998
8 * Ported to the kerneli patch by Marc Mutz <Marc@Mutz.com>
9 * Ported to CryptoAPI by Colin Slater <hoho@tacomeat.net>
10 *
11 * The original author has disclaimed all copyright interest in this
12 * code and thus put it in the public domain. The subsequent authors
13 * have put this under the GNU General Public License.
14 *
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2 of the License, or
18 * (at your option) any later version.
19 *
20 * This program is distributed in the hope that it will be useful,
21 * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 * GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, write to the Free Software
27 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
28 * USA
29 *
30 * This code is a "clean room" implementation, written from the paper
31 * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
32 * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available
33 * through http://www.counterpane.com/twofish.html
34 *
35 * For background information on multiplication in finite fields, used for
36 * the matrix operations in the key schedule, see the book _Contemporary
37 * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the
38 * Third Edition.
39 */
40
41#include <crypto/twofish.h>
42#include <linux/crypto.h>
43#include <linux/init.h>
44#include <linux/kernel.h>
45#include <linux/module.h>
46#include <linux/types.h>
47
48asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
49asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
50
51static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
52{
53 twofish_enc_blk(tfm, dst, src);
54}
55
56static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
57{
58 twofish_dec_blk(tfm, dst, src);
59}
60
61static struct crypto_alg alg = {
62 .cra_name = "twofish",
63 .cra_driver_name = "twofish-x86_64",
64 .cra_priority = 200,
65 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
66 .cra_blocksize = TF_BLOCK_SIZE,
67 .cra_ctxsize = sizeof(struct twofish_ctx),
68 .cra_alignmask = 3,
69 .cra_module = THIS_MODULE,
70 .cra_list = LIST_HEAD_INIT(alg.cra_list),
71 .cra_u = {
72 .cipher = {
73 .cia_min_keysize = TF_MIN_KEY_SIZE,
74 .cia_max_keysize = TF_MAX_KEY_SIZE,
75 .cia_setkey = twofish_setkey,
76 .cia_encrypt = twofish_encrypt,
77 .cia_decrypt = twofish_decrypt
78 }
79 }
80};
81
82static int __init init(void)
83{
84 return crypto_register_alg(&alg);
85}
86
87static void __exit fini(void)
88{
89 crypto_unregister_alg(&alg);
90}
91
92module_init(init);
93module_exit(fini);
94
95MODULE_LICENSE("GPL");
96MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized");
97MODULE_ALIAS("twofish");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 306738ceecb4..fa927a287a1d 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -165,6 +165,21 @@ config CRYPTO_TWOFISH_586
165 See also: 165 See also:
166 <http://www.schneier.com/twofish.html> 166 <http://www.schneier.com/twofish.html>
167 167
168config CRYPTO_TWOFISH_X86_64
169 tristate "Twofish cipher algorithm (x86_64)"
170 depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
171 select CRYPTO_TWOFISH_COMMON
172 help
173 Twofish cipher algorithm (x86_64).
174
175 Twofish was submitted as an AES (Advanced Encryption Standard)
176 candidate cipher by researchers at CounterPane Systems. It is a
177 16 round block cipher supporting key sizes of 128, 192, and 256
178 bits.
179
180 See also:
181 <http://www.schneier.com/twofish.html>
182
168config CRYPTO_SERPENT 183config CRYPTO_SERPENT
169 tristate "Serpent cipher algorithm" 184 tristate "Serpent cipher algorithm"
170 depends on CRYPTO 185 depends on CRYPTO