aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/des3_ede_glue.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2014-06-09 13:59:54 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2014-06-20 09:27:58 -0400
commit6574e6c64e971c9adb629e81e497afdb52b1c9df (patch)
treeacfe43924f1dd49e686508b4359eccf4d857bbef /arch/x86/crypto/des3_ede_glue.c
parent87131507e1a60c4803d54816b86091490b67ad68 (diff)
crypto: des_3des - add x86-64 assembly implementation
Patch adds x86_64 assembly implementation of Triple DES EDE cipher algorithm. Two assembly implementations are provided. First is regular 'one-block at time' encrypt/decrypt function. Second is 'three-blocks at time' function that gains performance increase on out-of-order CPUs. tcrypt test results: Intel Core i5-4570: des3_ede-asm vs des3_ede-generic: size ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec 16B 1.21x 1.22x 1.27x 1.36x 1.25x 1.25x 64B 1.98x 1.96x 1.23x 2.04x 2.01x 2.00x 256B 2.34x 2.37x 1.21x 2.40x 2.38x 2.39x 1024B 2.50x 2.47x 1.22x 2.51x 2.52x 2.51x 8192B 2.51x 2.53x 1.21x 2.56x 2.54x 2.55x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/des3_ede_glue.c')
-rw-r--r--arch/x86/crypto/des3_ede_glue.c509
1 files changed, 509 insertions, 0 deletions
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
new file mode 100644
index 000000000000..ebc421543e44
--- /dev/null
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -0,0 +1,509 @@
1/*
2 * Glue Code for assembler optimized version of 3DES
3 *
4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5 *
6 * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7 * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8 * CTR part based on code (crypto/ctr.c) by:
9 * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 */
22
23#include <asm/processor.h>
24#include <crypto/des.h>
25#include <linux/crypto.h>
26#include <linux/init.h>
27#include <linux/module.h>
28#include <linux/types.h>
29#include <crypto/algapi.h>
30
31struct des3_ede_x86_ctx {
32 u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
33 u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
34};
35
36/* regular block cipher functions */
37asmlinkage void des3_ede_x86_64_crypt_blk(const u32 *expkey, u8 *dst,
38 const u8 *src);
39
40/* 3-way parallel cipher functions */
41asmlinkage void des3_ede_x86_64_crypt_blk_3way(const u32 *expkey, u8 *dst,
42 const u8 *src);
43
44static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
45 const u8 *src)
46{
47 u32 *enc_ctx = ctx->enc_expkey;
48
49 des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
50}
51
52static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
53 const u8 *src)
54{
55 u32 *dec_ctx = ctx->dec_expkey;
56
57 des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
58}
59
60static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
61 const u8 *src)
62{
63 u32 *enc_ctx = ctx->enc_expkey;
64
65 des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
66}
67
68static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
69 const u8 *src)
70{
71 u32 *dec_ctx = ctx->dec_expkey;
72
73 des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
74}
75
76static void des3_ede_x86_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
77{
78 des3_ede_enc_blk(crypto_tfm_ctx(tfm), dst, src);
79}
80
81static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
82{
83 des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
84}
85
86static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
87 const u32 *expkey)
88{
89 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
90 unsigned int nbytes;
91 int err;
92
93 err = blkcipher_walk_virt(desc, walk);
94
95 while ((nbytes = walk->nbytes)) {
96 u8 *wsrc = walk->src.virt.addr;
97 u8 *wdst = walk->dst.virt.addr;
98
99 /* Process four block batch */
100 if (nbytes >= bsize * 3) {
101 do {
102 des3_ede_x86_64_crypt_blk_3way(expkey, wdst,
103 wsrc);
104
105 wsrc += bsize * 3;
106 wdst += bsize * 3;
107 nbytes -= bsize * 3;
108 } while (nbytes >= bsize * 3);
109
110 if (nbytes < bsize)
111 goto done;
112 }
113
114 /* Handle leftovers */
115 do {
116 des3_ede_x86_64_crypt_blk(expkey, wdst, wsrc);
117
118 wsrc += bsize;
119 wdst += bsize;
120 nbytes -= bsize;
121 } while (nbytes >= bsize);
122
123done:
124 err = blkcipher_walk_done(desc, walk, nbytes);
125 }
126
127 return err;
128}
129
130static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 struct scatterlist *src, unsigned int nbytes)
132{
133 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
134 struct blkcipher_walk walk;
135
136 blkcipher_walk_init(&walk, dst, src, nbytes);
137 return ecb_crypt(desc, &walk, ctx->enc_expkey);
138}
139
140static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
141 struct scatterlist *src, unsigned int nbytes)
142{
143 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
144 struct blkcipher_walk walk;
145
146 blkcipher_walk_init(&walk, dst, src, nbytes);
147 return ecb_crypt(desc, &walk, ctx->dec_expkey);
148}
149
150static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
151 struct blkcipher_walk *walk)
152{
153 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
154 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
155 unsigned int nbytes = walk->nbytes;
156 u64 *src = (u64 *)walk->src.virt.addr;
157 u64 *dst = (u64 *)walk->dst.virt.addr;
158 u64 *iv = (u64 *)walk->iv;
159
160 do {
161 *dst = *src ^ *iv;
162 des3_ede_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
163 iv = dst;
164
165 src += 1;
166 dst += 1;
167 nbytes -= bsize;
168 } while (nbytes >= bsize);
169
170 *(u64 *)walk->iv = *iv;
171 return nbytes;
172}
173
174static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
175 struct scatterlist *src, unsigned int nbytes)
176{
177 struct blkcipher_walk walk;
178 int err;
179
180 blkcipher_walk_init(&walk, dst, src, nbytes);
181 err = blkcipher_walk_virt(desc, &walk);
182
183 while ((nbytes = walk.nbytes)) {
184 nbytes = __cbc_encrypt(desc, &walk);
185 err = blkcipher_walk_done(desc, &walk, nbytes);
186 }
187
188 return err;
189}
190
191static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
192 struct blkcipher_walk *walk)
193{
194 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
195 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
196 unsigned int nbytes = walk->nbytes;
197 u64 *src = (u64 *)walk->src.virt.addr;
198 u64 *dst = (u64 *)walk->dst.virt.addr;
199 u64 ivs[3 - 1];
200 u64 last_iv;
201
202 /* Start of the last block. */
203 src += nbytes / bsize - 1;
204 dst += nbytes / bsize - 1;
205
206 last_iv = *src;
207
208 /* Process four block batch */
209 if (nbytes >= bsize * 3) {
210 do {
211 nbytes -= bsize * 3 - bsize;
212 src -= 3 - 1;
213 dst -= 3 - 1;
214
215 ivs[0] = src[0];
216 ivs[1] = src[1];
217
218 des3_ede_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
219
220 dst[1] ^= ivs[0];
221 dst[2] ^= ivs[1];
222
223 nbytes -= bsize;
224 if (nbytes < bsize)
225 goto done;
226
227 *dst ^= *(src - 1);
228 src -= 1;
229 dst -= 1;
230 } while (nbytes >= bsize * 3);
231 }
232
233 /* Handle leftovers */
234 for (;;) {
235 des3_ede_dec_blk(ctx, (u8 *)dst, (u8 *)src);
236
237 nbytes -= bsize;
238 if (nbytes < bsize)
239 break;
240
241 *dst ^= *(src - 1);
242 src -= 1;
243 dst -= 1;
244 }
245
246done:
247 *dst ^= *(u64 *)walk->iv;
248 *(u64 *)walk->iv = last_iv;
249
250 return nbytes;
251}
252
253static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
254 struct scatterlist *src, unsigned int nbytes)
255{
256 struct blkcipher_walk walk;
257 int err;
258
259 blkcipher_walk_init(&walk, dst, src, nbytes);
260 err = blkcipher_walk_virt(desc, &walk);
261
262 while ((nbytes = walk.nbytes)) {
263 nbytes = __cbc_decrypt(desc, &walk);
264 err = blkcipher_walk_done(desc, &walk, nbytes);
265 }
266
267 return err;
268}
269
270static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
271 struct blkcipher_walk *walk)
272{
273 u8 *ctrblk = walk->iv;
274 u8 keystream[DES3_EDE_BLOCK_SIZE];
275 u8 *src = walk->src.virt.addr;
276 u8 *dst = walk->dst.virt.addr;
277 unsigned int nbytes = walk->nbytes;
278
279 des3_ede_enc_blk(ctx, keystream, ctrblk);
280 crypto_xor(keystream, src, nbytes);
281 memcpy(dst, keystream, nbytes);
282
283 crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
284}
285
286static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
287 struct blkcipher_walk *walk)
288{
289 struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
290 unsigned int bsize = DES3_EDE_BLOCK_SIZE;
291 unsigned int nbytes = walk->nbytes;
292 u64 *src = (u64 *)walk->src.virt.addr;
293 u64 *dst = (u64 *)walk->dst.virt.addr;
294 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
295 __be64 ctrblocks[3];
296
297 /* Process four block batch */
298 if (nbytes >= bsize * 3) {
299 do {
300 /* create ctrblks for parallel encrypt */
301 ctrblocks[0] = cpu_to_be64(ctrblk++);
302 ctrblocks[1] = cpu_to_be64(ctrblk++);
303 ctrblocks[2] = cpu_to_be64(ctrblk++);
304
305 des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
306 (u8 *)ctrblocks);
307
308 dst[0] = src[0] ^ ctrblocks[0];
309 dst[1] = src[1] ^ ctrblocks[1];
310 dst[2] = src[2] ^ ctrblocks[2];
311
312 src += 3;
313 dst += 3;
314 } while ((nbytes -= bsize * 3) >= bsize * 3);
315
316 if (nbytes < bsize)
317 goto done;
318 }
319
320 /* Handle leftovers */
321 do {
322 ctrblocks[0] = cpu_to_be64(ctrblk++);
323
324 des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
325
326 dst[0] = src[0] ^ ctrblocks[0];
327
328 src += 1;
329 dst += 1;
330 } while ((nbytes -= bsize) >= bsize);
331
332done:
333 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
334 return nbytes;
335}
336
337static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
338 struct scatterlist *src, unsigned int nbytes)
339{
340 struct blkcipher_walk walk;
341 int err;
342
343 blkcipher_walk_init(&walk, dst, src, nbytes);
344 err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
345
346 while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
347 nbytes = __ctr_crypt(desc, &walk);
348 err = blkcipher_walk_done(desc, &walk, nbytes);
349 }
350
351 if (walk.nbytes) {
352 ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
353 err = blkcipher_walk_done(desc, &walk, 0);
354 }
355
356 return err;
357}
358
359static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
360 unsigned int keylen)
361{
362 struct des3_ede_x86_ctx *ctx = crypto_tfm_ctx(tfm);
363 u32 i, j, tmp;
364 int err;
365
366 /* Generate encryption context using generic implementation. */
367 err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
368 if (err < 0)
369 return err;
370
371 /* Fix encryption context for this implementation and form decryption
372 * context. */
373 j = DES3_EDE_EXPKEY_WORDS - 2;
374 for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
375 tmp = ror32(ctx->enc_expkey[i + 1], 4);
376 ctx->enc_expkey[i + 1] = tmp;
377
378 ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
379 ctx->dec_expkey[j + 1] = tmp;
380 }
381
382 return 0;
383}
384
385static struct crypto_alg des3_ede_algs[4] = { {
386 .cra_name = "des3_ede",
387 .cra_driver_name = "des3_ede-asm",
388 .cra_priority = 200,
389 .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
390 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
391 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
392 .cra_alignmask = 0,
393 .cra_module = THIS_MODULE,
394 .cra_u = {
395 .cipher = {
396 .cia_min_keysize = DES3_EDE_KEY_SIZE,
397 .cia_max_keysize = DES3_EDE_KEY_SIZE,
398 .cia_setkey = des3_ede_x86_setkey,
399 .cia_encrypt = des3_ede_x86_encrypt,
400 .cia_decrypt = des3_ede_x86_decrypt,
401 }
402 }
403}, {
404 .cra_name = "ecb(des3_ede)",
405 .cra_driver_name = "ecb-des3_ede-asm",
406 .cra_priority = 300,
407 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
408 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
409 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
410 .cra_alignmask = 0,
411 .cra_type = &crypto_blkcipher_type,
412 .cra_module = THIS_MODULE,
413 .cra_u = {
414 .blkcipher = {
415 .min_keysize = DES3_EDE_KEY_SIZE,
416 .max_keysize = DES3_EDE_KEY_SIZE,
417 .setkey = des3_ede_x86_setkey,
418 .encrypt = ecb_encrypt,
419 .decrypt = ecb_decrypt,
420 },
421 },
422}, {
423 .cra_name = "cbc(des3_ede)",
424 .cra_driver_name = "cbc-des3_ede-asm",
425 .cra_priority = 300,
426 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
427 .cra_blocksize = DES3_EDE_BLOCK_SIZE,
428 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
429 .cra_alignmask = 0,
430 .cra_type = &crypto_blkcipher_type,
431 .cra_module = THIS_MODULE,
432 .cra_u = {
433 .blkcipher = {
434 .min_keysize = DES3_EDE_KEY_SIZE,
435 .max_keysize = DES3_EDE_KEY_SIZE,
436 .ivsize = DES3_EDE_BLOCK_SIZE,
437 .setkey = des3_ede_x86_setkey,
438 .encrypt = cbc_encrypt,
439 .decrypt = cbc_decrypt,
440 },
441 },
442}, {
443 .cra_name = "ctr(des3_ede)",
444 .cra_driver_name = "ctr-des3_ede-asm",
445 .cra_priority = 300,
446 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
447 .cra_blocksize = 1,
448 .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
449 .cra_alignmask = 0,
450 .cra_type = &crypto_blkcipher_type,
451 .cra_module = THIS_MODULE,
452 .cra_u = {
453 .blkcipher = {
454 .min_keysize = DES3_EDE_KEY_SIZE,
455 .max_keysize = DES3_EDE_KEY_SIZE,
456 .ivsize = DES3_EDE_BLOCK_SIZE,
457 .setkey = des3_ede_x86_setkey,
458 .encrypt = ctr_crypt,
459 .decrypt = ctr_crypt,
460 },
461 },
462} };
463
464static bool is_blacklisted_cpu(void)
465{
466 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
467 return false;
468
469 if (boot_cpu_data.x86 == 0x0f) {
470 /*
471 * On Pentium 4, des3_ede-x86_64 is slower than generic C
472 * implementation because use of 64bit rotates (which are really
473 * slow on P4). Therefore blacklist P4s.
474 */
475 return true;
476 }
477
478 return false;
479}
480
481static int force;
482module_param(force, int, 0);
483MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
484
485static int __init des3_ede_x86_init(void)
486{
487 if (!force && is_blacklisted_cpu()) {
488 pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
489 return -ENODEV;
490 }
491
492 return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
493}
494
495static void __exit des3_ede_x86_fini(void)
496{
497 crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
498}
499
500module_init(des3_ede_x86_init);
501module_exit(des3_ede_x86_fini);
502
503MODULE_LICENSE("GPL");
504MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
505MODULE_ALIAS("des3_ede");
506MODULE_ALIAS("des3_ede-asm");
507MODULE_ALIAS("des");
508MODULE_ALIAS("des-asm");
509MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");