aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-04-08 14:51:05 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2013-04-25 09:01:52 -0400
commit70177286e1d49dfa2ce565af10d1f63d9b769d77 (patch)
treeaadbcc61386b6abff14dcc8a14b470b9397146e0
parent18be45270a80ab489d9402b63e1f103428f0afde (diff)
crypto: cast6-avx: use new optimized XTS code
Change cast6-avx to use the new XTS code, for smaller stack usage and small boost to performance. tcrypt results, with Intel i5-2450M: enc dec 16B 1.01x 1.01x 64B 1.01x 1.00x 256B 1.09x 1.02x 1024B 1.08x 1.06x 8192B 1.08x 1.07x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c91
2 files changed, 98 insertions, 41 deletions
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index f93b6105a0ce..e3531f833951 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -4,7 +4,7 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 7 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -227,6 +227,8 @@
227.data 227.data
228 228
229.align 16 229.align 16
230.Lxts_gf128mul_and_shl1_mask:
231 .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
230.Lbswap_mask: 232.Lbswap_mask:
231 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 233 .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
232.Lbswap128_mask: 234.Lbswap128_mask:
@@ -424,3 +426,47 @@ ENTRY(cast6_ctr_8way)
424 426
425 ret; 427 ret;
426ENDPROC(cast6_ctr_8way) 428ENDPROC(cast6_ctr_8way)
429
430ENTRY(cast6_xts_enc_8way)
431 /* input:
432 * %rdi: ctx, CTX
433 * %rsi: dst
434 * %rdx: src
435 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
436 */
437
438 movq %rsi, %r11;
439
440 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
441 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
442 RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
443
444 call __cast6_enc_blk8;
445
446 /* dst <= regs xor IVs(in dst) */
447 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
448
449 ret;
450ENDPROC(cast6_xts_enc_8way)
451
452ENTRY(cast6_xts_dec_8way)
453 /* input:
454 * %rdi: ctx, CTX
455 * %rsi: dst
456 * %rdx: src
457 * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
458 */
459
460 movq %rsi, %r11;
461
462 /* regs <= src, dst <= IVs, regs <= regs xor IVs */
463 load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
464 RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
465
466 call __cast6_dec_blk8;
467
468 /* dst <= regs xor IVs(in dst) */
469 store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
470
471 ret;
472ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 92f7ca24790a..8d0dfb86a559 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -4,6 +4,8 @@
4 * Copyright (C) 2012 Johannes Goetzfried 4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> 5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6 * 6 *
7 * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or 11 * the Free Software Foundation; either version 2 of the License, or
@@ -50,6 +52,23 @@ asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
50asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, 52asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
51 le128 *iv); 53 le128 *iv);
52 54
55asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
56 const u8 *src, le128 *iv);
57asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
58 const u8 *src, le128 *iv);
59
60static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
61{
62 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
63 GLUE_FUNC_CAST(__cast6_encrypt));
64}
65
66static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
67{
68 glue_xts_crypt_128bit_one(ctx, dst, src, iv,
69 GLUE_FUNC_CAST(__cast6_decrypt));
70}
71
53static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) 72static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
54{ 73{
55 be128 ctrblk; 74 be128 ctrblk;
@@ -87,6 +106,19 @@ static const struct common_glue_ctx cast6_ctr = {
87 } } 106 } }
88}; 107};
89 108
109static const struct common_glue_ctx cast6_enc_xts = {
110 .num_funcs = 2,
111 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
112
113 .funcs = { {
114 .num_blocks = CAST6_PARALLEL_BLOCKS,
115 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) }
116 }, {
117 .num_blocks = 1,
118 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) }
119 } }
120};
121
90static const struct common_glue_ctx cast6_dec = { 122static const struct common_glue_ctx cast6_dec = {
91 .num_funcs = 2, 123 .num_funcs = 2,
92 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, 124 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
@@ -113,6 +145,19 @@ static const struct common_glue_ctx cast6_dec_cbc = {
113 } } 145 } }
114}; 146};
115 147
148static const struct common_glue_ctx cast6_dec_xts = {
149 .num_funcs = 2,
150 .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
151
152 .funcs = { {
153 .num_blocks = CAST6_PARALLEL_BLOCKS,
154 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) }
155 }, {
156 .num_blocks = 1,
157 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) }
158 } }
159};
160
116static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 161static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
117 struct scatterlist *src, unsigned int nbytes) 162 struct scatterlist *src, unsigned int nbytes)
118{ 163{
@@ -307,54 +352,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
307 struct scatterlist *src, unsigned int nbytes) 352 struct scatterlist *src, unsigned int nbytes)
308{ 353{
309 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 354 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
310 be128 buf[CAST6_PARALLEL_BLOCKS];
311 struct crypt_priv crypt_ctx = {
312 .ctx = &ctx->crypt_ctx,
313 .fpu_enabled = false,
314 };
315 struct xts_crypt_req req = {
316 .tbuf = buf,
317 .tbuflen = sizeof(buf),
318 355
319 .tweak_ctx = &ctx->tweak_ctx, 356 return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
320 .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), 357 XTS_TWEAK_CAST(__cast6_encrypt),
321 .crypt_ctx = &crypt_ctx, 358 &ctx->tweak_ctx, &ctx->crypt_ctx);
322 .crypt_fn = encrypt_callback,
323 };
324 int ret;
325
326 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
327 ret = xts_crypt(desc, dst, src, nbytes, &req);
328 cast6_fpu_end(crypt_ctx.fpu_enabled);
329
330 return ret;
331} 359}
332 360
333static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 361static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
334 struct scatterlist *src, unsigned int nbytes) 362 struct scatterlist *src, unsigned int nbytes)
335{ 363{
336 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 364 struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
337 be128 buf[CAST6_PARALLEL_BLOCKS];
338 struct crypt_priv crypt_ctx = {
339 .ctx = &ctx->crypt_ctx,
340 .fpu_enabled = false,
341 };
342 struct xts_crypt_req req = {
343 .tbuf = buf,
344 .tbuflen = sizeof(buf),
345
346 .tweak_ctx = &ctx->tweak_ctx,
347 .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
348 .crypt_ctx = &crypt_ctx,
349 .crypt_fn = decrypt_callback,
350 };
351 int ret;
352 365
353 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; 366 return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
354 ret = xts_crypt(desc, dst, src, nbytes, &req); 367 XTS_TWEAK_CAST(__cast6_encrypt),
355 cast6_fpu_end(crypt_ctx.fpu_enabled); 368 &ctx->tweak_ctx, &ctx->crypt_ctx);
356
357 return ret;
358} 369}
359 370
360static struct crypto_alg cast6_algs[10] = { { 371static struct crypto_alg cast6_algs[10] = { {