diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-04-08 14:51:05 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2013-04-25 09:01:52 -0400 |
commit | 70177286e1d49dfa2ce565af10d1f63d9b769d77 (patch) | |
tree | aadbcc61386b6abff14dcc8a14b470b9397146e0 /arch/x86/crypto | |
parent | 18be45270a80ab489d9402b63e1f103428f0afde (diff) |
crypto: cast6-avx: use new optimized XTS code
Change cast6-avx to use the new XTS code, for smaller stack usage and small
boost to performance.
tcrypt results, with Intel i5-2450M:
enc dec
16B 1.01x 1.01x
64B 1.01x 1.00x
256B 1.09x 1.02x
1024B 1.08x 1.06x
8192B 1.08x 1.07x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 48 | ||||
-rw-r--r-- | arch/x86/crypto/cast6_avx_glue.c | 91 |
2 files changed, 98 insertions, 41 deletions
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index f93b6105a0ce..e3531f833951 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S | |||
@@ -4,7 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 7 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * | 8 | * |
9 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -227,6 +227,8 @@ | |||
227 | .data | 227 | .data |
228 | 228 | ||
229 | .align 16 | 229 | .align 16 |
230 | .Lxts_gf128mul_and_shl1_mask: | ||
231 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
230 | .Lbswap_mask: | 232 | .Lbswap_mask: |
231 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | 233 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 |
232 | .Lbswap128_mask: | 234 | .Lbswap128_mask: |
@@ -424,3 +426,47 @@ ENTRY(cast6_ctr_8way) | |||
424 | 426 | ||
425 | ret; | 427 | ret; |
426 | ENDPROC(cast6_ctr_8way) | 428 | ENDPROC(cast6_ctr_8way) |
429 | |||
430 | ENTRY(cast6_xts_enc_8way) | ||
431 | /* input: | ||
432 | * %rdi: ctx, CTX | ||
433 | * %rsi: dst | ||
434 | * %rdx: src | ||
435 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
436 | */ | ||
437 | |||
438 | movq %rsi, %r11; | ||
439 | |||
440 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
441 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
442 | RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); | ||
443 | |||
444 | call __cast6_enc_blk8; | ||
445 | |||
446 | /* dst <= regs xor IVs(in dst) */ | ||
447 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
448 | |||
449 | ret; | ||
450 | ENDPROC(cast6_xts_enc_8way) | ||
451 | |||
452 | ENTRY(cast6_xts_dec_8way) | ||
453 | /* input: | ||
454 | * %rdi: ctx, CTX | ||
455 | * %rsi: dst | ||
456 | * %rdx: src | ||
457 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
458 | */ | ||
459 | |||
460 | movq %rsi, %r11; | ||
461 | |||
462 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
463 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
464 | RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask); | ||
465 | |||
466 | call __cast6_dec_blk8; | ||
467 | |||
468 | /* dst <= regs xor IVs(in dst) */ | ||
469 | store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
470 | |||
471 | ret; | ||
472 | ENDPROC(cast6_xts_dec_8way) | ||
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 92f7ca24790a..8d0dfb86a559 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
@@ -4,6 +4,8 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> | ||
8 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; either version 2 of the License, or | 11 | * the Free Software Foundation; either version 2 of the License, or |
@@ -50,6 +52,23 @@ asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst, | |||
50 | asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, | 52 | asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, |
51 | le128 *iv); | 53 | le128 *iv); |
52 | 54 | ||
55 | asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, | ||
56 | const u8 *src, le128 *iv); | ||
57 | asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, | ||
58 | const u8 *src, le128 *iv); | ||
59 | |||
60 | static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
61 | { | ||
62 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
63 | GLUE_FUNC_CAST(__cast6_encrypt)); | ||
64 | } | ||
65 | |||
66 | static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
67 | { | ||
68 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
69 | GLUE_FUNC_CAST(__cast6_decrypt)); | ||
70 | } | ||
71 | |||
53 | static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | 72 | static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
54 | { | 73 | { |
55 | be128 ctrblk; | 74 | be128 ctrblk; |
@@ -87,6 +106,19 @@ static const struct common_glue_ctx cast6_ctr = { | |||
87 | } } | 106 | } } |
88 | }; | 107 | }; |
89 | 108 | ||
109 | static const struct common_glue_ctx cast6_enc_xts = { | ||
110 | .num_funcs = 2, | ||
111 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
112 | |||
113 | .funcs = { { | ||
114 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
115 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) } | ||
116 | }, { | ||
117 | .num_blocks = 1, | ||
118 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) } | ||
119 | } } | ||
120 | }; | ||
121 | |||
90 | static const struct common_glue_ctx cast6_dec = { | 122 | static const struct common_glue_ctx cast6_dec = { |
91 | .num_funcs = 2, | 123 | .num_funcs = 2, |
92 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | 124 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, |
@@ -113,6 +145,19 @@ static const struct common_glue_ctx cast6_dec_cbc = { | |||
113 | } } | 145 | } } |
114 | }; | 146 | }; |
115 | 147 | ||
148 | static const struct common_glue_ctx cast6_dec_xts = { | ||
149 | .num_funcs = 2, | ||
150 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
151 | |||
152 | .funcs = { { | ||
153 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
154 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) } | ||
155 | }, { | ||
156 | .num_blocks = 1, | ||
157 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) } | ||
158 | } } | ||
159 | }; | ||
160 | |||
116 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 161 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
117 | struct scatterlist *src, unsigned int nbytes) | 162 | struct scatterlist *src, unsigned int nbytes) |
118 | { | 163 | { |
@@ -307,54 +352,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
307 | struct scatterlist *src, unsigned int nbytes) | 352 | struct scatterlist *src, unsigned int nbytes) |
308 | { | 353 | { |
309 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 354 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
310 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
311 | struct crypt_priv crypt_ctx = { | ||
312 | .ctx = &ctx->crypt_ctx, | ||
313 | .fpu_enabled = false, | ||
314 | }; | ||
315 | struct xts_crypt_req req = { | ||
316 | .tbuf = buf, | ||
317 | .tbuflen = sizeof(buf), | ||
318 | 355 | ||
319 | .tweak_ctx = &ctx->tweak_ctx, | 356 | return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes, |
320 | .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), | 357 | XTS_TWEAK_CAST(__cast6_encrypt), |
321 | .crypt_ctx = &crypt_ctx, | 358 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
322 | .crypt_fn = encrypt_callback, | ||
323 | }; | ||
324 | int ret; | ||
325 | |||
326 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
327 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
328 | cast6_fpu_end(crypt_ctx.fpu_enabled); | ||
329 | |||
330 | return ret; | ||
331 | } | 359 | } |
332 | 360 | ||
333 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 361 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
334 | struct scatterlist *src, unsigned int nbytes) | 362 | struct scatterlist *src, unsigned int nbytes) |
335 | { | 363 | { |
336 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 364 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
337 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
338 | struct crypt_priv crypt_ctx = { | ||
339 | .ctx = &ctx->crypt_ctx, | ||
340 | .fpu_enabled = false, | ||
341 | }; | ||
342 | struct xts_crypt_req req = { | ||
343 | .tbuf = buf, | ||
344 | .tbuflen = sizeof(buf), | ||
345 | |||
346 | .tweak_ctx = &ctx->tweak_ctx, | ||
347 | .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), | ||
348 | .crypt_ctx = &crypt_ctx, | ||
349 | .crypt_fn = decrypt_callback, | ||
350 | }; | ||
351 | int ret; | ||
352 | 365 | ||
353 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 366 | return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes, |
354 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 367 | XTS_TWEAK_CAST(__cast6_encrypt), |
355 | cast6_fpu_end(crypt_ctx.fpu_enabled); | 368 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
356 | |||
357 | return ret; | ||
358 | } | 369 | } |
359 | 370 | ||
360 | static struct crypto_alg cast6_algs[10] = { { | 371 | static struct crypto_alg cast6_algs[10] = { { |