diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-04-08 14:50:55 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2013-04-25 09:01:51 -0400 |
commit | a05248ed2d9a83ae7c3e6db7c4ef9331c3dedc81 (patch) | |
tree | 62fead9aac2e2471dbc5d6df7b630589220eea3c /arch/x86 | |
parent | d2049d8566bf74723f0c353621174b37ff3d75ec (diff) |
crypto: x86 - add more optimized XTS-mode for serpent-avx
This patch adds AVX optimized XTS-mode helper functions/macros and converts
serpent-avx to use the new facilities. Benefits are slightly improved speed
and reduced stack usage as use of temporary IV-array is avoided.
tcrypt results, with Intel i5-2450M:
enc dec
16B 1.00x 1.00x
64B 1.00x 1.00x
256B 1.04x 1.06x
1024B 1.09x 1.09x
8192B 1.10x 1.09x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/crypto/glue_helper-asm-avx.S | 61 | ||||
-rw-r--r-- | arch/x86/crypto/glue_helper.c | 97 | ||||
-rw-r--r-- | arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 45 | ||||
-rw-r--r-- | arch/x86/crypto/serpent_avx_glue.c | 87 | ||||
-rw-r--r-- | arch/x86/include/asm/crypto/glue_helper.h | 24 | ||||
-rw-r--r-- | arch/x86/include/asm/crypto/serpent-avx.h | 5 |
6 files changed, 273 insertions, 46 deletions
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S index f7b6ea2ddfdb..02ee2308fb38 100644 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ b/arch/x86/crypto/glue_helper-asm-avx.S | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Shared glue code for 128bit block ciphers, AVX assembler macros | 2 | * Shared glue code for 128bit block ciphers, AVX assembler macros |
3 | * | 3 | * |
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -89,3 +89,62 @@ | |||
89 | vpxor (6*16)(src), x6, x6; \ | 89 | vpxor (6*16)(src), x6, x6; \ |
90 | vpxor (7*16)(src), x7, x7; \ | 90 | vpxor (7*16)(src), x7, x7; \ |
91 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | 91 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); |
92 | |||
93 | #define gf128mul_x_ble(iv, mask, tmp) \ | ||
94 | vpsrad $31, iv, tmp; \ | ||
95 | vpaddq iv, iv, iv; \ | ||
96 | vpshufd $0x13, tmp, tmp; \ | ||
97 | vpand mask, tmp, tmp; \ | ||
98 | vpxor tmp, iv, iv; | ||
99 | |||
100 | #define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \ | ||
101 | t1, xts_gf128mul_and_shl1_mask) \ | ||
102 | vmovdqa xts_gf128mul_and_shl1_mask, t0; \ | ||
103 | \ | ||
104 | /* load IV */ \ | ||
105 | vmovdqu (iv), tiv; \ | ||
106 | vpxor (0*16)(src), tiv, x0; \ | ||
107 | vmovdqu tiv, (0*16)(dst); \ | ||
108 | \ | ||
109 | /* construct and store IVs, also xor with source */ \ | ||
110 | gf128mul_x_ble(tiv, t0, t1); \ | ||
111 | vpxor (1*16)(src), tiv, x1; \ | ||
112 | vmovdqu tiv, (1*16)(dst); \ | ||
113 | \ | ||
114 | gf128mul_x_ble(tiv, t0, t1); \ | ||
115 | vpxor (2*16)(src), tiv, x2; \ | ||
116 | vmovdqu tiv, (2*16)(dst); \ | ||
117 | \ | ||
118 | gf128mul_x_ble(tiv, t0, t1); \ | ||
119 | vpxor (3*16)(src), tiv, x3; \ | ||
120 | vmovdqu tiv, (3*16)(dst); \ | ||
121 | \ | ||
122 | gf128mul_x_ble(tiv, t0, t1); \ | ||
123 | vpxor (4*16)(src), tiv, x4; \ | ||
124 | vmovdqu tiv, (4*16)(dst); \ | ||
125 | \ | ||
126 | gf128mul_x_ble(tiv, t0, t1); \ | ||
127 | vpxor (5*16)(src), tiv, x5; \ | ||
128 | vmovdqu tiv, (5*16)(dst); \ | ||
129 | \ | ||
130 | gf128mul_x_ble(tiv, t0, t1); \ | ||
131 | vpxor (6*16)(src), tiv, x6; \ | ||
132 | vmovdqu tiv, (6*16)(dst); \ | ||
133 | \ | ||
134 | gf128mul_x_ble(tiv, t0, t1); \ | ||
135 | vpxor (7*16)(src), tiv, x7; \ | ||
136 | vmovdqu tiv, (7*16)(dst); \ | ||
137 | \ | ||
138 | gf128mul_x_ble(tiv, t0, t1); \ | ||
139 | vmovdqu tiv, (iv); | ||
140 | |||
141 | #define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
142 | vpxor (0*16)(dst), x0, x0; \ | ||
143 | vpxor (1*16)(dst), x1, x1; \ | ||
144 | vpxor (2*16)(dst), x2, x2; \ | ||
145 | vpxor (3*16)(dst), x3, x3; \ | ||
146 | vpxor (4*16)(dst), x4, x4; \ | ||
147 | vpxor (5*16)(dst), x5, x5; \ | ||
148 | vpxor (6*16)(dst), x6, x6; \ | ||
149 | vpxor (7*16)(dst), x7, x7; \ | ||
150 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 22ce4f683e55..432f1d76ceb8 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Shared glue code for 128bit block ciphers | 2 | * Shared glue code for 128bit block ciphers |
3 | * | 3 | * |
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: |
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> |
@@ -304,4 +304,99 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |||
304 | } | 304 | } |
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | 305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); |
306 | 306 | ||
307 | static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
308 | void *ctx, | ||
309 | struct blkcipher_desc *desc, | ||
310 | struct blkcipher_walk *walk) | ||
311 | { | ||
312 | const unsigned int bsize = 128 / 8; | ||
313 | unsigned int nbytes = walk->nbytes; | ||
314 | u128 *src = (u128 *)walk->src.virt.addr; | ||
315 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
316 | unsigned int num_blocks, func_bytes; | ||
317 | unsigned int i; | ||
318 | |||
319 | /* Process multi-block batch */ | ||
320 | for (i = 0; i < gctx->num_funcs; i++) { | ||
321 | num_blocks = gctx->funcs[i].num_blocks; | ||
322 | func_bytes = bsize * num_blocks; | ||
323 | |||
324 | if (nbytes >= func_bytes) { | ||
325 | do { | ||
326 | gctx->funcs[i].fn_u.xts(ctx, dst, src, | ||
327 | (le128 *)walk->iv); | ||
328 | |||
329 | src += num_blocks; | ||
330 | dst += num_blocks; | ||
331 | nbytes -= func_bytes; | ||
332 | } while (nbytes >= func_bytes); | ||
333 | |||
334 | if (nbytes < bsize) | ||
335 | goto done; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | done: | ||
340 | return nbytes; | ||
341 | } | ||
342 | |||
343 | /* for implementations implementing faster XTS IV generator */ | ||
344 | int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
345 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
346 | struct scatterlist *src, unsigned int nbytes, | ||
347 | void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src), | ||
348 | void *tweak_ctx, void *crypt_ctx) | ||
349 | { | ||
350 | const unsigned int bsize = 128 / 8; | ||
351 | bool fpu_enabled = false; | ||
352 | struct blkcipher_walk walk; | ||
353 | int err; | ||
354 | |||
355 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
356 | |||
357 | err = blkcipher_walk_virt(desc, &walk); | ||
358 | nbytes = walk.nbytes; | ||
359 | if (!nbytes) | ||
360 | return err; | ||
361 | |||
362 | /* set minimum length to bsize, for tweak_fn */ | ||
363 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
364 | desc, fpu_enabled, | ||
365 | nbytes < bsize ? bsize : nbytes); | ||
366 | |||
367 | /* calculate first value of T */ | ||
368 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | ||
369 | |||
370 | while (nbytes) { | ||
371 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | ||
372 | |||
373 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
374 | nbytes = walk.nbytes; | ||
375 | } | ||
376 | |||
377 | glue_fpu_end(fpu_enabled); | ||
378 | |||
379 | return err; | ||
380 | } | ||
381 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | ||
382 | |||
383 | void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, | ||
384 | common_glue_func_t fn) | ||
385 | { | ||
386 | le128 ivblk = *iv; | ||
387 | |||
388 | /* generate next IV */ | ||
389 | le128_gf128mul_x_ble(iv, &ivblk); | ||
390 | |||
391 | /* CC <- T xor C */ | ||
392 | u128_xor(dst, src, (u128 *)&ivblk); | ||
393 | |||
394 | /* PP <- D(Key2,CC) */ | ||
395 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
396 | |||
397 | /* P <- T xor PP */ | ||
398 | u128_xor(dst, dst, (u128 *)&ivblk); | ||
399 | } | ||
400 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); | ||
401 | |||
307 | MODULE_LICENSE("GPL"); | 402 | MODULE_LICENSE("GPL"); |
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 43c938612b74..2f202f49872b 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -4,8 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | 7 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | 8 | * |
10 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -34,6 +33,8 @@ | |||
34 | 33 | ||
35 | .Lbswap128_mask: | 34 | .Lbswap128_mask: |
36 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | 35 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 |
36 | .Lxts_gf128mul_and_shl1_mask: | ||
37 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | ||
37 | 38 | ||
38 | .text | 39 | .text |
39 | 40 | ||
@@ -739,3 +740,43 @@ ENTRY(serpent_ctr_8way_avx) | |||
739 | 740 | ||
740 | ret; | 741 | ret; |
741 | ENDPROC(serpent_ctr_8way_avx) | 742 | ENDPROC(serpent_ctr_8way_avx) |
743 | |||
744 | ENTRY(serpent_xts_enc_8way_avx) | ||
745 | /* input: | ||
746 | * %rdi: ctx, CTX | ||
747 | * %rsi: dst | ||
748 | * %rdx: src | ||
749 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
750 | */ | ||
751 | |||
752 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
753 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
754 | RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); | ||
755 | |||
756 | call __serpent_enc_blk8_avx; | ||
757 | |||
758 | /* dst <= regs xor IVs(in dst) */ | ||
759 | store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
760 | |||
761 | ret; | ||
762 | ENDPROC(serpent_xts_enc_8way_avx) | ||
763 | |||
764 | ENTRY(serpent_xts_dec_8way_avx) | ||
765 | /* input: | ||
766 | * %rdi: ctx, CTX | ||
767 | * %rsi: dst | ||
768 | * %rdx: src | ||
769 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | ||
770 | */ | ||
771 | |||
772 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | ||
773 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | ||
774 | RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); | ||
775 | |||
776 | call __serpent_dec_blk8_avx; | ||
777 | |||
778 | /* dst <= regs xor IVs(in dst) */ | ||
779 | store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
780 | |||
781 | ret; | ||
782 | ENDPROC(serpent_xts_dec_8way_avx) | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 52abaaf28e7f..0f8519cf4ac2 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -4,8 +4,7 @@ | |||
4 | * Copyright (C) 2012 Johannes Goetzfried | 4 | * Copyright (C) 2012 Johannes Goetzfried |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | 5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> |
6 | * | 6 | * |
7 | * Glue code based on serpent_sse2_glue.c by: | 7 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | 8 | * |
10 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
11 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -53,6 +52,18 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | |||
53 | u128_xor(dst, src, (u128 *)&ctrblk); | 52 | u128_xor(dst, src, (u128 *)&ctrblk); |
54 | } | 53 | } |
55 | 54 | ||
55 | static void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
56 | { | ||
57 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
58 | GLUE_FUNC_CAST(__serpent_encrypt)); | ||
59 | } | ||
60 | |||
61 | static void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
62 | { | ||
63 | glue_xts_crypt_128bit_one(ctx, dst, src, iv, | ||
64 | GLUE_FUNC_CAST(__serpent_decrypt)); | ||
65 | } | ||
66 | |||
56 | static const struct common_glue_ctx serpent_enc = { | 67 | static const struct common_glue_ctx serpent_enc = { |
57 | .num_funcs = 2, | 68 | .num_funcs = 2, |
58 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | 69 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
@@ -79,6 +90,19 @@ static const struct common_glue_ctx serpent_ctr = { | |||
79 | } } | 90 | } } |
80 | }; | 91 | }; |
81 | 92 | ||
93 | static const struct common_glue_ctx serpent_enc_xts = { | ||
94 | .num_funcs = 2, | ||
95 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
96 | |||
97 | .funcs = { { | ||
98 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
99 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } | ||
100 | }, { | ||
101 | .num_blocks = 1, | ||
102 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } | ||
103 | } } | ||
104 | }; | ||
105 | |||
82 | static const struct common_glue_ctx serpent_dec = { | 106 | static const struct common_glue_ctx serpent_dec = { |
83 | .num_funcs = 2, | 107 | .num_funcs = 2, |
84 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | 108 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
@@ -105,6 +129,19 @@ static const struct common_glue_ctx serpent_dec_cbc = { | |||
105 | } } | 129 | } } |
106 | }; | 130 | }; |
107 | 131 | ||
132 | static const struct common_glue_ctx serpent_dec_xts = { | ||
133 | .num_funcs = 2, | ||
134 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
135 | |||
136 | .funcs = { { | ||
137 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
138 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } | ||
139 | }, { | ||
140 | .num_blocks = 1, | ||
141 | .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } | ||
142 | } } | ||
143 | }; | ||
144 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 145 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
109 | struct scatterlist *src, unsigned int nbytes) | 146 | struct scatterlist *src, unsigned int nbytes) |
110 | { | 147 | { |
@@ -299,54 +336,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
299 | struct scatterlist *src, unsigned int nbytes) | 336 | struct scatterlist *src, unsigned int nbytes) |
300 | { | 337 | { |
301 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 338 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
302 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
303 | struct crypt_priv crypt_ctx = { | ||
304 | .ctx = &ctx->crypt_ctx, | ||
305 | .fpu_enabled = false, | ||
306 | }; | ||
307 | struct xts_crypt_req req = { | ||
308 | .tbuf = buf, | ||
309 | .tbuflen = sizeof(buf), | ||
310 | |||
311 | .tweak_ctx = &ctx->tweak_ctx, | ||
312 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
313 | .crypt_ctx = &crypt_ctx, | ||
314 | .crypt_fn = encrypt_callback, | ||
315 | }; | ||
316 | int ret; | ||
317 | 339 | ||
318 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 340 | return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, |
319 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 341 | XTS_TWEAK_CAST(__serpent_encrypt), |
320 | serpent_fpu_end(crypt_ctx.fpu_enabled); | 342 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
321 | |||
322 | return ret; | ||
323 | } | 343 | } |
324 | 344 | ||
325 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 345 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
326 | struct scatterlist *src, unsigned int nbytes) | 346 | struct scatterlist *src, unsigned int nbytes) |
327 | { | 347 | { |
328 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 348 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); |
329 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
330 | struct crypt_priv crypt_ctx = { | ||
331 | .ctx = &ctx->crypt_ctx, | ||
332 | .fpu_enabled = false, | ||
333 | }; | ||
334 | struct xts_crypt_req req = { | ||
335 | .tbuf = buf, | ||
336 | .tbuflen = sizeof(buf), | ||
337 | |||
338 | .tweak_ctx = &ctx->tweak_ctx, | ||
339 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
340 | .crypt_ctx = &crypt_ctx, | ||
341 | .crypt_fn = decrypt_callback, | ||
342 | }; | ||
343 | int ret; | ||
344 | 349 | ||
345 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 350 | return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, |
346 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 351 | XTS_TWEAK_CAST(__serpent_encrypt), |
347 | serpent_fpu_end(crypt_ctx.fpu_enabled); | 352 | &ctx->tweak_ctx, &ctx->crypt_ctx); |
348 | |||
349 | return ret; | ||
350 | } | 353 | } |
351 | 354 | ||
352 | static struct crypto_alg serpent_algs[10] = { { | 355 | static struct crypto_alg serpent_algs[10] = { { |
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index e2d65b061d27..1eef55596e82 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h | |||
@@ -14,10 +14,13 @@ typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); | |||
14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); | 14 | typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); |
15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, | 15 | typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, |
16 | le128 *iv); | 16 | le128 *iv); |
17 | typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src, | ||
18 | le128 *iv); | ||
17 | 19 | ||
18 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) | 20 | #define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) |
19 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) | 21 | #define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) |
20 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) | 22 | #define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) |
23 | #define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn)) | ||
21 | 24 | ||
22 | struct common_glue_func_entry { | 25 | struct common_glue_func_entry { |
23 | unsigned int num_blocks; /* number of blocks that @fn will process */ | 26 | unsigned int num_blocks; /* number of blocks that @fn will process */ |
@@ -25,6 +28,7 @@ struct common_glue_func_entry { | |||
25 | common_glue_func_t ecb; | 28 | common_glue_func_t ecb; |
26 | common_glue_cbc_func_t cbc; | 29 | common_glue_cbc_func_t cbc; |
27 | common_glue_ctr_func_t ctr; | 30 | common_glue_ctr_func_t ctr; |
31 | common_glue_xts_func_t xts; | ||
28 | } fn_u; | 32 | } fn_u; |
29 | }; | 33 | }; |
30 | 34 | ||
@@ -96,6 +100,16 @@ static inline void le128_inc(le128 *i) | |||
96 | i->b = cpu_to_le64(b); | 100 | i->b = cpu_to_le64(b); |
97 | } | 101 | } |
98 | 102 | ||
103 | static inline void le128_gf128mul_x_ble(le128 *dst, const le128 *src) | ||
104 | { | ||
105 | u64 a = le64_to_cpu(src->a); | ||
106 | u64 b = le64_to_cpu(src->b); | ||
107 | u64 _tt = ((s64)a >> 63) & 0x87; | ||
108 | |||
109 | dst->a = cpu_to_le64((a << 1) ^ (b >> 63)); | ||
110 | dst->b = cpu_to_le64((b << 1) ^ _tt); | ||
111 | } | ||
112 | |||
99 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | 113 | extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, |
100 | struct blkcipher_desc *desc, | 114 | struct blkcipher_desc *desc, |
101 | struct scatterlist *dst, | 115 | struct scatterlist *dst, |
@@ -118,4 +132,14 @@ extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |||
118 | struct scatterlist *dst, | 132 | struct scatterlist *dst, |
119 | struct scatterlist *src, unsigned int nbytes); | 133 | struct scatterlist *src, unsigned int nbytes); |
120 | 134 | ||
135 | extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
136 | struct blkcipher_desc *desc, | ||
137 | struct scatterlist *dst, | ||
138 | struct scatterlist *src, unsigned int nbytes, | ||
139 | common_glue_func_t tweak_fn, void *tweak_ctx, | ||
140 | void *crypt_ctx); | ||
141 | |||
142 | extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, | ||
143 | le128 *iv, common_glue_func_t fn); | ||
144 | |||
121 | #endif /* _CRYPTO_GLUE_HELPER_H */ | 145 | #endif /* _CRYPTO_GLUE_HELPER_H */ |
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index 0da1d3e2a55c..56e79cc57eaf 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h | |||
@@ -16,4 +16,9 @@ asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | |||
16 | asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, | 16 | asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, |
17 | const u8 *src, le128 *iv); | 17 | const u8 *src, le128 *iv); |
18 | 18 | ||
19 | asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
20 | const u8 *src, le128 *iv); | ||
21 | asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, | ||
22 | const u8 *src, le128 *iv); | ||
23 | |||
19 | #endif | 24 | #endif |