diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-04-08 14:50:55 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2013-04-25 09:01:51 -0400 |
commit | a05248ed2d9a83ae7c3e6db7c4ef9331c3dedc81 (patch) | |
tree | 62fead9aac2e2471dbc5d6df7b630589220eea3c /arch/x86/crypto/glue_helper.c | |
parent | d2049d8566bf74723f0c353621174b37ff3d75ec (diff) |
crypto: x86 - add more optimized XTS-mode for serpent-avx
This patch adds AVX optimized XTS-mode helper functions/macros and converts
serpent-avx to use the new facilities. Benefits are slightly improved speed
and reduced stack usage as use of temporary IV-array is avoided.
tcrypt results, with Intel i5-2450M:
enc dec
16B 1.00x 1.00x
64B 1.00x 1.00x
256B 1.04x 1.06x
1024B 1.09x 1.09x
8192B 1.10x 1.09x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/glue_helper.c')
-rw-r--r-- | arch/x86/crypto/glue_helper.c | 97 |
1 files changed, 96 insertions, 1 deletions
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 22ce4f683e55..432f1d76ceb8 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Shared glue code for 128bit block ciphers | 2 | * Shared glue code for 128bit block ciphers |
3 | * | 3 | * |
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | 6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: |
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | 7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> |
@@ -304,4 +304,99 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |||
304 | } | 304 | } |
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | 305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); |
306 | 306 | ||
307 | static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
308 | void *ctx, | ||
309 | struct blkcipher_desc *desc, | ||
310 | struct blkcipher_walk *walk) | ||
311 | { | ||
312 | const unsigned int bsize = 128 / 8; | ||
313 | unsigned int nbytes = walk->nbytes; | ||
314 | u128 *src = (u128 *)walk->src.virt.addr; | ||
315 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
316 | unsigned int num_blocks, func_bytes; | ||
317 | unsigned int i; | ||
318 | |||
319 | /* Process multi-block batch */ | ||
320 | for (i = 0; i < gctx->num_funcs; i++) { | ||
321 | num_blocks = gctx->funcs[i].num_blocks; | ||
322 | func_bytes = bsize * num_blocks; | ||
323 | |||
324 | if (nbytes >= func_bytes) { | ||
325 | do { | ||
326 | gctx->funcs[i].fn_u.xts(ctx, dst, src, | ||
327 | (le128 *)walk->iv); | ||
328 | |||
329 | src += num_blocks; | ||
330 | dst += num_blocks; | ||
331 | nbytes -= func_bytes; | ||
332 | } while (nbytes >= func_bytes); | ||
333 | |||
334 | if (nbytes < bsize) | ||
335 | goto done; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | done: | ||
340 | return nbytes; | ||
341 | } | ||
342 | |||
343 | /* for implementations implementing faster XTS IV generator */ | ||
344 | int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | ||
345 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
346 | struct scatterlist *src, unsigned int nbytes, | ||
347 | void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src), | ||
348 | void *tweak_ctx, void *crypt_ctx) | ||
349 | { | ||
350 | const unsigned int bsize = 128 / 8; | ||
351 | bool fpu_enabled = false; | ||
352 | struct blkcipher_walk walk; | ||
353 | int err; | ||
354 | |||
355 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
356 | |||
357 | err = blkcipher_walk_virt(desc, &walk); | ||
358 | nbytes = walk.nbytes; | ||
359 | if (!nbytes) | ||
360 | return err; | ||
361 | |||
362 | /* set minimum length to bsize, for tweak_fn */ | ||
363 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
364 | desc, fpu_enabled, | ||
365 | nbytes < bsize ? bsize : nbytes); | ||
366 | |||
367 | /* calculate first value of T */ | ||
368 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | ||
369 | |||
370 | while (nbytes) { | ||
371 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | ||
372 | |||
373 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
374 | nbytes = walk.nbytes; | ||
375 | } | ||
376 | |||
377 | glue_fpu_end(fpu_enabled); | ||
378 | |||
379 | return err; | ||
380 | } | ||
381 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | ||
382 | |||
383 | void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, | ||
384 | common_glue_func_t fn) | ||
385 | { | ||
386 | le128 ivblk = *iv; | ||
387 | |||
388 | /* generate next IV */ | ||
389 | le128_gf128mul_x_ble(iv, &ivblk); | ||
390 | |||
391 | /* CC <- T xor C */ | ||
392 | u128_xor(dst, src, (u128 *)&ivblk); | ||
393 | |||
394 | /* PP <- D(Key2,CC) */ | ||
395 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
396 | |||
397 | /* P <- T xor PP */ | ||
398 | u128_xor(dst, dst, (u128 *)&ivblk); | ||
399 | } | ||
400 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); | ||
401 | |||
307 | MODULE_LICENSE("GPL"); | 402 | MODULE_LICENSE("GPL"); |