diff options
author | Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 2012-07-22 11:18:37 -0400 |
---|---|---|
committer | Herbert Xu <herbert@gondor.apana.org.au> | 2012-08-20 04:28:10 -0400 |
commit | 023af608254add7ba037cd634cc5f2fb21ff6420 (patch) | |
tree | 914c114c38c70841d3c794ea03c414dc110dab0c /arch/x86/crypto | |
parent | 35a1fc1873dd6deac6c005ead85424a8ee28183a (diff) |
crypto: aesni_intel - improve lrw and xts performance by utilizing parallel AES-NI hardware pipelines
Use parallel LRW and XTS encryption facilities to better utilize AES-NI
hardware pipelines and gain extra performance.
Tcrypt benchmark results (async), old vs new ratios:
Intel Core i5-2450M CPU (fam: 6, model: 42, step: 7)
aes:128bit
lrw:256bit xts:256bit
size lrw-enc lrw-dec xts-dec xts-dec
16B 0.99x 1.00x 1.22x 1.19x
64B 1.38x 1.50x 1.58x 1.61x
256B 2.04x 2.02x 2.27x 2.29x
1024B 2.56x 2.54x 2.89x 2.92x
8192B 2.85x 2.99x 3.40x 3.23x
aes:192bit
lrw:320bit xts:384bit
size lrw-enc lrw-dec xts-dec xts-dec
16B 1.08x 1.08x 1.16x 1.17x
64B 1.48x 1.54x 1.59x 1.65x
256B 2.18x 2.17x 2.29x 2.28x
1024B 2.67x 2.67x 2.87x 3.05x
8192B 2.93x 2.84x 3.28x 3.33x
aes:256bit
lrw:348bit xts:512bit
size lrw-enc lrw-dec xts-dec xts-dec
16B 1.07x 1.07x 1.18x 1.19x
64B 1.56x 1.56x 1.70x 1.71x
256B 2.22x 2.24x 2.46x 2.46x
1024B 2.76x 2.77x 3.13x 3.05x
8192B 2.99x 3.05x 3.40x 3.30x
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 253 |
1 files changed, 218 insertions, 35 deletions
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 648347a05773..7c04d0da709b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -28,6 +28,9 @@ | |||
28 | #include <crypto/aes.h> | 28 | #include <crypto/aes.h> |
29 | #include <crypto/cryptd.h> | 29 | #include <crypto/cryptd.h> |
30 | #include <crypto/ctr.h> | 30 | #include <crypto/ctr.h> |
31 | #include <crypto/b128ops.h> | ||
32 | #include <crypto/lrw.h> | ||
33 | #include <crypto/xts.h> | ||
31 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
32 | #include <asm/i387.h> | 35 | #include <asm/i387.h> |
33 | #include <asm/crypto/aes.h> | 36 | #include <asm/crypto/aes.h> |
@@ -41,18 +44,10 @@ | |||
41 | #define HAS_CTR | 44 | #define HAS_CTR |
42 | #endif | 45 | #endif |
43 | 46 | ||
44 | #if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) | ||
45 | #define HAS_LRW | ||
46 | #endif | ||
47 | |||
48 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) | 47 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) |
49 | #define HAS_PCBC | 48 | #define HAS_PCBC |
50 | #endif | 49 | #endif |
51 | 50 | ||
52 | #if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) | ||
53 | #define HAS_XTS | ||
54 | #endif | ||
55 | |||
56 | /* This data is stored at the end of the crypto_tfm struct. | 51 | /* This data is stored at the end of the crypto_tfm struct. |
57 | * It's a type of per "session" data storage location. | 52 | * It's a type of per "session" data storage location. |
58 | * This needs to be 16 byte aligned. | 53 | * This needs to be 16 byte aligned. |
@@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data { | |||
79 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | 74 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) |
80 | #define RFC4106_HASH_SUBKEY_SIZE 16 | 75 | #define RFC4106_HASH_SUBKEY_SIZE 16 |
81 | 76 | ||
77 | struct aesni_lrw_ctx { | ||
78 | struct lrw_table_ctx lrw_table; | ||
79 | u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; | ||
80 | }; | ||
81 | |||
82 | struct aesni_xts_ctx { | ||
83 | u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; | ||
84 | u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; | ||
85 | }; | ||
86 | |||
82 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 87 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
83 | unsigned int key_len); | 88 | unsigned int key_len); |
84 | asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, | 89 | asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, |
@@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) | |||
398 | #endif | 403 | #endif |
399 | #endif | 404 | #endif |
400 | 405 | ||
401 | #ifdef HAS_LRW | ||
402 | static int ablk_lrw_init(struct crypto_tfm *tfm) | ||
403 | { | ||
404 | return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))"); | ||
405 | } | ||
406 | #endif | ||
407 | |||
408 | #ifdef HAS_PCBC | 406 | #ifdef HAS_PCBC |
409 | static int ablk_pcbc_init(struct crypto_tfm *tfm) | 407 | static int ablk_pcbc_init(struct crypto_tfm *tfm) |
410 | { | 408 | { |
@@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm) | |||
412 | } | 410 | } |
413 | #endif | 411 | #endif |
414 | 412 | ||
415 | #ifdef HAS_XTS | 413 | static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) |
416 | static int ablk_xts_init(struct crypto_tfm *tfm) | ||
417 | { | 414 | { |
418 | return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))"); | 415 | aesni_ecb_enc(ctx, blks, blks, nbytes); |
416 | } | ||
417 | |||
418 | static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) | ||
419 | { | ||
420 | aesni_ecb_dec(ctx, blks, blks, nbytes); | ||
421 | } | ||
422 | |||
423 | static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
424 | unsigned int keylen) | ||
425 | { | ||
426 | struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
427 | int err; | ||
428 | |||
429 | err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, | ||
430 | keylen - AES_BLOCK_SIZE); | ||
431 | if (err) | ||
432 | return err; | ||
433 | |||
434 | return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); | ||
435 | } | ||
436 | |||
437 | static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) | ||
438 | { | ||
439 | struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
440 | |||
441 | lrw_free_table(&ctx->lrw_table); | ||
442 | } | ||
443 | |||
444 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
445 | struct scatterlist *src, unsigned int nbytes) | ||
446 | { | ||
447 | struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
448 | be128 buf[8]; | ||
449 | struct lrw_crypt_req req = { | ||
450 | .tbuf = buf, | ||
451 | .tbuflen = sizeof(buf), | ||
452 | |||
453 | .table_ctx = &ctx->lrw_table, | ||
454 | .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), | ||
455 | .crypt_fn = lrw_xts_encrypt_callback, | ||
456 | }; | ||
457 | int ret; | ||
458 | |||
459 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
460 | |||
461 | kernel_fpu_begin(); | ||
462 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
463 | kernel_fpu_end(); | ||
464 | |||
465 | return ret; | ||
466 | } | ||
467 | |||
468 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
469 | struct scatterlist *src, unsigned int nbytes) | ||
470 | { | ||
471 | struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
472 | be128 buf[8]; | ||
473 | struct lrw_crypt_req req = { | ||
474 | .tbuf = buf, | ||
475 | .tbuflen = sizeof(buf), | ||
476 | |||
477 | .table_ctx = &ctx->lrw_table, | ||
478 | .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), | ||
479 | .crypt_fn = lrw_xts_decrypt_callback, | ||
480 | }; | ||
481 | int ret; | ||
482 | |||
483 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
484 | |||
485 | kernel_fpu_begin(); | ||
486 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
487 | kernel_fpu_end(); | ||
488 | |||
489 | return ret; | ||
490 | } | ||
491 | |||
492 | static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
493 | unsigned int keylen) | ||
494 | { | ||
495 | struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
496 | u32 *flags = &tfm->crt_flags; | ||
497 | int err; | ||
498 | |||
499 | /* key consists of keys of equal size concatenated, therefore | ||
500 | * the length must be even | ||
501 | */ | ||
502 | if (keylen % 2) { | ||
503 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
504 | return -EINVAL; | ||
505 | } | ||
506 | |||
507 | /* first half of xts-key is for crypt */ | ||
508 | err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); | ||
509 | if (err) | ||
510 | return err; | ||
511 | |||
512 | /* second half of xts-key is for tweak */ | ||
513 | return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, | ||
514 | keylen / 2); | ||
515 | } | ||
516 | |||
517 | |||
518 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
519 | struct scatterlist *src, unsigned int nbytes) | ||
520 | { | ||
521 | struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
522 | be128 buf[8]; | ||
523 | struct xts_crypt_req req = { | ||
524 | .tbuf = buf, | ||
525 | .tbuflen = sizeof(buf), | ||
526 | |||
527 | .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), | ||
528 | .tweak_fn = XTS_TWEAK_CAST(aesni_enc), | ||
529 | .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), | ||
530 | .crypt_fn = lrw_xts_encrypt_callback, | ||
531 | }; | ||
532 | int ret; | ||
533 | |||
534 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
535 | |||
536 | kernel_fpu_begin(); | ||
537 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
538 | kernel_fpu_end(); | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
544 | struct scatterlist *src, unsigned int nbytes) | ||
545 | { | ||
546 | struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
547 | be128 buf[8]; | ||
548 | struct xts_crypt_req req = { | ||
549 | .tbuf = buf, | ||
550 | .tbuflen = sizeof(buf), | ||
551 | |||
552 | .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), | ||
553 | .tweak_fn = XTS_TWEAK_CAST(aesni_enc), | ||
554 | .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), | ||
555 | .crypt_fn = lrw_xts_decrypt_callback, | ||
556 | }; | ||
557 | int ret; | ||
558 | |||
559 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
560 | |||
561 | kernel_fpu_begin(); | ||
562 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
563 | kernel_fpu_end(); | ||
564 | |||
565 | return ret; | ||
419 | } | 566 | } |
420 | #endif | ||
421 | 567 | ||
422 | #ifdef CONFIG_X86_64 | 568 | #ifdef CONFIG_X86_64 |
423 | static int rfc4106_init(struct crypto_tfm *tfm) | 569 | static int rfc4106_init(struct crypto_tfm *tfm) |
@@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { { | |||
1035 | }, | 1181 | }, |
1036 | #endif | 1182 | #endif |
1037 | #endif | 1183 | #endif |
1038 | #ifdef HAS_LRW | 1184 | #ifdef HAS_PCBC |
1039 | }, { | 1185 | }, { |
1040 | .cra_name = "lrw(aes)", | 1186 | .cra_name = "pcbc(aes)", |
1041 | .cra_driver_name = "lrw-aes-aesni", | 1187 | .cra_driver_name = "pcbc-aes-aesni", |
1042 | .cra_priority = 400, | 1188 | .cra_priority = 400, |
1043 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1189 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1044 | .cra_blocksize = AES_BLOCK_SIZE, | 1190 | .cra_blocksize = AES_BLOCK_SIZE, |
@@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { { | |||
1046 | .cra_alignmask = 0, | 1192 | .cra_alignmask = 0, |
1047 | .cra_type = &crypto_ablkcipher_type, | 1193 | .cra_type = &crypto_ablkcipher_type, |
1048 | .cra_module = THIS_MODULE, | 1194 | .cra_module = THIS_MODULE, |
1049 | .cra_init = ablk_lrw_init, | 1195 | .cra_init = ablk_pcbc_init, |
1050 | .cra_exit = ablk_exit, | 1196 | .cra_exit = ablk_exit, |
1051 | .cra_u = { | 1197 | .cra_u = { |
1052 | .ablkcipher = { | 1198 | .ablkcipher = { |
1053 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, | 1199 | .min_keysize = AES_MIN_KEY_SIZE, |
1054 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, | 1200 | .max_keysize = AES_MAX_KEY_SIZE, |
1055 | .ivsize = AES_BLOCK_SIZE, | 1201 | .ivsize = AES_BLOCK_SIZE, |
1056 | .setkey = ablk_set_key, | 1202 | .setkey = ablk_set_key, |
1057 | .encrypt = ablk_encrypt, | 1203 | .encrypt = ablk_encrypt, |
@@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { { | |||
1059 | }, | 1205 | }, |
1060 | }, | 1206 | }, |
1061 | #endif | 1207 | #endif |
1062 | #ifdef HAS_PCBC | ||
1063 | }, { | 1208 | }, { |
1064 | .cra_name = "pcbc(aes)", | 1209 | .cra_name = "__lrw-aes-aesni", |
1065 | .cra_driver_name = "pcbc-aes-aesni", | 1210 | .cra_driver_name = "__driver-lrw-aes-aesni", |
1211 | .cra_priority = 0, | ||
1212 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1213 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1214 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), | ||
1215 | .cra_alignmask = 0, | ||
1216 | .cra_type = &crypto_blkcipher_type, | ||
1217 | .cra_module = THIS_MODULE, | ||
1218 | .cra_exit = lrw_aesni_exit_tfm, | ||
1219 | .cra_u = { | ||
1220 | .blkcipher = { | ||
1221 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, | ||
1222 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, | ||
1223 | .ivsize = AES_BLOCK_SIZE, | ||
1224 | .setkey = lrw_aesni_setkey, | ||
1225 | .encrypt = lrw_encrypt, | ||
1226 | .decrypt = lrw_decrypt, | ||
1227 | }, | ||
1228 | }, | ||
1229 | }, { | ||
1230 | .cra_name = "__xts-aes-aesni", | ||
1231 | .cra_driver_name = "__driver-xts-aes-aesni", | ||
1232 | .cra_priority = 0, | ||
1233 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1234 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1235 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), | ||
1236 | .cra_alignmask = 0, | ||
1237 | .cra_type = &crypto_blkcipher_type, | ||
1238 | .cra_module = THIS_MODULE, | ||
1239 | .cra_u = { | ||
1240 | .blkcipher = { | ||
1241 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
1242 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
1243 | .ivsize = AES_BLOCK_SIZE, | ||
1244 | .setkey = xts_aesni_setkey, | ||
1245 | .encrypt = xts_encrypt, | ||
1246 | .decrypt = xts_decrypt, | ||
1247 | }, | ||
1248 | }, | ||
1249 | }, { | ||
1250 | .cra_name = "lrw(aes)", | ||
1251 | .cra_driver_name = "lrw-aes-aesni", | ||
1066 | .cra_priority = 400, | 1252 | .cra_priority = 400, |
1067 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1253 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1068 | .cra_blocksize = AES_BLOCK_SIZE, | 1254 | .cra_blocksize = AES_BLOCK_SIZE, |
@@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { { | |||
1070 | .cra_alignmask = 0, | 1256 | .cra_alignmask = 0, |
1071 | .cra_type = &crypto_ablkcipher_type, | 1257 | .cra_type = &crypto_ablkcipher_type, |
1072 | .cra_module = THIS_MODULE, | 1258 | .cra_module = THIS_MODULE, |
1073 | .cra_init = ablk_pcbc_init, | 1259 | .cra_init = ablk_init, |
1074 | .cra_exit = ablk_exit, | 1260 | .cra_exit = ablk_exit, |
1075 | .cra_u = { | 1261 | .cra_u = { |
1076 | .ablkcipher = { | 1262 | .ablkcipher = { |
1077 | .min_keysize = AES_MIN_KEY_SIZE, | 1263 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, |
1078 | .max_keysize = AES_MAX_KEY_SIZE, | 1264 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, |
1079 | .ivsize = AES_BLOCK_SIZE, | 1265 | .ivsize = AES_BLOCK_SIZE, |
1080 | .setkey = ablk_set_key, | 1266 | .setkey = ablk_set_key, |
1081 | .encrypt = ablk_encrypt, | 1267 | .encrypt = ablk_encrypt, |
1082 | .decrypt = ablk_decrypt, | 1268 | .decrypt = ablk_decrypt, |
1083 | }, | 1269 | }, |
1084 | }, | 1270 | }, |
1085 | #endif | ||
1086 | #ifdef HAS_XTS | ||
1087 | }, { | 1271 | }, { |
1088 | .cra_name = "xts(aes)", | 1272 | .cra_name = "xts(aes)", |
1089 | .cra_driver_name = "xts-aes-aesni", | 1273 | .cra_driver_name = "xts-aes-aesni", |
@@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1094 | .cra_alignmask = 0, | 1278 | .cra_alignmask = 0, |
1095 | .cra_type = &crypto_ablkcipher_type, | 1279 | .cra_type = &crypto_ablkcipher_type, |
1096 | .cra_module = THIS_MODULE, | 1280 | .cra_module = THIS_MODULE, |
1097 | .cra_init = ablk_xts_init, | 1281 | .cra_init = ablk_init, |
1098 | .cra_exit = ablk_exit, | 1282 | .cra_exit = ablk_exit, |
1099 | .cra_u = { | 1283 | .cra_u = { |
1100 | .ablkcipher = { | 1284 | .ablkcipher = { |
@@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { { | |||
1106 | .decrypt = ablk_decrypt, | 1290 | .decrypt = ablk_decrypt, |
1107 | }, | 1291 | }, |
1108 | }, | 1292 | }, |
1109 | #endif | ||
1110 | } }; | 1293 | } }; |
1111 | 1294 | ||
1112 | 1295 | ||