diff options
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/Makefile | 14 | ||||
-rw-r--r-- | arch/x86/crypto/ablk_helper.c | 149 | ||||
-rw-r--r-- | arch/x86/crypto/aes_glue.c | 2 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 110 | ||||
-rw-r--r-- | arch/x86/crypto/camellia_glue.c | 355 | ||||
-rw-r--r-- | arch/x86/crypto/glue_helper.c | 307 | ||||
-rw-r--r-- | arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 704 | ||||
-rw-r--r-- | arch/x86/crypto/serpent_avx_glue.c | 636 | ||||
-rw-r--r-- | arch/x86/crypto/serpent_sse2_glue.c | 513 | ||||
-rw-r--r-- | arch/x86/crypto/sha1_ssse3_asm.S | 2 | ||||
-rw-r--r-- | arch/x86/crypto/sha1_ssse3_glue.c | 6 | ||||
-rw-r--r-- | arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 300 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_avx_glue.c | 624 | ||||
-rw-r--r-- | arch/x86/crypto/twofish_glue_3way.c | 409 |
14 files changed, 3040 insertions, 1091 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e191ac048b59..e908e5de82d3 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,6 +2,9 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | ||
6 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | ||
7 | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 8 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 9 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 10 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
@@ -12,8 +15,10 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | |||
12 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | 15 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o |
13 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 16 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | 17 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o |
18 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
15 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 19 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
16 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | 20 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o |
21 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 22 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
18 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 23 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
19 | 24 | ||
@@ -30,16 +35,11 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | |||
30 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | 35 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o |
31 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 36 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
32 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | 37 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o |
38 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o | ||
33 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 39 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
34 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | 40 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o |
41 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o | ||
35 | 42 | ||
36 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 43 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
37 | |||
38 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 44 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
39 | |||
40 | # enable AVX support only when $(AS) can actually assemble the instructions | ||
41 | ifeq ($(call as-instr,vpxor %xmm0$(comma)%xmm1$(comma)%xmm2,yes,no),yes) | ||
42 | AFLAGS_sha1_ssse3_asm.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
43 | CFLAGS_sha1_ssse3_glue.o += -DSHA1_ENABLE_AVX_SUPPORT | ||
44 | endif | ||
45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 45 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c new file mode 100644 index 000000000000..43282fe04a8b --- /dev/null +++ b/arch/x86/crypto/ablk_helper.c | |||
@@ -0,0 +1,149 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/crypto.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <crypto/algapi.h> | ||
32 | #include <crypto/cryptd.h> | ||
33 | #include <asm/i387.h> | ||
34 | #include <asm/crypto/ablk_helper.h> | ||
35 | |||
36 | int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
37 | unsigned int key_len) | ||
38 | { | ||
39 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
40 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
41 | int err; | ||
42 | |||
43 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
44 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
45 | & CRYPTO_TFM_REQ_MASK); | ||
46 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
47 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
48 | & CRYPTO_TFM_RES_MASK); | ||
49 | return err; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(ablk_set_key); | ||
52 | |||
53 | int __ablk_encrypt(struct ablkcipher_request *req) | ||
54 | { | ||
55 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
56 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
57 | struct blkcipher_desc desc; | ||
58 | |||
59 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
60 | desc.info = req->info; | ||
61 | desc.flags = 0; | ||
62 | |||
63 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
64 | &desc, req->dst, req->src, req->nbytes); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(__ablk_encrypt); | ||
67 | |||
68 | int ablk_encrypt(struct ablkcipher_request *req) | ||
69 | { | ||
70 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
71 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
72 | |||
73 | if (!irq_fpu_usable()) { | ||
74 | struct ablkcipher_request *cryptd_req = | ||
75 | ablkcipher_request_ctx(req); | ||
76 | |||
77 | memcpy(cryptd_req, req, sizeof(*req)); | ||
78 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
79 | |||
80 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
81 | } else { | ||
82 | return __ablk_encrypt(req); | ||
83 | } | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(ablk_encrypt); | ||
86 | |||
87 | int ablk_decrypt(struct ablkcipher_request *req) | ||
88 | { | ||
89 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
90 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
91 | |||
92 | if (!irq_fpu_usable()) { | ||
93 | struct ablkcipher_request *cryptd_req = | ||
94 | ablkcipher_request_ctx(req); | ||
95 | |||
96 | memcpy(cryptd_req, req, sizeof(*req)); | ||
97 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
98 | |||
99 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
100 | } else { | ||
101 | struct blkcipher_desc desc; | ||
102 | |||
103 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
104 | desc.info = req->info; | ||
105 | desc.flags = 0; | ||
106 | |||
107 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
108 | &desc, req->dst, req->src, req->nbytes); | ||
109 | } | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(ablk_decrypt); | ||
112 | |||
113 | void ablk_exit(struct crypto_tfm *tfm) | ||
114 | { | ||
115 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
116 | |||
117 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(ablk_exit); | ||
120 | |||
121 | int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
122 | { | ||
123 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
124 | struct cryptd_ablkcipher *cryptd_tfm; | ||
125 | |||
126 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
127 | if (IS_ERR(cryptd_tfm)) | ||
128 | return PTR_ERR(cryptd_tfm); | ||
129 | |||
130 | ctx->cryptd_tfm = cryptd_tfm; | ||
131 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
132 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | EXPORT_SYMBOL_GPL(ablk_init_common); | ||
137 | |||
138 | int ablk_init(struct crypto_tfm *tfm) | ||
139 | { | ||
140 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
141 | |||
142 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
143 | crypto_tfm_alg_driver_name(tfm)); | ||
144 | |||
145 | return ablk_init_common(tfm, drv_name); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(ablk_init); | ||
148 | |||
149 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 8efcf42a9d7e..59b37deb8c8d 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c | |||
@@ -5,7 +5,7 @@ | |||
5 | 5 | ||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <crypto/aes.h> | 7 | #include <crypto/aes.h> |
8 | #include <asm/aes.h> | 8 | #include <asm/crypto/aes.h> |
9 | 9 | ||
10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ac7f5cd019e8..34fdcff4d2c8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -30,7 +30,8 @@ | |||
30 | #include <crypto/ctr.h> | 30 | #include <crypto/ctr.h> |
31 | #include <asm/cpu_device_id.h> | 31 | #include <asm/cpu_device_id.h> |
32 | #include <asm/i387.h> | 32 | #include <asm/i387.h> |
33 | #include <asm/aes.h> | 33 | #include <asm/crypto/aes.h> |
34 | #include <asm/crypto/ablk_helper.h> | ||
34 | #include <crypto/scatterwalk.h> | 35 | #include <crypto/scatterwalk.h> |
35 | #include <crypto/internal/aead.h> | 36 | #include <crypto/internal/aead.h> |
36 | #include <linux/workqueue.h> | 37 | #include <linux/workqueue.h> |
@@ -52,10 +53,6 @@ | |||
52 | #define HAS_XTS | 53 | #define HAS_XTS |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | struct async_aes_ctx { | ||
56 | struct cryptd_ablkcipher *cryptd_tfm; | ||
57 | }; | ||
58 | |||
59 | /* This data is stored at the end of the crypto_tfm struct. | 56 | /* This data is stored at the end of the crypto_tfm struct. |
60 | * It's a type of per "session" data storage location. | 57 | * It's a type of per "session" data storage location. |
61 | * This needs to be 16 byte aligned. | 58 | * This needs to be 16 byte aligned. |
@@ -377,87 +374,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
377 | } | 374 | } |
378 | #endif | 375 | #endif |
379 | 376 | ||
380 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
381 | unsigned int key_len) | ||
382 | { | ||
383 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
384 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
385 | int err; | ||
386 | |||
387 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
388 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
389 | & CRYPTO_TFM_REQ_MASK); | ||
390 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
391 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
392 | & CRYPTO_TFM_RES_MASK); | ||
393 | return err; | ||
394 | } | ||
395 | |||
396 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
397 | { | ||
398 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
399 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
400 | |||
401 | if (!irq_fpu_usable()) { | ||
402 | struct ablkcipher_request *cryptd_req = | ||
403 | ablkcipher_request_ctx(req); | ||
404 | memcpy(cryptd_req, req, sizeof(*req)); | ||
405 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
406 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
407 | } else { | ||
408 | struct blkcipher_desc desc; | ||
409 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
410 | desc.info = req->info; | ||
411 | desc.flags = 0; | ||
412 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
413 | &desc, req->dst, req->src, req->nbytes); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
418 | { | ||
419 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
420 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
421 | |||
422 | if (!irq_fpu_usable()) { | ||
423 | struct ablkcipher_request *cryptd_req = | ||
424 | ablkcipher_request_ctx(req); | ||
425 | memcpy(cryptd_req, req, sizeof(*req)); | ||
426 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
427 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
428 | } else { | ||
429 | struct blkcipher_desc desc; | ||
430 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
431 | desc.info = req->info; | ||
432 | desc.flags = 0; | ||
433 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
434 | &desc, req->dst, req->src, req->nbytes); | ||
435 | } | ||
436 | } | ||
437 | |||
438 | static void ablk_exit(struct crypto_tfm *tfm) | ||
439 | { | ||
440 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
441 | |||
442 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
443 | } | ||
444 | |||
445 | static int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
446 | { | ||
447 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); | ||
448 | struct cryptd_ablkcipher *cryptd_tfm; | ||
449 | |||
450 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
451 | if (IS_ERR(cryptd_tfm)) | ||
452 | return PTR_ERR(cryptd_tfm); | ||
453 | |||
454 | ctx->cryptd_tfm = cryptd_tfm; | ||
455 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
456 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
457 | |||
458 | return 0; | ||
459 | } | ||
460 | |||
461 | static int ablk_ecb_init(struct crypto_tfm *tfm) | 377 | static int ablk_ecb_init(struct crypto_tfm *tfm) |
462 | { | 378 | { |
463 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); | 379 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); |
@@ -613,7 +529,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
613 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 529 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); |
614 | struct aesni_rfc4106_gcm_ctx *child_ctx = | 530 | struct aesni_rfc4106_gcm_ctx *child_ctx = |
615 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | 531 | aesni_rfc4106_gcm_ctx_get(cryptd_child); |
616 | u8 *new_key_mem = NULL; | 532 | u8 *new_key_align, *new_key_mem = NULL; |
617 | 533 | ||
618 | if (key_len < 4) { | 534 | if (key_len < 4) { |
619 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 535 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
@@ -637,9 +553,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
637 | if (!new_key_mem) | 553 | if (!new_key_mem) |
638 | return -ENOMEM; | 554 | return -ENOMEM; |
639 | 555 | ||
640 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | 556 | new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN); |
641 | memcpy(new_key_mem, key, key_len); | 557 | memcpy(new_key_align, key, key_len); |
642 | key = new_key_mem; | 558 | key = new_key_align; |
643 | } | 559 | } |
644 | 560 | ||
645 | if (!irq_fpu_usable()) | 561 | if (!irq_fpu_usable()) |
@@ -968,7 +884,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
968 | .cra_priority = 400, | 884 | .cra_priority = 400, |
969 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 885 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
970 | .cra_blocksize = AES_BLOCK_SIZE, | 886 | .cra_blocksize = AES_BLOCK_SIZE, |
971 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 887 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
972 | .cra_alignmask = 0, | 888 | .cra_alignmask = 0, |
973 | .cra_type = &crypto_ablkcipher_type, | 889 | .cra_type = &crypto_ablkcipher_type, |
974 | .cra_module = THIS_MODULE, | 890 | .cra_module = THIS_MODULE, |
@@ -989,7 +905,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
989 | .cra_priority = 400, | 905 | .cra_priority = 400, |
990 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 906 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
991 | .cra_blocksize = AES_BLOCK_SIZE, | 907 | .cra_blocksize = AES_BLOCK_SIZE, |
992 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 908 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
993 | .cra_alignmask = 0, | 909 | .cra_alignmask = 0, |
994 | .cra_type = &crypto_ablkcipher_type, | 910 | .cra_type = &crypto_ablkcipher_type, |
995 | .cra_module = THIS_MODULE, | 911 | .cra_module = THIS_MODULE, |
@@ -1033,7 +949,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1033 | .cra_priority = 400, | 949 | .cra_priority = 400, |
1034 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 950 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1035 | .cra_blocksize = 1, | 951 | .cra_blocksize = 1, |
1036 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 952 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1037 | .cra_alignmask = 0, | 953 | .cra_alignmask = 0, |
1038 | .cra_type = &crypto_ablkcipher_type, | 954 | .cra_type = &crypto_ablkcipher_type, |
1039 | .cra_module = THIS_MODULE, | 955 | .cra_module = THIS_MODULE, |
@@ -1098,7 +1014,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1098 | .cra_priority = 400, | 1014 | .cra_priority = 400, |
1099 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1015 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1100 | .cra_blocksize = 1, | 1016 | .cra_blocksize = 1, |
1101 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1017 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1102 | .cra_alignmask = 0, | 1018 | .cra_alignmask = 0, |
1103 | .cra_type = &crypto_ablkcipher_type, | 1019 | .cra_type = &crypto_ablkcipher_type, |
1104 | .cra_module = THIS_MODULE, | 1020 | .cra_module = THIS_MODULE, |
@@ -1126,7 +1042,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1126 | .cra_priority = 400, | 1042 | .cra_priority = 400, |
1127 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1043 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1128 | .cra_blocksize = AES_BLOCK_SIZE, | 1044 | .cra_blocksize = AES_BLOCK_SIZE, |
1129 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1045 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1130 | .cra_alignmask = 0, | 1046 | .cra_alignmask = 0, |
1131 | .cra_type = &crypto_ablkcipher_type, | 1047 | .cra_type = &crypto_ablkcipher_type, |
1132 | .cra_module = THIS_MODULE, | 1048 | .cra_module = THIS_MODULE, |
@@ -1150,7 +1066,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1150 | .cra_priority = 400, | 1066 | .cra_priority = 400, |
1151 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1067 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1152 | .cra_blocksize = AES_BLOCK_SIZE, | 1068 | .cra_blocksize = AES_BLOCK_SIZE, |
1153 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1069 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1154 | .cra_alignmask = 0, | 1070 | .cra_alignmask = 0, |
1155 | .cra_type = &crypto_ablkcipher_type, | 1071 | .cra_type = &crypto_ablkcipher_type, |
1156 | .cra_module = THIS_MODULE, | 1072 | .cra_module = THIS_MODULE, |
@@ -1174,7 +1090,7 @@ static struct crypto_alg aesni_algs[] = { { | |||
1174 | .cra_priority = 400, | 1090 | .cra_priority = 400, |
1175 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 1091 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
1176 | .cra_blocksize = AES_BLOCK_SIZE, | 1092 | .cra_blocksize = AES_BLOCK_SIZE, |
1177 | .cra_ctxsize = sizeof(struct async_aes_ctx), | 1093 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
1178 | .cra_alignmask = 0, | 1094 | .cra_alignmask = 0, |
1179 | .cra_type = &crypto_ablkcipher_type, | 1095 | .cra_type = &crypto_ablkcipher_type, |
1180 | .cra_module = THIS_MODULE, | 1096 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 3306dc0b139e..eeb2b3b743e9 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c | |||
@@ -5,10 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Camellia parts based on code by: | 6 | * Camellia parts based on code by: |
7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) | 7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) |
8 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
9 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
10 | * CTR part based on code (crypto/ctr.c) by: | ||
11 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
12 | * | 8 | * |
13 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
14 | * it under the terms of the GNU General Public License as published by | 10 | * it under the terms of the GNU General Public License as published by |
@@ -34,9 +30,9 @@ | |||
34 | #include <linux/module.h> | 30 | #include <linux/module.h> |
35 | #include <linux/types.h> | 31 | #include <linux/types.h> |
36 | #include <crypto/algapi.h> | 32 | #include <crypto/algapi.h> |
37 | #include <crypto/b128ops.h> | ||
38 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
39 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
35 | #include <asm/crypto/glue_helper.h> | ||
40 | 36 | ||
41 | #define CAMELLIA_MIN_KEY_SIZE 16 | 37 | #define CAMELLIA_MIN_KEY_SIZE 16 |
42 | #define CAMELLIA_MAX_KEY_SIZE 32 | 38 | #define CAMELLIA_MAX_KEY_SIZE 32 |
@@ -1312,307 +1308,128 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | |||
1312 | &tfm->crt_flags); | 1308 | &tfm->crt_flags); |
1313 | } | 1309 | } |
1314 | 1310 | ||
1315 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 1311 | static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) |
1316 | void (*fn)(struct camellia_ctx *, u8 *, const u8 *), | ||
1317 | void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *)) | ||
1318 | { | 1312 | { |
1319 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1313 | u128 iv = *src; |
1320 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1321 | unsigned int nbytes; | ||
1322 | int err; | ||
1323 | |||
1324 | err = blkcipher_walk_virt(desc, walk); | ||
1325 | |||
1326 | while ((nbytes = walk->nbytes)) { | ||
1327 | u8 *wsrc = walk->src.virt.addr; | ||
1328 | u8 *wdst = walk->dst.virt.addr; | ||
1329 | |||
1330 | /* Process two block batch */ | ||
1331 | if (nbytes >= bsize * 2) { | ||
1332 | do { | ||
1333 | fn_2way(ctx, wdst, wsrc); | ||
1334 | |||
1335 | wsrc += bsize * 2; | ||
1336 | wdst += bsize * 2; | ||
1337 | nbytes -= bsize * 2; | ||
1338 | } while (nbytes >= bsize * 2); | ||
1339 | |||
1340 | if (nbytes < bsize) | ||
1341 | goto done; | ||
1342 | } | ||
1343 | |||
1344 | /* Handle leftovers */ | ||
1345 | do { | ||
1346 | fn(ctx, wdst, wsrc); | ||
1347 | |||
1348 | wsrc += bsize; | ||
1349 | wdst += bsize; | ||
1350 | nbytes -= bsize; | ||
1351 | } while (nbytes >= bsize); | ||
1352 | |||
1353 | done: | ||
1354 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
1355 | } | ||
1356 | |||
1357 | return err; | ||
1358 | } | ||
1359 | |||
1360 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1361 | struct scatterlist *src, unsigned int nbytes) | ||
1362 | { | ||
1363 | struct blkcipher_walk walk; | ||
1364 | |||
1365 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1366 | return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way); | ||
1367 | } | ||
1368 | 1314 | ||
1369 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1315 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); |
1370 | struct scatterlist *src, unsigned int nbytes) | ||
1371 | { | ||
1372 | struct blkcipher_walk walk; | ||
1373 | |||
1374 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1375 | return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way); | ||
1376 | } | ||
1377 | 1316 | ||
1378 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 1317 | u128_xor(&dst[1], &dst[1], &iv); |
1379 | struct blkcipher_walk *walk) | ||
1380 | { | ||
1381 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1382 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1383 | unsigned int nbytes = walk->nbytes; | ||
1384 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1385 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1386 | u128 *iv = (u128 *)walk->iv; | ||
1387 | |||
1388 | do { | ||
1389 | u128_xor(dst, src, iv); | ||
1390 | camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
1391 | iv = dst; | ||
1392 | |||
1393 | src += 1; | ||
1394 | dst += 1; | ||
1395 | nbytes -= bsize; | ||
1396 | } while (nbytes >= bsize); | ||
1397 | |||
1398 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
1399 | return nbytes; | ||
1400 | } | 1318 | } |
1401 | 1319 | ||
1402 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1320 | static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
1403 | struct scatterlist *src, unsigned int nbytes) | ||
1404 | { | 1321 | { |
1405 | struct blkcipher_walk walk; | 1322 | be128 ctrblk; |
1406 | int err; | ||
1407 | 1323 | ||
1408 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1324 | if (dst != src) |
1409 | err = blkcipher_walk_virt(desc, &walk); | 1325 | *dst = *src; |
1410 | 1326 | ||
1411 | while ((nbytes = walk.nbytes)) { | 1327 | u128_to_be128(&ctrblk, iv); |
1412 | nbytes = __cbc_encrypt(desc, &walk); | 1328 | u128_inc(iv); |
1413 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1414 | } | ||
1415 | 1329 | ||
1416 | return err; | 1330 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); |
1417 | } | 1331 | } |
1418 | 1332 | ||
1419 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 1333 | static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, |
1420 | struct blkcipher_walk *walk) | 1334 | u128 *iv) |
1421 | { | 1335 | { |
1422 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1336 | be128 ctrblks[2]; |
1423 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1424 | unsigned int nbytes = walk->nbytes; | ||
1425 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1426 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1427 | u128 ivs[2 - 1]; | ||
1428 | u128 last_iv; | ||
1429 | 1337 | ||
1430 | /* Start of the last block. */ | 1338 | if (dst != src) { |
1431 | src += nbytes / bsize - 1; | 1339 | dst[0] = src[0]; |
1432 | dst += nbytes / bsize - 1; | 1340 | dst[1] = src[1]; |
1433 | |||
1434 | last_iv = *src; | ||
1435 | |||
1436 | /* Process two block batch */ | ||
1437 | if (nbytes >= bsize * 2) { | ||
1438 | do { | ||
1439 | nbytes -= bsize * (2 - 1); | ||
1440 | src -= 2 - 1; | ||
1441 | dst -= 2 - 1; | ||
1442 | |||
1443 | ivs[0] = src[0]; | ||
1444 | |||
1445 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); | ||
1446 | |||
1447 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
1448 | |||
1449 | nbytes -= bsize; | ||
1450 | if (nbytes < bsize) | ||
1451 | goto done; | ||
1452 | |||
1453 | u128_xor(dst, dst, src - 1); | ||
1454 | src -= 1; | ||
1455 | dst -= 1; | ||
1456 | } while (nbytes >= bsize * 2); | ||
1457 | |||
1458 | if (nbytes < bsize) | ||
1459 | goto done; | ||
1460 | } | 1341 | } |
1461 | 1342 | ||
1462 | /* Handle leftovers */ | 1343 | u128_to_be128(&ctrblks[0], iv); |
1463 | for (;;) { | 1344 | u128_inc(iv); |
1464 | camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src); | 1345 | u128_to_be128(&ctrblks[1], iv); |
1465 | 1346 | u128_inc(iv); | |
1466 | nbytes -= bsize; | ||
1467 | if (nbytes < bsize) | ||
1468 | break; | ||
1469 | 1347 | ||
1470 | u128_xor(dst, dst, src - 1); | 1348 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); |
1471 | src -= 1; | ||
1472 | dst -= 1; | ||
1473 | } | ||
1474 | |||
1475 | done: | ||
1476 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
1477 | *(u128 *)walk->iv = last_iv; | ||
1478 | |||
1479 | return nbytes; | ||
1480 | } | 1349 | } |
1481 | 1350 | ||
1482 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1351 | static const struct common_glue_ctx camellia_enc = { |
1483 | struct scatterlist *src, unsigned int nbytes) | 1352 | .num_funcs = 2, |
1484 | { | 1353 | .fpu_blocks_limit = -1, |
1485 | struct blkcipher_walk walk; | 1354 | |
1486 | int err; | 1355 | .funcs = { { |
1487 | 1356 | .num_blocks = 2, | |
1488 | blkcipher_walk_init(&walk, dst, src, nbytes); | 1357 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } |
1489 | err = blkcipher_walk_virt(desc, &walk); | 1358 | }, { |
1359 | .num_blocks = 1, | ||
1360 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
1361 | } } | ||
1362 | }; | ||
1490 | 1363 | ||
1491 | while ((nbytes = walk.nbytes)) { | 1364 | static const struct common_glue_ctx camellia_ctr = { |
1492 | nbytes = __cbc_decrypt(desc, &walk); | 1365 | .num_funcs = 2, |
1493 | err = blkcipher_walk_done(desc, &walk, nbytes); | 1366 | .fpu_blocks_limit = -1, |
1494 | } | 1367 | |
1368 | .funcs = { { | ||
1369 | .num_blocks = 2, | ||
1370 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
1371 | }, { | ||
1372 | .num_blocks = 1, | ||
1373 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
1374 | } } | ||
1375 | }; | ||
1495 | 1376 | ||
1496 | return err; | 1377 | static const struct common_glue_ctx camellia_dec = { |
1497 | } | 1378 | .num_funcs = 2, |
1379 | .fpu_blocks_limit = -1, | ||
1380 | |||
1381 | .funcs = { { | ||
1382 | .num_blocks = 2, | ||
1383 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
1384 | }, { | ||
1385 | .num_blocks = 1, | ||
1386 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
1387 | } } | ||
1388 | }; | ||
1498 | 1389 | ||
1499 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 1390 | static const struct common_glue_ctx camellia_dec_cbc = { |
1500 | { | 1391 | .num_funcs = 2, |
1501 | dst->a = cpu_to_be64(src->a); | 1392 | .fpu_blocks_limit = -1, |
1502 | dst->b = cpu_to_be64(src->b); | 1393 | |
1503 | } | 1394 | .funcs = { { |
1395 | .num_blocks = 2, | ||
1396 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
1397 | }, { | ||
1398 | .num_blocks = 1, | ||
1399 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
1400 | } } | ||
1401 | }; | ||
1504 | 1402 | ||
1505 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 1403 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1404 | struct scatterlist *src, unsigned int nbytes) | ||
1506 | { | 1405 | { |
1507 | dst->a = be64_to_cpu(src->a); | 1406 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); |
1508 | dst->b = be64_to_cpu(src->b); | ||
1509 | } | 1407 | } |
1510 | 1408 | ||
1511 | static inline void u128_inc(u128 *i) | 1409 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1410 | struct scatterlist *src, unsigned int nbytes) | ||
1512 | { | 1411 | { |
1513 | i->b++; | 1412 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); |
1514 | if (!i->b) | ||
1515 | i->a++; | ||
1516 | } | 1413 | } |
1517 | 1414 | ||
1518 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 1415 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1519 | struct blkcipher_walk *walk) | 1416 | struct scatterlist *src, unsigned int nbytes) |
1520 | { | 1417 | { |
1521 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1418 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, |
1522 | u8 keystream[CAMELLIA_BLOCK_SIZE]; | 1419 | dst, src, nbytes); |
1523 | u8 *src = walk->src.virt.addr; | ||
1524 | u8 *dst = walk->dst.virt.addr; | ||
1525 | unsigned int nbytes = walk->nbytes; | ||
1526 | u128 ctrblk; | ||
1527 | |||
1528 | memcpy(keystream, src, nbytes); | ||
1529 | camellia_enc_blk_xor(ctx, keystream, walk->iv); | ||
1530 | memcpy(dst, keystream, nbytes); | ||
1531 | |||
1532 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1533 | u128_inc(&ctrblk); | ||
1534 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1535 | } | 1420 | } |
1536 | 1421 | ||
1537 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 1422 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1538 | struct blkcipher_walk *walk) | 1423 | struct scatterlist *src, unsigned int nbytes) |
1539 | { | 1424 | { |
1540 | struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 1425 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, |
1541 | unsigned int bsize = CAMELLIA_BLOCK_SIZE; | 1426 | nbytes); |
1542 | unsigned int nbytes = walk->nbytes; | ||
1543 | u128 *src = (u128 *)walk->src.virt.addr; | ||
1544 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
1545 | u128 ctrblk; | ||
1546 | be128 ctrblocks[2]; | ||
1547 | |||
1548 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
1549 | |||
1550 | /* Process two block batch */ | ||
1551 | if (nbytes >= bsize * 2) { | ||
1552 | do { | ||
1553 | if (dst != src) { | ||
1554 | dst[0] = src[0]; | ||
1555 | dst[1] = src[1]; | ||
1556 | } | ||
1557 | |||
1558 | /* create ctrblks for parallel encrypt */ | ||
1559 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1560 | u128_inc(&ctrblk); | ||
1561 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
1562 | u128_inc(&ctrblk); | ||
1563 | |||
1564 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, | ||
1565 | (u8 *)ctrblocks); | ||
1566 | |||
1567 | src += 2; | ||
1568 | dst += 2; | ||
1569 | nbytes -= bsize * 2; | ||
1570 | } while (nbytes >= bsize * 2); | ||
1571 | |||
1572 | if (nbytes < bsize) | ||
1573 | goto done; | ||
1574 | } | ||
1575 | |||
1576 | /* Handle leftovers */ | ||
1577 | do { | ||
1578 | if (dst != src) | ||
1579 | *dst = *src; | ||
1580 | |||
1581 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
1582 | u128_inc(&ctrblk); | ||
1583 | |||
1584 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); | ||
1585 | |||
1586 | src += 1; | ||
1587 | dst += 1; | ||
1588 | nbytes -= bsize; | ||
1589 | } while (nbytes >= bsize); | ||
1590 | |||
1591 | done: | ||
1592 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
1593 | return nbytes; | ||
1594 | } | 1427 | } |
1595 | 1428 | ||
1596 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 1429 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1597 | struct scatterlist *src, unsigned int nbytes) | 1430 | struct scatterlist *src, unsigned int nbytes) |
1598 | { | 1431 | { |
1599 | struct blkcipher_walk walk; | 1432 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); |
1600 | int err; | ||
1601 | |||
1602 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
1603 | err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE); | ||
1604 | |||
1605 | while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) { | ||
1606 | nbytes = __ctr_crypt(desc, &walk); | ||
1607 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
1608 | } | ||
1609 | |||
1610 | if (walk.nbytes) { | ||
1611 | ctr_crypt_final(desc, &walk); | ||
1612 | err = blkcipher_walk_done(desc, &walk, 0); | ||
1613 | } | ||
1614 | |||
1615 | return err; | ||
1616 | } | 1433 | } |
1617 | 1434 | ||
1618 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 1435 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c new file mode 100644 index 000000000000..4854f0f31e4f --- /dev/null +++ b/arch/x86/crypto/glue_helper.c | |||
@@ -0,0 +1,307 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
24 | * USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | #include <crypto/b128ops.h> | ||
30 | #include <crypto/lrw.h> | ||
31 | #include <crypto/xts.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
33 | #include <crypto/scatterwalk.h> | ||
34 | |||
35 | static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
36 | struct blkcipher_desc *desc, | ||
37 | struct blkcipher_walk *walk) | ||
38 | { | ||
39 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
40 | const unsigned int bsize = 128 / 8; | ||
41 | unsigned int nbytes, i, func_bytes; | ||
42 | bool fpu_enabled = false; | ||
43 | int err; | ||
44 | |||
45 | err = blkcipher_walk_virt(desc, walk); | ||
46 | |||
47 | while ((nbytes = walk->nbytes)) { | ||
48 | u8 *wsrc = walk->src.virt.addr; | ||
49 | u8 *wdst = walk->dst.virt.addr; | ||
50 | |||
51 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
52 | desc, fpu_enabled, nbytes); | ||
53 | |||
54 | for (i = 0; i < gctx->num_funcs; i++) { | ||
55 | func_bytes = bsize * gctx->funcs[i].num_blocks; | ||
56 | |||
57 | /* Process multi-block batch */ | ||
58 | if (nbytes >= func_bytes) { | ||
59 | do { | ||
60 | gctx->funcs[i].fn_u.ecb(ctx, wdst, | ||
61 | wsrc); | ||
62 | |||
63 | wsrc += func_bytes; | ||
64 | wdst += func_bytes; | ||
65 | nbytes -= func_bytes; | ||
66 | } while (nbytes >= func_bytes); | ||
67 | |||
68 | if (nbytes < bsize) | ||
69 | goto done; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | done: | ||
74 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
75 | } | ||
76 | |||
77 | glue_fpu_end(fpu_enabled); | ||
78 | return err; | ||
79 | } | ||
80 | |||
81 | int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
82 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
83 | struct scatterlist *src, unsigned int nbytes) | ||
84 | { | ||
85 | struct blkcipher_walk walk; | ||
86 | |||
87 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
88 | return __glue_ecb_crypt_128bit(gctx, desc, &walk); | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); | ||
91 | |||
92 | static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
93 | struct blkcipher_desc *desc, | ||
94 | struct blkcipher_walk *walk) | ||
95 | { | ||
96 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
97 | const unsigned int bsize = 128 / 8; | ||
98 | unsigned int nbytes = walk->nbytes; | ||
99 | u128 *src = (u128 *)walk->src.virt.addr; | ||
100 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
101 | u128 *iv = (u128 *)walk->iv; | ||
102 | |||
103 | do { | ||
104 | u128_xor(dst, src, iv); | ||
105 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
106 | iv = dst; | ||
107 | |||
108 | src += 1; | ||
109 | dst += 1; | ||
110 | nbytes -= bsize; | ||
111 | } while (nbytes >= bsize); | ||
112 | |||
113 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
114 | return nbytes; | ||
115 | } | ||
116 | |||
117 | int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
118 | struct blkcipher_desc *desc, | ||
119 | struct scatterlist *dst, | ||
120 | struct scatterlist *src, unsigned int nbytes) | ||
121 | { | ||
122 | struct blkcipher_walk walk; | ||
123 | int err; | ||
124 | |||
125 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
126 | err = blkcipher_walk_virt(desc, &walk); | ||
127 | |||
128 | while ((nbytes = walk.nbytes)) { | ||
129 | nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); | ||
130 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
131 | } | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); | ||
136 | |||
137 | static unsigned int | ||
138 | __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
139 | struct blkcipher_desc *desc, | ||
140 | struct blkcipher_walk *walk) | ||
141 | { | ||
142 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
143 | const unsigned int bsize = 128 / 8; | ||
144 | unsigned int nbytes = walk->nbytes; | ||
145 | u128 *src = (u128 *)walk->src.virt.addr; | ||
146 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
147 | u128 last_iv; | ||
148 | unsigned int num_blocks, func_bytes; | ||
149 | unsigned int i; | ||
150 | |||
151 | /* Start of the last block. */ | ||
152 | src += nbytes / bsize - 1; | ||
153 | dst += nbytes / bsize - 1; | ||
154 | |||
155 | last_iv = *src; | ||
156 | |||
157 | for (i = 0; i < gctx->num_funcs; i++) { | ||
158 | num_blocks = gctx->funcs[i].num_blocks; | ||
159 | func_bytes = bsize * num_blocks; | ||
160 | |||
161 | /* Process multi-block batch */ | ||
162 | if (nbytes >= func_bytes) { | ||
163 | do { | ||
164 | nbytes -= func_bytes - bsize; | ||
165 | src -= num_blocks - 1; | ||
166 | dst -= num_blocks - 1; | ||
167 | |||
168 | gctx->funcs[i].fn_u.cbc(ctx, dst, src); | ||
169 | |||
170 | nbytes -= bsize; | ||
171 | if (nbytes < bsize) | ||
172 | goto done; | ||
173 | |||
174 | u128_xor(dst, dst, src - 1); | ||
175 | src -= 1; | ||
176 | dst -= 1; | ||
177 | } while (nbytes >= func_bytes); | ||
178 | |||
179 | if (nbytes < bsize) | ||
180 | goto done; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | done: | ||
185 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
186 | *(u128 *)walk->iv = last_iv; | ||
187 | |||
188 | return nbytes; | ||
189 | } | ||
190 | |||
191 | int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
192 | struct blkcipher_desc *desc, | ||
193 | struct scatterlist *dst, | ||
194 | struct scatterlist *src, unsigned int nbytes) | ||
195 | { | ||
196 | const unsigned int bsize = 128 / 8; | ||
197 | bool fpu_enabled = false; | ||
198 | struct blkcipher_walk walk; | ||
199 | int err; | ||
200 | |||
201 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
202 | err = blkcipher_walk_virt(desc, &walk); | ||
203 | |||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
206 | desc, fpu_enabled, nbytes); | ||
207 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | ||
208 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
209 | } | ||
210 | |||
211 | glue_fpu_end(fpu_enabled); | ||
212 | return err; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | ||
215 | |||
216 | static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | ||
217 | struct blkcipher_desc *desc, | ||
218 | struct blkcipher_walk *walk) | ||
219 | { | ||
220 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
221 | u8 *src = (u8 *)walk->src.virt.addr; | ||
222 | u8 *dst = (u8 *)walk->dst.virt.addr; | ||
223 | unsigned int nbytes = walk->nbytes; | ||
224 | u128 ctrblk; | ||
225 | u128 tmp; | ||
226 | |||
227 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
228 | |||
229 | memcpy(&tmp, src, nbytes); | ||
230 | fn_ctr(ctx, &tmp, &tmp, &ctrblk); | ||
231 | memcpy(dst, &tmp, nbytes); | ||
232 | |||
233 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | |||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
238 | struct blkcipher_desc *desc, | ||
239 | struct blkcipher_walk *walk) | ||
240 | { | ||
241 | const unsigned int bsize = 128 / 8; | ||
242 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
243 | unsigned int nbytes = walk->nbytes; | ||
244 | u128 *src = (u128 *)walk->src.virt.addr; | ||
245 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
246 | u128 ctrblk; | ||
247 | unsigned int num_blocks, func_bytes; | ||
248 | unsigned int i; | ||
249 | |||
250 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
251 | |||
252 | /* Process multi-block batch */ | ||
253 | for (i = 0; i < gctx->num_funcs; i++) { | ||
254 | num_blocks = gctx->funcs[i].num_blocks; | ||
255 | func_bytes = bsize * num_blocks; | ||
256 | |||
257 | if (nbytes >= func_bytes) { | ||
258 | do { | ||
259 | gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); | ||
260 | |||
261 | src += num_blocks; | ||
262 | dst += num_blocks; | ||
263 | nbytes -= func_bytes; | ||
264 | } while (nbytes >= func_bytes); | ||
265 | |||
266 | if (nbytes < bsize) | ||
267 | goto done; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | done: | ||
272 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
273 | return nbytes; | ||
274 | } | ||
275 | |||
276 | int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
277 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | const unsigned int bsize = 128 / 8; | ||
281 | bool fpu_enabled = false; | ||
282 | struct blkcipher_walk walk; | ||
283 | int err; | ||
284 | |||
285 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
286 | err = blkcipher_walk_virt_block(desc, &walk, bsize); | ||
287 | |||
288 | while ((nbytes = walk.nbytes) >= bsize) { | ||
289 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
290 | desc, fpu_enabled, nbytes); | ||
291 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | ||
292 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
293 | } | ||
294 | |||
295 | glue_fpu_end(fpu_enabled); | ||
296 | |||
297 | if (walk.nbytes) { | ||
298 | glue_ctr_crypt_final_128bit( | ||
299 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | ||
300 | err = blkcipher_walk_done(desc, &walk, 0); | ||
301 | } | ||
302 | |||
303 | return err; | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | ||
306 | |||
307 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..504106bf04a2 --- /dev/null +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,704 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-avx-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way AVX serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define tp %xmm5 | ||
42 | |||
43 | #define RA2 %xmm6 | ||
44 | #define RB2 %xmm7 | ||
45 | #define RC2 %xmm8 | ||
46 | #define RD2 %xmm9 | ||
47 | #define RE2 %xmm10 | ||
48 | |||
49 | #define RNOT %xmm11 | ||
50 | |||
51 | #define RK0 %xmm12 | ||
52 | #define RK1 %xmm13 | ||
53 | #define RK2 %xmm14 | ||
54 | #define RK3 %xmm15 | ||
55 | |||
56 | |||
57 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
58 | vpor x0, x3, tp; \ | ||
59 | vpxor x3, x0, x0; \ | ||
60 | vpxor x2, x3, x4; \ | ||
61 | vpxor RNOT, x4, x4; \ | ||
62 | vpxor x1, tp, x3; \ | ||
63 | vpand x0, x1, x1; \ | ||
64 | vpxor x4, x1, x1; \ | ||
65 | vpxor x0, x2, x2; | ||
66 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
67 | vpxor x3, x0, x0; \ | ||
68 | vpor x0, x4, x4; \ | ||
69 | vpxor x2, x0, x0; \ | ||
70 | vpand x1, x2, x2; \ | ||
71 | vpxor x2, x3, x3; \ | ||
72 | vpxor RNOT, x1, x1; \ | ||
73 | vpxor x4, x2, x2; \ | ||
74 | vpxor x2, x1, x1; | ||
75 | |||
76 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
77 | vpxor x0, x1, tp; \ | ||
78 | vpxor x3, x0, x0; \ | ||
79 | vpxor RNOT, x3, x3; \ | ||
80 | vpand tp, x1, x4; \ | ||
81 | vpor tp, x0, x0; \ | ||
82 | vpxor x2, x3, x3; \ | ||
83 | vpxor x3, x0, x0; \ | ||
84 | vpxor x3, tp, x1; | ||
85 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
86 | vpxor x4, x3, x3; \ | ||
87 | vpor x4, x1, x1; \ | ||
88 | vpxor x2, x4, x4; \ | ||
89 | vpand x0, x2, x2; \ | ||
90 | vpxor x1, x2, x2; \ | ||
91 | vpor x0, x1, x1; \ | ||
92 | vpxor RNOT, x0, x0; \ | ||
93 | vpxor x2, x0, x0; \ | ||
94 | vpxor x1, x4, x4; | ||
95 | |||
96 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
97 | vpxor RNOT, x3, x3; \ | ||
98 | vpxor x0, x1, x1; \ | ||
99 | vpand x2, x0, tp; \ | ||
100 | vpxor x3, tp, tp; \ | ||
101 | vpor x0, x3, x3; \ | ||
102 | vpxor x1, x2, x2; \ | ||
103 | vpxor x1, x3, x3; \ | ||
104 | vpand tp, x1, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | vpxor x2, tp, tp; \ | ||
107 | vpand x3, x2, x2; \ | ||
108 | vpor x1, x3, x3; \ | ||
109 | vpxor RNOT, tp, tp; \ | ||
110 | vpxor tp, x3, x3; \ | ||
111 | vpxor tp, x0, x4; \ | ||
112 | vpxor x2, tp, x0; \ | ||
113 | vpor x2, x1, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | vpxor x3, x1, tp; \ | ||
117 | vpor x0, x3, x3; \ | ||
118 | vpand x0, x1, x4; \ | ||
119 | vpxor x2, x0, x0; \ | ||
120 | vpxor tp, x2, x2; \ | ||
121 | vpand x3, tp, x1; \ | ||
122 | vpxor x3, x2, x2; \ | ||
123 | vpor x4, x0, x0; \ | ||
124 | vpxor x3, x4, x4; | ||
125 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
126 | vpxor x0, x1, x1; \ | ||
127 | vpand x3, x0, x0; \ | ||
128 | vpand x4, x3, x3; \ | ||
129 | vpxor x2, x3, x3; \ | ||
130 | vpor x1, x4, x4; \ | ||
131 | vpand x1, x2, x2; \ | ||
132 | vpxor x3, x4, x4; \ | ||
133 | vpxor x3, x0, x0; \ | ||
134 | vpxor x2, x3, x3; | ||
135 | |||
136 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
137 | vpand x0, x3, tp; \ | ||
138 | vpxor x3, x0, x0; \ | ||
139 | vpxor x2, tp, tp; \ | ||
140 | vpor x3, x2, x2; \ | ||
141 | vpxor x1, x0, x0; \ | ||
142 | vpxor tp, x3, x4; \ | ||
143 | vpor x0, x2, x2; \ | ||
144 | vpxor x1, x2, x2; | ||
145 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
146 | vpand x0, x1, x1; \ | ||
147 | vpxor x4, x1, x1; \ | ||
148 | vpand x2, x4, x4; \ | ||
149 | vpxor tp, x2, x2; \ | ||
150 | vpxor x0, x4, x4; \ | ||
151 | vpor x1, tp, x3; \ | ||
152 | vpxor RNOT, x1, x1; \ | ||
153 | vpxor x0, x3, x3; | ||
154 | |||
155 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
156 | vpor x0, x1, tp; \ | ||
157 | vpxor tp, x2, x2; \ | ||
158 | vpxor RNOT, x3, x3; \ | ||
159 | vpxor x0, x1, x4; \ | ||
160 | vpxor x2, x0, x0; \ | ||
161 | vpand x4, tp, x1; \ | ||
162 | vpor x3, x4, x4; \ | ||
163 | vpxor x0, x4, x4; | ||
164 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
165 | vpand x3, x0, x0; \ | ||
166 | vpxor x3, x1, x1; \ | ||
167 | vpxor x2, x3, x3; \ | ||
168 | vpxor x1, x0, x0; \ | ||
169 | vpand x4, x2, x2; \ | ||
170 | vpxor x2, x1, x1; \ | ||
171 | vpand x0, x2, x2; \ | ||
172 | vpxor x2, x3, x3; | ||
173 | |||
174 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
175 | vpxor x0, x3, x3; \ | ||
176 | vpxor x2, x1, tp; \ | ||
177 | vpxor x0, x2, x2; \ | ||
178 | vpand x3, x0, x0; \ | ||
179 | vpor x3, tp, tp; \ | ||
180 | vpxor RNOT, x1, x4; \ | ||
181 | vpxor tp, x0, x0; \ | ||
182 | vpxor x2, tp, x1; | ||
183 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
184 | vpxor x4, x3, x3; \ | ||
185 | vpxor x0, x4, x4; \ | ||
186 | vpand x0, x2, x2; \ | ||
187 | vpxor x1, x4, x4; \ | ||
188 | vpxor x3, x2, x2; \ | ||
189 | vpand x1, x3, x3; \ | ||
190 | vpxor x0, x3, x3; \ | ||
191 | vpxor x2, x1, x1; | ||
192 | |||
193 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
194 | vpxor RNOT, x1, tp; \ | ||
195 | vpxor RNOT, x0, x0; \ | ||
196 | vpand x2, tp, x1; \ | ||
197 | vpxor x3, x1, x1; \ | ||
198 | vpor tp, x3, x3; \ | ||
199 | vpxor x2, tp, x4; \ | ||
200 | vpxor x3, x2, x2; \ | ||
201 | vpxor x0, x3, x3; \ | ||
202 | vpor x1, x0, x0; | ||
203 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
204 | vpand x0, x2, x2; \ | ||
205 | vpxor x4, x0, x0; \ | ||
206 | vpxor x3, x4, x4; \ | ||
207 | vpand x0, x3, x3; \ | ||
208 | vpxor x1, x4, x4; \ | ||
209 | vpxor x4, x2, x2; \ | ||
210 | vpxor x1, x3, x3; \ | ||
211 | vpor x0, x4, x4; \ | ||
212 | vpxor x1, x4, x4; | ||
213 | |||
214 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
215 | vpxor x0, x1, x1; \ | ||
216 | vpor x1, x3, tp; \ | ||
217 | vpxor x1, x3, x4; \ | ||
218 | vpxor RNOT, x0, x0; \ | ||
219 | vpxor tp, x2, x2; \ | ||
220 | vpxor x0, tp, x3; \ | ||
221 | vpand x1, x0, x0; \ | ||
222 | vpxor x2, x0, x0; | ||
223 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
224 | vpand x3, x2, x2; \ | ||
225 | vpxor x4, x3, x3; \ | ||
226 | vpxor x3, x2, x2; \ | ||
227 | vpxor x3, x1, x1; \ | ||
228 | vpand x0, x3, x3; \ | ||
229 | vpxor x0, x1, x1; \ | ||
230 | vpxor x2, x0, x0; \ | ||
231 | vpxor x3, x4, x4; | ||
232 | |||
233 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
234 | vpxor x3, x1, x1; \ | ||
235 | vpxor x2, x0, tp; \ | ||
236 | vpxor RNOT, x2, x2; \ | ||
237 | vpor x1, x0, x4; \ | ||
238 | vpxor x3, x4, x4; \ | ||
239 | vpand x1, x3, x3; \ | ||
240 | vpxor x2, x1, x1; \ | ||
241 | vpand x4, x2, x2; | ||
242 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
243 | vpxor x1, x4, x4; \ | ||
244 | vpor x3, x1, x1; \ | ||
245 | vpxor tp, x3, x3; \ | ||
246 | vpxor tp, x2, x2; \ | ||
247 | vpor x4, tp, x0; \ | ||
248 | vpxor x4, x2, x2; \ | ||
249 | vpxor x0, x1, x1; \ | ||
250 | vpxor x1, x4, x4; | ||
251 | |||
252 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
253 | vpxor x1, x2, x2; \ | ||
254 | vpxor RNOT, x3, tp; \ | ||
255 | vpor x2, tp, tp; \ | ||
256 | vpxor x3, x2, x2; \ | ||
257 | vpxor x0, x3, x4; \ | ||
258 | vpxor x1, tp, x3; \ | ||
259 | vpor x2, x1, x1; \ | ||
260 | vpxor x0, x2, x2; | ||
261 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
262 | vpxor x4, x1, x1; \ | ||
263 | vpor x3, x4, x4; \ | ||
264 | vpxor x3, x2, x2; \ | ||
265 | vpxor x2, x4, x4; \ | ||
266 | vpand x1, x2, x2; \ | ||
267 | vpxor x3, x2, x2; \ | ||
268 | vpxor x4, x3, x3; \ | ||
269 | vpxor x0, x4, x4; | ||
270 | |||
271 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
272 | vpxor x1, x2, x2; \ | ||
273 | vpand x2, x1, tp; \ | ||
274 | vpxor x0, tp, tp; \ | ||
275 | vpor x1, x0, x0; \ | ||
276 | vpxor x3, x1, x4; \ | ||
277 | vpxor x3, x0, x0; \ | ||
278 | vpor tp, x3, x3; \ | ||
279 | vpxor x2, tp, x1; | ||
280 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
281 | vpxor x3, x1, x1; \ | ||
282 | vpxor x2, x0, x0; \ | ||
283 | vpxor x3, x2, x2; \ | ||
284 | vpand x1, x3, x3; \ | ||
285 | vpxor x0, x1, x1; \ | ||
286 | vpand x2, x0, x0; \ | ||
287 | vpxor x3, x4, x4; \ | ||
288 | vpxor x0, x3, x3; \ | ||
289 | vpxor x1, x0, x0; | ||
290 | |||
291 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
292 | vpxor x3, x2, x2; \ | ||
293 | vpand x1, x0, tp; \ | ||
294 | vpxor x2, tp, tp; \ | ||
295 | vpor x3, x2, x2; \ | ||
296 | vpxor RNOT, x0, x4; \ | ||
297 | vpxor tp, x1, x1; \ | ||
298 | vpxor x2, tp, x0; \ | ||
299 | vpand x4, x2, x2; | ||
300 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
301 | vpxor x0, x2, x2; \ | ||
302 | vpor x4, x0, x0; \ | ||
303 | vpxor x3, x0, x0; \ | ||
304 | vpand x2, x3, x3; \ | ||
305 | vpxor x3, x4, x4; \ | ||
306 | vpxor x1, x3, x3; \ | ||
307 | vpand x0, x1, x1; \ | ||
308 | vpxor x1, x4, x4; \ | ||
309 | vpxor x3, x0, x0; | ||
310 | |||
311 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
312 | vpor x2, x1, tp; \ | ||
313 | vpxor x1, x2, x2; \ | ||
314 | vpxor x3, tp, tp; \ | ||
315 | vpand x1, x3, x3; \ | ||
316 | vpxor x3, x2, x2; \ | ||
317 | vpor x0, x3, x3; \ | ||
318 | vpxor RNOT, x0, x0; \ | ||
319 | vpxor x2, x3, x3; \ | ||
320 | vpor x0, x2, x2; | ||
321 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
322 | vpxor tp, x1, x4; \ | ||
323 | vpxor x4, x2, x2; \ | ||
324 | vpand x0, x4, x4; \ | ||
325 | vpxor tp, x0, x0; \ | ||
326 | vpxor x3, tp, x1; \ | ||
327 | vpand x2, x0, x0; \ | ||
328 | vpxor x3, x2, x2; \ | ||
329 | vpxor x2, x0, x0; \ | ||
330 | vpxor x4, x2, x2; \ | ||
331 | vpxor x3, x4, x4; | ||
332 | |||
333 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
334 | vpxor x2, x0, x0; \ | ||
335 | vpand x3, x0, tp; \ | ||
336 | vpxor x3, x2, x2; \ | ||
337 | vpxor x2, tp, tp; \ | ||
338 | vpxor x1, x3, x3; \ | ||
339 | vpor x0, x2, x2; \ | ||
340 | vpxor x3, x2, x2; \ | ||
341 | vpand tp, x3, x3; | ||
342 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
343 | vpxor RNOT, tp, tp; \ | ||
344 | vpxor x1, x3, x3; \ | ||
345 | vpand x2, x1, x1; \ | ||
346 | vpxor tp, x0, x4; \ | ||
347 | vpxor x4, x3, x3; \ | ||
348 | vpxor x2, x4, x4; \ | ||
349 | vpxor x1, tp, x0; \ | ||
350 | vpxor x0, x2, x2; | ||
351 | |||
352 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
353 | vpand x0, x3, tp; \ | ||
354 | vpxor x2, x0, x0; \ | ||
355 | vpor x3, x2, x2; \ | ||
356 | vpxor x1, x3, x4; \ | ||
357 | vpxor RNOT, x0, x0; \ | ||
358 | vpor tp, x1, x1; \ | ||
359 | vpxor x0, x4, x4; \ | ||
360 | vpand x2, x0, x0; \ | ||
361 | vpxor x1, x0, x0; | ||
362 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
363 | vpand x2, x1, x1; \ | ||
364 | vpxor x2, tp, x3; \ | ||
365 | vpxor x3, x4, x4; \ | ||
366 | vpand x3, x2, x2; \ | ||
367 | vpor x0, x3, x3; \ | ||
368 | vpxor x4, x1, x1; \ | ||
369 | vpxor x4, x3, x3; \ | ||
370 | vpand x0, x4, x4; \ | ||
371 | vpxor x2, x4, x4; | ||
372 | |||
373 | #define get_key(i, j, t) \ | ||
374 | vbroadcastss (4*(i)+(j))*4(CTX), t; | ||
375 | |||
376 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
377 | get_key(i, 0, RK0); \ | ||
378 | get_key(i, 1, RK1); \ | ||
379 | get_key(i, 2, RK2); \ | ||
380 | get_key(i, 3, RK3); \ | ||
381 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
382 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
383 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
384 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
385 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
386 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
387 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
388 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
389 | |||
390 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
391 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
392 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
393 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
394 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
395 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
396 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
397 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
398 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
399 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
400 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
401 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
402 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
403 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
404 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
405 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
406 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
407 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
408 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
409 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
410 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
411 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
412 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
413 | get_key(i, 1, RK1); \ | ||
414 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
415 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
416 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
417 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
418 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
419 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
420 | get_key(i, 3, RK3); \ | ||
421 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
422 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
423 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
424 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
425 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
426 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
427 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
428 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
429 | get_key(i, 0, RK0); \ | ||
430 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
431 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
432 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
433 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
434 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
435 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
436 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
437 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
438 | get_key(i, 2, RK2); \ | ||
439 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
440 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
441 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
442 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
443 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
444 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
445 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
446 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
447 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
448 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
449 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
450 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
451 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
452 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
453 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
454 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
455 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
456 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
457 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
458 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
459 | |||
460 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
461 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
462 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
463 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
464 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
465 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
466 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
467 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
468 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
469 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
470 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
471 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
472 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
473 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
474 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
475 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
476 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
477 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
478 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
479 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
480 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
481 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
482 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
483 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
484 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
485 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
486 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
487 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
488 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
489 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
490 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
491 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
492 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
493 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
494 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
495 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
496 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
497 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
498 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
499 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
500 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
501 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
502 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
503 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
504 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
505 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
506 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
507 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
508 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
509 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
510 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
511 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
512 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
513 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
514 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
515 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
516 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
517 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
518 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
519 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
520 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
521 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
522 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
523 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
524 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
525 | |||
526 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
527 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
528 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
529 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
530 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
531 | |||
532 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
533 | get_key(i, 0, RK0); \ | ||
534 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
535 | get_key(i, 2, RK2); \ | ||
536 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
537 | get_key(i, 3, RK3); \ | ||
538 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
539 | get_key(i, 1, RK1); \ | ||
540 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
541 | |||
542 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
543 | vpunpckldq x1, x0, t0; \ | ||
544 | vpunpckhdq x1, x0, t2; \ | ||
545 | vpunpckldq x3, x2, t1; \ | ||
546 | vpunpckhdq x3, x2, x3; \ | ||
547 | \ | ||
548 | vpunpcklqdq t1, t0, x0; \ | ||
549 | vpunpckhqdq t1, t0, x1; \ | ||
550 | vpunpcklqdq x3, t2, x2; \ | ||
551 | vpunpckhqdq x3, t2, x3; | ||
552 | |||
553 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
554 | vmovdqu (0*4*4)(in), x0; \ | ||
555 | vmovdqu (1*4*4)(in), x1; \ | ||
556 | vmovdqu (2*4*4)(in), x2; \ | ||
557 | vmovdqu (3*4*4)(in), x3; \ | ||
558 | \ | ||
559 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
560 | |||
561 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
562 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
563 | \ | ||
564 | vmovdqu x0, (0*4*4)(out); \ | ||
565 | vmovdqu x1, (1*4*4)(out); \ | ||
566 | vmovdqu x2, (2*4*4)(out); \ | ||
567 | vmovdqu x3, (3*4*4)(out); | ||
568 | |||
569 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
570 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
571 | \ | ||
572 | vpxor (0*4*4)(out), x0, x0; \ | ||
573 | vmovdqu x0, (0*4*4)(out); \ | ||
574 | vpxor (1*4*4)(out), x1, x1; \ | ||
575 | vmovdqu x1, (1*4*4)(out); \ | ||
576 | vpxor (2*4*4)(out), x2, x2; \ | ||
577 | vmovdqu x2, (2*4*4)(out); \ | ||
578 | vpxor (3*4*4)(out), x3, x3; \ | ||
579 | vmovdqu x3, (3*4*4)(out); | ||
580 | |||
581 | .align 8 | ||
582 | .global __serpent_enc_blk_8way_avx | ||
583 | .type __serpent_enc_blk_8way_avx,@function; | ||
584 | |||
585 | __serpent_enc_blk_8way_avx: | ||
586 | /* input: | ||
587 | * %rdi: ctx, CTX | ||
588 | * %rsi: dst | ||
589 | * %rdx: src | ||
590 | * %rcx: bool, if true: xor output | ||
591 | */ | ||
592 | |||
593 | vpcmpeqd RNOT, RNOT, RNOT; | ||
594 | |||
595 | leaq (4*4*4)(%rdx), %rax; | ||
596 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
597 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
598 | |||
599 | K2(RA, RB, RC, RD, RE, 0); | ||
600 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
601 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
602 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
603 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
604 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
605 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
606 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
607 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
608 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
609 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
610 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
611 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
612 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
613 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
614 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
615 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
616 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
617 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
618 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
619 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
620 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
621 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
622 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
623 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
624 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
625 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
626 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
627 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
628 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
629 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
630 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
631 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
632 | |||
633 | leaq (4*4*4)(%rsi), %rax; | ||
634 | |||
635 | testb %cl, %cl; | ||
636 | jnz __enc_xor8; | ||
637 | |||
638 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
639 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
640 | |||
641 | ret; | ||
642 | |||
643 | __enc_xor8: | ||
644 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
645 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
646 | |||
647 | ret; | ||
648 | |||
649 | .align 8 | ||
650 | .global serpent_dec_blk_8way_avx | ||
651 | .type serpent_dec_blk_8way_avx,@function; | ||
652 | |||
653 | serpent_dec_blk_8way_avx: | ||
654 | /* input: | ||
655 | * %rdi: ctx, CTX | ||
656 | * %rsi: dst | ||
657 | * %rdx: src | ||
658 | */ | ||
659 | |||
660 | vpcmpeqd RNOT, RNOT, RNOT; | ||
661 | |||
662 | leaq (4*4*4)(%rdx), %rax; | ||
663 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
664 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
665 | |||
666 | K2(RA, RB, RC, RD, RE, 32); | ||
667 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
668 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
669 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
670 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
671 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
672 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
673 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
674 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
675 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
676 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
677 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
678 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
679 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
680 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
681 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
682 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
683 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
684 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
685 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
686 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
687 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
688 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
689 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
690 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
691 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
692 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
693 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
694 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
695 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
696 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
697 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
698 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
699 | |||
700 | leaq (4*4*4)(%rsi), %rax; | ||
701 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
702 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
703 | |||
704 | ret; | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c new file mode 100644 index 000000000000..b36bdac237eb --- /dev/null +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Glue code based on serpent_sse2_glue.c by: | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/hardirq.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include <linux/err.h> | ||
32 | #include <crypto/algapi.h> | ||
33 | #include <crypto/serpent.h> | ||
34 | #include <crypto/cryptd.h> | ||
35 | #include <crypto/b128ops.h> | ||
36 | #include <crypto/ctr.h> | ||
37 | #include <crypto/lrw.h> | ||
38 | #include <crypto/xts.h> | ||
39 | #include <asm/xcr.h> | ||
40 | #include <asm/xsave.h> | ||
41 | #include <asm/crypto/serpent-avx.h> | ||
42 | #include <asm/crypto/ablk_helper.h> | ||
43 | #include <asm/crypto/glue_helper.h> | ||
44 | |||
45 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
46 | { | ||
47 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
48 | unsigned int j; | ||
49 | |||
50 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
51 | ivs[j] = src[j]; | ||
52 | |||
53 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
54 | |||
55 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
56 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
57 | } | ||
58 | |||
59 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) | ||
60 | { | ||
61 | be128 ctrblk; | ||
62 | |||
63 | u128_to_be128(&ctrblk, iv); | ||
64 | u128_inc(iv); | ||
65 | |||
66 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
67 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
68 | } | ||
69 | |||
70 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
71 | u128 *iv) | ||
72 | { | ||
73 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
74 | unsigned int i; | ||
75 | |||
76 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
77 | if (dst != src) | ||
78 | dst[i] = src[i]; | ||
79 | |||
80 | u128_to_be128(&ctrblks[i], iv); | ||
81 | u128_inc(iv); | ||
82 | } | ||
83 | |||
84 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
85 | } | ||
86 | |||
87 | static const struct common_glue_ctx serpent_enc = { | ||
88 | .num_funcs = 2, | ||
89 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
90 | |||
91 | .funcs = { { | ||
92 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
93 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | ||
94 | }, { | ||
95 | .num_blocks = 1, | ||
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
97 | } } | ||
98 | }; | ||
99 | |||
100 | static const struct common_glue_ctx serpent_ctr = { | ||
101 | .num_funcs = 2, | ||
102 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
103 | |||
104 | .funcs = { { | ||
105 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
106 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
107 | }, { | ||
108 | .num_blocks = 1, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
110 | } } | ||
111 | }; | ||
112 | |||
113 | static const struct common_glue_ctx serpent_dec = { | ||
114 | .num_funcs = 2, | ||
115 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
116 | |||
117 | .funcs = { { | ||
118 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
119 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
120 | }, { | ||
121 | .num_blocks = 1, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
123 | } } | ||
124 | }; | ||
125 | |||
126 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
127 | .num_funcs = 2, | ||
128 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
129 | |||
130 | .funcs = { { | ||
131 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
132 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
133 | }, { | ||
134 | .num_blocks = 1, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
136 | } } | ||
137 | }; | ||
138 | |||
139 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | ||
142 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
143 | } | ||
144 | |||
145 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
146 | struct scatterlist *src, unsigned int nbytes) | ||
147 | { | ||
148 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
149 | } | ||
150 | |||
151 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
152 | struct scatterlist *src, unsigned int nbytes) | ||
153 | { | ||
154 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
155 | dst, src, nbytes); | ||
156 | } | ||
157 | |||
158 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
159 | struct scatterlist *src, unsigned int nbytes) | ||
160 | { | ||
161 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
162 | nbytes); | ||
163 | } | ||
164 | |||
165 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
166 | struct scatterlist *src, unsigned int nbytes) | ||
167 | { | ||
168 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
169 | } | ||
170 | |||
171 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
172 | { | ||
173 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
174 | NULL, fpu_enabled, nbytes); | ||
175 | } | ||
176 | |||
177 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
178 | { | ||
179 | glue_fpu_end(fpu_enabled); | ||
180 | } | ||
181 | |||
182 | struct crypt_priv { | ||
183 | struct serpent_ctx *ctx; | ||
184 | bool fpu_enabled; | ||
185 | }; | ||
186 | |||
187 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
188 | { | ||
189 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
190 | struct crypt_priv *ctx = priv; | ||
191 | int i; | ||
192 | |||
193 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
194 | |||
195 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
196 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
197 | return; | ||
198 | } | ||
199 | |||
200 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
201 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
202 | } | ||
203 | |||
204 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
205 | { | ||
206 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
207 | struct crypt_priv *ctx = priv; | ||
208 | int i; | ||
209 | |||
210 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
211 | |||
212 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
213 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
214 | return; | ||
215 | } | ||
216 | |||
217 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
218 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
219 | } | ||
220 | |||
221 | struct serpent_lrw_ctx { | ||
222 | struct lrw_table_ctx lrw_table; | ||
223 | struct serpent_ctx serpent_ctx; | ||
224 | }; | ||
225 | |||
226 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
227 | unsigned int keylen) | ||
228 | { | ||
229 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
230 | int err; | ||
231 | |||
232 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
233 | SERPENT_BLOCK_SIZE); | ||
234 | if (err) | ||
235 | return err; | ||
236 | |||
237 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
238 | SERPENT_BLOCK_SIZE); | ||
239 | } | ||
240 | |||
241 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
242 | struct scatterlist *src, unsigned int nbytes) | ||
243 | { | ||
244 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
245 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
246 | struct crypt_priv crypt_ctx = { | ||
247 | .ctx = &ctx->serpent_ctx, | ||
248 | .fpu_enabled = false, | ||
249 | }; | ||
250 | struct lrw_crypt_req req = { | ||
251 | .tbuf = buf, | ||
252 | .tbuflen = sizeof(buf), | ||
253 | |||
254 | .table_ctx = &ctx->lrw_table, | ||
255 | .crypt_ctx = &crypt_ctx, | ||
256 | .crypt_fn = encrypt_callback, | ||
257 | }; | ||
258 | int ret; | ||
259 | |||
260 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
261 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
262 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
263 | |||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
268 | struct scatterlist *src, unsigned int nbytes) | ||
269 | { | ||
270 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
271 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
272 | struct crypt_priv crypt_ctx = { | ||
273 | .ctx = &ctx->serpent_ctx, | ||
274 | .fpu_enabled = false, | ||
275 | }; | ||
276 | struct lrw_crypt_req req = { | ||
277 | .tbuf = buf, | ||
278 | .tbuflen = sizeof(buf), | ||
279 | |||
280 | .table_ctx = &ctx->lrw_table, | ||
281 | .crypt_ctx = &crypt_ctx, | ||
282 | .crypt_fn = decrypt_callback, | ||
283 | }; | ||
284 | int ret; | ||
285 | |||
286 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
287 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
288 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
289 | |||
290 | return ret; | ||
291 | } | ||
292 | |||
293 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
294 | { | ||
295 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
296 | |||
297 | lrw_free_table(&ctx->lrw_table); | ||
298 | } | ||
299 | |||
300 | struct serpent_xts_ctx { | ||
301 | struct serpent_ctx tweak_ctx; | ||
302 | struct serpent_ctx crypt_ctx; | ||
303 | }; | ||
304 | |||
305 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
306 | unsigned int keylen) | ||
307 | { | ||
308 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
309 | u32 *flags = &tfm->crt_flags; | ||
310 | int err; | ||
311 | |||
312 | /* key consists of keys of equal size concatenated, therefore | ||
313 | * the length must be even | ||
314 | */ | ||
315 | if (keylen % 2) { | ||
316 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
317 | return -EINVAL; | ||
318 | } | ||
319 | |||
320 | /* first half of xts-key is for crypt */ | ||
321 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
322 | if (err) | ||
323 | return err; | ||
324 | |||
325 | /* second half of xts-key is for tweak */ | ||
326 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
327 | } | ||
328 | |||
329 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
330 | struct scatterlist *src, unsigned int nbytes) | ||
331 | { | ||
332 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
333 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
334 | struct crypt_priv crypt_ctx = { | ||
335 | .ctx = &ctx->crypt_ctx, | ||
336 | .fpu_enabled = false, | ||
337 | }; | ||
338 | struct xts_crypt_req req = { | ||
339 | .tbuf = buf, | ||
340 | .tbuflen = sizeof(buf), | ||
341 | |||
342 | .tweak_ctx = &ctx->tweak_ctx, | ||
343 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
344 | .crypt_ctx = &crypt_ctx, | ||
345 | .crypt_fn = encrypt_callback, | ||
346 | }; | ||
347 | int ret; | ||
348 | |||
349 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
350 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
351 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
352 | |||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
357 | struct scatterlist *src, unsigned int nbytes) | ||
358 | { | ||
359 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
360 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
361 | struct crypt_priv crypt_ctx = { | ||
362 | .ctx = &ctx->crypt_ctx, | ||
363 | .fpu_enabled = false, | ||
364 | }; | ||
365 | struct xts_crypt_req req = { | ||
366 | .tbuf = buf, | ||
367 | .tbuflen = sizeof(buf), | ||
368 | |||
369 | .tweak_ctx = &ctx->tweak_ctx, | ||
370 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
371 | .crypt_ctx = &crypt_ctx, | ||
372 | .crypt_fn = decrypt_callback, | ||
373 | }; | ||
374 | int ret; | ||
375 | |||
376 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
377 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
378 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
379 | |||
380 | return ret; | ||
381 | } | ||
382 | |||
383 | static struct crypto_alg serpent_algs[10] = { { | ||
384 | .cra_name = "__ecb-serpent-avx", | ||
385 | .cra_driver_name = "__driver-ecb-serpent-avx", | ||
386 | .cra_priority = 0, | ||
387 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
388 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
389 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
390 | .cra_alignmask = 0, | ||
391 | .cra_type = &crypto_blkcipher_type, | ||
392 | .cra_module = THIS_MODULE, | ||
393 | .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), | ||
394 | .cra_u = { | ||
395 | .blkcipher = { | ||
396 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
397 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
398 | .setkey = serpent_setkey, | ||
399 | .encrypt = ecb_encrypt, | ||
400 | .decrypt = ecb_decrypt, | ||
401 | }, | ||
402 | }, | ||
403 | }, { | ||
404 | .cra_name = "__cbc-serpent-avx", | ||
405 | .cra_driver_name = "__driver-cbc-serpent-avx", | ||
406 | .cra_priority = 0, | ||
407 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
408 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
409 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
410 | .cra_alignmask = 0, | ||
411 | .cra_type = &crypto_blkcipher_type, | ||
412 | .cra_module = THIS_MODULE, | ||
413 | .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), | ||
414 | .cra_u = { | ||
415 | .blkcipher = { | ||
416 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
417 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
418 | .setkey = serpent_setkey, | ||
419 | .encrypt = cbc_encrypt, | ||
420 | .decrypt = cbc_decrypt, | ||
421 | }, | ||
422 | }, | ||
423 | }, { | ||
424 | .cra_name = "__ctr-serpent-avx", | ||
425 | .cra_driver_name = "__driver-ctr-serpent-avx", | ||
426 | .cra_priority = 0, | ||
427 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
428 | .cra_blocksize = 1, | ||
429 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
430 | .cra_alignmask = 0, | ||
431 | .cra_type = &crypto_blkcipher_type, | ||
432 | .cra_module = THIS_MODULE, | ||
433 | .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), | ||
434 | .cra_u = { | ||
435 | .blkcipher = { | ||
436 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
437 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
438 | .ivsize = SERPENT_BLOCK_SIZE, | ||
439 | .setkey = serpent_setkey, | ||
440 | .encrypt = ctr_crypt, | ||
441 | .decrypt = ctr_crypt, | ||
442 | }, | ||
443 | }, | ||
444 | }, { | ||
445 | .cra_name = "__lrw-serpent-avx", | ||
446 | .cra_driver_name = "__driver-lrw-serpent-avx", | ||
447 | .cra_priority = 0, | ||
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
451 | .cra_alignmask = 0, | ||
452 | .cra_type = &crypto_blkcipher_type, | ||
453 | .cra_module = THIS_MODULE, | ||
454 | .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), | ||
455 | .cra_exit = lrw_exit_tfm, | ||
456 | .cra_u = { | ||
457 | .blkcipher = { | ||
458 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
459 | SERPENT_BLOCK_SIZE, | ||
460 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
461 | SERPENT_BLOCK_SIZE, | ||
462 | .ivsize = SERPENT_BLOCK_SIZE, | ||
463 | .setkey = lrw_serpent_setkey, | ||
464 | .encrypt = lrw_encrypt, | ||
465 | .decrypt = lrw_decrypt, | ||
466 | }, | ||
467 | }, | ||
468 | }, { | ||
469 | .cra_name = "__xts-serpent-avx", | ||
470 | .cra_driver_name = "__driver-xts-serpent-avx", | ||
471 | .cra_priority = 0, | ||
472 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
473 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
474 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
475 | .cra_alignmask = 0, | ||
476 | .cra_type = &crypto_blkcipher_type, | ||
477 | .cra_module = THIS_MODULE, | ||
478 | .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), | ||
479 | .cra_u = { | ||
480 | .blkcipher = { | ||
481 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
482 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
483 | .ivsize = SERPENT_BLOCK_SIZE, | ||
484 | .setkey = xts_serpent_setkey, | ||
485 | .encrypt = xts_encrypt, | ||
486 | .decrypt = xts_decrypt, | ||
487 | }, | ||
488 | }, | ||
489 | }, { | ||
490 | .cra_name = "ecb(serpent)", | ||
491 | .cra_driver_name = "ecb-serpent-avx", | ||
492 | .cra_priority = 500, | ||
493 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
494 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
495 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
496 | .cra_alignmask = 0, | ||
497 | .cra_type = &crypto_ablkcipher_type, | ||
498 | .cra_module = THIS_MODULE, | ||
499 | .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), | ||
500 | .cra_init = ablk_init, | ||
501 | .cra_exit = ablk_exit, | ||
502 | .cra_u = { | ||
503 | .ablkcipher = { | ||
504 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
505 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
506 | .setkey = ablk_set_key, | ||
507 | .encrypt = ablk_encrypt, | ||
508 | .decrypt = ablk_decrypt, | ||
509 | }, | ||
510 | }, | ||
511 | }, { | ||
512 | .cra_name = "cbc(serpent)", | ||
513 | .cra_driver_name = "cbc-serpent-avx", | ||
514 | .cra_priority = 500, | ||
515 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
516 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
517 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
518 | .cra_alignmask = 0, | ||
519 | .cra_type = &crypto_ablkcipher_type, | ||
520 | .cra_module = THIS_MODULE, | ||
521 | .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), | ||
522 | .cra_init = ablk_init, | ||
523 | .cra_exit = ablk_exit, | ||
524 | .cra_u = { | ||
525 | .ablkcipher = { | ||
526 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
527 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
528 | .ivsize = SERPENT_BLOCK_SIZE, | ||
529 | .setkey = ablk_set_key, | ||
530 | .encrypt = __ablk_encrypt, | ||
531 | .decrypt = ablk_decrypt, | ||
532 | }, | ||
533 | }, | ||
534 | }, { | ||
535 | .cra_name = "ctr(serpent)", | ||
536 | .cra_driver_name = "ctr-serpent-avx", | ||
537 | .cra_priority = 500, | ||
538 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
539 | .cra_blocksize = 1, | ||
540 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
541 | .cra_alignmask = 0, | ||
542 | .cra_type = &crypto_ablkcipher_type, | ||
543 | .cra_module = THIS_MODULE, | ||
544 | .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), | ||
545 | .cra_init = ablk_init, | ||
546 | .cra_exit = ablk_exit, | ||
547 | .cra_u = { | ||
548 | .ablkcipher = { | ||
549 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
550 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
551 | .ivsize = SERPENT_BLOCK_SIZE, | ||
552 | .setkey = ablk_set_key, | ||
553 | .encrypt = ablk_encrypt, | ||
554 | .decrypt = ablk_encrypt, | ||
555 | .geniv = "chainiv", | ||
556 | }, | ||
557 | }, | ||
558 | }, { | ||
559 | .cra_name = "lrw(serpent)", | ||
560 | .cra_driver_name = "lrw-serpent-avx", | ||
561 | .cra_priority = 500, | ||
562 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
563 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
564 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
565 | .cra_alignmask = 0, | ||
566 | .cra_type = &crypto_ablkcipher_type, | ||
567 | .cra_module = THIS_MODULE, | ||
568 | .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), | ||
569 | .cra_init = ablk_init, | ||
570 | .cra_exit = ablk_exit, | ||
571 | .cra_u = { | ||
572 | .ablkcipher = { | ||
573 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
574 | SERPENT_BLOCK_SIZE, | ||
575 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
576 | SERPENT_BLOCK_SIZE, | ||
577 | .ivsize = SERPENT_BLOCK_SIZE, | ||
578 | .setkey = ablk_set_key, | ||
579 | .encrypt = ablk_encrypt, | ||
580 | .decrypt = ablk_decrypt, | ||
581 | }, | ||
582 | }, | ||
583 | }, { | ||
584 | .cra_name = "xts(serpent)", | ||
585 | .cra_driver_name = "xts-serpent-avx", | ||
586 | .cra_priority = 500, | ||
587 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
588 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
589 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
590 | .cra_alignmask = 0, | ||
591 | .cra_type = &crypto_ablkcipher_type, | ||
592 | .cra_module = THIS_MODULE, | ||
593 | .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), | ||
594 | .cra_init = ablk_init, | ||
595 | .cra_exit = ablk_exit, | ||
596 | .cra_u = { | ||
597 | .ablkcipher = { | ||
598 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
599 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
600 | .ivsize = SERPENT_BLOCK_SIZE, | ||
601 | .setkey = ablk_set_key, | ||
602 | .encrypt = ablk_encrypt, | ||
603 | .decrypt = ablk_decrypt, | ||
604 | }, | ||
605 | }, | ||
606 | } }; | ||
607 | |||
608 | static int __init serpent_init(void) | ||
609 | { | ||
610 | u64 xcr0; | ||
611 | |||
612 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
613 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
614 | return -ENODEV; | ||
615 | } | ||
616 | |||
617 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
618 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
619 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
620 | return -ENODEV; | ||
621 | } | ||
622 | |||
623 | return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
624 | } | ||
625 | |||
626 | static void __exit serpent_exit(void) | ||
627 | { | ||
628 | crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
629 | } | ||
630 | |||
631 | module_init(serpent_init); | ||
632 | module_exit(serpent_exit); | ||
633 | |||
634 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); | ||
635 | MODULE_LICENSE("GPL"); | ||
636 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 4b21be85e0a1..d679c8675f4a 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c | |||
@@ -41,358 +41,145 @@ | |||
41 | #include <crypto/ctr.h> | 41 | #include <crypto/ctr.h> |
42 | #include <crypto/lrw.h> | 42 | #include <crypto/lrw.h> |
43 | #include <crypto/xts.h> | 43 | #include <crypto/xts.h> |
44 | #include <asm/i387.h> | 44 | #include <asm/crypto/serpent-sse2.h> |
45 | #include <asm/serpent.h> | 45 | #include <asm/crypto/ablk_helper.h> |
46 | #include <crypto/scatterwalk.h> | 46 | #include <asm/crypto/glue_helper.h> |
47 | #include <linux/workqueue.h> | ||
48 | #include <linux/spinlock.h> | ||
49 | |||
50 | struct async_serpent_ctx { | ||
51 | struct cryptd_ablkcipher *cryptd_tfm; | ||
52 | }; | ||
53 | 47 | ||
54 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | 48 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) |
55 | { | ||
56 | if (fpu_enabled) | ||
57 | return true; | ||
58 | |||
59 | /* SSE2 is only used when chunk to be processed is large enough, so | ||
60 | * do not enable FPU until it is necessary. | ||
61 | */ | ||
62 | if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS) | ||
63 | return false; | ||
64 | |||
65 | kernel_fpu_begin(); | ||
66 | return true; | ||
67 | } | ||
68 | |||
69 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
70 | { | 49 | { |
71 | if (fpu_enabled) | 50 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; |
72 | kernel_fpu_end(); | 51 | unsigned int j; |
73 | } | ||
74 | |||
75 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
76 | bool enc) | ||
77 | { | ||
78 | bool fpu_enabled = false; | ||
79 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
80 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
81 | unsigned int nbytes; | ||
82 | int err; | ||
83 | |||
84 | err = blkcipher_walk_virt(desc, walk); | ||
85 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
86 | |||
87 | while ((nbytes = walk->nbytes)) { | ||
88 | u8 *wsrc = walk->src.virt.addr; | ||
89 | u8 *wdst = walk->dst.virt.addr; | ||
90 | |||
91 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
92 | |||
93 | /* Process multi-block batch */ | ||
94 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
95 | do { | ||
96 | if (enc) | ||
97 | serpent_enc_blk_xway(ctx, wdst, wsrc); | ||
98 | else | ||
99 | serpent_dec_blk_xway(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize * SERPENT_PARALLEL_BLOCKS; | ||
102 | wdst += bsize * SERPENT_PARALLEL_BLOCKS; | ||
103 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
104 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
105 | |||
106 | if (nbytes < bsize) | ||
107 | goto done; | ||
108 | } | ||
109 | |||
110 | /* Handle leftovers */ | ||
111 | do { | ||
112 | if (enc) | ||
113 | __serpent_encrypt(ctx, wdst, wsrc); | ||
114 | else | ||
115 | __serpent_decrypt(ctx, wdst, wsrc); | ||
116 | |||
117 | wsrc += bsize; | ||
118 | wdst += bsize; | ||
119 | nbytes -= bsize; | ||
120 | } while (nbytes >= bsize); | ||
121 | |||
122 | done: | ||
123 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
124 | } | ||
125 | 52 | ||
126 | serpent_fpu_end(fpu_enabled); | 53 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
127 | return err; | 54 | ivs[j] = src[j]; |
128 | } | ||
129 | 55 | ||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 56 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); |
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | struct blkcipher_walk walk; | ||
134 | 57 | ||
135 | blkcipher_walk_init(&walk, dst, src, nbytes); | 58 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) |
136 | return ecb_crypt(desc, &walk, true); | 59 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); |
137 | } | 60 | } |
138 | 61 | ||
139 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 62 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
140 | struct scatterlist *src, unsigned int nbytes) | ||
141 | { | 63 | { |
142 | struct blkcipher_walk walk; | 64 | be128 ctrblk; |
143 | 65 | ||
144 | blkcipher_walk_init(&walk, dst, src, nbytes); | 66 | u128_to_be128(&ctrblk, iv); |
145 | return ecb_crypt(desc, &walk, false); | 67 | u128_inc(iv); |
146 | } | ||
147 | 68 | ||
148 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 69 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
149 | struct blkcipher_walk *walk) | 70 | u128_xor(dst, src, (u128 *)&ctrblk); |
150 | { | ||
151 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
152 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
153 | unsigned int nbytes = walk->nbytes; | ||
154 | u128 *src = (u128 *)walk->src.virt.addr; | ||
155 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
156 | u128 *iv = (u128 *)walk->iv; | ||
157 | |||
158 | do { | ||
159 | u128_xor(dst, src, iv); | ||
160 | __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
161 | iv = dst; | ||
162 | |||
163 | src += 1; | ||
164 | dst += 1; | ||
165 | nbytes -= bsize; | ||
166 | } while (nbytes >= bsize); | ||
167 | |||
168 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
169 | return nbytes; | ||
170 | } | 71 | } |
171 | 72 | ||
172 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 73 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, |
173 | struct scatterlist *src, unsigned int nbytes) | 74 | u128 *iv) |
174 | { | 75 | { |
175 | struct blkcipher_walk walk; | 76 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; |
176 | int err; | 77 | unsigned int i; |
177 | 78 | ||
178 | blkcipher_walk_init(&walk, dst, src, nbytes); | 79 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { |
179 | err = blkcipher_walk_virt(desc, &walk); | 80 | if (dst != src) |
81 | dst[i] = src[i]; | ||
180 | 82 | ||
181 | while ((nbytes = walk.nbytes)) { | 83 | u128_to_be128(&ctrblks[i], iv); |
182 | nbytes = __cbc_encrypt(desc, &walk); | 84 | u128_inc(iv); |
183 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
184 | } | 85 | } |
185 | 86 | ||
186 | return err; | 87 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); |
187 | } | 88 | } |
188 | 89 | ||
189 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 90 | static const struct common_glue_ctx serpent_enc = { |
190 | struct blkcipher_walk *walk) | 91 | .num_funcs = 2, |
191 | { | 92 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
192 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
193 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
194 | unsigned int nbytes = walk->nbytes; | ||
195 | u128 *src = (u128 *)walk->src.virt.addr; | ||
196 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
197 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
198 | u128 last_iv; | ||
199 | int i; | ||
200 | |||
201 | /* Start of the last block. */ | ||
202 | src += nbytes / bsize - 1; | ||
203 | dst += nbytes / bsize - 1; | ||
204 | |||
205 | last_iv = *src; | ||
206 | |||
207 | /* Process multi-block batch */ | ||
208 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
209 | do { | ||
210 | nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1); | ||
211 | src -= SERPENT_PARALLEL_BLOCKS - 1; | ||
212 | dst -= SERPENT_PARALLEL_BLOCKS - 1; | ||
213 | |||
214 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
215 | ivs[i] = src[i]; | ||
216 | |||
217 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
218 | |||
219 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++) | ||
220 | u128_xor(dst + (i + 1), dst + (i + 1), ivs + i); | ||
221 | |||
222 | nbytes -= bsize; | ||
223 | if (nbytes < bsize) | ||
224 | goto done; | ||
225 | 93 | ||
226 | u128_xor(dst, dst, src - 1); | 94 | .funcs = { { |
227 | src -= 1; | 95 | .num_blocks = SERPENT_PARALLEL_BLOCKS, |
228 | dst -= 1; | 96 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } |
229 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | 97 | }, { |
230 | 98 | .num_blocks = 1, | |
231 | if (nbytes < bsize) | 99 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } |
232 | goto done; | 100 | } } |
233 | } | 101 | }; |
234 | |||
235 | /* Handle leftovers */ | ||
236 | for (;;) { | ||
237 | __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
238 | |||
239 | nbytes -= bsize; | ||
240 | if (nbytes < bsize) | ||
241 | break; | ||
242 | 102 | ||
243 | u128_xor(dst, dst, src - 1); | 103 | static const struct common_glue_ctx serpent_ctr = { |
244 | src -= 1; | 104 | .num_funcs = 2, |
245 | dst -= 1; | 105 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
246 | } | 106 | |
107 | .funcs = { { | ||
108 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
110 | }, { | ||
111 | .num_blocks = 1, | ||
112 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
113 | } } | ||
114 | }; | ||
247 | 115 | ||
248 | done: | 116 | static const struct common_glue_ctx serpent_dec = { |
249 | u128_xor(dst, dst, (u128 *)walk->iv); | 117 | .num_funcs = 2, |
250 | *(u128 *)walk->iv = last_iv; | 118 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, |
119 | |||
120 | .funcs = { { | ||
121 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
123 | }, { | ||
124 | .num_blocks = 1, | ||
125 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
126 | } } | ||
127 | }; | ||
251 | 128 | ||
252 | return nbytes; | 129 | static const struct common_glue_ctx serpent_dec_cbc = { |
253 | } | 130 | .num_funcs = 2, |
131 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
132 | |||
133 | .funcs = { { | ||
134 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
136 | }, { | ||
137 | .num_blocks = 1, | ||
138 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
139 | } } | ||
140 | }; | ||
254 | 141 | ||
255 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 142 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
256 | struct scatterlist *src, unsigned int nbytes) | 143 | struct scatterlist *src, unsigned int nbytes) |
257 | { | 144 | { |
258 | bool fpu_enabled = false; | 145 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); |
259 | struct blkcipher_walk walk; | ||
260 | int err; | ||
261 | |||
262 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
263 | err = blkcipher_walk_virt(desc, &walk); | ||
264 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
265 | |||
266 | while ((nbytes = walk.nbytes)) { | ||
267 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
268 | nbytes = __cbc_decrypt(desc, &walk); | ||
269 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
270 | } | ||
271 | |||
272 | serpent_fpu_end(fpu_enabled); | ||
273 | return err; | ||
274 | } | 146 | } |
275 | 147 | ||
276 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 148 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
149 | struct scatterlist *src, unsigned int nbytes) | ||
277 | { | 150 | { |
278 | dst->a = cpu_to_be64(src->a); | 151 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); |
279 | dst->b = cpu_to_be64(src->b); | ||
280 | } | 152 | } |
281 | 153 | ||
282 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 154 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
283 | { | 156 | { |
284 | dst->a = be64_to_cpu(src->a); | 157 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, |
285 | dst->b = be64_to_cpu(src->b); | 158 | dst, src, nbytes); |
286 | } | 159 | } |
287 | 160 | ||
288 | static inline void u128_inc(u128 *i) | 161 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
162 | struct scatterlist *src, unsigned int nbytes) | ||
289 | { | 163 | { |
290 | i->b++; | 164 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, |
291 | if (!i->b) | 165 | nbytes); |
292 | i->a++; | ||
293 | } | 166 | } |
294 | 167 | ||
295 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 168 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
296 | struct blkcipher_walk *walk) | 169 | struct scatterlist *src, unsigned int nbytes) |
297 | { | 170 | { |
298 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 171 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); |
299 | u8 *ctrblk = walk->iv; | ||
300 | u8 keystream[SERPENT_BLOCK_SIZE]; | ||
301 | u8 *src = walk->src.virt.addr; | ||
302 | u8 *dst = walk->dst.virt.addr; | ||
303 | unsigned int nbytes = walk->nbytes; | ||
304 | |||
305 | __serpent_encrypt(ctx, keystream, ctrblk); | ||
306 | crypto_xor(keystream, src, nbytes); | ||
307 | memcpy(dst, keystream, nbytes); | ||
308 | |||
309 | crypto_inc(ctrblk, SERPENT_BLOCK_SIZE); | ||
310 | } | 172 | } |
311 | 173 | ||
312 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | 174 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) |
313 | struct blkcipher_walk *walk) | ||
314 | { | 175 | { |
315 | struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, |
316 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | 177 | NULL, fpu_enabled, nbytes); |
317 | unsigned int nbytes = walk->nbytes; | ||
318 | u128 *src = (u128 *)walk->src.virt.addr; | ||
319 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
320 | u128 ctrblk; | ||
321 | be128 ctrblocks[SERPENT_PARALLEL_BLOCKS]; | ||
322 | int i; | ||
323 | |||
324 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
325 | |||
326 | /* Process multi-block batch */ | ||
327 | if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) { | ||
328 | do { | ||
329 | /* create ctrblks for parallel encrypt */ | ||
330 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
331 | if (dst != src) | ||
332 | dst[i] = src[i]; | ||
333 | |||
334 | u128_to_be128(&ctrblocks[i], &ctrblk); | ||
335 | u128_inc(&ctrblk); | ||
336 | } | ||
337 | |||
338 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, | ||
339 | (u8 *)ctrblocks); | ||
340 | |||
341 | src += SERPENT_PARALLEL_BLOCKS; | ||
342 | dst += SERPENT_PARALLEL_BLOCKS; | ||
343 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | ||
344 | } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS); | ||
345 | |||
346 | if (nbytes < bsize) | ||
347 | goto done; | ||
348 | } | ||
349 | |||
350 | /* Handle leftovers */ | ||
351 | do { | ||
352 | if (dst != src) | ||
353 | *dst = *src; | ||
354 | |||
355 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
356 | u128_inc(&ctrblk); | ||
357 | |||
358 | __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
359 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
360 | |||
361 | src += 1; | ||
362 | dst += 1; | ||
363 | nbytes -= bsize; | ||
364 | } while (nbytes >= bsize); | ||
365 | |||
366 | done: | ||
367 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
368 | return nbytes; | ||
369 | } | 178 | } |
370 | 179 | ||
371 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static inline void serpent_fpu_end(bool fpu_enabled) |
372 | struct scatterlist *src, unsigned int nbytes) | ||
373 | { | 181 | { |
374 | bool fpu_enabled = false; | 182 | glue_fpu_end(fpu_enabled); |
375 | struct blkcipher_walk walk; | ||
376 | int err; | ||
377 | |||
378 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
379 | err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE); | ||
380 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
381 | |||
382 | while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) { | ||
383 | fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes); | ||
384 | nbytes = __ctr_crypt(desc, &walk); | ||
385 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
386 | } | ||
387 | |||
388 | serpent_fpu_end(fpu_enabled); | ||
389 | |||
390 | if (walk.nbytes) { | ||
391 | ctr_crypt_final(desc, &walk); | ||
392 | err = blkcipher_walk_done(desc, &walk, 0); | ||
393 | } | ||
394 | |||
395 | return err; | ||
396 | } | 183 | } |
397 | 184 | ||
398 | struct crypt_priv { | 185 | struct crypt_priv { |
@@ -596,106 +383,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
596 | return ret; | 383 | return ret; |
597 | } | 384 | } |
598 | 385 | ||
599 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
600 | unsigned int key_len) | ||
601 | { | ||
602 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
603 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
604 | int err; | ||
605 | |||
606 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
607 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
608 | & CRYPTO_TFM_REQ_MASK); | ||
609 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
610 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
611 | & CRYPTO_TFM_RES_MASK); | ||
612 | return err; | ||
613 | } | ||
614 | |||
615 | static int __ablk_encrypt(struct ablkcipher_request *req) | ||
616 | { | ||
617 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
618 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
619 | struct blkcipher_desc desc; | ||
620 | |||
621 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
622 | desc.info = req->info; | ||
623 | desc.flags = 0; | ||
624 | |||
625 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
626 | &desc, req->dst, req->src, req->nbytes); | ||
627 | } | ||
628 | |||
629 | static int ablk_encrypt(struct ablkcipher_request *req) | ||
630 | { | ||
631 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
632 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
633 | |||
634 | if (!irq_fpu_usable()) { | ||
635 | struct ablkcipher_request *cryptd_req = | ||
636 | ablkcipher_request_ctx(req); | ||
637 | |||
638 | memcpy(cryptd_req, req, sizeof(*req)); | ||
639 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
640 | |||
641 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
642 | } else { | ||
643 | return __ablk_encrypt(req); | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static int ablk_decrypt(struct ablkcipher_request *req) | ||
648 | { | ||
649 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
650 | struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
651 | |||
652 | if (!irq_fpu_usable()) { | ||
653 | struct ablkcipher_request *cryptd_req = | ||
654 | ablkcipher_request_ctx(req); | ||
655 | |||
656 | memcpy(cryptd_req, req, sizeof(*req)); | ||
657 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
658 | |||
659 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
660 | } else { | ||
661 | struct blkcipher_desc desc; | ||
662 | |||
663 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
664 | desc.info = req->info; | ||
665 | desc.flags = 0; | ||
666 | |||
667 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
668 | &desc, req->dst, req->src, req->nbytes); | ||
669 | } | ||
670 | } | ||
671 | |||
672 | static void ablk_exit(struct crypto_tfm *tfm) | ||
673 | { | ||
674 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
675 | |||
676 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
677 | } | ||
678 | |||
679 | static int ablk_init(struct crypto_tfm *tfm) | ||
680 | { | ||
681 | struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm); | ||
682 | struct cryptd_ablkcipher *cryptd_tfm; | ||
683 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
684 | |||
685 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
686 | crypto_tfm_alg_driver_name(tfm)); | ||
687 | |||
688 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
689 | if (IS_ERR(cryptd_tfm)) | ||
690 | return PTR_ERR(cryptd_tfm); | ||
691 | |||
692 | ctx->cryptd_tfm = cryptd_tfm; | ||
693 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
694 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
695 | |||
696 | return 0; | ||
697 | } | ||
698 | |||
699 | static struct crypto_alg serpent_algs[10] = { { | 386 | static struct crypto_alg serpent_algs[10] = { { |
700 | .cra_name = "__ecb-serpent-sse2", | 387 | .cra_name = "__ecb-serpent-sse2", |
701 | .cra_driver_name = "__driver-ecb-serpent-sse2", | 388 | .cra_driver_name = "__driver-ecb-serpent-sse2", |
@@ -808,7 +495,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
808 | .cra_priority = 400, | 495 | .cra_priority = 400, |
809 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 496 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
810 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 497 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
811 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 498 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
812 | .cra_alignmask = 0, | 499 | .cra_alignmask = 0, |
813 | .cra_type = &crypto_ablkcipher_type, | 500 | .cra_type = &crypto_ablkcipher_type, |
814 | .cra_module = THIS_MODULE, | 501 | .cra_module = THIS_MODULE, |
@@ -830,7 +517,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
830 | .cra_priority = 400, | 517 | .cra_priority = 400, |
831 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 518 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
832 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 519 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
833 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 520 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
834 | .cra_alignmask = 0, | 521 | .cra_alignmask = 0, |
835 | .cra_type = &crypto_ablkcipher_type, | 522 | .cra_type = &crypto_ablkcipher_type, |
836 | .cra_module = THIS_MODULE, | 523 | .cra_module = THIS_MODULE, |
@@ -853,7 +540,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
853 | .cra_priority = 400, | 540 | .cra_priority = 400, |
854 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 541 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
855 | .cra_blocksize = 1, | 542 | .cra_blocksize = 1, |
856 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 543 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
857 | .cra_alignmask = 0, | 544 | .cra_alignmask = 0, |
858 | .cra_type = &crypto_ablkcipher_type, | 545 | .cra_type = &crypto_ablkcipher_type, |
859 | .cra_module = THIS_MODULE, | 546 | .cra_module = THIS_MODULE, |
@@ -877,7 +564,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
877 | .cra_priority = 400, | 564 | .cra_priority = 400, |
878 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 565 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
879 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 566 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
880 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 567 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
881 | .cra_alignmask = 0, | 568 | .cra_alignmask = 0, |
882 | .cra_type = &crypto_ablkcipher_type, | 569 | .cra_type = &crypto_ablkcipher_type, |
883 | .cra_module = THIS_MODULE, | 570 | .cra_module = THIS_MODULE, |
@@ -902,7 +589,7 @@ static struct crypto_alg serpent_algs[10] = { { | |||
902 | .cra_priority = 400, | 589 | .cra_priority = 400, |
903 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | 590 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, |
904 | .cra_blocksize = SERPENT_BLOCK_SIZE, | 591 | .cra_blocksize = SERPENT_BLOCK_SIZE, |
905 | .cra_ctxsize = sizeof(struct async_serpent_ctx), | 592 | .cra_ctxsize = sizeof(struct async_helper_ctx), |
906 | .cra_alignmask = 0, | 593 | .cra_alignmask = 0, |
907 | .cra_type = &crypto_ablkcipher_type, | 594 | .cra_type = &crypto_ablkcipher_type, |
908 | .cra_module = THIS_MODULE, | 595 | .cra_module = THIS_MODULE, |
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index b2c2f57d70e8..49d6987a73d9 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S | |||
@@ -468,7 +468,7 @@ W_PRECALC_SSSE3 | |||
468 | */ | 468 | */ |
469 | SHA1_VECTOR_ASM sha1_transform_ssse3 | 469 | SHA1_VECTOR_ASM sha1_transform_ssse3 |
470 | 470 | ||
471 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 471 | #ifdef CONFIG_AS_AVX |
472 | 472 | ||
473 | .macro W_PRECALC_AVX | 473 | .macro W_PRECALC_AVX |
474 | 474 | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f916499d0abe..4a11a9d72451 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -35,7 +35,7 @@ | |||
35 | 35 | ||
36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | 36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, |
37 | unsigned int rounds); | 37 | unsigned int rounds); |
38 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 38 | #ifdef CONFIG_AS_AVX |
39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | 39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, |
40 | unsigned int rounds); | 40 | unsigned int rounds); |
41 | #endif | 41 | #endif |
@@ -184,7 +184,7 @@ static struct shash_alg alg = { | |||
184 | } | 184 | } |
185 | }; | 185 | }; |
186 | 186 | ||
187 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 187 | #ifdef CONFIG_AS_AVX |
188 | static bool __init avx_usable(void) | 188 | static bool __init avx_usable(void) |
189 | { | 189 | { |
190 | u64 xcr0; | 190 | u64 xcr0; |
@@ -209,7 +209,7 @@ static int __init sha1_ssse3_mod_init(void) | |||
209 | if (cpu_has_ssse3) | 209 | if (cpu_has_ssse3) |
210 | sha1_transform_asm = sha1_transform_ssse3; | 210 | sha1_transform_asm = sha1_transform_ssse3; |
211 | 211 | ||
212 | #ifdef SHA1_ENABLE_AVX_SUPPORT | 212 | #ifdef CONFIG_AS_AVX |
213 | /* allow AVX to override SSSE3, it's a little faster */ | 213 | /* allow AVX to override SSSE3, it's a little faster */ |
214 | if (avx_usable()) | 214 | if (avx_usable()) |
215 | sha1_transform_asm = sha1_transform_avx; | 215 | sha1_transform_asm = sha1_transform_avx; |
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S new file mode 100644 index 000000000000..35f45574390d --- /dev/null +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S | |||
@@ -0,0 +1,300 @@ | |||
1 | /* | ||
2 | * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | .file "twofish-avx-x86_64-asm_64.S" | ||
25 | .text | ||
26 | |||
27 | /* structure of crypto context */ | ||
28 | #define s0 0 | ||
29 | #define s1 1024 | ||
30 | #define s2 2048 | ||
31 | #define s3 3072 | ||
32 | #define w 4096 | ||
33 | #define k 4128 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 8-way AVX twofish | ||
37 | **********************************************************************/ | ||
38 | #define CTX %rdi | ||
39 | |||
40 | #define RA1 %xmm0 | ||
41 | #define RB1 %xmm1 | ||
42 | #define RC1 %xmm2 | ||
43 | #define RD1 %xmm3 | ||
44 | |||
45 | #define RA2 %xmm4 | ||
46 | #define RB2 %xmm5 | ||
47 | #define RC2 %xmm6 | ||
48 | #define RD2 %xmm7 | ||
49 | |||
50 | #define RX %xmm8 | ||
51 | #define RY %xmm9 | ||
52 | |||
53 | #define RK1 %xmm10 | ||
54 | #define RK2 %xmm11 | ||
55 | |||
56 | #define RID1 %rax | ||
57 | #define RID1b %al | ||
58 | #define RID2 %rbx | ||
59 | #define RID2b %bl | ||
60 | |||
61 | #define RGI1 %rdx | ||
62 | #define RGI1bl %dl | ||
63 | #define RGI1bh %dh | ||
64 | #define RGI2 %rcx | ||
65 | #define RGI2bl %cl | ||
66 | #define RGI2bh %ch | ||
67 | |||
68 | #define RGS1 %r8 | ||
69 | #define RGS1d %r8d | ||
70 | #define RGS2 %r9 | ||
71 | #define RGS2d %r9d | ||
72 | #define RGS3 %r10 | ||
73 | #define RGS3d %r10d | ||
74 | |||
75 | |||
76 | #define lookup_32bit(t0, t1, t2, t3, src, dst) \ | ||
77 | movb src ## bl, RID1b; \ | ||
78 | movb src ## bh, RID2b; \ | ||
79 | movl t0(CTX, RID1, 4), dst ## d; \ | ||
80 | xorl t1(CTX, RID2, 4), dst ## d; \ | ||
81 | shrq $16, src; \ | ||
82 | movb src ## bl, RID1b; \ | ||
83 | movb src ## bh, RID2b; \ | ||
84 | xorl t2(CTX, RID1, 4), dst ## d; \ | ||
85 | xorl t3(CTX, RID2, 4), dst ## d; | ||
86 | |||
87 | #define G(a, x, t0, t1, t2, t3) \ | ||
88 | vmovq a, RGI1; \ | ||
89 | vpsrldq $8, a, x; \ | ||
90 | vmovq x, RGI2; \ | ||
91 | \ | ||
92 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ | ||
93 | shrq $16, RGI1; \ | ||
94 | lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ | ||
95 | shlq $32, RGS2; \ | ||
96 | orq RGS1, RGS2; \ | ||
97 | \ | ||
98 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ | ||
99 | shrq $16, RGI2; \ | ||
100 | lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ | ||
101 | shlq $32, RGS3; \ | ||
102 | orq RGS1, RGS3; \ | ||
103 | \ | ||
104 | vmovq RGS2, x; \ | ||
105 | vpinsrq $1, RGS3, x, x; | ||
106 | |||
107 | #define encround(a, b, c, d, x, y) \ | ||
108 | G(a, x, s0, s1, s2, s3); \ | ||
109 | G(b, y, s1, s2, s3, s0); \ | ||
110 | vpaddd x, y, x; \ | ||
111 | vpaddd y, x, y; \ | ||
112 | vpaddd x, RK1, x; \ | ||
113 | vpaddd y, RK2, y; \ | ||
114 | vpxor x, c, c; \ | ||
115 | vpsrld $1, c, x; \ | ||
116 | vpslld $(32 - 1), c, c; \ | ||
117 | vpor c, x, c; \ | ||
118 | vpslld $1, d, x; \ | ||
119 | vpsrld $(32 - 1), d, d; \ | ||
120 | vpor d, x, d; \ | ||
121 | vpxor d, y, d; | ||
122 | |||
123 | #define decround(a, b, c, d, x, y) \ | ||
124 | G(a, x, s0, s1, s2, s3); \ | ||
125 | G(b, y, s1, s2, s3, s0); \ | ||
126 | vpaddd x, y, x; \ | ||
127 | vpaddd y, x, y; \ | ||
128 | vpaddd y, RK2, y; \ | ||
129 | vpxor d, y, d; \ | ||
130 | vpsrld $1, d, y; \ | ||
131 | vpslld $(32 - 1), d, d; \ | ||
132 | vpor d, y, d; \ | ||
133 | vpslld $1, c, y; \ | ||
134 | vpsrld $(32 - 1), c, c; \ | ||
135 | vpor c, y, c; \ | ||
136 | vpaddd x, RK1, x; \ | ||
137 | vpxor x, c, c; | ||
138 | |||
139 | #define encrypt_round(n, a, b, c, d) \ | ||
140 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
141 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
142 | encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
143 | encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
144 | |||
145 | #define decrypt_round(n, a, b, c, d) \ | ||
146 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
147 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
148 | decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ | ||
149 | decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); | ||
150 | |||
151 | #define encrypt_cycle(n) \ | ||
152 | encrypt_round((2*n), RA, RB, RC, RD); \ | ||
153 | encrypt_round(((2*n) + 1), RC, RD, RA, RB); | ||
154 | |||
155 | #define decrypt_cycle(n) \ | ||
156 | decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ | ||
157 | decrypt_round((2*n), RA, RB, RC, RD); | ||
158 | |||
159 | |||
160 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
161 | vpunpckldq x1, x0, t0; \ | ||
162 | vpunpckhdq x1, x0, t2; \ | ||
163 | vpunpckldq x3, x2, t1; \ | ||
164 | vpunpckhdq x3, x2, x3; \ | ||
165 | \ | ||
166 | vpunpcklqdq t1, t0, x0; \ | ||
167 | vpunpckhqdq t1, t0, x1; \ | ||
168 | vpunpcklqdq x3, t2, x2; \ | ||
169 | vpunpckhqdq x3, t2, x3; | ||
170 | |||
171 | #define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
172 | vpxor (0*4*4)(in), wkey, x0; \ | ||
173 | vpxor (1*4*4)(in), wkey, x1; \ | ||
174 | vpxor (2*4*4)(in), wkey, x2; \ | ||
175 | vpxor (3*4*4)(in), wkey, x3; \ | ||
176 | \ | ||
177 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
178 | |||
179 | #define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
180 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
181 | \ | ||
182 | vpxor x0, wkey, x0; \ | ||
183 | vmovdqu x0, (0*4*4)(out); \ | ||
184 | vpxor x1, wkey, x1; \ | ||
185 | vmovdqu x1, (1*4*4)(out); \ | ||
186 | vpxor x2, wkey, x2; \ | ||
187 | vmovdqu x2, (2*4*4)(out); \ | ||
188 | vpxor x3, wkey, x3; \ | ||
189 | vmovdqu x3, (3*4*4)(out); | ||
190 | |||
191 | #define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
192 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
193 | \ | ||
194 | vpxor x0, wkey, x0; \ | ||
195 | vpxor (0*4*4)(out), x0, x0; \ | ||
196 | vmovdqu x0, (0*4*4)(out); \ | ||
197 | vpxor x1, wkey, x1; \ | ||
198 | vpxor (1*4*4)(out), x1, x1; \ | ||
199 | vmovdqu x1, (1*4*4)(out); \ | ||
200 | vpxor x2, wkey, x2; \ | ||
201 | vpxor (2*4*4)(out), x2, x2; \ | ||
202 | vmovdqu x2, (2*4*4)(out); \ | ||
203 | vpxor x3, wkey, x3; \ | ||
204 | vpxor (3*4*4)(out), x3, x3; \ | ||
205 | vmovdqu x3, (3*4*4)(out); | ||
206 | |||
207 | .align 8 | ||
208 | .global __twofish_enc_blk_8way | ||
209 | .type __twofish_enc_blk_8way,@function; | ||
210 | |||
211 | __twofish_enc_blk_8way: | ||
212 | /* input: | ||
213 | * %rdi: ctx, CTX | ||
214 | * %rsi: dst | ||
215 | * %rdx: src | ||
216 | * %rcx: bool, if true: xor output | ||
217 | */ | ||
218 | |||
219 | pushq %rbx; | ||
220 | pushq %rcx; | ||
221 | |||
222 | vmovdqu w(CTX), RK1; | ||
223 | |||
224 | leaq (4*4*4)(%rdx), %rax; | ||
225 | inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
226 | inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
227 | |||
228 | xorq RID1, RID1; | ||
229 | xorq RID2, RID2; | ||
230 | |||
231 | encrypt_cycle(0); | ||
232 | encrypt_cycle(1); | ||
233 | encrypt_cycle(2); | ||
234 | encrypt_cycle(3); | ||
235 | encrypt_cycle(4); | ||
236 | encrypt_cycle(5); | ||
237 | encrypt_cycle(6); | ||
238 | encrypt_cycle(7); | ||
239 | |||
240 | vmovdqu (w+4*4)(CTX), RK1; | ||
241 | |||
242 | popq %rcx; | ||
243 | popq %rbx; | ||
244 | |||
245 | leaq (4*4*4)(%rsi), %rax; | ||
246 | |||
247 | testb %cl, %cl; | ||
248 | jnz __enc_xor8; | ||
249 | |||
250 | outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
251 | outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
252 | |||
253 | ret; | ||
254 | |||
255 | __enc_xor8: | ||
256 | outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
257 | outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
258 | |||
259 | ret; | ||
260 | |||
261 | .align 8 | ||
262 | .global twofish_dec_blk_8way | ||
263 | .type twofish_dec_blk_8way,@function; | ||
264 | |||
265 | twofish_dec_blk_8way: | ||
266 | /* input: | ||
267 | * %rdi: ctx, CTX | ||
268 | * %rsi: dst | ||
269 | * %rdx: src | ||
270 | */ | ||
271 | |||
272 | pushq %rbx; | ||
273 | |||
274 | vmovdqu (w+4*4)(CTX), RK1; | ||
275 | |||
276 | leaq (4*4*4)(%rdx), %rax; | ||
277 | inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); | ||
278 | inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); | ||
279 | |||
280 | xorq RID1, RID1; | ||
281 | xorq RID2, RID2; | ||
282 | |||
283 | decrypt_cycle(7); | ||
284 | decrypt_cycle(6); | ||
285 | decrypt_cycle(5); | ||
286 | decrypt_cycle(4); | ||
287 | decrypt_cycle(3); | ||
288 | decrypt_cycle(2); | ||
289 | decrypt_cycle(1); | ||
290 | decrypt_cycle(0); | ||
291 | |||
292 | vmovdqu (w)(CTX), RK1; | ||
293 | |||
294 | popq %rbx; | ||
295 | |||
296 | leaq (4*4*4)(%rsi), %rax; | ||
297 | outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); | ||
298 | outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); | ||
299 | |||
300 | ret; | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c new file mode 100644 index 000000000000..782b67ddaf6a --- /dev/null +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -0,0 +1,624 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler version of Twofish Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/twofish.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/b128ops.h> | ||
33 | #include <crypto/ctr.h> | ||
34 | #include <crypto/lrw.h> | ||
35 | #include <crypto/xts.h> | ||
36 | #include <asm/i387.h> | ||
37 | #include <asm/xcr.h> | ||
38 | #include <asm/xsave.h> | ||
39 | #include <asm/crypto/twofish.h> | ||
40 | #include <asm/crypto/ablk_helper.h> | ||
41 | #include <asm/crypto/glue_helper.h> | ||
42 | #include <crypto/scatterwalk.h> | ||
43 | #include <linux/workqueue.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | |||
46 | #define TWOFISH_PARALLEL_BLOCKS 8 | ||
47 | |||
48 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src) | ||
50 | { | ||
51 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
52 | } | ||
53 | |||
54 | /* 8-way parallel cipher functions */ | ||
55 | asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
56 | const u8 *src, bool xor); | ||
57 | asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst, | ||
58 | const u8 *src); | ||
59 | |||
60 | static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
61 | const u8 *src) | ||
62 | { | ||
63 | __twofish_enc_blk_8way(ctx, dst, src, false); | ||
64 | } | ||
65 | |||
66 | static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst, | ||
67 | const u8 *src) | ||
68 | { | ||
69 | __twofish_enc_blk_8way(ctx, dst, src, true); | ||
70 | } | ||
71 | |||
72 | static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst, | ||
73 | const u8 *src) | ||
74 | { | ||
75 | twofish_dec_blk_8way(ctx, dst, src); | ||
76 | } | ||
77 | |||
78 | static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
79 | { | ||
80 | u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1]; | ||
81 | unsigned int j; | ||
82 | |||
83 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
84 | ivs[j] = src[j]; | ||
85 | |||
86 | twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
87 | |||
88 | for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++) | ||
89 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
90 | } | ||
91 | |||
92 | static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
93 | u128 *iv) | ||
94 | { | ||
95 | be128 ctrblks[TWOFISH_PARALLEL_BLOCKS]; | ||
96 | unsigned int i; | ||
97 | |||
98 | for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) { | ||
99 | if (dst != src) | ||
100 | dst[i] = src[i]; | ||
101 | |||
102 | u128_to_be128(&ctrblks[i], iv); | ||
103 | u128_inc(iv); | ||
104 | } | ||
105 | |||
106 | twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
107 | } | ||
108 | |||
109 | static const struct common_glue_ctx twofish_enc = { | ||
110 | .num_funcs = 3, | ||
111 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
112 | |||
113 | .funcs = { { | ||
114 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
115 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) } | ||
116 | }, { | ||
117 | .num_blocks = 3, | ||
118 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
119 | }, { | ||
120 | .num_blocks = 1, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
122 | } } | ||
123 | }; | ||
124 | |||
125 | static const struct common_glue_ctx twofish_ctr = { | ||
126 | .num_funcs = 3, | ||
127 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
128 | |||
129 | .funcs = { { | ||
130 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
131 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) } | ||
132 | }, { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
138 | } } | ||
139 | }; | ||
140 | |||
141 | static const struct common_glue_ctx twofish_dec = { | ||
142 | .num_funcs = 3, | ||
143 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
147 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) } | ||
148 | }, { | ||
149 | .num_blocks = 3, | ||
150 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
151 | }, { | ||
152 | .num_blocks = 1, | ||
153 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
154 | } } | ||
155 | }; | ||
156 | |||
157 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
158 | .num_funcs = 3, | ||
159 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
160 | |||
161 | .funcs = { { | ||
162 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
163 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) } | ||
164 | }, { | ||
165 | .num_blocks = 3, | ||
166 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
167 | }, { | ||
168 | .num_blocks = 1, | ||
169 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
170 | } } | ||
171 | }; | ||
172 | |||
173 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
174 | struct scatterlist *src, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
177 | } | ||
178 | |||
179 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
180 | struct scatterlist *src, unsigned int nbytes) | ||
181 | { | ||
182 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
183 | } | ||
184 | |||
185 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
186 | struct scatterlist *src, unsigned int nbytes) | ||
187 | { | ||
188 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
189 | dst, src, nbytes); | ||
190 | } | ||
191 | |||
192 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
193 | struct scatterlist *src, unsigned int nbytes) | ||
194 | { | ||
195 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
196 | nbytes); | ||
197 | } | ||
198 | |||
199 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
200 | struct scatterlist *src, unsigned int nbytes) | ||
201 | { | ||
202 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
203 | } | ||
204 | |||
205 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
206 | { | ||
207 | return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, | ||
208 | fpu_enabled, nbytes); | ||
209 | } | ||
210 | |||
211 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
212 | { | ||
213 | glue_fpu_end(fpu_enabled); | ||
214 | } | ||
215 | |||
216 | struct crypt_priv { | ||
217 | struct twofish_ctx *ctx; | ||
218 | bool fpu_enabled; | ||
219 | }; | ||
220 | |||
221 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
222 | { | ||
223 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
224 | struct crypt_priv *ctx = priv; | ||
225 | int i; | ||
226 | |||
227 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
228 | |||
229 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
230 | twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
231 | return; | ||
232 | } | ||
233 | |||
234 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
235 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
236 | |||
237 | nbytes %= bsize * 3; | ||
238 | |||
239 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
240 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
241 | } | ||
242 | |||
243 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
244 | { | ||
245 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
246 | struct crypt_priv *ctx = priv; | ||
247 | int i; | ||
248 | |||
249 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
250 | |||
251 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
252 | twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
257 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
258 | |||
259 | nbytes %= bsize * 3; | ||
260 | |||
261 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
262 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
263 | } | ||
264 | |||
265 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
266 | struct scatterlist *src, unsigned int nbytes) | ||
267 | { | ||
268 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
269 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
270 | struct crypt_priv crypt_ctx = { | ||
271 | .ctx = &ctx->twofish_ctx, | ||
272 | .fpu_enabled = false, | ||
273 | }; | ||
274 | struct lrw_crypt_req req = { | ||
275 | .tbuf = buf, | ||
276 | .tbuflen = sizeof(buf), | ||
277 | |||
278 | .table_ctx = &ctx->lrw_table, | ||
279 | .crypt_ctx = &crypt_ctx, | ||
280 | .crypt_fn = encrypt_callback, | ||
281 | }; | ||
282 | int ret; | ||
283 | |||
284 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
285 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
286 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
287 | |||
288 | return ret; | ||
289 | } | ||
290 | |||
291 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
292 | struct scatterlist *src, unsigned int nbytes) | ||
293 | { | ||
294 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
295 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
296 | struct crypt_priv crypt_ctx = { | ||
297 | .ctx = &ctx->twofish_ctx, | ||
298 | .fpu_enabled = false, | ||
299 | }; | ||
300 | struct lrw_crypt_req req = { | ||
301 | .tbuf = buf, | ||
302 | .tbuflen = sizeof(buf), | ||
303 | |||
304 | .table_ctx = &ctx->lrw_table, | ||
305 | .crypt_ctx = &crypt_ctx, | ||
306 | .crypt_fn = decrypt_callback, | ||
307 | }; | ||
308 | int ret; | ||
309 | |||
310 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
311 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
312 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
313 | |||
314 | return ret; | ||
315 | } | ||
316 | |||
317 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
318 | struct scatterlist *src, unsigned int nbytes) | ||
319 | { | ||
320 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
321 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
322 | struct crypt_priv crypt_ctx = { | ||
323 | .ctx = &ctx->crypt_ctx, | ||
324 | .fpu_enabled = false, | ||
325 | }; | ||
326 | struct xts_crypt_req req = { | ||
327 | .tbuf = buf, | ||
328 | .tbuflen = sizeof(buf), | ||
329 | |||
330 | .tweak_ctx = &ctx->tweak_ctx, | ||
331 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
332 | .crypt_ctx = &crypt_ctx, | ||
333 | .crypt_fn = encrypt_callback, | ||
334 | }; | ||
335 | int ret; | ||
336 | |||
337 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
338 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
339 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
340 | |||
341 | return ret; | ||
342 | } | ||
343 | |||
344 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
345 | struct scatterlist *src, unsigned int nbytes) | ||
346 | { | ||
347 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
348 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
349 | struct crypt_priv crypt_ctx = { | ||
350 | .ctx = &ctx->crypt_ctx, | ||
351 | .fpu_enabled = false, | ||
352 | }; | ||
353 | struct xts_crypt_req req = { | ||
354 | .tbuf = buf, | ||
355 | .tbuflen = sizeof(buf), | ||
356 | |||
357 | .tweak_ctx = &ctx->tweak_ctx, | ||
358 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
359 | .crypt_ctx = &crypt_ctx, | ||
360 | .crypt_fn = decrypt_callback, | ||
361 | }; | ||
362 | int ret; | ||
363 | |||
364 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
365 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
366 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
367 | |||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static struct crypto_alg twofish_algs[10] = { { | ||
372 | .cra_name = "__ecb-twofish-avx", | ||
373 | .cra_driver_name = "__driver-ecb-twofish-avx", | ||
374 | .cra_priority = 0, | ||
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
376 | .cra_blocksize = TF_BLOCK_SIZE, | ||
377 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
378 | .cra_alignmask = 0, | ||
379 | .cra_type = &crypto_blkcipher_type, | ||
380 | .cra_module = THIS_MODULE, | ||
381 | .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), | ||
382 | .cra_u = { | ||
383 | .blkcipher = { | ||
384 | .min_keysize = TF_MIN_KEY_SIZE, | ||
385 | .max_keysize = TF_MAX_KEY_SIZE, | ||
386 | .setkey = twofish_setkey, | ||
387 | .encrypt = ecb_encrypt, | ||
388 | .decrypt = ecb_decrypt, | ||
389 | }, | ||
390 | }, | ||
391 | }, { | ||
392 | .cra_name = "__cbc-twofish-avx", | ||
393 | .cra_driver_name = "__driver-cbc-twofish-avx", | ||
394 | .cra_priority = 0, | ||
395 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
396 | .cra_blocksize = TF_BLOCK_SIZE, | ||
397 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
398 | .cra_alignmask = 0, | ||
399 | .cra_type = &crypto_blkcipher_type, | ||
400 | .cra_module = THIS_MODULE, | ||
401 | .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), | ||
402 | .cra_u = { | ||
403 | .blkcipher = { | ||
404 | .min_keysize = TF_MIN_KEY_SIZE, | ||
405 | .max_keysize = TF_MAX_KEY_SIZE, | ||
406 | .setkey = twofish_setkey, | ||
407 | .encrypt = cbc_encrypt, | ||
408 | .decrypt = cbc_decrypt, | ||
409 | }, | ||
410 | }, | ||
411 | }, { | ||
412 | .cra_name = "__ctr-twofish-avx", | ||
413 | .cra_driver_name = "__driver-ctr-twofish-avx", | ||
414 | .cra_priority = 0, | ||
415 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
416 | .cra_blocksize = 1, | ||
417 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
418 | .cra_alignmask = 0, | ||
419 | .cra_type = &crypto_blkcipher_type, | ||
420 | .cra_module = THIS_MODULE, | ||
421 | .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), | ||
422 | .cra_u = { | ||
423 | .blkcipher = { | ||
424 | .min_keysize = TF_MIN_KEY_SIZE, | ||
425 | .max_keysize = TF_MAX_KEY_SIZE, | ||
426 | .ivsize = TF_BLOCK_SIZE, | ||
427 | .setkey = twofish_setkey, | ||
428 | .encrypt = ctr_crypt, | ||
429 | .decrypt = ctr_crypt, | ||
430 | }, | ||
431 | }, | ||
432 | }, { | ||
433 | .cra_name = "__lrw-twofish-avx", | ||
434 | .cra_driver_name = "__driver-lrw-twofish-avx", | ||
435 | .cra_priority = 0, | ||
436 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
437 | .cra_blocksize = TF_BLOCK_SIZE, | ||
438 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
439 | .cra_alignmask = 0, | ||
440 | .cra_type = &crypto_blkcipher_type, | ||
441 | .cra_module = THIS_MODULE, | ||
442 | .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), | ||
443 | .cra_exit = lrw_twofish_exit_tfm, | ||
444 | .cra_u = { | ||
445 | .blkcipher = { | ||
446 | .min_keysize = TF_MIN_KEY_SIZE + | ||
447 | TF_BLOCK_SIZE, | ||
448 | .max_keysize = TF_MAX_KEY_SIZE + | ||
449 | TF_BLOCK_SIZE, | ||
450 | .ivsize = TF_BLOCK_SIZE, | ||
451 | .setkey = lrw_twofish_setkey, | ||
452 | .encrypt = lrw_encrypt, | ||
453 | .decrypt = lrw_decrypt, | ||
454 | }, | ||
455 | }, | ||
456 | }, { | ||
457 | .cra_name = "__xts-twofish-avx", | ||
458 | .cra_driver_name = "__driver-xts-twofish-avx", | ||
459 | .cra_priority = 0, | ||
460 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
461 | .cra_blocksize = TF_BLOCK_SIZE, | ||
462 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
463 | .cra_alignmask = 0, | ||
464 | .cra_type = &crypto_blkcipher_type, | ||
465 | .cra_module = THIS_MODULE, | ||
466 | .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), | ||
467 | .cra_u = { | ||
468 | .blkcipher = { | ||
469 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
470 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
471 | .ivsize = TF_BLOCK_SIZE, | ||
472 | .setkey = xts_twofish_setkey, | ||
473 | .encrypt = xts_encrypt, | ||
474 | .decrypt = xts_decrypt, | ||
475 | }, | ||
476 | }, | ||
477 | }, { | ||
478 | .cra_name = "ecb(twofish)", | ||
479 | .cra_driver_name = "ecb-twofish-avx", | ||
480 | .cra_priority = 400, | ||
481 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
482 | .cra_blocksize = TF_BLOCK_SIZE, | ||
483 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
484 | .cra_alignmask = 0, | ||
485 | .cra_type = &crypto_ablkcipher_type, | ||
486 | .cra_module = THIS_MODULE, | ||
487 | .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), | ||
488 | .cra_init = ablk_init, | ||
489 | .cra_exit = ablk_exit, | ||
490 | .cra_u = { | ||
491 | .ablkcipher = { | ||
492 | .min_keysize = TF_MIN_KEY_SIZE, | ||
493 | .max_keysize = TF_MAX_KEY_SIZE, | ||
494 | .setkey = ablk_set_key, | ||
495 | .encrypt = ablk_encrypt, | ||
496 | .decrypt = ablk_decrypt, | ||
497 | }, | ||
498 | }, | ||
499 | }, { | ||
500 | .cra_name = "cbc(twofish)", | ||
501 | .cra_driver_name = "cbc-twofish-avx", | ||
502 | .cra_priority = 400, | ||
503 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
504 | .cra_blocksize = TF_BLOCK_SIZE, | ||
505 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
506 | .cra_alignmask = 0, | ||
507 | .cra_type = &crypto_ablkcipher_type, | ||
508 | .cra_module = THIS_MODULE, | ||
509 | .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), | ||
510 | .cra_init = ablk_init, | ||
511 | .cra_exit = ablk_exit, | ||
512 | .cra_u = { | ||
513 | .ablkcipher = { | ||
514 | .min_keysize = TF_MIN_KEY_SIZE, | ||
515 | .max_keysize = TF_MAX_KEY_SIZE, | ||
516 | .ivsize = TF_BLOCK_SIZE, | ||
517 | .setkey = ablk_set_key, | ||
518 | .encrypt = __ablk_encrypt, | ||
519 | .decrypt = ablk_decrypt, | ||
520 | }, | ||
521 | }, | ||
522 | }, { | ||
523 | .cra_name = "ctr(twofish)", | ||
524 | .cra_driver_name = "ctr-twofish-avx", | ||
525 | .cra_priority = 400, | ||
526 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
527 | .cra_blocksize = 1, | ||
528 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
529 | .cra_alignmask = 0, | ||
530 | .cra_type = &crypto_ablkcipher_type, | ||
531 | .cra_module = THIS_MODULE, | ||
532 | .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), | ||
533 | .cra_init = ablk_init, | ||
534 | .cra_exit = ablk_exit, | ||
535 | .cra_u = { | ||
536 | .ablkcipher = { | ||
537 | .min_keysize = TF_MIN_KEY_SIZE, | ||
538 | .max_keysize = TF_MAX_KEY_SIZE, | ||
539 | .ivsize = TF_BLOCK_SIZE, | ||
540 | .setkey = ablk_set_key, | ||
541 | .encrypt = ablk_encrypt, | ||
542 | .decrypt = ablk_encrypt, | ||
543 | .geniv = "chainiv", | ||
544 | }, | ||
545 | }, | ||
546 | }, { | ||
547 | .cra_name = "lrw(twofish)", | ||
548 | .cra_driver_name = "lrw-twofish-avx", | ||
549 | .cra_priority = 400, | ||
550 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
551 | .cra_blocksize = TF_BLOCK_SIZE, | ||
552 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
553 | .cra_alignmask = 0, | ||
554 | .cra_type = &crypto_ablkcipher_type, | ||
555 | .cra_module = THIS_MODULE, | ||
556 | .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), | ||
557 | .cra_init = ablk_init, | ||
558 | .cra_exit = ablk_exit, | ||
559 | .cra_u = { | ||
560 | .ablkcipher = { | ||
561 | .min_keysize = TF_MIN_KEY_SIZE + | ||
562 | TF_BLOCK_SIZE, | ||
563 | .max_keysize = TF_MAX_KEY_SIZE + | ||
564 | TF_BLOCK_SIZE, | ||
565 | .ivsize = TF_BLOCK_SIZE, | ||
566 | .setkey = ablk_set_key, | ||
567 | .encrypt = ablk_encrypt, | ||
568 | .decrypt = ablk_decrypt, | ||
569 | }, | ||
570 | }, | ||
571 | }, { | ||
572 | .cra_name = "xts(twofish)", | ||
573 | .cra_driver_name = "xts-twofish-avx", | ||
574 | .cra_priority = 400, | ||
575 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
576 | .cra_blocksize = TF_BLOCK_SIZE, | ||
577 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
578 | .cra_alignmask = 0, | ||
579 | .cra_type = &crypto_ablkcipher_type, | ||
580 | .cra_module = THIS_MODULE, | ||
581 | .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), | ||
582 | .cra_init = ablk_init, | ||
583 | .cra_exit = ablk_exit, | ||
584 | .cra_u = { | ||
585 | .ablkcipher = { | ||
586 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
587 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
588 | .ivsize = TF_BLOCK_SIZE, | ||
589 | .setkey = ablk_set_key, | ||
590 | .encrypt = ablk_encrypt, | ||
591 | .decrypt = ablk_decrypt, | ||
592 | }, | ||
593 | }, | ||
594 | } }; | ||
595 | |||
596 | static int __init twofish_init(void) | ||
597 | { | ||
598 | u64 xcr0; | ||
599 | |||
600 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
601 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
602 | return -ENODEV; | ||
603 | } | ||
604 | |||
605 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
606 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
607 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
608 | return -ENODEV; | ||
609 | } | ||
610 | |||
611 | return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
612 | } | ||
613 | |||
614 | static void __exit twofish_exit(void) | ||
615 | { | ||
616 | crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
617 | } | ||
618 | |||
619 | module_init(twofish_init); | ||
620 | module_exit(twofish_exit); | ||
621 | |||
622 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); | ||
623 | MODULE_LICENSE("GPL"); | ||
624 | MODULE_ALIAS("twofish"); | ||
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 922ab24cce31..15f9347316c8 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c | |||
@@ -3,11 +3,6 @@ | |||
3 | * | 3 | * |
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | 4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> |
5 | * | 5 | * |
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
12 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
13 | * the Free Software Foundation; either version 2 of the License, or | 8 | * the Free Software Foundation; either version 2 of the License, or |
@@ -33,20 +28,13 @@ | |||
33 | #include <crypto/algapi.h> | 28 | #include <crypto/algapi.h> |
34 | #include <crypto/twofish.h> | 29 | #include <crypto/twofish.h> |
35 | #include <crypto/b128ops.h> | 30 | #include <crypto/b128ops.h> |
31 | #include <asm/crypto/twofish.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
36 | #include <crypto/lrw.h> | 33 | #include <crypto/lrw.h> |
37 | #include <crypto/xts.h> | 34 | #include <crypto/xts.h> |
38 | 35 | ||
39 | /* regular block cipher functions from twofish_x86_64 module */ | 36 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
40 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 37 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
41 | const u8 *src); | ||
42 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
43 | const u8 *src); | ||
44 | |||
45 | /* 3-way parallel cipher functions */ | ||
46 | asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
47 | const u8 *src, bool xor); | ||
48 | asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
49 | const u8 *src); | ||
50 | 38 | ||
51 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | 39 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
52 | const u8 *src) | 40 | const u8 *src) |
@@ -60,311 +48,139 @@ static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, | |||
60 | __twofish_enc_blk_3way(ctx, dst, src, true); | 48 | __twofish_enc_blk_3way(ctx, dst, src, true); |
61 | } | 49 | } |
62 | 50 | ||
63 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | 51 | void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) |
64 | void (*fn)(struct twofish_ctx *, u8 *, const u8 *), | ||
65 | void (*fn_3way)(struct twofish_ctx *, u8 *, const u8 *)) | ||
66 | { | ||
67 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
68 | unsigned int bsize = TF_BLOCK_SIZE; | ||
69 | unsigned int nbytes; | ||
70 | int err; | ||
71 | |||
72 | err = blkcipher_walk_virt(desc, walk); | ||
73 | |||
74 | while ((nbytes = walk->nbytes)) { | ||
75 | u8 *wsrc = walk->src.virt.addr; | ||
76 | u8 *wdst = walk->dst.virt.addr; | ||
77 | |||
78 | /* Process three block batch */ | ||
79 | if (nbytes >= bsize * 3) { | ||
80 | do { | ||
81 | fn_3way(ctx, wdst, wsrc); | ||
82 | |||
83 | wsrc += bsize * 3; | ||
84 | wdst += bsize * 3; | ||
85 | nbytes -= bsize * 3; | ||
86 | } while (nbytes >= bsize * 3); | ||
87 | |||
88 | if (nbytes < bsize) | ||
89 | goto done; | ||
90 | } | ||
91 | |||
92 | /* Handle leftovers */ | ||
93 | do { | ||
94 | fn(ctx, wdst, wsrc); | ||
95 | |||
96 | wsrc += bsize; | ||
97 | wdst += bsize; | ||
98 | nbytes -= bsize; | ||
99 | } while (nbytes >= bsize); | ||
100 | |||
101 | done: | ||
102 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
103 | } | ||
104 | |||
105 | return err; | ||
106 | } | ||
107 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
109 | struct scatterlist *src, unsigned int nbytes) | ||
110 | { | 52 | { |
111 | struct blkcipher_walk walk; | 53 | u128 ivs[2]; |
112 | 54 | ||
113 | blkcipher_walk_init(&walk, dst, src, nbytes); | 55 | ivs[0] = src[0]; |
114 | return ecb_crypt(desc, &walk, twofish_enc_blk, twofish_enc_blk_3way); | 56 | ivs[1] = src[1]; |
115 | } | ||
116 | 57 | ||
117 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 58 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); |
118 | struct scatterlist *src, unsigned int nbytes) | ||
119 | { | ||
120 | struct blkcipher_walk walk; | ||
121 | 59 | ||
122 | blkcipher_walk_init(&walk, dst, src, nbytes); | 60 | u128_xor(&dst[1], &dst[1], &ivs[0]); |
123 | return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way); | 61 | u128_xor(&dst[2], &dst[2], &ivs[1]); |
124 | } | 62 | } |
63 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); | ||
125 | 64 | ||
126 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | 65 | void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) |
127 | struct blkcipher_walk *walk) | ||
128 | { | ||
129 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
130 | unsigned int bsize = TF_BLOCK_SIZE; | ||
131 | unsigned int nbytes = walk->nbytes; | ||
132 | u128 *src = (u128 *)walk->src.virt.addr; | ||
133 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
134 | u128 *iv = (u128 *)walk->iv; | ||
135 | |||
136 | do { | ||
137 | u128_xor(dst, src, iv); | ||
138 | twofish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
139 | iv = dst; | ||
140 | |||
141 | src += 1; | ||
142 | dst += 1; | ||
143 | nbytes -= bsize; | ||
144 | } while (nbytes >= bsize); | ||
145 | |||
146 | u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); | ||
147 | return nbytes; | ||
148 | } | ||
149 | |||
150 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
151 | struct scatterlist *src, unsigned int nbytes) | ||
152 | { | 66 | { |
153 | struct blkcipher_walk walk; | 67 | be128 ctrblk; |
154 | int err; | ||
155 | 68 | ||
156 | blkcipher_walk_init(&walk, dst, src, nbytes); | 69 | if (dst != src) |
157 | err = blkcipher_walk_virt(desc, &walk); | 70 | *dst = *src; |
158 | 71 | ||
159 | while ((nbytes = walk.nbytes)) { | 72 | u128_to_be128(&ctrblk, iv); |
160 | nbytes = __cbc_encrypt(desc, &walk); | 73 | u128_inc(iv); |
161 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
162 | } | ||
163 | 74 | ||
164 | return err; | 75 | twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
76 | u128_xor(dst, dst, (u128 *)&ctrblk); | ||
165 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); | ||
166 | 79 | ||
167 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | 80 | void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, |
168 | struct blkcipher_walk *walk) | 81 | u128 *iv) |
169 | { | 82 | { |
170 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 83 | be128 ctrblks[3]; |
171 | unsigned int bsize = TF_BLOCK_SIZE; | ||
172 | unsigned int nbytes = walk->nbytes; | ||
173 | u128 *src = (u128 *)walk->src.virt.addr; | ||
174 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
175 | u128 ivs[3 - 1]; | ||
176 | u128 last_iv; | ||
177 | |||
178 | /* Start of the last block. */ | ||
179 | src += nbytes / bsize - 1; | ||
180 | dst += nbytes / bsize - 1; | ||
181 | |||
182 | last_iv = *src; | ||
183 | |||
184 | /* Process three block batch */ | ||
185 | if (nbytes >= bsize * 3) { | ||
186 | do { | ||
187 | nbytes -= bsize * (3 - 1); | ||
188 | src -= 3 - 1; | ||
189 | dst -= 3 - 1; | ||
190 | |||
191 | ivs[0] = src[0]; | ||
192 | ivs[1] = src[1]; | ||
193 | |||
194 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); | ||
195 | |||
196 | u128_xor(dst + 1, dst + 1, ivs + 0); | ||
197 | u128_xor(dst + 2, dst + 2, ivs + 1); | ||
198 | |||
199 | nbytes -= bsize; | ||
200 | if (nbytes < bsize) | ||
201 | goto done; | ||
202 | |||
203 | u128_xor(dst, dst, src - 1); | ||
204 | src -= 1; | ||
205 | dst -= 1; | ||
206 | } while (nbytes >= bsize * 3); | ||
207 | |||
208 | if (nbytes < bsize) | ||
209 | goto done; | ||
210 | } | ||
211 | |||
212 | /* Handle leftovers */ | ||
213 | for (;;) { | ||
214 | twofish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
215 | |||
216 | nbytes -= bsize; | ||
217 | if (nbytes < bsize) | ||
218 | break; | ||
219 | 84 | ||
220 | u128_xor(dst, dst, src - 1); | 85 | if (dst != src) { |
221 | src -= 1; | 86 | dst[0] = src[0]; |
222 | dst -= 1; | 87 | dst[1] = src[1]; |
88 | dst[2] = src[2]; | ||
223 | } | 89 | } |
224 | 90 | ||
225 | done: | 91 | u128_to_be128(&ctrblks[0], iv); |
226 | u128_xor(dst, dst, (u128 *)walk->iv); | 92 | u128_inc(iv); |
227 | *(u128 *)walk->iv = last_iv; | 93 | u128_to_be128(&ctrblks[1], iv); |
94 | u128_inc(iv); | ||
95 | u128_to_be128(&ctrblks[2], iv); | ||
96 | u128_inc(iv); | ||
228 | 97 | ||
229 | return nbytes; | 98 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); |
230 | } | 99 | } |
100 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); | ||
101 | |||
102 | static const struct common_glue_ctx twofish_enc = { | ||
103 | .num_funcs = 2, | ||
104 | .fpu_blocks_limit = -1, | ||
105 | |||
106 | .funcs = { { | ||
107 | .num_blocks = 3, | ||
108 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
109 | }, { | ||
110 | .num_blocks = 1, | ||
111 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
112 | } } | ||
113 | }; | ||
231 | 114 | ||
232 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 115 | static const struct common_glue_ctx twofish_ctr = { |
233 | struct scatterlist *src, unsigned int nbytes) | 116 | .num_funcs = 2, |
234 | { | 117 | .fpu_blocks_limit = -1, |
235 | struct blkcipher_walk walk; | 118 | |
236 | int err; | 119 | .funcs = { { |
237 | 120 | .num_blocks = 3, | |
238 | blkcipher_walk_init(&walk, dst, src, nbytes); | 121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } |
239 | err = blkcipher_walk_virt(desc, &walk); | 122 | }, { |
123 | .num_blocks = 1, | ||
124 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } | ||
125 | } } | ||
126 | }; | ||
240 | 127 | ||
241 | while ((nbytes = walk.nbytes)) { | 128 | static const struct common_glue_ctx twofish_dec = { |
242 | nbytes = __cbc_decrypt(desc, &walk); | 129 | .num_funcs = 2, |
243 | err = blkcipher_walk_done(desc, &walk, nbytes); | 130 | .fpu_blocks_limit = -1, |
244 | } | 131 | |
132 | .funcs = { { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
138 | } } | ||
139 | }; | ||
245 | 140 | ||
246 | return err; | 141 | static const struct common_glue_ctx twofish_dec_cbc = { |
247 | } | 142 | .num_funcs = 2, |
143 | .fpu_blocks_limit = -1, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = 3, | ||
147 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
148 | }, { | ||
149 | .num_blocks = 1, | ||
150 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
151 | } } | ||
152 | }; | ||
248 | 153 | ||
249 | static inline void u128_to_be128(be128 *dst, const u128 *src) | 154 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
155 | struct scatterlist *src, unsigned int nbytes) | ||
250 | { | 156 | { |
251 | dst->a = cpu_to_be64(src->a); | 157 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); |
252 | dst->b = cpu_to_be64(src->b); | ||
253 | } | 158 | } |
254 | 159 | ||
255 | static inline void be128_to_u128(u128 *dst, const be128 *src) | 160 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
161 | struct scatterlist *src, unsigned int nbytes) | ||
256 | { | 162 | { |
257 | dst->a = be64_to_cpu(src->a); | 163 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); |
258 | dst->b = be64_to_cpu(src->b); | ||
259 | } | 164 | } |
260 | 165 | ||
261 | static inline void u128_inc(u128 *i) | 166 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
167 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | 168 | { |
263 | i->b++; | 169 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, |
264 | if (!i->b) | 170 | dst, src, nbytes); |
265 | i->a++; | ||
266 | } | 171 | } |
267 | 172 | ||
268 | static void ctr_crypt_final(struct blkcipher_desc *desc, | 173 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
269 | struct blkcipher_walk *walk) | 174 | struct scatterlist *src, unsigned int nbytes) |
270 | { | 175 | { |
271 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 176 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, |
272 | u8 *ctrblk = walk->iv; | 177 | nbytes); |
273 | u8 keystream[TF_BLOCK_SIZE]; | ||
274 | u8 *src = walk->src.virt.addr; | ||
275 | u8 *dst = walk->dst.virt.addr; | ||
276 | unsigned int nbytes = walk->nbytes; | ||
277 | |||
278 | twofish_enc_blk(ctx, keystream, ctrblk); | ||
279 | crypto_xor(keystream, src, nbytes); | ||
280 | memcpy(dst, keystream, nbytes); | ||
281 | |||
282 | crypto_inc(ctrblk, TF_BLOCK_SIZE); | ||
283 | } | ||
284 | |||
285 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
286 | struct blkcipher_walk *walk) | ||
287 | { | ||
288 | struct twofish_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
289 | unsigned int bsize = TF_BLOCK_SIZE; | ||
290 | unsigned int nbytes = walk->nbytes; | ||
291 | u128 *src = (u128 *)walk->src.virt.addr; | ||
292 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
293 | u128 ctrblk; | ||
294 | be128 ctrblocks[3]; | ||
295 | |||
296 | be128_to_u128(&ctrblk, (be128 *)walk->iv); | ||
297 | |||
298 | /* Process three block batch */ | ||
299 | if (nbytes >= bsize * 3) { | ||
300 | do { | ||
301 | if (dst != src) { | ||
302 | dst[0] = src[0]; | ||
303 | dst[1] = src[1]; | ||
304 | dst[2] = src[2]; | ||
305 | } | ||
306 | |||
307 | /* create ctrblks for parallel encrypt */ | ||
308 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
309 | u128_inc(&ctrblk); | ||
310 | u128_to_be128(&ctrblocks[1], &ctrblk); | ||
311 | u128_inc(&ctrblk); | ||
312 | u128_to_be128(&ctrblocks[2], &ctrblk); | ||
313 | u128_inc(&ctrblk); | ||
314 | |||
315 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, | ||
316 | (u8 *)ctrblocks); | ||
317 | |||
318 | src += 3; | ||
319 | dst += 3; | ||
320 | nbytes -= bsize * 3; | ||
321 | } while (nbytes >= bsize * 3); | ||
322 | |||
323 | if (nbytes < bsize) | ||
324 | goto done; | ||
325 | } | ||
326 | |||
327 | /* Handle leftovers */ | ||
328 | do { | ||
329 | if (dst != src) | ||
330 | *dst = *src; | ||
331 | |||
332 | u128_to_be128(&ctrblocks[0], &ctrblk); | ||
333 | u128_inc(&ctrblk); | ||
334 | |||
335 | twofish_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); | ||
336 | u128_xor(dst, dst, (u128 *)ctrblocks); | ||
337 | |||
338 | src += 1; | ||
339 | dst += 1; | ||
340 | nbytes -= bsize; | ||
341 | } while (nbytes >= bsize); | ||
342 | |||
343 | done: | ||
344 | u128_to_be128((be128 *)walk->iv, &ctrblk); | ||
345 | return nbytes; | ||
346 | } | 178 | } |
347 | 179 | ||
348 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 180 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
349 | struct scatterlist *src, unsigned int nbytes) | 181 | struct scatterlist *src, unsigned int nbytes) |
350 | { | 182 | { |
351 | struct blkcipher_walk walk; | 183 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); |
352 | int err; | ||
353 | |||
354 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
355 | err = blkcipher_walk_virt_block(desc, &walk, TF_BLOCK_SIZE); | ||
356 | |||
357 | while ((nbytes = walk.nbytes) >= TF_BLOCK_SIZE) { | ||
358 | nbytes = __ctr_crypt(desc, &walk); | ||
359 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
360 | } | ||
361 | |||
362 | if (walk.nbytes) { | ||
363 | ctr_crypt_final(desc, &walk); | ||
364 | err = blkcipher_walk_done(desc, &walk, 0); | ||
365 | } | ||
366 | |||
367 | return err; | ||
368 | } | 184 | } |
369 | 185 | ||
370 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | 186 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) |
@@ -397,13 +213,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |||
397 | twofish_dec_blk(ctx, srcdst, srcdst); | 213 | twofish_dec_blk(ctx, srcdst, srcdst); |
398 | } | 214 | } |
399 | 215 | ||
400 | struct twofish_lrw_ctx { | 216 | int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
401 | struct lrw_table_ctx lrw_table; | 217 | unsigned int keylen) |
402 | struct twofish_ctx twofish_ctx; | ||
403 | }; | ||
404 | |||
405 | static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
406 | unsigned int keylen) | ||
407 | { | 218 | { |
408 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 219 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
409 | int err; | 220 | int err; |
@@ -415,6 +226,7 @@ static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
415 | 226 | ||
416 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | 227 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); |
417 | } | 228 | } |
229 | EXPORT_SYMBOL_GPL(lrw_twofish_setkey); | ||
418 | 230 | ||
419 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 231 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
420 | struct scatterlist *src, unsigned int nbytes) | 232 | struct scatterlist *src, unsigned int nbytes) |
@@ -450,20 +262,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |||
450 | return lrw_crypt(desc, dst, src, nbytes, &req); | 262 | return lrw_crypt(desc, dst, src, nbytes, &req); |
451 | } | 263 | } |
452 | 264 | ||
453 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | 265 | void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) |
454 | { | 266 | { |
455 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 267 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); |
456 | 268 | ||
457 | lrw_free_table(&ctx->lrw_table); | 269 | lrw_free_table(&ctx->lrw_table); |
458 | } | 270 | } |
271 | EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); | ||
459 | 272 | ||
460 | struct twofish_xts_ctx { | 273 | int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, |
461 | struct twofish_ctx tweak_ctx; | 274 | unsigned int keylen) |
462 | struct twofish_ctx crypt_ctx; | ||
463 | }; | ||
464 | |||
465 | static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
466 | unsigned int keylen) | ||
467 | { | 275 | { |
468 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 276 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); |
469 | u32 *flags = &tfm->crt_flags; | 277 | u32 *flags = &tfm->crt_flags; |
@@ -486,6 +294,7 @@ static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | |||
486 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | 294 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, |
487 | flags); | 295 | flags); |
488 | } | 296 | } |
297 | EXPORT_SYMBOL_GPL(xts_twofish_setkey); | ||
489 | 298 | ||
490 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 299 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
491 | struct scatterlist *src, unsigned int nbytes) | 300 | struct scatterlist *src, unsigned int nbytes) |
@@ -596,7 +405,7 @@ static struct crypto_alg tf_algs[5] = { { | |||
596 | .cra_type = &crypto_blkcipher_type, | 405 | .cra_type = &crypto_blkcipher_type, |
597 | .cra_module = THIS_MODULE, | 406 | .cra_module = THIS_MODULE, |
598 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), | 407 | .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), |
599 | .cra_exit = lrw_exit_tfm, | 408 | .cra_exit = lrw_twofish_exit_tfm, |
600 | .cra_u = { | 409 | .cra_u = { |
601 | .blkcipher = { | 410 | .blkcipher = { |
602 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | 411 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, |