diff options
author | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
---|---|---|
committer | Jonathan Herman <hermanjl@cs.unc.edu> | 2013-01-17 16:15:55 -0500 |
commit | 8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch) | |
tree | a8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /arch/x86/crypto | |
parent | 406089d01562f1e2bf9f089fd7637009ebaad589 (diff) |
Patched in Tegra support.
Diffstat (limited to 'arch/x86/crypto')
35 files changed, 596 insertions, 14721 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0ca7c9ac38..c04f1b7a913 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,53 +2,26 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o | ||
6 | obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o | ||
7 | |||
8 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
9 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
10 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
11 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o | ||
12 | 8 | ||
13 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o | 9 | obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o |
14 | obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o | ||
15 | obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o | ||
16 | obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o | ||
17 | obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o | ||
18 | obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o | ||
19 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o | 10 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o |
20 | obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o | ||
21 | obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o | ||
22 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o | 11 | obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o |
23 | obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o | ||
24 | obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o | ||
25 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o | 12 | obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o |
26 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o | 13 | obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o |
27 | 14 | ||
28 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o | 15 | obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o |
29 | obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o | ||
30 | 16 | ||
31 | aes-i586-y := aes-i586-asm_32.o aes_glue.o | 17 | aes-i586-y := aes-i586-asm_32.o aes_glue.o |
32 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o | 18 | twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o |
33 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o | 19 | salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o |
34 | serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o | ||
35 | 20 | ||
36 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o | 21 | aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o |
37 | camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o | ||
38 | camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ | ||
39 | camellia_aesni_avx_glue.o | ||
40 | cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o | ||
41 | cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o | ||
42 | blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o | ||
43 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o | 22 | twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o |
44 | twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o | ||
45 | twofish-avx-x86_64-y := twofish-avx-x86_64-asm_64.o twofish_avx_glue.o | ||
46 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o | 23 | salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o |
47 | serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o | ||
48 | serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o | ||
49 | 24 | ||
50 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 25 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
26 | |||
51 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 27 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
52 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | ||
53 | crc32c-intel-y := crc32c-intel_glue.o | ||
54 | crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o | ||
diff --git a/arch/x86/crypto/ablk_helper.c b/arch/x86/crypto/ablk_helper.c deleted file mode 100644 index 43282fe04a8..00000000000 --- a/arch/x86/crypto/ablk_helper.c +++ /dev/null | |||
@@ -1,149 +0,0 @@ | |||
1 | /* | ||
2 | * Shared async block cipher helpers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/crypto.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <crypto/algapi.h> | ||
32 | #include <crypto/cryptd.h> | ||
33 | #include <asm/i387.h> | ||
34 | #include <asm/crypto/ablk_helper.h> | ||
35 | |||
36 | int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | ||
37 | unsigned int key_len) | ||
38 | { | ||
39 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
40 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
41 | int err; | ||
42 | |||
43 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
44 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
45 | & CRYPTO_TFM_REQ_MASK); | ||
46 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
47 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
48 | & CRYPTO_TFM_RES_MASK); | ||
49 | return err; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(ablk_set_key); | ||
52 | |||
53 | int __ablk_encrypt(struct ablkcipher_request *req) | ||
54 | { | ||
55 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
56 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
57 | struct blkcipher_desc desc; | ||
58 | |||
59 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
60 | desc.info = req->info; | ||
61 | desc.flags = 0; | ||
62 | |||
63 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
64 | &desc, req->dst, req->src, req->nbytes); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(__ablk_encrypt); | ||
67 | |||
68 | int ablk_encrypt(struct ablkcipher_request *req) | ||
69 | { | ||
70 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
71 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
72 | |||
73 | if (!irq_fpu_usable()) { | ||
74 | struct ablkcipher_request *cryptd_req = | ||
75 | ablkcipher_request_ctx(req); | ||
76 | |||
77 | memcpy(cryptd_req, req, sizeof(*req)); | ||
78 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
79 | |||
80 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
81 | } else { | ||
82 | return __ablk_encrypt(req); | ||
83 | } | ||
84 | } | ||
85 | EXPORT_SYMBOL_GPL(ablk_encrypt); | ||
86 | |||
87 | int ablk_decrypt(struct ablkcipher_request *req) | ||
88 | { | ||
89 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); | ||
90 | struct async_helper_ctx *ctx = crypto_ablkcipher_ctx(tfm); | ||
91 | |||
92 | if (!irq_fpu_usable()) { | ||
93 | struct ablkcipher_request *cryptd_req = | ||
94 | ablkcipher_request_ctx(req); | ||
95 | |||
96 | memcpy(cryptd_req, req, sizeof(*req)); | ||
97 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
98 | |||
99 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
100 | } else { | ||
101 | struct blkcipher_desc desc; | ||
102 | |||
103 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
104 | desc.info = req->info; | ||
105 | desc.flags = 0; | ||
106 | |||
107 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
108 | &desc, req->dst, req->src, req->nbytes); | ||
109 | } | ||
110 | } | ||
111 | EXPORT_SYMBOL_GPL(ablk_decrypt); | ||
112 | |||
113 | void ablk_exit(struct crypto_tfm *tfm) | ||
114 | { | ||
115 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
116 | |||
117 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
118 | } | ||
119 | EXPORT_SYMBOL_GPL(ablk_exit); | ||
120 | |||
121 | int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name) | ||
122 | { | ||
123 | struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm); | ||
124 | struct cryptd_ablkcipher *cryptd_tfm; | ||
125 | |||
126 | cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0); | ||
127 | if (IS_ERR(cryptd_tfm)) | ||
128 | return PTR_ERR(cryptd_tfm); | ||
129 | |||
130 | ctx->cryptd_tfm = cryptd_tfm; | ||
131 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + | ||
132 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | EXPORT_SYMBOL_GPL(ablk_init_common); | ||
137 | |||
138 | int ablk_init(struct crypto_tfm *tfm) | ||
139 | { | ||
140 | char drv_name[CRYPTO_MAX_ALG_NAME]; | ||
141 | |||
142 | snprintf(drv_name, sizeof(drv_name), "__driver-%s", | ||
143 | crypto_tfm_alg_driver_name(tfm)); | ||
144 | |||
145 | return ablk_init_common(tfm, drv_name); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(ablk_init); | ||
148 | |||
149 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index aafe8ce0d65..49ae9fe32b2 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c | |||
@@ -3,9 +3,7 @@ | |||
3 | * | 3 | * |
4 | */ | 4 | */ |
5 | 5 | ||
6 | #include <linux/module.h> | ||
7 | #include <crypto/aes.h> | 6 | #include <crypto/aes.h> |
8 | #include <asm/crypto/aes.h> | ||
9 | 7 | ||
10 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 8 | asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
11 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); | 9 | asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); |
@@ -40,6 +38,7 @@ static struct crypto_alg aes_alg = { | |||
40 | .cra_blocksize = AES_BLOCK_SIZE, | 38 | .cra_blocksize = AES_BLOCK_SIZE, |
41 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), | 39 | .cra_ctxsize = sizeof(struct crypto_aes_ctx), |
42 | .cra_module = THIS_MODULE, | 40 | .cra_module = THIS_MODULE, |
41 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | ||
43 | .cra_u = { | 42 | .cra_u = { |
44 | .cipher = { | 43 | .cipher = { |
45 | .cia_min_keysize = AES_MIN_KEY_SIZE, | 44 | .cia_min_keysize = AES_MIN_KEY_SIZE, |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3470624d783..be6d9e365a8 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -2460,12 +2460,10 @@ ENTRY(aesni_cbc_dec) | |||
2460 | pxor IN3, STATE4 | 2460 | pxor IN3, STATE4 |
2461 | movaps IN4, IV | 2461 | movaps IN4, IV |
2462 | #else | 2462 | #else |
2463 | pxor (INP), STATE2 | ||
2464 | pxor 0x10(INP), STATE3 | ||
2463 | pxor IN1, STATE4 | 2465 | pxor IN1, STATE4 |
2464 | movaps IN2, IV | 2466 | movaps IN2, IV |
2465 | movups (INP), IN1 | ||
2466 | pxor IN1, STATE2 | ||
2467 | movups 0x10(INP), IN2 | ||
2468 | pxor IN2, STATE3 | ||
2469 | #endif | 2467 | #endif |
2470 | movups STATE1, (OUTP) | 2468 | movups STATE1, (OUTP) |
2471 | movups STATE2, 0x10(OUTP) | 2469 | movups STATE2, 0x10(OUTP) |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1b9c22bea8a..feee8ff1d05 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -22,19 +22,13 @@ | |||
22 | #include <linux/hardirq.h> | 22 | #include <linux/hardirq.h> |
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/crypto.h> | 24 | #include <linux/crypto.h> |
25 | #include <linux/module.h> | ||
26 | #include <linux/err.h> | 25 | #include <linux/err.h> |
27 | #include <crypto/algapi.h> | 26 | #include <crypto/algapi.h> |
28 | #include <crypto/aes.h> | 27 | #include <crypto/aes.h> |
29 | #include <crypto/cryptd.h> | 28 | #include <crypto/cryptd.h> |
30 | #include <crypto/ctr.h> | 29 | #include <crypto/ctr.h> |
31 | #include <crypto/b128ops.h> | ||
32 | #include <crypto/lrw.h> | ||
33 | #include <crypto/xts.h> | ||
34 | #include <asm/cpu_device_id.h> | ||
35 | #include <asm/i387.h> | 30 | #include <asm/i387.h> |
36 | #include <asm/crypto/aes.h> | 31 | #include <asm/aes.h> |
37 | #include <asm/crypto/ablk_helper.h> | ||
38 | #include <crypto/scatterwalk.h> | 32 | #include <crypto/scatterwalk.h> |
39 | #include <crypto/internal/aead.h> | 33 | #include <crypto/internal/aead.h> |
40 | #include <linux/workqueue.h> | 34 | #include <linux/workqueue.h> |
@@ -44,10 +38,22 @@ | |||
44 | #define HAS_CTR | 38 | #define HAS_CTR |
45 | #endif | 39 | #endif |
46 | 40 | ||
41 | #if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) | ||
42 | #define HAS_LRW | ||
43 | #endif | ||
44 | |||
47 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) | 45 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) |
48 | #define HAS_PCBC | 46 | #define HAS_PCBC |
49 | #endif | 47 | #endif |
50 | 48 | ||
49 | #if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) | ||
50 | #define HAS_XTS | ||
51 | #endif | ||
52 | |||
53 | struct async_aes_ctx { | ||
54 | struct cryptd_ablkcipher *cryptd_tfm; | ||
55 | }; | ||
56 | |||
51 | /* This data is stored at the end of the crypto_tfm struct. | 57 | /* This data is stored at the end of the crypto_tfm struct. |
52 | * It's a type of per "session" data storage location. | 58 | * It's a type of per "session" data storage location. |
53 | * This needs to be 16 byte aligned. | 59 | * This needs to be 16 byte aligned. |
@@ -74,16 +80,6 @@ struct aesni_hash_subkey_req_data { | |||
74 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) | 80 | #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) |
75 | #define RFC4106_HASH_SUBKEY_SIZE 16 | 81 | #define RFC4106_HASH_SUBKEY_SIZE 16 |
76 | 82 | ||
77 | struct aesni_lrw_ctx { | ||
78 | struct lrw_table_ctx lrw_table; | ||
79 | u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; | ||
80 | }; | ||
81 | |||
82 | struct aesni_xts_ctx { | ||
83 | u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; | ||
84 | u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; | ||
85 | }; | ||
86 | |||
87 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | 83 | asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
88 | unsigned int key_len); | 84 | unsigned int key_len); |
89 | asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, | 85 | asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, |
@@ -224,6 +220,27 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
224 | } | 220 | } |
225 | } | 221 | } |
226 | 222 | ||
223 | static struct crypto_alg aesni_alg = { | ||
224 | .cra_name = "aes", | ||
225 | .cra_driver_name = "aes-aesni", | ||
226 | .cra_priority = 300, | ||
227 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
228 | .cra_blocksize = AES_BLOCK_SIZE, | ||
229 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
230 | .cra_alignmask = 0, | ||
231 | .cra_module = THIS_MODULE, | ||
232 | .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list), | ||
233 | .cra_u = { | ||
234 | .cipher = { | ||
235 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
236 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
237 | .cia_setkey = aes_set_key, | ||
238 | .cia_encrypt = aes_encrypt, | ||
239 | .cia_decrypt = aes_decrypt | ||
240 | } | ||
241 | } | ||
242 | }; | ||
243 | |||
227 | static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 244 | static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
228 | { | 245 | { |
229 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | 246 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); |
@@ -238,6 +255,27 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | |||
238 | aesni_dec(ctx, dst, src); | 255 | aesni_dec(ctx, dst, src); |
239 | } | 256 | } |
240 | 257 | ||
258 | static struct crypto_alg __aesni_alg = { | ||
259 | .cra_name = "__aes-aesni", | ||
260 | .cra_driver_name = "__driver-aes-aesni", | ||
261 | .cra_priority = 0, | ||
262 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
263 | .cra_blocksize = AES_BLOCK_SIZE, | ||
264 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
265 | .cra_alignmask = 0, | ||
266 | .cra_module = THIS_MODULE, | ||
267 | .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list), | ||
268 | .cra_u = { | ||
269 | .cipher = { | ||
270 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
271 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
272 | .cia_setkey = aes_set_key, | ||
273 | .cia_encrypt = __aes_encrypt, | ||
274 | .cia_decrypt = __aes_decrypt | ||
275 | } | ||
276 | } | ||
277 | }; | ||
278 | |||
241 | static int ecb_encrypt(struct blkcipher_desc *desc, | 279 | static int ecb_encrypt(struct blkcipher_desc *desc, |
242 | struct scatterlist *dst, struct scatterlist *src, | 280 | struct scatterlist *dst, struct scatterlist *src, |
243 | unsigned int nbytes) | 281 | unsigned int nbytes) |
@@ -286,6 +324,28 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |||
286 | return err; | 324 | return err; |
287 | } | 325 | } |
288 | 326 | ||
327 | static struct crypto_alg blk_ecb_alg = { | ||
328 | .cra_name = "__ecb-aes-aesni", | ||
329 | .cra_driver_name = "__driver-ecb-aes-aesni", | ||
330 | .cra_priority = 0, | ||
331 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
332 | .cra_blocksize = AES_BLOCK_SIZE, | ||
333 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
334 | .cra_alignmask = 0, | ||
335 | .cra_type = &crypto_blkcipher_type, | ||
336 | .cra_module = THIS_MODULE, | ||
337 | .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), | ||
338 | .cra_u = { | ||
339 | .blkcipher = { | ||
340 | .min_keysize = AES_MIN_KEY_SIZE, | ||
341 | .max_keysize = AES_MAX_KEY_SIZE, | ||
342 | .setkey = aes_set_key, | ||
343 | .encrypt = ecb_encrypt, | ||
344 | .decrypt = ecb_decrypt, | ||
345 | }, | ||
346 | }, | ||
347 | }; | ||
348 | |||
289 | static int cbc_encrypt(struct blkcipher_desc *desc, | 349 | static int cbc_encrypt(struct blkcipher_desc *desc, |
290 | struct scatterlist *dst, struct scatterlist *src, | 350 | struct scatterlist *dst, struct scatterlist *src, |
291 | unsigned int nbytes) | 351 | unsigned int nbytes) |
@@ -334,6 +394,28 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |||
334 | return err; | 394 | return err; |
335 | } | 395 | } |
336 | 396 | ||
397 | static struct crypto_alg blk_cbc_alg = { | ||
398 | .cra_name = "__cbc-aes-aesni", | ||
399 | .cra_driver_name = "__driver-cbc-aes-aesni", | ||
400 | .cra_priority = 0, | ||
401 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
402 | .cra_blocksize = AES_BLOCK_SIZE, | ||
403 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
404 | .cra_alignmask = 0, | ||
405 | .cra_type = &crypto_blkcipher_type, | ||
406 | .cra_module = THIS_MODULE, | ||
407 | .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), | ||
408 | .cra_u = { | ||
409 | .blkcipher = { | ||
410 | .min_keysize = AES_MIN_KEY_SIZE, | ||
411 | .max_keysize = AES_MAX_KEY_SIZE, | ||
412 | .setkey = aes_set_key, | ||
413 | .encrypt = cbc_encrypt, | ||
414 | .decrypt = cbc_decrypt, | ||
415 | }, | ||
416 | }, | ||
417 | }; | ||
418 | |||
337 | #ifdef CONFIG_X86_64 | 419 | #ifdef CONFIG_X86_64 |
338 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, | 420 | static void ctr_crypt_final(struct crypto_aes_ctx *ctx, |
339 | struct blkcipher_walk *walk) | 421 | struct blkcipher_walk *walk) |
@@ -377,199 +459,373 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
377 | 459 | ||
378 | return err; | 460 | return err; |
379 | } | 461 | } |
462 | |||
463 | static struct crypto_alg blk_ctr_alg = { | ||
464 | .cra_name = "__ctr-aes-aesni", | ||
465 | .cra_driver_name = "__driver-ctr-aes-aesni", | ||
466 | .cra_priority = 0, | ||
467 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
468 | .cra_blocksize = 1, | ||
469 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
470 | .cra_alignmask = 0, | ||
471 | .cra_type = &crypto_blkcipher_type, | ||
472 | .cra_module = THIS_MODULE, | ||
473 | .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list), | ||
474 | .cra_u = { | ||
475 | .blkcipher = { | ||
476 | .min_keysize = AES_MIN_KEY_SIZE, | ||
477 | .max_keysize = AES_MAX_KEY_SIZE, | ||
478 | .ivsize = AES_BLOCK_SIZE, | ||
479 | .setkey = aes_set_key, | ||
480 | .encrypt = ctr_crypt, | ||
481 | .decrypt = ctr_crypt, | ||
482 | }, | ||
483 | }, | ||
484 | }; | ||
380 | #endif | 485 | #endif |
381 | 486 | ||
382 | static int ablk_ecb_init(struct crypto_tfm *tfm) | 487 | static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, |
488 | unsigned int key_len) | ||
383 | { | 489 | { |
384 | return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); | 490 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
385 | } | 491 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; |
492 | int err; | ||
386 | 493 | ||
387 | static int ablk_cbc_init(struct crypto_tfm *tfm) | 494 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); |
388 | { | 495 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) |
389 | return ablk_init_common(tfm, "__driver-cbc-aes-aesni"); | 496 | & CRYPTO_TFM_REQ_MASK); |
497 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
498 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
499 | & CRYPTO_TFM_RES_MASK); | ||
500 | return err; | ||
390 | } | 501 | } |
391 | 502 | ||
392 | #ifdef CONFIG_X86_64 | 503 | static int ablk_encrypt(struct ablkcipher_request *req) |
393 | static int ablk_ctr_init(struct crypto_tfm *tfm) | ||
394 | { | 504 | { |
395 | return ablk_init_common(tfm, "__driver-ctr-aes-aesni"); | 505 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
396 | } | 506 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
397 | 507 | ||
398 | #ifdef HAS_CTR | 508 | if (!irq_fpu_usable()) { |
399 | static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) | 509 | struct ablkcipher_request *cryptd_req = |
400 | { | 510 | ablkcipher_request_ctx(req); |
401 | return ablk_init_common(tfm, "rfc3686(__driver-ctr-aes-aesni)"); | 511 | memcpy(cryptd_req, req, sizeof(*req)); |
512 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
513 | return crypto_ablkcipher_encrypt(cryptd_req); | ||
514 | } else { | ||
515 | struct blkcipher_desc desc; | ||
516 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
517 | desc.info = req->info; | ||
518 | desc.flags = 0; | ||
519 | return crypto_blkcipher_crt(desc.tfm)->encrypt( | ||
520 | &desc, req->dst, req->src, req->nbytes); | ||
521 | } | ||
402 | } | 522 | } |
403 | #endif | ||
404 | #endif | ||
405 | 523 | ||
406 | #ifdef HAS_PCBC | 524 | static int ablk_decrypt(struct ablkcipher_request *req) |
407 | static int ablk_pcbc_init(struct crypto_tfm *tfm) | ||
408 | { | 525 | { |
409 | return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))"); | 526 | struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); |
410 | } | 527 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
411 | #endif | ||
412 | 528 | ||
413 | static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) | 529 | if (!irq_fpu_usable()) { |
414 | { | 530 | struct ablkcipher_request *cryptd_req = |
415 | aesni_ecb_enc(ctx, blks, blks, nbytes); | 531 | ablkcipher_request_ctx(req); |
532 | memcpy(cryptd_req, req, sizeof(*req)); | ||
533 | ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); | ||
534 | return crypto_ablkcipher_decrypt(cryptd_req); | ||
535 | } else { | ||
536 | struct blkcipher_desc desc; | ||
537 | desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); | ||
538 | desc.info = req->info; | ||
539 | desc.flags = 0; | ||
540 | return crypto_blkcipher_crt(desc.tfm)->decrypt( | ||
541 | &desc, req->dst, req->src, req->nbytes); | ||
542 | } | ||
416 | } | 543 | } |
417 | 544 | ||
418 | static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) | 545 | static void ablk_exit(struct crypto_tfm *tfm) |
419 | { | 546 | { |
420 | aesni_ecb_dec(ctx, blks, blks, nbytes); | 547 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); |
548 | |||
549 | cryptd_free_ablkcipher(ctx->cryptd_tfm); | ||
421 | } | 550 | } |
422 | 551 | ||
423 | static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, | 552 | static void ablk_init_common(struct crypto_tfm *tfm, |
424 | unsigned int keylen) | 553 | struct cryptd_ablkcipher *cryptd_tfm) |
425 | { | 554 | { |
426 | struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 555 | struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); |
427 | int err; | ||
428 | 556 | ||
429 | err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, | 557 | ctx->cryptd_tfm = cryptd_tfm; |
430 | keylen - AES_BLOCK_SIZE); | 558 | tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + |
431 | if (err) | 559 | crypto_ablkcipher_reqsize(&cryptd_tfm->base); |
432 | return err; | ||
433 | |||
434 | return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); | ||
435 | } | 560 | } |
436 | 561 | ||
437 | static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) | 562 | static int ablk_ecb_init(struct crypto_tfm *tfm) |
438 | { | 563 | { |
439 | struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | 564 | struct cryptd_ablkcipher *cryptd_tfm; |
440 | 565 | ||
441 | lrw_free_table(&ctx->lrw_table); | 566 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0); |
567 | if (IS_ERR(cryptd_tfm)) | ||
568 | return PTR_ERR(cryptd_tfm); | ||
569 | ablk_init_common(tfm, cryptd_tfm); | ||
570 | return 0; | ||
442 | } | 571 | } |
443 | 572 | ||
444 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 573 | static struct crypto_alg ablk_ecb_alg = { |
445 | struct scatterlist *src, unsigned int nbytes) | 574 | .cra_name = "ecb(aes)", |
446 | { | 575 | .cra_driver_name = "ecb-aes-aesni", |
447 | struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 576 | .cra_priority = 400, |
448 | be128 buf[8]; | 577 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, |
449 | struct lrw_crypt_req req = { | 578 | .cra_blocksize = AES_BLOCK_SIZE, |
450 | .tbuf = buf, | 579 | .cra_ctxsize = sizeof(struct async_aes_ctx), |
451 | .tbuflen = sizeof(buf), | 580 | .cra_alignmask = 0, |
452 | 581 | .cra_type = &crypto_ablkcipher_type, | |
453 | .table_ctx = &ctx->lrw_table, | 582 | .cra_module = THIS_MODULE, |
454 | .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), | 583 | .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), |
455 | .crypt_fn = lrw_xts_encrypt_callback, | 584 | .cra_init = ablk_ecb_init, |
456 | }; | 585 | .cra_exit = ablk_exit, |
457 | int ret; | 586 | .cra_u = { |
458 | 587 | .ablkcipher = { | |
459 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 588 | .min_keysize = AES_MIN_KEY_SIZE, |
589 | .max_keysize = AES_MAX_KEY_SIZE, | ||
590 | .setkey = ablk_set_key, | ||
591 | .encrypt = ablk_encrypt, | ||
592 | .decrypt = ablk_decrypt, | ||
593 | }, | ||
594 | }, | ||
595 | }; | ||
460 | 596 | ||
461 | kernel_fpu_begin(); | 597 | static int ablk_cbc_init(struct crypto_tfm *tfm) |
462 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | 598 | { |
463 | kernel_fpu_end(); | 599 | struct cryptd_ablkcipher *cryptd_tfm; |
464 | 600 | ||
465 | return ret; | 601 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0); |
602 | if (IS_ERR(cryptd_tfm)) | ||
603 | return PTR_ERR(cryptd_tfm); | ||
604 | ablk_init_common(tfm, cryptd_tfm); | ||
605 | return 0; | ||
466 | } | 606 | } |
467 | 607 | ||
468 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 608 | static struct crypto_alg ablk_cbc_alg = { |
469 | struct scatterlist *src, unsigned int nbytes) | 609 | .cra_name = "cbc(aes)", |
470 | { | 610 | .cra_driver_name = "cbc-aes-aesni", |
471 | struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 611 | .cra_priority = 400, |
472 | be128 buf[8]; | 612 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, |
473 | struct lrw_crypt_req req = { | 613 | .cra_blocksize = AES_BLOCK_SIZE, |
474 | .tbuf = buf, | 614 | .cra_ctxsize = sizeof(struct async_aes_ctx), |
475 | .tbuflen = sizeof(buf), | 615 | .cra_alignmask = 0, |
476 | 616 | .cra_type = &crypto_ablkcipher_type, | |
477 | .table_ctx = &ctx->lrw_table, | 617 | .cra_module = THIS_MODULE, |
478 | .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), | 618 | .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), |
479 | .crypt_fn = lrw_xts_decrypt_callback, | 619 | .cra_init = ablk_cbc_init, |
480 | }; | 620 | .cra_exit = ablk_exit, |
481 | int ret; | 621 | .cra_u = { |
482 | 622 | .ablkcipher = { | |
483 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 623 | .min_keysize = AES_MIN_KEY_SIZE, |
624 | .max_keysize = AES_MAX_KEY_SIZE, | ||
625 | .ivsize = AES_BLOCK_SIZE, | ||
626 | .setkey = ablk_set_key, | ||
627 | .encrypt = ablk_encrypt, | ||
628 | .decrypt = ablk_decrypt, | ||
629 | }, | ||
630 | }, | ||
631 | }; | ||
484 | 632 | ||
485 | kernel_fpu_begin(); | 633 | #ifdef CONFIG_X86_64 |
486 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | 634 | static int ablk_ctr_init(struct crypto_tfm *tfm) |
487 | kernel_fpu_end(); | 635 | { |
636 | struct cryptd_ablkcipher *cryptd_tfm; | ||
488 | 637 | ||
489 | return ret; | 638 | cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-aes-aesni", 0, 0); |
639 | if (IS_ERR(cryptd_tfm)) | ||
640 | return PTR_ERR(cryptd_tfm); | ||
641 | ablk_init_common(tfm, cryptd_tfm); | ||
642 | return 0; | ||
490 | } | 643 | } |
491 | 644 | ||
492 | static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, | 645 | static struct crypto_alg ablk_ctr_alg = { |
493 | unsigned int keylen) | 646 | .cra_name = "ctr(aes)", |
494 | { | 647 | .cra_driver_name = "ctr-aes-aesni", |
495 | struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); | 648 | .cra_priority = 400, |
496 | u32 *flags = &tfm->crt_flags; | 649 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, |
497 | int err; | 650 | .cra_blocksize = 1, |
498 | 651 | .cra_ctxsize = sizeof(struct async_aes_ctx), | |
499 | /* key consists of keys of equal size concatenated, therefore | 652 | .cra_alignmask = 0, |
500 | * the length must be even | 653 | .cra_type = &crypto_ablkcipher_type, |
501 | */ | 654 | .cra_module = THIS_MODULE, |
502 | if (keylen % 2) { | 655 | .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), |
503 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | 656 | .cra_init = ablk_ctr_init, |
504 | return -EINVAL; | 657 | .cra_exit = ablk_exit, |
505 | } | 658 | .cra_u = { |
659 | .ablkcipher = { | ||
660 | .min_keysize = AES_MIN_KEY_SIZE, | ||
661 | .max_keysize = AES_MAX_KEY_SIZE, | ||
662 | .ivsize = AES_BLOCK_SIZE, | ||
663 | .setkey = ablk_set_key, | ||
664 | .encrypt = ablk_encrypt, | ||
665 | .decrypt = ablk_encrypt, | ||
666 | .geniv = "chainiv", | ||
667 | }, | ||
668 | }, | ||
669 | }; | ||
506 | 670 | ||
507 | /* first half of xts-key is for crypt */ | 671 | #ifdef HAS_CTR |
508 | err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); | 672 | static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) |
509 | if (err) | 673 | { |
510 | return err; | 674 | struct cryptd_ablkcipher *cryptd_tfm; |
511 | 675 | ||
512 | /* second half of xts-key is for tweak */ | 676 | cryptd_tfm = cryptd_alloc_ablkcipher( |
513 | return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, | 677 | "rfc3686(__driver-ctr-aes-aesni)", 0, 0); |
514 | keylen / 2); | 678 | if (IS_ERR(cryptd_tfm)) |
679 | return PTR_ERR(cryptd_tfm); | ||
680 | ablk_init_common(tfm, cryptd_tfm); | ||
681 | return 0; | ||
515 | } | 682 | } |
516 | 683 | ||
684 | static struct crypto_alg ablk_rfc3686_ctr_alg = { | ||
685 | .cra_name = "rfc3686(ctr(aes))", | ||
686 | .cra_driver_name = "rfc3686-ctr-aes-aesni", | ||
687 | .cra_priority = 400, | ||
688 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
689 | .cra_blocksize = 1, | ||
690 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
691 | .cra_alignmask = 0, | ||
692 | .cra_type = &crypto_ablkcipher_type, | ||
693 | .cra_module = THIS_MODULE, | ||
694 | .cra_list = LIST_HEAD_INIT(ablk_rfc3686_ctr_alg.cra_list), | ||
695 | .cra_init = ablk_rfc3686_ctr_init, | ||
696 | .cra_exit = ablk_exit, | ||
697 | .cra_u = { | ||
698 | .ablkcipher = { | ||
699 | .min_keysize = AES_MIN_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, | ||
700 | .max_keysize = AES_MAX_KEY_SIZE+CTR_RFC3686_NONCE_SIZE, | ||
701 | .ivsize = CTR_RFC3686_IV_SIZE, | ||
702 | .setkey = ablk_set_key, | ||
703 | .encrypt = ablk_encrypt, | ||
704 | .decrypt = ablk_decrypt, | ||
705 | .geniv = "seqiv", | ||
706 | }, | ||
707 | }, | ||
708 | }; | ||
709 | #endif | ||
710 | #endif | ||
517 | 711 | ||
518 | static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) | 712 | #ifdef HAS_LRW |
713 | static int ablk_lrw_init(struct crypto_tfm *tfm) | ||
519 | { | 714 | { |
520 | aesni_enc(ctx, out, in); | 715 | struct cryptd_ablkcipher *cryptd_tfm; |
521 | } | ||
522 | 716 | ||
523 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 717 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", |
524 | struct scatterlist *src, unsigned int nbytes) | 718 | 0, 0); |
525 | { | 719 | if (IS_ERR(cryptd_tfm)) |
526 | struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 720 | return PTR_ERR(cryptd_tfm); |
527 | be128 buf[8]; | 721 | ablk_init_common(tfm, cryptd_tfm); |
528 | struct xts_crypt_req req = { | 722 | return 0; |
529 | .tbuf = buf, | 723 | } |
530 | .tbuflen = sizeof(buf), | ||
531 | |||
532 | .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), | ||
533 | .tweak_fn = aesni_xts_tweak, | ||
534 | .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), | ||
535 | .crypt_fn = lrw_xts_encrypt_callback, | ||
536 | }; | ||
537 | int ret; | ||
538 | 724 | ||
539 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 725 | static struct crypto_alg ablk_lrw_alg = { |
726 | .cra_name = "lrw(aes)", | ||
727 | .cra_driver_name = "lrw-aes-aesni", | ||
728 | .cra_priority = 400, | ||
729 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
730 | .cra_blocksize = AES_BLOCK_SIZE, | ||
731 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
732 | .cra_alignmask = 0, | ||
733 | .cra_type = &crypto_ablkcipher_type, | ||
734 | .cra_module = THIS_MODULE, | ||
735 | .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), | ||
736 | .cra_init = ablk_lrw_init, | ||
737 | .cra_exit = ablk_exit, | ||
738 | .cra_u = { | ||
739 | .ablkcipher = { | ||
740 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, | ||
741 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, | ||
742 | .ivsize = AES_BLOCK_SIZE, | ||
743 | .setkey = ablk_set_key, | ||
744 | .encrypt = ablk_encrypt, | ||
745 | .decrypt = ablk_decrypt, | ||
746 | }, | ||
747 | }, | ||
748 | }; | ||
749 | #endif | ||
540 | 750 | ||
541 | kernel_fpu_begin(); | 751 | #ifdef HAS_PCBC |
542 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 752 | static int ablk_pcbc_init(struct crypto_tfm *tfm) |
543 | kernel_fpu_end(); | 753 | { |
754 | struct cryptd_ablkcipher *cryptd_tfm; | ||
544 | 755 | ||
545 | return ret; | 756 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", |
757 | 0, 0); | ||
758 | if (IS_ERR(cryptd_tfm)) | ||
759 | return PTR_ERR(cryptd_tfm); | ||
760 | ablk_init_common(tfm, cryptd_tfm); | ||
761 | return 0; | ||
546 | } | 762 | } |
547 | 763 | ||
548 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | 764 | static struct crypto_alg ablk_pcbc_alg = { |
549 | struct scatterlist *src, unsigned int nbytes) | 765 | .cra_name = "pcbc(aes)", |
550 | { | 766 | .cra_driver_name = "pcbc-aes-aesni", |
551 | struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | 767 | .cra_priority = 400, |
552 | be128 buf[8]; | 768 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, |
553 | struct xts_crypt_req req = { | 769 | .cra_blocksize = AES_BLOCK_SIZE, |
554 | .tbuf = buf, | 770 | .cra_ctxsize = sizeof(struct async_aes_ctx), |
555 | .tbuflen = sizeof(buf), | 771 | .cra_alignmask = 0, |
556 | 772 | .cra_type = &crypto_ablkcipher_type, | |
557 | .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), | 773 | .cra_module = THIS_MODULE, |
558 | .tweak_fn = aesni_xts_tweak, | 774 | .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), |
559 | .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), | 775 | .cra_init = ablk_pcbc_init, |
560 | .crypt_fn = lrw_xts_decrypt_callback, | 776 | .cra_exit = ablk_exit, |
561 | }; | 777 | .cra_u = { |
562 | int ret; | 778 | .ablkcipher = { |
563 | 779 | .min_keysize = AES_MIN_KEY_SIZE, | |
564 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | 780 | .max_keysize = AES_MAX_KEY_SIZE, |
781 | .ivsize = AES_BLOCK_SIZE, | ||
782 | .setkey = ablk_set_key, | ||
783 | .encrypt = ablk_encrypt, | ||
784 | .decrypt = ablk_decrypt, | ||
785 | }, | ||
786 | }, | ||
787 | }; | ||
788 | #endif | ||
565 | 789 | ||
566 | kernel_fpu_begin(); | 790 | #ifdef HAS_XTS |
567 | ret = xts_crypt(desc, dst, src, nbytes, &req); | 791 | static int ablk_xts_init(struct crypto_tfm *tfm) |
568 | kernel_fpu_end(); | 792 | { |
793 | struct cryptd_ablkcipher *cryptd_tfm; | ||
569 | 794 | ||
570 | return ret; | 795 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", |
796 | 0, 0); | ||
797 | if (IS_ERR(cryptd_tfm)) | ||
798 | return PTR_ERR(cryptd_tfm); | ||
799 | ablk_init_common(tfm, cryptd_tfm); | ||
800 | return 0; | ||
571 | } | 801 | } |
572 | 802 | ||
803 | static struct crypto_alg ablk_xts_alg = { | ||
804 | .cra_name = "xts(aes)", | ||
805 | .cra_driver_name = "xts-aes-aesni", | ||
806 | .cra_priority = 400, | ||
807 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
808 | .cra_blocksize = AES_BLOCK_SIZE, | ||
809 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
810 | .cra_alignmask = 0, | ||
811 | .cra_type = &crypto_ablkcipher_type, | ||
812 | .cra_module = THIS_MODULE, | ||
813 | .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), | ||
814 | .cra_init = ablk_xts_init, | ||
815 | .cra_exit = ablk_exit, | ||
816 | .cra_u = { | ||
817 | .ablkcipher = { | ||
818 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
819 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
820 | .ivsize = AES_BLOCK_SIZE, | ||
821 | .setkey = ablk_set_key, | ||
822 | .encrypt = ablk_encrypt, | ||
823 | .decrypt = ablk_decrypt, | ||
824 | }, | ||
825 | }, | ||
826 | }; | ||
827 | #endif | ||
828 | |||
573 | #ifdef CONFIG_X86_64 | 829 | #ifdef CONFIG_X86_64 |
574 | static int rfc4106_init(struct crypto_tfm *tfm) | 830 | static int rfc4106_init(struct crypto_tfm *tfm) |
575 | { | 831 | { |
@@ -680,7 +936,7 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
680 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); | 936 | struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); |
681 | struct aesni_rfc4106_gcm_ctx *child_ctx = | 937 | struct aesni_rfc4106_gcm_ctx *child_ctx = |
682 | aesni_rfc4106_gcm_ctx_get(cryptd_child); | 938 | aesni_rfc4106_gcm_ctx_get(cryptd_child); |
683 | u8 *new_key_align, *new_key_mem = NULL; | 939 | u8 *new_key_mem = NULL; |
684 | 940 | ||
685 | if (key_len < 4) { | 941 | if (key_len < 4) { |
686 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 942 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
@@ -704,9 +960,9 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
704 | if (!new_key_mem) | 960 | if (!new_key_mem) |
705 | return -ENOMEM; | 961 | return -ENOMEM; |
706 | 962 | ||
707 | new_key_align = PTR_ALIGN(new_key_mem, AESNI_ALIGN); | 963 | new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); |
708 | memcpy(new_key_align, key, key_len); | 964 | memcpy(new_key_mem, key, key_len); |
709 | key = new_key_align; | 965 | key = new_key_mem; |
710 | } | 966 | } |
711 | 967 | ||
712 | if (!irq_fpu_usable()) | 968 | if (!irq_fpu_usable()) |
@@ -792,6 +1048,32 @@ static int rfc4106_decrypt(struct aead_request *req) | |||
792 | } | 1048 | } |
793 | } | 1049 | } |
794 | 1050 | ||
1051 | static struct crypto_alg rfc4106_alg = { | ||
1052 | .cra_name = "rfc4106(gcm(aes))", | ||
1053 | .cra_driver_name = "rfc4106-gcm-aesni", | ||
1054 | .cra_priority = 400, | ||
1055 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, | ||
1056 | .cra_blocksize = 1, | ||
1057 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, | ||
1058 | .cra_alignmask = 0, | ||
1059 | .cra_type = &crypto_nivaead_type, | ||
1060 | .cra_module = THIS_MODULE, | ||
1061 | .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), | ||
1062 | .cra_init = rfc4106_init, | ||
1063 | .cra_exit = rfc4106_exit, | ||
1064 | .cra_u = { | ||
1065 | .aead = { | ||
1066 | .setkey = rfc4106_set_key, | ||
1067 | .setauthsize = rfc4106_set_authsize, | ||
1068 | .encrypt = rfc4106_encrypt, | ||
1069 | .decrypt = rfc4106_decrypt, | ||
1070 | .geniv = "seqiv", | ||
1071 | .ivsize = 8, | ||
1072 | .maxauthsize = 16, | ||
1073 | }, | ||
1074 | }, | ||
1075 | }; | ||
1076 | |||
795 | static int __driver_rfc4106_encrypt(struct aead_request *req) | 1077 | static int __driver_rfc4106_encrypt(struct aead_request *req) |
796 | { | 1078 | { |
797 | u8 one_entry_in_sg = 0; | 1079 | u8 one_entry_in_sg = 0; |
@@ -824,12 +1106,12 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
824 | one_entry_in_sg = 1; | 1106 | one_entry_in_sg = 1; |
825 | scatterwalk_start(&src_sg_walk, req->src); | 1107 | scatterwalk_start(&src_sg_walk, req->src); |
826 | scatterwalk_start(&assoc_sg_walk, req->assoc); | 1108 | scatterwalk_start(&assoc_sg_walk, req->assoc); |
827 | src = scatterwalk_map(&src_sg_walk); | 1109 | src = scatterwalk_map(&src_sg_walk, 0); |
828 | assoc = scatterwalk_map(&assoc_sg_walk); | 1110 | assoc = scatterwalk_map(&assoc_sg_walk, 0); |
829 | dst = src; | 1111 | dst = src; |
830 | if (unlikely(req->src != req->dst)) { | 1112 | if (unlikely(req->src != req->dst)) { |
831 | scatterwalk_start(&dst_sg_walk, req->dst); | 1113 | scatterwalk_start(&dst_sg_walk, req->dst); |
832 | dst = scatterwalk_map(&dst_sg_walk); | 1114 | dst = scatterwalk_map(&dst_sg_walk, 0); |
833 | } | 1115 | } |
834 | 1116 | ||
835 | } else { | 1117 | } else { |
@@ -853,11 +1135,11 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
853 | * back to the packet. */ | 1135 | * back to the packet. */ |
854 | if (one_entry_in_sg) { | 1136 | if (one_entry_in_sg) { |
855 | if (unlikely(req->src != req->dst)) { | 1137 | if (unlikely(req->src != req->dst)) { |
856 | scatterwalk_unmap(dst); | 1138 | scatterwalk_unmap(dst, 0); |
857 | scatterwalk_done(&dst_sg_walk, 0, 0); | 1139 | scatterwalk_done(&dst_sg_walk, 0, 0); |
858 | } | 1140 | } |
859 | scatterwalk_unmap(src); | 1141 | scatterwalk_unmap(src, 0); |
860 | scatterwalk_unmap(assoc); | 1142 | scatterwalk_unmap(assoc, 0); |
861 | scatterwalk_done(&src_sg_walk, 0, 0); | 1143 | scatterwalk_done(&src_sg_walk, 0, 0); |
862 | scatterwalk_done(&assoc_sg_walk, 0, 0); | 1144 | scatterwalk_done(&assoc_sg_walk, 0, 0); |
863 | } else { | 1145 | } else { |
@@ -906,12 +1188,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
906 | one_entry_in_sg = 1; | 1188 | one_entry_in_sg = 1; |
907 | scatterwalk_start(&src_sg_walk, req->src); | 1189 | scatterwalk_start(&src_sg_walk, req->src); |
908 | scatterwalk_start(&assoc_sg_walk, req->assoc); | 1190 | scatterwalk_start(&assoc_sg_walk, req->assoc); |
909 | src = scatterwalk_map(&src_sg_walk); | 1191 | src = scatterwalk_map(&src_sg_walk, 0); |
910 | assoc = scatterwalk_map(&assoc_sg_walk); | 1192 | assoc = scatterwalk_map(&assoc_sg_walk, 0); |
911 | dst = src; | 1193 | dst = src; |
912 | if (unlikely(req->src != req->dst)) { | 1194 | if (unlikely(req->src != req->dst)) { |
913 | scatterwalk_start(&dst_sg_walk, req->dst); | 1195 | scatterwalk_start(&dst_sg_walk, req->dst); |
914 | dst = scatterwalk_map(&dst_sg_walk); | 1196 | dst = scatterwalk_map(&dst_sg_walk, 0); |
915 | } | 1197 | } |
916 | 1198 | ||
917 | } else { | 1199 | } else { |
@@ -936,11 +1218,11 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
936 | 1218 | ||
937 | if (one_entry_in_sg) { | 1219 | if (one_entry_in_sg) { |
938 | if (unlikely(req->src != req->dst)) { | 1220 | if (unlikely(req->src != req->dst)) { |
939 | scatterwalk_unmap(dst); | 1221 | scatterwalk_unmap(dst, 0); |
940 | scatterwalk_done(&dst_sg_walk, 0, 0); | 1222 | scatterwalk_done(&dst_sg_walk, 0, 0); |
941 | } | 1223 | } |
942 | scatterwalk_unmap(src); | 1224 | scatterwalk_unmap(src, 0); |
943 | scatterwalk_unmap(assoc); | 1225 | scatterwalk_unmap(assoc, 0); |
944 | scatterwalk_done(&src_sg_walk, 0, 0); | 1226 | scatterwalk_done(&src_sg_walk, 0, 0); |
945 | scatterwalk_done(&assoc_sg_walk, 0, 0); | 1227 | scatterwalk_done(&assoc_sg_walk, 0, 0); |
946 | } else { | 1228 | } else { |
@@ -949,378 +1231,145 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
949 | } | 1231 | } |
950 | return retval; | 1232 | return retval; |
951 | } | 1233 | } |
952 | #endif | ||
953 | 1234 | ||
954 | static struct crypto_alg aesni_algs[] = { { | 1235 | static struct crypto_alg __rfc4106_alg = { |
955 | .cra_name = "aes", | ||
956 | .cra_driver_name = "aes-aesni", | ||
957 | .cra_priority = 300, | ||
958 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
959 | .cra_blocksize = AES_BLOCK_SIZE, | ||
960 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | ||
961 | AESNI_ALIGN - 1, | ||
962 | .cra_alignmask = 0, | ||
963 | .cra_module = THIS_MODULE, | ||
964 | .cra_u = { | ||
965 | .cipher = { | ||
966 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
967 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
968 | .cia_setkey = aes_set_key, | ||
969 | .cia_encrypt = aes_encrypt, | ||
970 | .cia_decrypt = aes_decrypt | ||
971 | } | ||
972 | } | ||
973 | }, { | ||
974 | .cra_name = "__aes-aesni", | ||
975 | .cra_driver_name = "__driver-aes-aesni", | ||
976 | .cra_priority = 0, | ||
977 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
978 | .cra_blocksize = AES_BLOCK_SIZE, | ||
979 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | ||
980 | AESNI_ALIGN - 1, | ||
981 | .cra_alignmask = 0, | ||
982 | .cra_module = THIS_MODULE, | ||
983 | .cra_u = { | ||
984 | .cipher = { | ||
985 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
986 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
987 | .cia_setkey = aes_set_key, | ||
988 | .cia_encrypt = __aes_encrypt, | ||
989 | .cia_decrypt = __aes_decrypt | ||
990 | } | ||
991 | } | ||
992 | }, { | ||
993 | .cra_name = "__ecb-aes-aesni", | ||
994 | .cra_driver_name = "__driver-ecb-aes-aesni", | ||
995 | .cra_priority = 0, | ||
996 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
997 | .cra_blocksize = AES_BLOCK_SIZE, | ||
998 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | ||
999 | AESNI_ALIGN - 1, | ||
1000 | .cra_alignmask = 0, | ||
1001 | .cra_type = &crypto_blkcipher_type, | ||
1002 | .cra_module = THIS_MODULE, | ||
1003 | .cra_u = { | ||
1004 | .blkcipher = { | ||
1005 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1006 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1007 | .setkey = aes_set_key, | ||
1008 | .encrypt = ecb_encrypt, | ||
1009 | .decrypt = ecb_decrypt, | ||
1010 | }, | ||
1011 | }, | ||
1012 | }, { | ||
1013 | .cra_name = "__cbc-aes-aesni", | ||
1014 | .cra_driver_name = "__driver-cbc-aes-aesni", | ||
1015 | .cra_priority = 0, | ||
1016 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1017 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1018 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | ||
1019 | AESNI_ALIGN - 1, | ||
1020 | .cra_alignmask = 0, | ||
1021 | .cra_type = &crypto_blkcipher_type, | ||
1022 | .cra_module = THIS_MODULE, | ||
1023 | .cra_u = { | ||
1024 | .blkcipher = { | ||
1025 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1026 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1027 | .setkey = aes_set_key, | ||
1028 | .encrypt = cbc_encrypt, | ||
1029 | .decrypt = cbc_decrypt, | ||
1030 | }, | ||
1031 | }, | ||
1032 | }, { | ||
1033 | .cra_name = "ecb(aes)", | ||
1034 | .cra_driver_name = "ecb-aes-aesni", | ||
1035 | .cra_priority = 400, | ||
1036 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1037 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1038 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1039 | .cra_alignmask = 0, | ||
1040 | .cra_type = &crypto_ablkcipher_type, | ||
1041 | .cra_module = THIS_MODULE, | ||
1042 | .cra_init = ablk_ecb_init, | ||
1043 | .cra_exit = ablk_exit, | ||
1044 | .cra_u = { | ||
1045 | .ablkcipher = { | ||
1046 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1047 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1048 | .setkey = ablk_set_key, | ||
1049 | .encrypt = ablk_encrypt, | ||
1050 | .decrypt = ablk_decrypt, | ||
1051 | }, | ||
1052 | }, | ||
1053 | }, { | ||
1054 | .cra_name = "cbc(aes)", | ||
1055 | .cra_driver_name = "cbc-aes-aesni", | ||
1056 | .cra_priority = 400, | ||
1057 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1058 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1059 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1060 | .cra_alignmask = 0, | ||
1061 | .cra_type = &crypto_ablkcipher_type, | ||
1062 | .cra_module = THIS_MODULE, | ||
1063 | .cra_init = ablk_cbc_init, | ||
1064 | .cra_exit = ablk_exit, | ||
1065 | .cra_u = { | ||
1066 | .ablkcipher = { | ||
1067 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1068 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1069 | .ivsize = AES_BLOCK_SIZE, | ||
1070 | .setkey = ablk_set_key, | ||
1071 | .encrypt = ablk_encrypt, | ||
1072 | .decrypt = ablk_decrypt, | ||
1073 | }, | ||
1074 | }, | ||
1075 | #ifdef CONFIG_X86_64 | ||
1076 | }, { | ||
1077 | .cra_name = "__ctr-aes-aesni", | ||
1078 | .cra_driver_name = "__driver-ctr-aes-aesni", | ||
1079 | .cra_priority = 0, | ||
1080 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1081 | .cra_blocksize = 1, | ||
1082 | .cra_ctxsize = sizeof(struct crypto_aes_ctx) + | ||
1083 | AESNI_ALIGN - 1, | ||
1084 | .cra_alignmask = 0, | ||
1085 | .cra_type = &crypto_blkcipher_type, | ||
1086 | .cra_module = THIS_MODULE, | ||
1087 | .cra_u = { | ||
1088 | .blkcipher = { | ||
1089 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1090 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1091 | .ivsize = AES_BLOCK_SIZE, | ||
1092 | .setkey = aes_set_key, | ||
1093 | .encrypt = ctr_crypt, | ||
1094 | .decrypt = ctr_crypt, | ||
1095 | }, | ||
1096 | }, | ||
1097 | }, { | ||
1098 | .cra_name = "ctr(aes)", | ||
1099 | .cra_driver_name = "ctr-aes-aesni", | ||
1100 | .cra_priority = 400, | ||
1101 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1102 | .cra_blocksize = 1, | ||
1103 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1104 | .cra_alignmask = 0, | ||
1105 | .cra_type = &crypto_ablkcipher_type, | ||
1106 | .cra_module = THIS_MODULE, | ||
1107 | .cra_init = ablk_ctr_init, | ||
1108 | .cra_exit = ablk_exit, | ||
1109 | .cra_u = { | ||
1110 | .ablkcipher = { | ||
1111 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1112 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1113 | .ivsize = AES_BLOCK_SIZE, | ||
1114 | .setkey = ablk_set_key, | ||
1115 | .encrypt = ablk_encrypt, | ||
1116 | .decrypt = ablk_encrypt, | ||
1117 | .geniv = "chainiv", | ||
1118 | }, | ||
1119 | }, | ||
1120 | }, { | ||
1121 | .cra_name = "__gcm-aes-aesni", | 1236 | .cra_name = "__gcm-aes-aesni", |
1122 | .cra_driver_name = "__driver-gcm-aes-aesni", | 1237 | .cra_driver_name = "__driver-gcm-aes-aesni", |
1123 | .cra_priority = 0, | 1238 | .cra_priority = 0, |
1124 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, | 1239 | .cra_flags = CRYPTO_ALG_TYPE_AEAD, |
1125 | .cra_blocksize = 1, | 1240 | .cra_blocksize = 1, |
1126 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + | 1241 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, |
1127 | AESNI_ALIGN, | ||
1128 | .cra_alignmask = 0, | 1242 | .cra_alignmask = 0, |
1129 | .cra_type = &crypto_aead_type, | 1243 | .cra_type = &crypto_aead_type, |
1130 | .cra_module = THIS_MODULE, | 1244 | .cra_module = THIS_MODULE, |
1245 | .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), | ||
1131 | .cra_u = { | 1246 | .cra_u = { |
1132 | .aead = { | 1247 | .aead = { |
1133 | .encrypt = __driver_rfc4106_encrypt, | 1248 | .encrypt = __driver_rfc4106_encrypt, |
1134 | .decrypt = __driver_rfc4106_decrypt, | 1249 | .decrypt = __driver_rfc4106_decrypt, |
1135 | }, | 1250 | }, |
1136 | }, | 1251 | }, |
1137 | }, { | ||
1138 | .cra_name = "rfc4106(gcm(aes))", | ||
1139 | .cra_driver_name = "rfc4106-gcm-aesni", | ||
1140 | .cra_priority = 400, | ||
1141 | .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, | ||
1142 | .cra_blocksize = 1, | ||
1143 | .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + | ||
1144 | AESNI_ALIGN, | ||
1145 | .cra_alignmask = 0, | ||
1146 | .cra_type = &crypto_nivaead_type, | ||
1147 | .cra_module = THIS_MODULE, | ||
1148 | .cra_init = rfc4106_init, | ||
1149 | .cra_exit = rfc4106_exit, | ||
1150 | .cra_u = { | ||
1151 | .aead = { | ||
1152 | .setkey = rfc4106_set_key, | ||
1153 | .setauthsize = rfc4106_set_authsize, | ||
1154 | .encrypt = rfc4106_encrypt, | ||
1155 | .decrypt = rfc4106_decrypt, | ||
1156 | .geniv = "seqiv", | ||
1157 | .ivsize = 8, | ||
1158 | .maxauthsize = 16, | ||
1159 | }, | ||
1160 | }, | ||
1161 | #ifdef HAS_CTR | ||
1162 | }, { | ||
1163 | .cra_name = "rfc3686(ctr(aes))", | ||
1164 | .cra_driver_name = "rfc3686-ctr-aes-aesni", | ||
1165 | .cra_priority = 400, | ||
1166 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1167 | .cra_blocksize = 1, | ||
1168 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1169 | .cra_alignmask = 0, | ||
1170 | .cra_type = &crypto_ablkcipher_type, | ||
1171 | .cra_module = THIS_MODULE, | ||
1172 | .cra_init = ablk_rfc3686_ctr_init, | ||
1173 | .cra_exit = ablk_exit, | ||
1174 | .cra_u = { | ||
1175 | .ablkcipher = { | ||
1176 | .min_keysize = AES_MIN_KEY_SIZE + | ||
1177 | CTR_RFC3686_NONCE_SIZE, | ||
1178 | .max_keysize = AES_MAX_KEY_SIZE + | ||
1179 | CTR_RFC3686_NONCE_SIZE, | ||
1180 | .ivsize = CTR_RFC3686_IV_SIZE, | ||
1181 | .setkey = ablk_set_key, | ||
1182 | .encrypt = ablk_encrypt, | ||
1183 | .decrypt = ablk_decrypt, | ||
1184 | .geniv = "seqiv", | ||
1185 | }, | ||
1186 | }, | ||
1187 | #endif | ||
1188 | #endif | ||
1189 | #ifdef HAS_PCBC | ||
1190 | }, { | ||
1191 | .cra_name = "pcbc(aes)", | ||
1192 | .cra_driver_name = "pcbc-aes-aesni", | ||
1193 | .cra_priority = 400, | ||
1194 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1195 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1196 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1197 | .cra_alignmask = 0, | ||
1198 | .cra_type = &crypto_ablkcipher_type, | ||
1199 | .cra_module = THIS_MODULE, | ||
1200 | .cra_init = ablk_pcbc_init, | ||
1201 | .cra_exit = ablk_exit, | ||
1202 | .cra_u = { | ||
1203 | .ablkcipher = { | ||
1204 | .min_keysize = AES_MIN_KEY_SIZE, | ||
1205 | .max_keysize = AES_MAX_KEY_SIZE, | ||
1206 | .ivsize = AES_BLOCK_SIZE, | ||
1207 | .setkey = ablk_set_key, | ||
1208 | .encrypt = ablk_encrypt, | ||
1209 | .decrypt = ablk_decrypt, | ||
1210 | }, | ||
1211 | }, | ||
1212 | #endif | ||
1213 | }, { | ||
1214 | .cra_name = "__lrw-aes-aesni", | ||
1215 | .cra_driver_name = "__driver-lrw-aes-aesni", | ||
1216 | .cra_priority = 0, | ||
1217 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1218 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1219 | .cra_ctxsize = sizeof(struct aesni_lrw_ctx), | ||
1220 | .cra_alignmask = 0, | ||
1221 | .cra_type = &crypto_blkcipher_type, | ||
1222 | .cra_module = THIS_MODULE, | ||
1223 | .cra_exit = lrw_aesni_exit_tfm, | ||
1224 | .cra_u = { | ||
1225 | .blkcipher = { | ||
1226 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, | ||
1227 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, | ||
1228 | .ivsize = AES_BLOCK_SIZE, | ||
1229 | .setkey = lrw_aesni_setkey, | ||
1230 | .encrypt = lrw_encrypt, | ||
1231 | .decrypt = lrw_decrypt, | ||
1232 | }, | ||
1233 | }, | ||
1234 | }, { | ||
1235 | .cra_name = "__xts-aes-aesni", | ||
1236 | .cra_driver_name = "__driver-xts-aes-aesni", | ||
1237 | .cra_priority = 0, | ||
1238 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1239 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1240 | .cra_ctxsize = sizeof(struct aesni_xts_ctx), | ||
1241 | .cra_alignmask = 0, | ||
1242 | .cra_type = &crypto_blkcipher_type, | ||
1243 | .cra_module = THIS_MODULE, | ||
1244 | .cra_u = { | ||
1245 | .blkcipher = { | ||
1246 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
1247 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
1248 | .ivsize = AES_BLOCK_SIZE, | ||
1249 | .setkey = xts_aesni_setkey, | ||
1250 | .encrypt = xts_encrypt, | ||
1251 | .decrypt = xts_decrypt, | ||
1252 | }, | ||
1253 | }, | ||
1254 | }, { | ||
1255 | .cra_name = "lrw(aes)", | ||
1256 | .cra_driver_name = "lrw-aes-aesni", | ||
1257 | .cra_priority = 400, | ||
1258 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1259 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1260 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1261 | .cra_alignmask = 0, | ||
1262 | .cra_type = &crypto_ablkcipher_type, | ||
1263 | .cra_module = THIS_MODULE, | ||
1264 | .cra_init = ablk_init, | ||
1265 | .cra_exit = ablk_exit, | ||
1266 | .cra_u = { | ||
1267 | .ablkcipher = { | ||
1268 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, | ||
1269 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, | ||
1270 | .ivsize = AES_BLOCK_SIZE, | ||
1271 | .setkey = ablk_set_key, | ||
1272 | .encrypt = ablk_encrypt, | ||
1273 | .decrypt = ablk_decrypt, | ||
1274 | }, | ||
1275 | }, | ||
1276 | }, { | ||
1277 | .cra_name = "xts(aes)", | ||
1278 | .cra_driver_name = "xts-aes-aesni", | ||
1279 | .cra_priority = 400, | ||
1280 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
1281 | .cra_blocksize = AES_BLOCK_SIZE, | ||
1282 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
1283 | .cra_alignmask = 0, | ||
1284 | .cra_type = &crypto_ablkcipher_type, | ||
1285 | .cra_module = THIS_MODULE, | ||
1286 | .cra_init = ablk_init, | ||
1287 | .cra_exit = ablk_exit, | ||
1288 | .cra_u = { | ||
1289 | .ablkcipher = { | ||
1290 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
1291 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
1292 | .ivsize = AES_BLOCK_SIZE, | ||
1293 | .setkey = ablk_set_key, | ||
1294 | .encrypt = ablk_encrypt, | ||
1295 | .decrypt = ablk_decrypt, | ||
1296 | }, | ||
1297 | }, | ||
1298 | } }; | ||
1299 | |||
1300 | |||
1301 | static const struct x86_cpu_id aesni_cpu_id[] = { | ||
1302 | X86_FEATURE_MATCH(X86_FEATURE_AES), | ||
1303 | {} | ||
1304 | }; | 1252 | }; |
1305 | MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); | 1253 | #endif |
1306 | 1254 | ||
1307 | static int __init aesni_init(void) | 1255 | static int __init aesni_init(void) |
1308 | { | 1256 | { |
1309 | int err; | 1257 | int err; |
1310 | 1258 | ||
1311 | if (!x86_match_cpu(aesni_cpu_id)) | 1259 | if (!cpu_has_aes) { |
1260 | printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); | ||
1312 | return -ENODEV; | 1261 | return -ENODEV; |
1262 | } | ||
1313 | 1263 | ||
1314 | err = crypto_fpu_init(); | 1264 | if ((err = crypto_fpu_init())) |
1315 | if (err) | 1265 | goto fpu_err; |
1316 | return err; | 1266 | if ((err = crypto_register_alg(&aesni_alg))) |
1267 | goto aes_err; | ||
1268 | if ((err = crypto_register_alg(&__aesni_alg))) | ||
1269 | goto __aes_err; | ||
1270 | if ((err = crypto_register_alg(&blk_ecb_alg))) | ||
1271 | goto blk_ecb_err; | ||
1272 | if ((err = crypto_register_alg(&blk_cbc_alg))) | ||
1273 | goto blk_cbc_err; | ||
1274 | if ((err = crypto_register_alg(&ablk_ecb_alg))) | ||
1275 | goto ablk_ecb_err; | ||
1276 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | ||
1277 | goto ablk_cbc_err; | ||
1278 | #ifdef CONFIG_X86_64 | ||
1279 | if ((err = crypto_register_alg(&blk_ctr_alg))) | ||
1280 | goto blk_ctr_err; | ||
1281 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | ||
1282 | goto ablk_ctr_err; | ||
1283 | if ((err = crypto_register_alg(&__rfc4106_alg))) | ||
1284 | goto __aead_gcm_err; | ||
1285 | if ((err = crypto_register_alg(&rfc4106_alg))) | ||
1286 | goto aead_gcm_err; | ||
1287 | #ifdef HAS_CTR | ||
1288 | if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) | ||
1289 | goto ablk_rfc3686_ctr_err; | ||
1290 | #endif | ||
1291 | #endif | ||
1292 | #ifdef HAS_LRW | ||
1293 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | ||
1294 | goto ablk_lrw_err; | ||
1295 | #endif | ||
1296 | #ifdef HAS_PCBC | ||
1297 | if ((err = crypto_register_alg(&ablk_pcbc_alg))) | ||
1298 | goto ablk_pcbc_err; | ||
1299 | #endif | ||
1300 | #ifdef HAS_XTS | ||
1301 | if ((err = crypto_register_alg(&ablk_xts_alg))) | ||
1302 | goto ablk_xts_err; | ||
1303 | #endif | ||
1304 | return err; | ||
1317 | 1305 | ||
1318 | return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); | 1306 | #ifdef HAS_XTS |
1307 | ablk_xts_err: | ||
1308 | #endif | ||
1309 | #ifdef HAS_PCBC | ||
1310 | crypto_unregister_alg(&ablk_pcbc_alg); | ||
1311 | ablk_pcbc_err: | ||
1312 | #endif | ||
1313 | #ifdef HAS_LRW | ||
1314 | crypto_unregister_alg(&ablk_lrw_alg); | ||
1315 | ablk_lrw_err: | ||
1316 | #endif | ||
1317 | #ifdef CONFIG_X86_64 | ||
1318 | #ifdef HAS_CTR | ||
1319 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | ||
1320 | ablk_rfc3686_ctr_err: | ||
1321 | #endif | ||
1322 | crypto_unregister_alg(&rfc4106_alg); | ||
1323 | aead_gcm_err: | ||
1324 | crypto_unregister_alg(&__rfc4106_alg); | ||
1325 | __aead_gcm_err: | ||
1326 | crypto_unregister_alg(&ablk_ctr_alg); | ||
1327 | ablk_ctr_err: | ||
1328 | crypto_unregister_alg(&blk_ctr_alg); | ||
1329 | blk_ctr_err: | ||
1330 | #endif | ||
1331 | crypto_unregister_alg(&ablk_cbc_alg); | ||
1332 | ablk_cbc_err: | ||
1333 | crypto_unregister_alg(&ablk_ecb_alg); | ||
1334 | ablk_ecb_err: | ||
1335 | crypto_unregister_alg(&blk_cbc_alg); | ||
1336 | blk_cbc_err: | ||
1337 | crypto_unregister_alg(&blk_ecb_alg); | ||
1338 | blk_ecb_err: | ||
1339 | crypto_unregister_alg(&__aesni_alg); | ||
1340 | __aes_err: | ||
1341 | crypto_unregister_alg(&aesni_alg); | ||
1342 | aes_err: | ||
1343 | fpu_err: | ||
1344 | return err; | ||
1319 | } | 1345 | } |
1320 | 1346 | ||
1321 | static void __exit aesni_exit(void) | 1347 | static void __exit aesni_exit(void) |
1322 | { | 1348 | { |
1323 | crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); | 1349 | #ifdef HAS_XTS |
1350 | crypto_unregister_alg(&ablk_xts_alg); | ||
1351 | #endif | ||
1352 | #ifdef HAS_PCBC | ||
1353 | crypto_unregister_alg(&ablk_pcbc_alg); | ||
1354 | #endif | ||
1355 | #ifdef HAS_LRW | ||
1356 | crypto_unregister_alg(&ablk_lrw_alg); | ||
1357 | #endif | ||
1358 | #ifdef CONFIG_X86_64 | ||
1359 | #ifdef HAS_CTR | ||
1360 | crypto_unregister_alg(&ablk_rfc3686_ctr_alg); | ||
1361 | #endif | ||
1362 | crypto_unregister_alg(&rfc4106_alg); | ||
1363 | crypto_unregister_alg(&__rfc4106_alg); | ||
1364 | crypto_unregister_alg(&ablk_ctr_alg); | ||
1365 | crypto_unregister_alg(&blk_ctr_alg); | ||
1366 | #endif | ||
1367 | crypto_unregister_alg(&ablk_cbc_alg); | ||
1368 | crypto_unregister_alg(&ablk_ecb_alg); | ||
1369 | crypto_unregister_alg(&blk_cbc_alg); | ||
1370 | crypto_unregister_alg(&blk_ecb_alg); | ||
1371 | crypto_unregister_alg(&__aesni_alg); | ||
1372 | crypto_unregister_alg(&aesni_alg); | ||
1324 | 1373 | ||
1325 | crypto_fpu_exit(); | 1374 | crypto_fpu_exit(); |
1326 | } | 1375 | } |
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S deleted file mode 100644 index 391d245dc08..00000000000 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,390 +0,0 @@ | |||
1 | /* | ||
2 | * Blowfish Cipher Algorithm (x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
19 | * USA | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | .file "blowfish-x86_64-asm.S" | ||
24 | .text | ||
25 | |||
26 | /* structure of crypto context */ | ||
27 | #define p 0 | ||
28 | #define s0 ((16 + 2) * 4) | ||
29 | #define s1 ((16 + 2 + (1 * 256)) * 4) | ||
30 | #define s2 ((16 + 2 + (2 * 256)) * 4) | ||
31 | #define s3 ((16 + 2 + (3 * 256)) * 4) | ||
32 | |||
33 | /* register macros */ | ||
34 | #define CTX %rdi | ||
35 | #define RIO %rsi | ||
36 | |||
37 | #define RX0 %rax | ||
38 | #define RX1 %rbx | ||
39 | #define RX2 %rcx | ||
40 | #define RX3 %rdx | ||
41 | |||
42 | #define RX0d %eax | ||
43 | #define RX1d %ebx | ||
44 | #define RX2d %ecx | ||
45 | #define RX3d %edx | ||
46 | |||
47 | #define RX0bl %al | ||
48 | #define RX1bl %bl | ||
49 | #define RX2bl %cl | ||
50 | #define RX3bl %dl | ||
51 | |||
52 | #define RX0bh %ah | ||
53 | #define RX1bh %bh | ||
54 | #define RX2bh %ch | ||
55 | #define RX3bh %dh | ||
56 | |||
57 | #define RT0 %rbp | ||
58 | #define RT1 %rsi | ||
59 | #define RT2 %r8 | ||
60 | #define RT3 %r9 | ||
61 | |||
62 | #define RT0d %ebp | ||
63 | #define RT1d %esi | ||
64 | #define RT2d %r8d | ||
65 | #define RT3d %r9d | ||
66 | |||
67 | #define RKEY %r10 | ||
68 | |||
69 | /*********************************************************************** | ||
70 | * 1-way blowfish | ||
71 | ***********************************************************************/ | ||
72 | #define F() \ | ||
73 | rorq $16, RX0; \ | ||
74 | movzbl RX0bh, RT0d; \ | ||
75 | movzbl RX0bl, RT1d; \ | ||
76 | rolq $16, RX0; \ | ||
77 | movl s0(CTX,RT0,4), RT0d; \ | ||
78 | addl s1(CTX,RT1,4), RT0d; \ | ||
79 | movzbl RX0bh, RT1d; \ | ||
80 | movzbl RX0bl, RT2d; \ | ||
81 | rolq $32, RX0; \ | ||
82 | xorl s2(CTX,RT1,4), RT0d; \ | ||
83 | addl s3(CTX,RT2,4), RT0d; \ | ||
84 | xorq RT0, RX0; | ||
85 | |||
86 | #define add_roundkey_enc(n) \ | ||
87 | xorq p+4*(n)(CTX), RX0; | ||
88 | |||
89 | #define round_enc(n) \ | ||
90 | add_roundkey_enc(n); \ | ||
91 | \ | ||
92 | F(); \ | ||
93 | F(); | ||
94 | |||
95 | #define add_roundkey_dec(n) \ | ||
96 | movq p+4*(n-1)(CTX), RT0; \ | ||
97 | rorq $32, RT0; \ | ||
98 | xorq RT0, RX0; | ||
99 | |||
100 | #define round_dec(n) \ | ||
101 | add_roundkey_dec(n); \ | ||
102 | \ | ||
103 | F(); \ | ||
104 | F(); \ | ||
105 | |||
106 | #define read_block() \ | ||
107 | movq (RIO), RX0; \ | ||
108 | rorq $32, RX0; \ | ||
109 | bswapq RX0; | ||
110 | |||
111 | #define write_block() \ | ||
112 | bswapq RX0; \ | ||
113 | movq RX0, (RIO); | ||
114 | |||
115 | #define xor_block() \ | ||
116 | bswapq RX0; \ | ||
117 | xorq RX0, (RIO); | ||
118 | |||
119 | .align 8 | ||
120 | .global __blowfish_enc_blk | ||
121 | .type __blowfish_enc_blk,@function; | ||
122 | |||
123 | __blowfish_enc_blk: | ||
124 | /* input: | ||
125 | * %rdi: ctx, CTX | ||
126 | * %rsi: dst | ||
127 | * %rdx: src | ||
128 | * %rcx: bool, if true: xor output | ||
129 | */ | ||
130 | movq %rbp, %r11; | ||
131 | |||
132 | movq %rsi, %r10; | ||
133 | movq %rdx, RIO; | ||
134 | |||
135 | read_block(); | ||
136 | |||
137 | round_enc(0); | ||
138 | round_enc(2); | ||
139 | round_enc(4); | ||
140 | round_enc(6); | ||
141 | round_enc(8); | ||
142 | round_enc(10); | ||
143 | round_enc(12); | ||
144 | round_enc(14); | ||
145 | add_roundkey_enc(16); | ||
146 | |||
147 | movq %r11, %rbp; | ||
148 | |||
149 | movq %r10, RIO; | ||
150 | test %cl, %cl; | ||
151 | jnz __enc_xor; | ||
152 | |||
153 | write_block(); | ||
154 | ret; | ||
155 | __enc_xor: | ||
156 | xor_block(); | ||
157 | ret; | ||
158 | |||
159 | .align 8 | ||
160 | .global blowfish_dec_blk | ||
161 | .type blowfish_dec_blk,@function; | ||
162 | |||
163 | blowfish_dec_blk: | ||
164 | /* input: | ||
165 | * %rdi: ctx, CTX | ||
166 | * %rsi: dst | ||
167 | * %rdx: src | ||
168 | */ | ||
169 | movq %rbp, %r11; | ||
170 | |||
171 | movq %rsi, %r10; | ||
172 | movq %rdx, RIO; | ||
173 | |||
174 | read_block(); | ||
175 | |||
176 | round_dec(17); | ||
177 | round_dec(15); | ||
178 | round_dec(13); | ||
179 | round_dec(11); | ||
180 | round_dec(9); | ||
181 | round_dec(7); | ||
182 | round_dec(5); | ||
183 | round_dec(3); | ||
184 | add_roundkey_dec(1); | ||
185 | |||
186 | movq %r10, RIO; | ||
187 | write_block(); | ||
188 | |||
189 | movq %r11, %rbp; | ||
190 | |||
191 | ret; | ||
192 | |||
193 | /********************************************************************** | ||
194 | 4-way blowfish, four blocks parallel | ||
195 | **********************************************************************/ | ||
196 | |||
197 | /* F() for 4-way. Slower when used alone/1-way, but faster when used | ||
198 | * parallel/4-way (tested on AMD Phenom II & Intel Xeon E7330). | ||
199 | */ | ||
200 | #define F4(x) \ | ||
201 | movzbl x ## bh, RT1d; \ | ||
202 | movzbl x ## bl, RT3d; \ | ||
203 | rorq $16, x; \ | ||
204 | movzbl x ## bh, RT0d; \ | ||
205 | movzbl x ## bl, RT2d; \ | ||
206 | rorq $16, x; \ | ||
207 | movl s0(CTX,RT0,4), RT0d; \ | ||
208 | addl s1(CTX,RT2,4), RT0d; \ | ||
209 | xorl s2(CTX,RT1,4), RT0d; \ | ||
210 | addl s3(CTX,RT3,4), RT0d; \ | ||
211 | xorq RT0, x; | ||
212 | |||
213 | #define add_preloaded_roundkey4() \ | ||
214 | xorq RKEY, RX0; \ | ||
215 | xorq RKEY, RX1; \ | ||
216 | xorq RKEY, RX2; \ | ||
217 | xorq RKEY, RX3; | ||
218 | |||
219 | #define preload_roundkey_enc(n) \ | ||
220 | movq p+4*(n)(CTX), RKEY; | ||
221 | |||
222 | #define add_roundkey_enc4(n) \ | ||
223 | add_preloaded_roundkey4(); \ | ||
224 | preload_roundkey_enc(n + 2); | ||
225 | |||
226 | #define round_enc4(n) \ | ||
227 | add_roundkey_enc4(n); \ | ||
228 | \ | ||
229 | F4(RX0); \ | ||
230 | F4(RX1); \ | ||
231 | F4(RX2); \ | ||
232 | F4(RX3); \ | ||
233 | \ | ||
234 | F4(RX0); \ | ||
235 | F4(RX1); \ | ||
236 | F4(RX2); \ | ||
237 | F4(RX3); | ||
238 | |||
239 | #define preload_roundkey_dec(n) \ | ||
240 | movq p+4*((n)-1)(CTX), RKEY; \ | ||
241 | rorq $32, RKEY; | ||
242 | |||
243 | #define add_roundkey_dec4(n) \ | ||
244 | add_preloaded_roundkey4(); \ | ||
245 | preload_roundkey_dec(n - 2); | ||
246 | |||
247 | #define round_dec4(n) \ | ||
248 | add_roundkey_dec4(n); \ | ||
249 | \ | ||
250 | F4(RX0); \ | ||
251 | F4(RX1); \ | ||
252 | F4(RX2); \ | ||
253 | F4(RX3); \ | ||
254 | \ | ||
255 | F4(RX0); \ | ||
256 | F4(RX1); \ | ||
257 | F4(RX2); \ | ||
258 | F4(RX3); | ||
259 | |||
260 | #define read_block4() \ | ||
261 | movq (RIO), RX0; \ | ||
262 | rorq $32, RX0; \ | ||
263 | bswapq RX0; \ | ||
264 | \ | ||
265 | movq 8(RIO), RX1; \ | ||
266 | rorq $32, RX1; \ | ||
267 | bswapq RX1; \ | ||
268 | \ | ||
269 | movq 16(RIO), RX2; \ | ||
270 | rorq $32, RX2; \ | ||
271 | bswapq RX2; \ | ||
272 | \ | ||
273 | movq 24(RIO), RX3; \ | ||
274 | rorq $32, RX3; \ | ||
275 | bswapq RX3; | ||
276 | |||
277 | #define write_block4() \ | ||
278 | bswapq RX0; \ | ||
279 | movq RX0, (RIO); \ | ||
280 | \ | ||
281 | bswapq RX1; \ | ||
282 | movq RX1, 8(RIO); \ | ||
283 | \ | ||
284 | bswapq RX2; \ | ||
285 | movq RX2, 16(RIO); \ | ||
286 | \ | ||
287 | bswapq RX3; \ | ||
288 | movq RX3, 24(RIO); | ||
289 | |||
290 | #define xor_block4() \ | ||
291 | bswapq RX0; \ | ||
292 | xorq RX0, (RIO); \ | ||
293 | \ | ||
294 | bswapq RX1; \ | ||
295 | xorq RX1, 8(RIO); \ | ||
296 | \ | ||
297 | bswapq RX2; \ | ||
298 | xorq RX2, 16(RIO); \ | ||
299 | \ | ||
300 | bswapq RX3; \ | ||
301 | xorq RX3, 24(RIO); | ||
302 | |||
303 | .align 8 | ||
304 | .global __blowfish_enc_blk_4way | ||
305 | .type __blowfish_enc_blk_4way,@function; | ||
306 | |||
307 | __blowfish_enc_blk_4way: | ||
308 | /* input: | ||
309 | * %rdi: ctx, CTX | ||
310 | * %rsi: dst | ||
311 | * %rdx: src | ||
312 | * %rcx: bool, if true: xor output | ||
313 | */ | ||
314 | pushq %rbp; | ||
315 | pushq %rbx; | ||
316 | pushq %rcx; | ||
317 | |||
318 | preload_roundkey_enc(0); | ||
319 | |||
320 | movq %rsi, %r11; | ||
321 | movq %rdx, RIO; | ||
322 | |||
323 | read_block4(); | ||
324 | |||
325 | round_enc4(0); | ||
326 | round_enc4(2); | ||
327 | round_enc4(4); | ||
328 | round_enc4(6); | ||
329 | round_enc4(8); | ||
330 | round_enc4(10); | ||
331 | round_enc4(12); | ||
332 | round_enc4(14); | ||
333 | add_preloaded_roundkey4(); | ||
334 | |||
335 | popq %rbp; | ||
336 | movq %r11, RIO; | ||
337 | |||
338 | test %bpl, %bpl; | ||
339 | jnz __enc_xor4; | ||
340 | |||
341 | write_block4(); | ||
342 | |||
343 | popq %rbx; | ||
344 | popq %rbp; | ||
345 | ret; | ||
346 | |||
347 | __enc_xor4: | ||
348 | xor_block4(); | ||
349 | |||
350 | popq %rbx; | ||
351 | popq %rbp; | ||
352 | ret; | ||
353 | |||
354 | .align 8 | ||
355 | .global blowfish_dec_blk_4way | ||
356 | .type blowfish_dec_blk_4way,@function; | ||
357 | |||
358 | blowfish_dec_blk_4way: | ||
359 | /* input: | ||
360 | * %rdi: ctx, CTX | ||
361 | * %rsi: dst | ||
362 | * %rdx: src | ||
363 | */ | ||
364 | pushq %rbp; | ||
365 | pushq %rbx; | ||
366 | preload_roundkey_dec(17); | ||
367 | |||
368 | movq %rsi, %r11; | ||
369 | movq %rdx, RIO; | ||
370 | |||
371 | read_block4(); | ||
372 | |||
373 | round_dec4(17); | ||
374 | round_dec4(15); | ||
375 | round_dec4(13); | ||
376 | round_dec4(11); | ||
377 | round_dec4(9); | ||
378 | round_dec4(7); | ||
379 | round_dec4(5); | ||
380 | round_dec4(3); | ||
381 | add_preloaded_roundkey4(); | ||
382 | |||
383 | movq %r11, RIO; | ||
384 | write_block4(); | ||
385 | |||
386 | popq %rbx; | ||
387 | popq %rbp; | ||
388 | |||
389 | ret; | ||
390 | |||
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c deleted file mode 100644 index 50ec333b70e..00000000000 --- a/arch/x86/crypto/blowfish_glue.c +++ /dev/null | |||
@@ -1,485 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for assembler optimized version of Blowfish | ||
3 | * | ||
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
24 | * USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <asm/processor.h> | ||
29 | #include <crypto/blowfish.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include <linux/init.h> | ||
32 | #include <linux/module.h> | ||
33 | #include <linux/types.h> | ||
34 | #include <crypto/algapi.h> | ||
35 | |||
36 | /* regular block cipher functions */ | ||
37 | asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, | ||
38 | bool xor); | ||
39 | asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); | ||
40 | |||
41 | /* 4-way parallel cipher functions */ | ||
42 | asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
43 | const u8 *src, bool xor); | ||
44 | asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
45 | const u8 *src); | ||
46 | |||
47 | static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) | ||
48 | { | ||
49 | __blowfish_enc_blk(ctx, dst, src, false); | ||
50 | } | ||
51 | |||
52 | static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, | ||
53 | const u8 *src) | ||
54 | { | ||
55 | __blowfish_enc_blk(ctx, dst, src, true); | ||
56 | } | ||
57 | |||
58 | static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, | ||
59 | const u8 *src) | ||
60 | { | ||
61 | __blowfish_enc_blk_4way(ctx, dst, src, false); | ||
62 | } | ||
63 | |||
64 | static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, | ||
65 | const u8 *src) | ||
66 | { | ||
67 | __blowfish_enc_blk_4way(ctx, dst, src, true); | ||
68 | } | ||
69 | |||
70 | static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
71 | { | ||
72 | blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); | ||
73 | } | ||
74 | |||
75 | static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
76 | { | ||
77 | blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); | ||
78 | } | ||
79 | |||
80 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
81 | void (*fn)(struct bf_ctx *, u8 *, const u8 *), | ||
82 | void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) | ||
83 | { | ||
84 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
85 | unsigned int bsize = BF_BLOCK_SIZE; | ||
86 | unsigned int nbytes; | ||
87 | int err; | ||
88 | |||
89 | err = blkcipher_walk_virt(desc, walk); | ||
90 | |||
91 | while ((nbytes = walk->nbytes)) { | ||
92 | u8 *wsrc = walk->src.virt.addr; | ||
93 | u8 *wdst = walk->dst.virt.addr; | ||
94 | |||
95 | /* Process four block batch */ | ||
96 | if (nbytes >= bsize * 4) { | ||
97 | do { | ||
98 | fn_4way(ctx, wdst, wsrc); | ||
99 | |||
100 | wsrc += bsize * 4; | ||
101 | wdst += bsize * 4; | ||
102 | nbytes -= bsize * 4; | ||
103 | } while (nbytes >= bsize * 4); | ||
104 | |||
105 | if (nbytes < bsize) | ||
106 | goto done; | ||
107 | } | ||
108 | |||
109 | /* Handle leftovers */ | ||
110 | do { | ||
111 | fn(ctx, wdst, wsrc); | ||
112 | |||
113 | wsrc += bsize; | ||
114 | wdst += bsize; | ||
115 | nbytes -= bsize; | ||
116 | } while (nbytes >= bsize); | ||
117 | |||
118 | done: | ||
119 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
120 | } | ||
121 | |||
122 | return err; | ||
123 | } | ||
124 | |||
125 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
126 | struct scatterlist *src, unsigned int nbytes) | ||
127 | { | ||
128 | struct blkcipher_walk walk; | ||
129 | |||
130 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
131 | return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); | ||
132 | } | ||
133 | |||
134 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
135 | struct scatterlist *src, unsigned int nbytes) | ||
136 | { | ||
137 | struct blkcipher_walk walk; | ||
138 | |||
139 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
140 | return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); | ||
141 | } | ||
142 | |||
143 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
144 | struct blkcipher_walk *walk) | ||
145 | { | ||
146 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
147 | unsigned int bsize = BF_BLOCK_SIZE; | ||
148 | unsigned int nbytes = walk->nbytes; | ||
149 | u64 *src = (u64 *)walk->src.virt.addr; | ||
150 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
151 | u64 *iv = (u64 *)walk->iv; | ||
152 | |||
153 | do { | ||
154 | *dst = *src ^ *iv; | ||
155 | blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst); | ||
156 | iv = dst; | ||
157 | |||
158 | src += 1; | ||
159 | dst += 1; | ||
160 | nbytes -= bsize; | ||
161 | } while (nbytes >= bsize); | ||
162 | |||
163 | *(u64 *)walk->iv = *iv; | ||
164 | return nbytes; | ||
165 | } | ||
166 | |||
167 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
168 | struct scatterlist *src, unsigned int nbytes) | ||
169 | { | ||
170 | struct blkcipher_walk walk; | ||
171 | int err; | ||
172 | |||
173 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
174 | err = blkcipher_walk_virt(desc, &walk); | ||
175 | |||
176 | while ((nbytes = walk.nbytes)) { | ||
177 | nbytes = __cbc_encrypt(desc, &walk); | ||
178 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
179 | } | ||
180 | |||
181 | return err; | ||
182 | } | ||
183 | |||
184 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
185 | struct blkcipher_walk *walk) | ||
186 | { | ||
187 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
188 | unsigned int bsize = BF_BLOCK_SIZE; | ||
189 | unsigned int nbytes = walk->nbytes; | ||
190 | u64 *src = (u64 *)walk->src.virt.addr; | ||
191 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
192 | u64 ivs[4 - 1]; | ||
193 | u64 last_iv; | ||
194 | |||
195 | /* Start of the last block. */ | ||
196 | src += nbytes / bsize - 1; | ||
197 | dst += nbytes / bsize - 1; | ||
198 | |||
199 | last_iv = *src; | ||
200 | |||
201 | /* Process four block batch */ | ||
202 | if (nbytes >= bsize * 4) { | ||
203 | do { | ||
204 | nbytes -= bsize * 4 - bsize; | ||
205 | src -= 4 - 1; | ||
206 | dst -= 4 - 1; | ||
207 | |||
208 | ivs[0] = src[0]; | ||
209 | ivs[1] = src[1]; | ||
210 | ivs[2] = src[2]; | ||
211 | |||
212 | blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src); | ||
213 | |||
214 | dst[1] ^= ivs[0]; | ||
215 | dst[2] ^= ivs[1]; | ||
216 | dst[3] ^= ivs[2]; | ||
217 | |||
218 | nbytes -= bsize; | ||
219 | if (nbytes < bsize) | ||
220 | goto done; | ||
221 | |||
222 | *dst ^= *(src - 1); | ||
223 | src -= 1; | ||
224 | dst -= 1; | ||
225 | } while (nbytes >= bsize * 4); | ||
226 | |||
227 | if (nbytes < bsize) | ||
228 | goto done; | ||
229 | } | ||
230 | |||
231 | /* Handle leftovers */ | ||
232 | for (;;) { | ||
233 | blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src); | ||
234 | |||
235 | nbytes -= bsize; | ||
236 | if (nbytes < bsize) | ||
237 | break; | ||
238 | |||
239 | *dst ^= *(src - 1); | ||
240 | src -= 1; | ||
241 | dst -= 1; | ||
242 | } | ||
243 | |||
244 | done: | ||
245 | *dst ^= *(u64 *)walk->iv; | ||
246 | *(u64 *)walk->iv = last_iv; | ||
247 | |||
248 | return nbytes; | ||
249 | } | ||
250 | |||
251 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
252 | struct scatterlist *src, unsigned int nbytes) | ||
253 | { | ||
254 | struct blkcipher_walk walk; | ||
255 | int err; | ||
256 | |||
257 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
258 | err = blkcipher_walk_virt(desc, &walk); | ||
259 | |||
260 | while ((nbytes = walk.nbytes)) { | ||
261 | nbytes = __cbc_decrypt(desc, &walk); | ||
262 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
263 | } | ||
264 | |||
265 | return err; | ||
266 | } | ||
267 | |||
268 | static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) | ||
269 | { | ||
270 | u8 *ctrblk = walk->iv; | ||
271 | u8 keystream[BF_BLOCK_SIZE]; | ||
272 | u8 *src = walk->src.virt.addr; | ||
273 | u8 *dst = walk->dst.virt.addr; | ||
274 | unsigned int nbytes = walk->nbytes; | ||
275 | |||
276 | blowfish_enc_blk(ctx, keystream, ctrblk); | ||
277 | crypto_xor(keystream, src, nbytes); | ||
278 | memcpy(dst, keystream, nbytes); | ||
279 | |||
280 | crypto_inc(ctrblk, BF_BLOCK_SIZE); | ||
281 | } | ||
282 | |||
283 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
284 | struct blkcipher_walk *walk) | ||
285 | { | ||
286 | struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
287 | unsigned int bsize = BF_BLOCK_SIZE; | ||
288 | unsigned int nbytes = walk->nbytes; | ||
289 | u64 *src = (u64 *)walk->src.virt.addr; | ||
290 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
291 | u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); | ||
292 | __be64 ctrblocks[4]; | ||
293 | |||
294 | /* Process four block batch */ | ||
295 | if (nbytes >= bsize * 4) { | ||
296 | do { | ||
297 | if (dst != src) { | ||
298 | dst[0] = src[0]; | ||
299 | dst[1] = src[1]; | ||
300 | dst[2] = src[2]; | ||
301 | dst[3] = src[3]; | ||
302 | } | ||
303 | |||
304 | /* create ctrblks for parallel encrypt */ | ||
305 | ctrblocks[0] = cpu_to_be64(ctrblk++); | ||
306 | ctrblocks[1] = cpu_to_be64(ctrblk++); | ||
307 | ctrblocks[2] = cpu_to_be64(ctrblk++); | ||
308 | ctrblocks[3] = cpu_to_be64(ctrblk++); | ||
309 | |||
310 | blowfish_enc_blk_xor_4way(ctx, (u8 *)dst, | ||
311 | (u8 *)ctrblocks); | ||
312 | |||
313 | src += 4; | ||
314 | dst += 4; | ||
315 | } while ((nbytes -= bsize * 4) >= bsize * 4); | ||
316 | |||
317 | if (nbytes < bsize) | ||
318 | goto done; | ||
319 | } | ||
320 | |||
321 | /* Handle leftovers */ | ||
322 | do { | ||
323 | if (dst != src) | ||
324 | *dst = *src; | ||
325 | |||
326 | ctrblocks[0] = cpu_to_be64(ctrblk++); | ||
327 | |||
328 | blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks); | ||
329 | |||
330 | src += 1; | ||
331 | dst += 1; | ||
332 | } while ((nbytes -= bsize) >= bsize); | ||
333 | |||
334 | done: | ||
335 | *(__be64 *)walk->iv = cpu_to_be64(ctrblk); | ||
336 | return nbytes; | ||
337 | } | ||
338 | |||
339 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
340 | struct scatterlist *src, unsigned int nbytes) | ||
341 | { | ||
342 | struct blkcipher_walk walk; | ||
343 | int err; | ||
344 | |||
345 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
346 | err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); | ||
347 | |||
348 | while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { | ||
349 | nbytes = __ctr_crypt(desc, &walk); | ||
350 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
351 | } | ||
352 | |||
353 | if (walk.nbytes) { | ||
354 | ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); | ||
355 | err = blkcipher_walk_done(desc, &walk, 0); | ||
356 | } | ||
357 | |||
358 | return err; | ||
359 | } | ||
360 | |||
361 | static struct crypto_alg bf_algs[4] = { { | ||
362 | .cra_name = "blowfish", | ||
363 | .cra_driver_name = "blowfish-asm", | ||
364 | .cra_priority = 200, | ||
365 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
366 | .cra_blocksize = BF_BLOCK_SIZE, | ||
367 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
368 | .cra_alignmask = 0, | ||
369 | .cra_module = THIS_MODULE, | ||
370 | .cra_u = { | ||
371 | .cipher = { | ||
372 | .cia_min_keysize = BF_MIN_KEY_SIZE, | ||
373 | .cia_max_keysize = BF_MAX_KEY_SIZE, | ||
374 | .cia_setkey = blowfish_setkey, | ||
375 | .cia_encrypt = blowfish_encrypt, | ||
376 | .cia_decrypt = blowfish_decrypt, | ||
377 | } | ||
378 | } | ||
379 | }, { | ||
380 | .cra_name = "ecb(blowfish)", | ||
381 | .cra_driver_name = "ecb-blowfish-asm", | ||
382 | .cra_priority = 300, | ||
383 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
384 | .cra_blocksize = BF_BLOCK_SIZE, | ||
385 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
386 | .cra_alignmask = 0, | ||
387 | .cra_type = &crypto_blkcipher_type, | ||
388 | .cra_module = THIS_MODULE, | ||
389 | .cra_u = { | ||
390 | .blkcipher = { | ||
391 | .min_keysize = BF_MIN_KEY_SIZE, | ||
392 | .max_keysize = BF_MAX_KEY_SIZE, | ||
393 | .setkey = blowfish_setkey, | ||
394 | .encrypt = ecb_encrypt, | ||
395 | .decrypt = ecb_decrypt, | ||
396 | }, | ||
397 | }, | ||
398 | }, { | ||
399 | .cra_name = "cbc(blowfish)", | ||
400 | .cra_driver_name = "cbc-blowfish-asm", | ||
401 | .cra_priority = 300, | ||
402 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
403 | .cra_blocksize = BF_BLOCK_SIZE, | ||
404 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
405 | .cra_alignmask = 0, | ||
406 | .cra_type = &crypto_blkcipher_type, | ||
407 | .cra_module = THIS_MODULE, | ||
408 | .cra_u = { | ||
409 | .blkcipher = { | ||
410 | .min_keysize = BF_MIN_KEY_SIZE, | ||
411 | .max_keysize = BF_MAX_KEY_SIZE, | ||
412 | .ivsize = BF_BLOCK_SIZE, | ||
413 | .setkey = blowfish_setkey, | ||
414 | .encrypt = cbc_encrypt, | ||
415 | .decrypt = cbc_decrypt, | ||
416 | }, | ||
417 | }, | ||
418 | }, { | ||
419 | .cra_name = "ctr(blowfish)", | ||
420 | .cra_driver_name = "ctr-blowfish-asm", | ||
421 | .cra_priority = 300, | ||
422 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
423 | .cra_blocksize = 1, | ||
424 | .cra_ctxsize = sizeof(struct bf_ctx), | ||
425 | .cra_alignmask = 0, | ||
426 | .cra_type = &crypto_blkcipher_type, | ||
427 | .cra_module = THIS_MODULE, | ||
428 | .cra_u = { | ||
429 | .blkcipher = { | ||
430 | .min_keysize = BF_MIN_KEY_SIZE, | ||
431 | .max_keysize = BF_MAX_KEY_SIZE, | ||
432 | .ivsize = BF_BLOCK_SIZE, | ||
433 | .setkey = blowfish_setkey, | ||
434 | .encrypt = ctr_crypt, | ||
435 | .decrypt = ctr_crypt, | ||
436 | }, | ||
437 | }, | ||
438 | } }; | ||
439 | |||
440 | static bool is_blacklisted_cpu(void) | ||
441 | { | ||
442 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
443 | return false; | ||
444 | |||
445 | if (boot_cpu_data.x86 == 0x0f) { | ||
446 | /* | ||
447 | * On Pentium 4, blowfish-x86_64 is slower than generic C | ||
448 | * implementation because use of 64bit rotates (which are really | ||
449 | * slow on P4). Therefore blacklist P4s. | ||
450 | */ | ||
451 | return true; | ||
452 | } | ||
453 | |||
454 | return false; | ||
455 | } | ||
456 | |||
457 | static int force; | ||
458 | module_param(force, int, 0); | ||
459 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); | ||
460 | |||
461 | static int __init init(void) | ||
462 | { | ||
463 | if (!force && is_blacklisted_cpu()) { | ||
464 | printk(KERN_INFO | ||
465 | "blowfish-x86_64: performance on this CPU " | ||
466 | "would be suboptimal: disabling " | ||
467 | "blowfish-x86_64.\n"); | ||
468 | return -ENODEV; | ||
469 | } | ||
470 | |||
471 | return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
472 | } | ||
473 | |||
474 | static void __exit fini(void) | ||
475 | { | ||
476 | crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); | ||
477 | } | ||
478 | |||
479 | module_init(init); | ||
480 | module_exit(fini); | ||
481 | |||
482 | MODULE_LICENSE("GPL"); | ||
483 | MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); | ||
484 | MODULE_ALIAS("blowfish"); | ||
485 | MODULE_ALIAS("blowfish-asm"); | ||
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S deleted file mode 100644 index 2306d2e4816..00000000000 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ /dev/null | |||
@@ -1,1102 +0,0 @@ | |||
1 | /* | ||
2 | * x86_64/AVX/AES-NI assembler implementation of Camellia | ||
3 | * | ||
4 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * Version licensed under 2-clause BSD License is available at: | ||
15 | * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz | ||
16 | */ | ||
17 | |||
18 | #define CAMELLIA_TABLE_BYTE_LEN 272 | ||
19 | |||
20 | /* struct camellia_ctx: */ | ||
21 | #define key_table 0 | ||
22 | #define key_length CAMELLIA_TABLE_BYTE_LEN | ||
23 | |||
24 | /* register macros */ | ||
25 | #define CTX %rdi | ||
26 | |||
27 | /********************************************************************** | ||
28 | 16-way camellia | ||
29 | **********************************************************************/ | ||
30 | #define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ | ||
31 | vpand x, mask4bit, tmp0; \ | ||
32 | vpandn x, mask4bit, x; \ | ||
33 | vpsrld $4, x, x; \ | ||
34 | \ | ||
35 | vpshufb tmp0, lo_t, tmp0; \ | ||
36 | vpshufb x, hi_t, x; \ | ||
37 | vpxor tmp0, x, x; | ||
38 | |||
39 | /* | ||
40 | * IN: | ||
41 | * x0..x7: byte-sliced AB state | ||
42 | * mem_cd: register pointer storing CD state | ||
43 | * key: index for key material | ||
44 | * OUT: | ||
45 | * x0..x7: new byte-sliced CD state | ||
46 | */ | ||
47 | #define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ | ||
48 | t7, mem_cd, key) \ | ||
49 | /* \ | ||
50 | * S-function with AES subbytes \ | ||
51 | */ \ | ||
52 | vmovdqa .Linv_shift_row, t4; \ | ||
53 | vbroadcastss .L0f0f0f0f, t7; \ | ||
54 | vmovdqa .Lpre_tf_lo_s1, t0; \ | ||
55 | vmovdqa .Lpre_tf_hi_s1, t1; \ | ||
56 | \ | ||
57 | /* AES inverse shift rows */ \ | ||
58 | vpshufb t4, x0, x0; \ | ||
59 | vpshufb t4, x7, x7; \ | ||
60 | vpshufb t4, x1, x1; \ | ||
61 | vpshufb t4, x4, x4; \ | ||
62 | vpshufb t4, x2, x2; \ | ||
63 | vpshufb t4, x5, x5; \ | ||
64 | vpshufb t4, x3, x3; \ | ||
65 | vpshufb t4, x6, x6; \ | ||
66 | \ | ||
67 | /* prefilter sboxes 1, 2 and 3 */ \ | ||
68 | vmovdqa .Lpre_tf_lo_s4, t2; \ | ||
69 | vmovdqa .Lpre_tf_hi_s4, t3; \ | ||
70 | filter_8bit(x0, t0, t1, t7, t6); \ | ||
71 | filter_8bit(x7, t0, t1, t7, t6); \ | ||
72 | filter_8bit(x1, t0, t1, t7, t6); \ | ||
73 | filter_8bit(x4, t0, t1, t7, t6); \ | ||
74 | filter_8bit(x2, t0, t1, t7, t6); \ | ||
75 | filter_8bit(x5, t0, t1, t7, t6); \ | ||
76 | \ | ||
77 | /* prefilter sbox 4 */ \ | ||
78 | vpxor t4, t4, t4; \ | ||
79 | filter_8bit(x3, t2, t3, t7, t6); \ | ||
80 | filter_8bit(x6, t2, t3, t7, t6); \ | ||
81 | \ | ||
82 | /* AES subbytes + AES shift rows */ \ | ||
83 | vmovdqa .Lpost_tf_lo_s1, t0; \ | ||
84 | vmovdqa .Lpost_tf_hi_s1, t1; \ | ||
85 | vaesenclast t4, x0, x0; \ | ||
86 | vaesenclast t4, x7, x7; \ | ||
87 | vaesenclast t4, x1, x1; \ | ||
88 | vaesenclast t4, x4, x4; \ | ||
89 | vaesenclast t4, x2, x2; \ | ||
90 | vaesenclast t4, x5, x5; \ | ||
91 | vaesenclast t4, x3, x3; \ | ||
92 | vaesenclast t4, x6, x6; \ | ||
93 | \ | ||
94 | /* postfilter sboxes 1 and 4 */ \ | ||
95 | vmovdqa .Lpost_tf_lo_s3, t2; \ | ||
96 | vmovdqa .Lpost_tf_hi_s3, t3; \ | ||
97 | filter_8bit(x0, t0, t1, t7, t6); \ | ||
98 | filter_8bit(x7, t0, t1, t7, t6); \ | ||
99 | filter_8bit(x3, t0, t1, t7, t6); \ | ||
100 | filter_8bit(x6, t0, t1, t7, t6); \ | ||
101 | \ | ||
102 | /* postfilter sbox 3 */ \ | ||
103 | vmovdqa .Lpost_tf_lo_s2, t4; \ | ||
104 | vmovdqa .Lpost_tf_hi_s2, t5; \ | ||
105 | filter_8bit(x2, t2, t3, t7, t6); \ | ||
106 | filter_8bit(x5, t2, t3, t7, t6); \ | ||
107 | \ | ||
108 | vpxor t6, t6, t6; \ | ||
109 | vmovq key, t0; \ | ||
110 | \ | ||
111 | /* postfilter sbox 2 */ \ | ||
112 | filter_8bit(x1, t4, t5, t7, t2); \ | ||
113 | filter_8bit(x4, t4, t5, t7, t2); \ | ||
114 | \ | ||
115 | vpsrldq $5, t0, t5; \ | ||
116 | vpsrldq $1, t0, t1; \ | ||
117 | vpsrldq $2, t0, t2; \ | ||
118 | vpsrldq $3, t0, t3; \ | ||
119 | vpsrldq $4, t0, t4; \ | ||
120 | vpshufb t6, t0, t0; \ | ||
121 | vpshufb t6, t1, t1; \ | ||
122 | vpshufb t6, t2, t2; \ | ||
123 | vpshufb t6, t3, t3; \ | ||
124 | vpshufb t6, t4, t4; \ | ||
125 | vpsrldq $2, t5, t7; \ | ||
126 | vpshufb t6, t7, t7; \ | ||
127 | \ | ||
128 | /* \ | ||
129 | * P-function \ | ||
130 | */ \ | ||
131 | vpxor x5, x0, x0; \ | ||
132 | vpxor x6, x1, x1; \ | ||
133 | vpxor x7, x2, x2; \ | ||
134 | vpxor x4, x3, x3; \ | ||
135 | \ | ||
136 | vpxor x2, x4, x4; \ | ||
137 | vpxor x3, x5, x5; \ | ||
138 | vpxor x0, x6, x6; \ | ||
139 | vpxor x1, x7, x7; \ | ||
140 | \ | ||
141 | vpxor x7, x0, x0; \ | ||
142 | vpxor x4, x1, x1; \ | ||
143 | vpxor x5, x2, x2; \ | ||
144 | vpxor x6, x3, x3; \ | ||
145 | \ | ||
146 | vpxor x3, x4, x4; \ | ||
147 | vpxor x0, x5, x5; \ | ||
148 | vpxor x1, x6, x6; \ | ||
149 | vpxor x2, x7, x7; /* note: high and low parts swapped */ \ | ||
150 | \ | ||
151 | /* \ | ||
152 | * Add key material and result to CD (x becomes new CD) \ | ||
153 | */ \ | ||
154 | \ | ||
155 | vpxor t3, x4, x4; \ | ||
156 | vpxor 0 * 16(mem_cd), x4, x4; \ | ||
157 | \ | ||
158 | vpxor t2, x5, x5; \ | ||
159 | vpxor 1 * 16(mem_cd), x5, x5; \ | ||
160 | \ | ||
161 | vpsrldq $1, t5, t3; \ | ||
162 | vpshufb t6, t5, t5; \ | ||
163 | vpshufb t6, t3, t6; \ | ||
164 | \ | ||
165 | vpxor t1, x6, x6; \ | ||
166 | vpxor 2 * 16(mem_cd), x6, x6; \ | ||
167 | \ | ||
168 | vpxor t0, x7, x7; \ | ||
169 | vpxor 3 * 16(mem_cd), x7, x7; \ | ||
170 | \ | ||
171 | vpxor t7, x0, x0; \ | ||
172 | vpxor 4 * 16(mem_cd), x0, x0; \ | ||
173 | \ | ||
174 | vpxor t6, x1, x1; \ | ||
175 | vpxor 5 * 16(mem_cd), x1, x1; \ | ||
176 | \ | ||
177 | vpxor t5, x2, x2; \ | ||
178 | vpxor 6 * 16(mem_cd), x2, x2; \ | ||
179 | \ | ||
180 | vpxor t4, x3, x3; \ | ||
181 | vpxor 7 * 16(mem_cd), x3, x3; | ||
182 | |||
183 | /* | ||
184 | * Size optimization... with inlined roundsm16, binary would be over 5 times | ||
185 | * larger and would only be 0.5% faster (on sandy-bridge). | ||
186 | */ | ||
187 | .align 8 | ||
188 | roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: | ||
189 | roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
190 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, | ||
191 | %rcx, (%r9)); | ||
192 | ret; | ||
193 | |||
194 | .align 8 | ||
195 | roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: | ||
196 | roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3, | ||
197 | %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, | ||
198 | %rax, (%r9)); | ||
199 | ret; | ||
200 | |||
201 | /* | ||
202 | * IN/OUT: | ||
203 | * x0..x7: byte-sliced AB state preloaded | ||
204 | * mem_ab: byte-sliced AB state in memory | ||
205 | * mem_cb: byte-sliced CD state in memory | ||
206 | */ | ||
207 | #define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
208 | y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ | ||
209 | leaq (key_table + (i) * 8)(CTX), %r9; \ | ||
210 | call roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \ | ||
211 | \ | ||
212 | vmovdqu x4, 0 * 16(mem_cd); \ | ||
213 | vmovdqu x5, 1 * 16(mem_cd); \ | ||
214 | vmovdqu x6, 2 * 16(mem_cd); \ | ||
215 | vmovdqu x7, 3 * 16(mem_cd); \ | ||
216 | vmovdqu x0, 4 * 16(mem_cd); \ | ||
217 | vmovdqu x1, 5 * 16(mem_cd); \ | ||
218 | vmovdqu x2, 6 * 16(mem_cd); \ | ||
219 | vmovdqu x3, 7 * 16(mem_cd); \ | ||
220 | \ | ||
221 | leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \ | ||
222 | call roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \ | ||
223 | \ | ||
224 | store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); | ||
225 | |||
226 | #define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ | ||
227 | |||
228 | #define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ | ||
229 | /* Store new AB state */ \ | ||
230 | vmovdqu x0, 0 * 16(mem_ab); \ | ||
231 | vmovdqu x1, 1 * 16(mem_ab); \ | ||
232 | vmovdqu x2, 2 * 16(mem_ab); \ | ||
233 | vmovdqu x3, 3 * 16(mem_ab); \ | ||
234 | vmovdqu x4, 4 * 16(mem_ab); \ | ||
235 | vmovdqu x5, 5 * 16(mem_ab); \ | ||
236 | vmovdqu x6, 6 * 16(mem_ab); \ | ||
237 | vmovdqu x7, 7 * 16(mem_ab); | ||
238 | |||
239 | #define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
240 | y6, y7, mem_ab, mem_cd, i) \ | ||
241 | two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
242 | y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ | ||
243 | two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
244 | y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ | ||
245 | two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
246 | y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); | ||
247 | |||
248 | #define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
249 | y6, y7, mem_ab, mem_cd, i) \ | ||
250 | two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
251 | y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ | ||
252 | two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
253 | y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ | ||
254 | two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
255 | y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); | ||
256 | |||
257 | /* | ||
258 | * IN: | ||
259 | * v0..3: byte-sliced 32-bit integers | ||
260 | * OUT: | ||
261 | * v0..3: (IN <<< 1) | ||
262 | */ | ||
263 | #define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \ | ||
264 | vpcmpgtb v0, zero, t0; \ | ||
265 | vpaddb v0, v0, v0; \ | ||
266 | vpabsb t0, t0; \ | ||
267 | \ | ||
268 | vpcmpgtb v1, zero, t1; \ | ||
269 | vpaddb v1, v1, v1; \ | ||
270 | vpabsb t1, t1; \ | ||
271 | \ | ||
272 | vpcmpgtb v2, zero, t2; \ | ||
273 | vpaddb v2, v2, v2; \ | ||
274 | vpabsb t2, t2; \ | ||
275 | \ | ||
276 | vpor t0, v1, v1; \ | ||
277 | \ | ||
278 | vpcmpgtb v3, zero, t0; \ | ||
279 | vpaddb v3, v3, v3; \ | ||
280 | vpabsb t0, t0; \ | ||
281 | \ | ||
282 | vpor t1, v2, v2; \ | ||
283 | vpor t2, v3, v3; \ | ||
284 | vpor t0, v0, v0; | ||
285 | |||
286 | /* | ||
287 | * IN: | ||
288 | * r: byte-sliced AB state in memory | ||
289 | * l: byte-sliced CD state in memory | ||
290 | * OUT: | ||
291 | * x0..x7: new byte-sliced CD state | ||
292 | */ | ||
293 | #define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ | ||
294 | tt1, tt2, tt3, kll, klr, krl, krr) \ | ||
295 | /* \ | ||
296 | * t0 = kll; \ | ||
297 | * t0 &= ll; \ | ||
298 | * lr ^= rol32(t0, 1); \ | ||
299 | */ \ | ||
300 | vpxor tt0, tt0, tt0; \ | ||
301 | vmovd kll, t0; \ | ||
302 | vpshufb tt0, t0, t3; \ | ||
303 | vpsrldq $1, t0, t0; \ | ||
304 | vpshufb tt0, t0, t2; \ | ||
305 | vpsrldq $1, t0, t0; \ | ||
306 | vpshufb tt0, t0, t1; \ | ||
307 | vpsrldq $1, t0, t0; \ | ||
308 | vpshufb tt0, t0, t0; \ | ||
309 | \ | ||
310 | vpand l0, t0, t0; \ | ||
311 | vpand l1, t1, t1; \ | ||
312 | vpand l2, t2, t2; \ | ||
313 | vpand l3, t3, t3; \ | ||
314 | \ | ||
315 | rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ | ||
316 | \ | ||
317 | vpxor l4, t0, l4; \ | ||
318 | vmovdqu l4, 4 * 16(l); \ | ||
319 | vpxor l5, t1, l5; \ | ||
320 | vmovdqu l5, 5 * 16(l); \ | ||
321 | vpxor l6, t2, l6; \ | ||
322 | vmovdqu l6, 6 * 16(l); \ | ||
323 | vpxor l7, t3, l7; \ | ||
324 | vmovdqu l7, 7 * 16(l); \ | ||
325 | \ | ||
326 | /* \ | ||
327 | * t2 = krr; \ | ||
328 | * t2 |= rr; \ | ||
329 | * rl ^= t2; \ | ||
330 | */ \ | ||
331 | \ | ||
332 | vmovd krr, t0; \ | ||
333 | vpshufb tt0, t0, t3; \ | ||
334 | vpsrldq $1, t0, t0; \ | ||
335 | vpshufb tt0, t0, t2; \ | ||
336 | vpsrldq $1, t0, t0; \ | ||
337 | vpshufb tt0, t0, t1; \ | ||
338 | vpsrldq $1, t0, t0; \ | ||
339 | vpshufb tt0, t0, t0; \ | ||
340 | \ | ||
341 | vpor 4 * 16(r), t0, t0; \ | ||
342 | vpor 5 * 16(r), t1, t1; \ | ||
343 | vpor 6 * 16(r), t2, t2; \ | ||
344 | vpor 7 * 16(r), t3, t3; \ | ||
345 | \ | ||
346 | vpxor 0 * 16(r), t0, t0; \ | ||
347 | vpxor 1 * 16(r), t1, t1; \ | ||
348 | vpxor 2 * 16(r), t2, t2; \ | ||
349 | vpxor 3 * 16(r), t3, t3; \ | ||
350 | vmovdqu t0, 0 * 16(r); \ | ||
351 | vmovdqu t1, 1 * 16(r); \ | ||
352 | vmovdqu t2, 2 * 16(r); \ | ||
353 | vmovdqu t3, 3 * 16(r); \ | ||
354 | \ | ||
355 | /* \ | ||
356 | * t2 = krl; \ | ||
357 | * t2 &= rl; \ | ||
358 | * rr ^= rol32(t2, 1); \ | ||
359 | */ \ | ||
360 | vmovd krl, t0; \ | ||
361 | vpshufb tt0, t0, t3; \ | ||
362 | vpsrldq $1, t0, t0; \ | ||
363 | vpshufb tt0, t0, t2; \ | ||
364 | vpsrldq $1, t0, t0; \ | ||
365 | vpshufb tt0, t0, t1; \ | ||
366 | vpsrldq $1, t0, t0; \ | ||
367 | vpshufb tt0, t0, t0; \ | ||
368 | \ | ||
369 | vpand 0 * 16(r), t0, t0; \ | ||
370 | vpand 1 * 16(r), t1, t1; \ | ||
371 | vpand 2 * 16(r), t2, t2; \ | ||
372 | vpand 3 * 16(r), t3, t3; \ | ||
373 | \ | ||
374 | rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ | ||
375 | \ | ||
376 | vpxor 4 * 16(r), t0, t0; \ | ||
377 | vpxor 5 * 16(r), t1, t1; \ | ||
378 | vpxor 6 * 16(r), t2, t2; \ | ||
379 | vpxor 7 * 16(r), t3, t3; \ | ||
380 | vmovdqu t0, 4 * 16(r); \ | ||
381 | vmovdqu t1, 5 * 16(r); \ | ||
382 | vmovdqu t2, 6 * 16(r); \ | ||
383 | vmovdqu t3, 7 * 16(r); \ | ||
384 | \ | ||
385 | /* \ | ||
386 | * t0 = klr; \ | ||
387 | * t0 |= lr; \ | ||
388 | * ll ^= t0; \ | ||
389 | */ \ | ||
390 | \ | ||
391 | vmovd klr, t0; \ | ||
392 | vpshufb tt0, t0, t3; \ | ||
393 | vpsrldq $1, t0, t0; \ | ||
394 | vpshufb tt0, t0, t2; \ | ||
395 | vpsrldq $1, t0, t0; \ | ||
396 | vpshufb tt0, t0, t1; \ | ||
397 | vpsrldq $1, t0, t0; \ | ||
398 | vpshufb tt0, t0, t0; \ | ||
399 | \ | ||
400 | vpor l4, t0, t0; \ | ||
401 | vpor l5, t1, t1; \ | ||
402 | vpor l6, t2, t2; \ | ||
403 | vpor l7, t3, t3; \ | ||
404 | \ | ||
405 | vpxor l0, t0, l0; \ | ||
406 | vmovdqu l0, 0 * 16(l); \ | ||
407 | vpxor l1, t1, l1; \ | ||
408 | vmovdqu l1, 1 * 16(l); \ | ||
409 | vpxor l2, t2, l2; \ | ||
410 | vmovdqu l2, 2 * 16(l); \ | ||
411 | vpxor l3, t3, l3; \ | ||
412 | vmovdqu l3, 3 * 16(l); | ||
413 | |||
414 | #define transpose_4x4(x0, x1, x2, x3, t1, t2) \ | ||
415 | vpunpckhdq x1, x0, t2; \ | ||
416 | vpunpckldq x1, x0, x0; \ | ||
417 | \ | ||
418 | vpunpckldq x3, x2, t1; \ | ||
419 | vpunpckhdq x3, x2, x2; \ | ||
420 | \ | ||
421 | vpunpckhqdq t1, x0, x1; \ | ||
422 | vpunpcklqdq t1, x0, x0; \ | ||
423 | \ | ||
424 | vpunpckhqdq x2, t2, x3; \ | ||
425 | vpunpcklqdq x2, t2, x2; | ||
426 | |||
427 | #define byteslice_16x16b(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, \ | ||
428 | b3, c3, d3, st0, st1) \ | ||
429 | vmovdqu d2, st0; \ | ||
430 | vmovdqu d3, st1; \ | ||
431 | transpose_4x4(a0, a1, a2, a3, d2, d3); \ | ||
432 | transpose_4x4(b0, b1, b2, b3, d2, d3); \ | ||
433 | vmovdqu st0, d2; \ | ||
434 | vmovdqu st1, d3; \ | ||
435 | \ | ||
436 | vmovdqu a0, st0; \ | ||
437 | vmovdqu a1, st1; \ | ||
438 | transpose_4x4(c0, c1, c2, c3, a0, a1); \ | ||
439 | transpose_4x4(d0, d1, d2, d3, a0, a1); \ | ||
440 | \ | ||
441 | vmovdqu .Lshufb_16x16b, a0; \ | ||
442 | vmovdqu st1, a1; \ | ||
443 | vpshufb a0, a2, a2; \ | ||
444 | vpshufb a0, a3, a3; \ | ||
445 | vpshufb a0, b0, b0; \ | ||
446 | vpshufb a0, b1, b1; \ | ||
447 | vpshufb a0, b2, b2; \ | ||
448 | vpshufb a0, b3, b3; \ | ||
449 | vpshufb a0, a1, a1; \ | ||
450 | vpshufb a0, c0, c0; \ | ||
451 | vpshufb a0, c1, c1; \ | ||
452 | vpshufb a0, c2, c2; \ | ||
453 | vpshufb a0, c3, c3; \ | ||
454 | vpshufb a0, d0, d0; \ | ||
455 | vpshufb a0, d1, d1; \ | ||
456 | vpshufb a0, d2, d2; \ | ||
457 | vpshufb a0, d3, d3; \ | ||
458 | vmovdqu d3, st1; \ | ||
459 | vmovdqu st0, d3; \ | ||
460 | vpshufb a0, d3, a0; \ | ||
461 | vmovdqu d2, st0; \ | ||
462 | \ | ||
463 | transpose_4x4(a0, b0, c0, d0, d2, d3); \ | ||
464 | transpose_4x4(a1, b1, c1, d1, d2, d3); \ | ||
465 | vmovdqu st0, d2; \ | ||
466 | vmovdqu st1, d3; \ | ||
467 | \ | ||
468 | vmovdqu b0, st0; \ | ||
469 | vmovdqu b1, st1; \ | ||
470 | transpose_4x4(a2, b2, c2, d2, b0, b1); \ | ||
471 | transpose_4x4(a3, b3, c3, d3, b0, b1); \ | ||
472 | vmovdqu st0, b0; \ | ||
473 | vmovdqu st1, b1; \ | ||
474 | /* does not adjust output bytes inside vectors */ | ||
475 | |||
476 | /* load blocks to registers and apply pre-whitening */ | ||
477 | #define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
478 | y6, y7, rio, key) \ | ||
479 | vmovq key, x0; \ | ||
480 | vpshufb .Lpack_bswap, x0, x0; \ | ||
481 | \ | ||
482 | vpxor 0 * 16(rio), x0, y7; \ | ||
483 | vpxor 1 * 16(rio), x0, y6; \ | ||
484 | vpxor 2 * 16(rio), x0, y5; \ | ||
485 | vpxor 3 * 16(rio), x0, y4; \ | ||
486 | vpxor 4 * 16(rio), x0, y3; \ | ||
487 | vpxor 5 * 16(rio), x0, y2; \ | ||
488 | vpxor 6 * 16(rio), x0, y1; \ | ||
489 | vpxor 7 * 16(rio), x0, y0; \ | ||
490 | vpxor 8 * 16(rio), x0, x7; \ | ||
491 | vpxor 9 * 16(rio), x0, x6; \ | ||
492 | vpxor 10 * 16(rio), x0, x5; \ | ||
493 | vpxor 11 * 16(rio), x0, x4; \ | ||
494 | vpxor 12 * 16(rio), x0, x3; \ | ||
495 | vpxor 13 * 16(rio), x0, x2; \ | ||
496 | vpxor 14 * 16(rio), x0, x1; \ | ||
497 | vpxor 15 * 16(rio), x0, x0; | ||
498 | |||
499 | /* byteslice pre-whitened blocks and store to temporary memory */ | ||
500 | #define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
501 | y6, y7, mem_ab, mem_cd) \ | ||
502 | byteslice_16x16b(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ | ||
503 | y5, y6, y7, (mem_ab), (mem_cd)); \ | ||
504 | \ | ||
505 | vmovdqu x0, 0 * 16(mem_ab); \ | ||
506 | vmovdqu x1, 1 * 16(mem_ab); \ | ||
507 | vmovdqu x2, 2 * 16(mem_ab); \ | ||
508 | vmovdqu x3, 3 * 16(mem_ab); \ | ||
509 | vmovdqu x4, 4 * 16(mem_ab); \ | ||
510 | vmovdqu x5, 5 * 16(mem_ab); \ | ||
511 | vmovdqu x6, 6 * 16(mem_ab); \ | ||
512 | vmovdqu x7, 7 * 16(mem_ab); \ | ||
513 | vmovdqu y0, 0 * 16(mem_cd); \ | ||
514 | vmovdqu y1, 1 * 16(mem_cd); \ | ||
515 | vmovdqu y2, 2 * 16(mem_cd); \ | ||
516 | vmovdqu y3, 3 * 16(mem_cd); \ | ||
517 | vmovdqu y4, 4 * 16(mem_cd); \ | ||
518 | vmovdqu y5, 5 * 16(mem_cd); \ | ||
519 | vmovdqu y6, 6 * 16(mem_cd); \ | ||
520 | vmovdqu y7, 7 * 16(mem_cd); | ||
521 | |||
522 | /* de-byteslice, apply post-whitening and store blocks */ | ||
523 | #define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ | ||
524 | y5, y6, y7, key, stack_tmp0, stack_tmp1) \ | ||
525 | byteslice_16x16b(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, y3, \ | ||
526 | y7, x3, x7, stack_tmp0, stack_tmp1); \ | ||
527 | \ | ||
528 | vmovdqu x0, stack_tmp0; \ | ||
529 | \ | ||
530 | vmovq key, x0; \ | ||
531 | vpshufb .Lpack_bswap, x0, x0; \ | ||
532 | \ | ||
533 | vpxor x0, y7, y7; \ | ||
534 | vpxor x0, y6, y6; \ | ||
535 | vpxor x0, y5, y5; \ | ||
536 | vpxor x0, y4, y4; \ | ||
537 | vpxor x0, y3, y3; \ | ||
538 | vpxor x0, y2, y2; \ | ||
539 | vpxor x0, y1, y1; \ | ||
540 | vpxor x0, y0, y0; \ | ||
541 | vpxor x0, x7, x7; \ | ||
542 | vpxor x0, x6, x6; \ | ||
543 | vpxor x0, x5, x5; \ | ||
544 | vpxor x0, x4, x4; \ | ||
545 | vpxor x0, x3, x3; \ | ||
546 | vpxor x0, x2, x2; \ | ||
547 | vpxor x0, x1, x1; \ | ||
548 | vpxor stack_tmp0, x0, x0; | ||
549 | |||
550 | #define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ | ||
551 | y6, y7, rio) \ | ||
552 | vmovdqu x0, 0 * 16(rio); \ | ||
553 | vmovdqu x1, 1 * 16(rio); \ | ||
554 | vmovdqu x2, 2 * 16(rio); \ | ||
555 | vmovdqu x3, 3 * 16(rio); \ | ||
556 | vmovdqu x4, 4 * 16(rio); \ | ||
557 | vmovdqu x5, 5 * 16(rio); \ | ||
558 | vmovdqu x6, 6 * 16(rio); \ | ||
559 | vmovdqu x7, 7 * 16(rio); \ | ||
560 | vmovdqu y0, 8 * 16(rio); \ | ||
561 | vmovdqu y1, 9 * 16(rio); \ | ||
562 | vmovdqu y2, 10 * 16(rio); \ | ||
563 | vmovdqu y3, 11 * 16(rio); \ | ||
564 | vmovdqu y4, 12 * 16(rio); \ | ||
565 | vmovdqu y5, 13 * 16(rio); \ | ||
566 | vmovdqu y6, 14 * 16(rio); \ | ||
567 | vmovdqu y7, 15 * 16(rio); | ||
568 | |||
569 | .data | ||
570 | .align 16 | ||
571 | |||
572 | #define SHUFB_BYTES(idx) \ | ||
573 | 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) | ||
574 | |||
575 | .Lshufb_16x16b: | ||
576 | .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3); | ||
577 | |||
578 | .Lpack_bswap: | ||
579 | .long 0x00010203 | ||
580 | .long 0x04050607 | ||
581 | .long 0x80808080 | ||
582 | .long 0x80808080 | ||
583 | |||
584 | /* For CTR-mode IV byteswap */ | ||
585 | .Lbswap128_mask: | ||
586 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
587 | |||
588 | /* | ||
589 | * pre-SubByte transform | ||
590 | * | ||
591 | * pre-lookup for sbox1, sbox2, sbox3: | ||
592 | * swap_bitendianness( | ||
593 | * isom_map_camellia_to_aes( | ||
594 | * camellia_f( | ||
595 | * swap_bitendianess(in) | ||
596 | * ) | ||
597 | * ) | ||
598 | * ) | ||
599 | * | ||
600 | * (note: '⊕ 0xc5' inside camellia_f()) | ||
601 | */ | ||
602 | .Lpre_tf_lo_s1: | ||
603 | .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 | ||
604 | .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 | ||
605 | .Lpre_tf_hi_s1: | ||
606 | .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a | ||
607 | .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 | ||
608 | |||
609 | /* | ||
610 | * pre-SubByte transform | ||
611 | * | ||
612 | * pre-lookup for sbox4: | ||
613 | * swap_bitendianness( | ||
614 | * isom_map_camellia_to_aes( | ||
615 | * camellia_f( | ||
616 | * swap_bitendianess(in <<< 1) | ||
617 | * ) | ||
618 | * ) | ||
619 | * ) | ||
620 | * | ||
621 | * (note: '⊕ 0xc5' inside camellia_f()) | ||
622 | */ | ||
623 | .Lpre_tf_lo_s4: | ||
624 | .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 | ||
625 | .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 | ||
626 | .Lpre_tf_hi_s4: | ||
627 | .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 | ||
628 | .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf | ||
629 | |||
630 | /* | ||
631 | * post-SubByte transform | ||
632 | * | ||
633 | * post-lookup for sbox1, sbox4: | ||
634 | * swap_bitendianness( | ||
635 | * camellia_h( | ||
636 | * isom_map_aes_to_camellia( | ||
637 | * swap_bitendianness( | ||
638 | * aes_inverse_affine_transform(in) | ||
639 | * ) | ||
640 | * ) | ||
641 | * ) | ||
642 | * ) | ||
643 | * | ||
644 | * (note: '⊕ 0x6e' inside camellia_h()) | ||
645 | */ | ||
646 | .Lpost_tf_lo_s1: | ||
647 | .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 | ||
648 | .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 | ||
649 | .Lpost_tf_hi_s1: | ||
650 | .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 | ||
651 | .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c | ||
652 | |||
653 | /* | ||
654 | * post-SubByte transform | ||
655 | * | ||
656 | * post-lookup for sbox2: | ||
657 | * swap_bitendianness( | ||
658 | * camellia_h( | ||
659 | * isom_map_aes_to_camellia( | ||
660 | * swap_bitendianness( | ||
661 | * aes_inverse_affine_transform(in) | ||
662 | * ) | ||
663 | * ) | ||
664 | * ) | ||
665 | * ) <<< 1 | ||
666 | * | ||
667 | * (note: '⊕ 0x6e' inside camellia_h()) | ||
668 | */ | ||
669 | .Lpost_tf_lo_s2: | ||
670 | .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 | ||
671 | .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 | ||
672 | .Lpost_tf_hi_s2: | ||
673 | .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 | ||
674 | .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 | ||
675 | |||
676 | /* | ||
677 | * post-SubByte transform | ||
678 | * | ||
679 | * post-lookup for sbox3: | ||
680 | * swap_bitendianness( | ||
681 | * camellia_h( | ||
682 | * isom_map_aes_to_camellia( | ||
683 | * swap_bitendianness( | ||
684 | * aes_inverse_affine_transform(in) | ||
685 | * ) | ||
686 | * ) | ||
687 | * ) | ||
688 | * ) >>> 1 | ||
689 | * | ||
690 | * (note: '⊕ 0x6e' inside camellia_h()) | ||
691 | */ | ||
692 | .Lpost_tf_lo_s3: | ||
693 | .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 | ||
694 | .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 | ||
695 | .Lpost_tf_hi_s3: | ||
696 | .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 | ||
697 | .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 | ||
698 | |||
699 | /* For isolating SubBytes from AESENCLAST, inverse shift row */ | ||
700 | .Linv_shift_row: | ||
701 | .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b | ||
702 | .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 | ||
703 | |||
704 | /* 4-bit mask */ | ||
705 | .align 4 | ||
706 | .L0f0f0f0f: | ||
707 | .long 0x0f0f0f0f | ||
708 | |||
709 | .text | ||
710 | |||
711 | .align 8 | ||
712 | .type __camellia_enc_blk16,@function; | ||
713 | |||
714 | __camellia_enc_blk16: | ||
715 | /* input: | ||
716 | * %rdi: ctx, CTX | ||
717 | * %rax: temporary storage, 256 bytes | ||
718 | * %xmm0..%xmm15: 16 plaintext blocks | ||
719 | * output: | ||
720 | * %xmm0..%xmm15: 16 encrypted blocks, order swapped: | ||
721 | * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 | ||
722 | */ | ||
723 | |||
724 | leaq 8 * 16(%rax), %rcx; | ||
725 | |||
726 | inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
727 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
728 | %xmm15, %rax, %rcx); | ||
729 | |||
730 | enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
731 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
732 | %xmm15, %rax, %rcx, 0); | ||
733 | |||
734 | fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
735 | %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
736 | %xmm15, | ||
737 | ((key_table + (8) * 8) + 0)(CTX), | ||
738 | ((key_table + (8) * 8) + 4)(CTX), | ||
739 | ((key_table + (8) * 8) + 8)(CTX), | ||
740 | ((key_table + (8) * 8) + 12)(CTX)); | ||
741 | |||
742 | enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
743 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
744 | %xmm15, %rax, %rcx, 8); | ||
745 | |||
746 | fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
747 | %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
748 | %xmm15, | ||
749 | ((key_table + (16) * 8) + 0)(CTX), | ||
750 | ((key_table + (16) * 8) + 4)(CTX), | ||
751 | ((key_table + (16) * 8) + 8)(CTX), | ||
752 | ((key_table + (16) * 8) + 12)(CTX)); | ||
753 | |||
754 | enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
755 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
756 | %xmm15, %rax, %rcx, 16); | ||
757 | |||
758 | movl $24, %r8d; | ||
759 | cmpl $16, key_length(CTX); | ||
760 | jne .Lenc_max32; | ||
761 | |||
762 | .Lenc_done: | ||
763 | /* load CD for output */ | ||
764 | vmovdqu 0 * 16(%rcx), %xmm8; | ||
765 | vmovdqu 1 * 16(%rcx), %xmm9; | ||
766 | vmovdqu 2 * 16(%rcx), %xmm10; | ||
767 | vmovdqu 3 * 16(%rcx), %xmm11; | ||
768 | vmovdqu 4 * 16(%rcx), %xmm12; | ||
769 | vmovdqu 5 * 16(%rcx), %xmm13; | ||
770 | vmovdqu 6 * 16(%rcx), %xmm14; | ||
771 | vmovdqu 7 * 16(%rcx), %xmm15; | ||
772 | |||
773 | outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
774 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
775 | %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax)); | ||
776 | |||
777 | ret; | ||
778 | |||
779 | .align 8 | ||
780 | .Lenc_max32: | ||
781 | movl $32, %r8d; | ||
782 | |||
783 | fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
784 | %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
785 | %xmm15, | ||
786 | ((key_table + (24) * 8) + 0)(CTX), | ||
787 | ((key_table + (24) * 8) + 4)(CTX), | ||
788 | ((key_table + (24) * 8) + 8)(CTX), | ||
789 | ((key_table + (24) * 8) + 12)(CTX)); | ||
790 | |||
791 | enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
792 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
793 | %xmm15, %rax, %rcx, 24); | ||
794 | |||
795 | jmp .Lenc_done; | ||
796 | |||
797 | .align 8 | ||
798 | .type __camellia_dec_blk16,@function; | ||
799 | |||
800 | __camellia_dec_blk16: | ||
801 | /* input: | ||
802 | * %rdi: ctx, CTX | ||
803 | * %rax: temporary storage, 256 bytes | ||
804 | * %r8d: 24 for 16 byte key, 32 for larger | ||
805 | * %xmm0..%xmm15: 16 encrypted blocks | ||
806 | * output: | ||
807 | * %xmm0..%xmm15: 16 plaintext blocks, order swapped: | ||
808 | * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 | ||
809 | */ | ||
810 | |||
811 | leaq 8 * 16(%rax), %rcx; | ||
812 | |||
813 | inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
814 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
815 | %xmm15, %rax, %rcx); | ||
816 | |||
817 | cmpl $32, %r8d; | ||
818 | je .Ldec_max32; | ||
819 | |||
820 | .Ldec_max24: | ||
821 | dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
822 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
823 | %xmm15, %rax, %rcx, 16); | ||
824 | |||
825 | fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
826 | %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
827 | %xmm15, | ||
828 | ((key_table + (16) * 8) + 8)(CTX), | ||
829 | ((key_table + (16) * 8) + 12)(CTX), | ||
830 | ((key_table + (16) * 8) + 0)(CTX), | ||
831 | ((key_table + (16) * 8) + 4)(CTX)); | ||
832 | |||
833 | dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
834 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
835 | %xmm15, %rax, %rcx, 8); | ||
836 | |||
837 | fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
838 | %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
839 | %xmm15, | ||
840 | ((key_table + (8) * 8) + 8)(CTX), | ||
841 | ((key_table + (8) * 8) + 12)(CTX), | ||
842 | ((key_table + (8) * 8) + 0)(CTX), | ||
843 | ((key_table + (8) * 8) + 4)(CTX)); | ||
844 | |||
845 | dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
846 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
847 | %xmm15, %rax, %rcx, 0); | ||
848 | |||
849 | /* load CD for output */ | ||
850 | vmovdqu 0 * 16(%rcx), %xmm8; | ||
851 | vmovdqu 1 * 16(%rcx), %xmm9; | ||
852 | vmovdqu 2 * 16(%rcx), %xmm10; | ||
853 | vmovdqu 3 * 16(%rcx), %xmm11; | ||
854 | vmovdqu 4 * 16(%rcx), %xmm12; | ||
855 | vmovdqu 5 * 16(%rcx), %xmm13; | ||
856 | vmovdqu 6 * 16(%rcx), %xmm14; | ||
857 | vmovdqu 7 * 16(%rcx), %xmm15; | ||
858 | |||
859 | outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
860 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
861 | %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); | ||
862 | |||
863 | ret; | ||
864 | |||
865 | .align 8 | ||
866 | .Ldec_max32: | ||
867 | dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
868 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
869 | %xmm15, %rax, %rcx, 24); | ||
870 | |||
871 | fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
872 | %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
873 | %xmm15, | ||
874 | ((key_table + (24) * 8) + 8)(CTX), | ||
875 | ((key_table + (24) * 8) + 12)(CTX), | ||
876 | ((key_table + (24) * 8) + 0)(CTX), | ||
877 | ((key_table + (24) * 8) + 4)(CTX)); | ||
878 | |||
879 | jmp .Ldec_max24; | ||
880 | |||
881 | .align 8 | ||
882 | .global camellia_ecb_enc_16way | ||
883 | .type camellia_ecb_enc_16way,@function; | ||
884 | |||
885 | camellia_ecb_enc_16way: | ||
886 | /* input: | ||
887 | * %rdi: ctx, CTX | ||
888 | * %rsi: dst (16 blocks) | ||
889 | * %rdx: src (16 blocks) | ||
890 | */ | ||
891 | |||
892 | inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
893 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
894 | %xmm15, %rdx, (key_table)(CTX)); | ||
895 | |||
896 | /* now dst can be used as temporary buffer (even in src == dst case) */ | ||
897 | movq %rsi, %rax; | ||
898 | |||
899 | call __camellia_enc_blk16; | ||
900 | |||
901 | write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, | ||
902 | %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, | ||
903 | %xmm8, %rsi); | ||
904 | |||
905 | ret; | ||
906 | |||
907 | .align 8 | ||
908 | .global camellia_ecb_dec_16way | ||
909 | .type camellia_ecb_dec_16way,@function; | ||
910 | |||
911 | camellia_ecb_dec_16way: | ||
912 | /* input: | ||
913 | * %rdi: ctx, CTX | ||
914 | * %rsi: dst (16 blocks) | ||
915 | * %rdx: src (16 blocks) | ||
916 | */ | ||
917 | |||
918 | cmpl $16, key_length(CTX); | ||
919 | movl $32, %r8d; | ||
920 | movl $24, %eax; | ||
921 | cmovel %eax, %r8d; /* max */ | ||
922 | |||
923 | inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
924 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
925 | %xmm15, %rdx, (key_table)(CTX, %r8, 8)); | ||
926 | |||
927 | /* now dst can be used as temporary buffer (even in src == dst case) */ | ||
928 | movq %rsi, %rax; | ||
929 | |||
930 | call __camellia_dec_blk16; | ||
931 | |||
932 | write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, | ||
933 | %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, | ||
934 | %xmm8, %rsi); | ||
935 | |||
936 | ret; | ||
937 | |||
938 | .align 8 | ||
939 | .global camellia_cbc_dec_16way | ||
940 | .type camellia_cbc_dec_16way,@function; | ||
941 | |||
942 | camellia_cbc_dec_16way: | ||
943 | /* input: | ||
944 | * %rdi: ctx, CTX | ||
945 | * %rsi: dst (16 blocks) | ||
946 | * %rdx: src (16 blocks) | ||
947 | */ | ||
948 | |||
949 | cmpl $16, key_length(CTX); | ||
950 | movl $32, %r8d; | ||
951 | movl $24, %eax; | ||
952 | cmovel %eax, %r8d; /* max */ | ||
953 | |||
954 | inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, | ||
955 | %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, | ||
956 | %xmm15, %rdx, (key_table)(CTX, %r8, 8)); | ||
957 | |||
958 | /* | ||
959 | * dst might still be in-use (in case dst == src), so use stack for | ||
960 | * temporary storage. | ||
961 | */ | ||
962 | subq $(16 * 16), %rsp; | ||
963 | movq %rsp, %rax; | ||
964 | |||
965 | call __camellia_dec_blk16; | ||
966 | |||
967 | addq $(16 * 16), %rsp; | ||
968 | |||
969 | vpxor (0 * 16)(%rdx), %xmm6, %xmm6; | ||
970 | vpxor (1 * 16)(%rdx), %xmm5, %xmm5; | ||
971 | vpxor (2 * 16)(%rdx), %xmm4, %xmm4; | ||
972 | vpxor (3 * 16)(%rdx), %xmm3, %xmm3; | ||
973 | vpxor (4 * 16)(%rdx), %xmm2, %xmm2; | ||
974 | vpxor (5 * 16)(%rdx), %xmm1, %xmm1; | ||
975 | vpxor (6 * 16)(%rdx), %xmm0, %xmm0; | ||
976 | vpxor (7 * 16)(%rdx), %xmm15, %xmm15; | ||
977 | vpxor (8 * 16)(%rdx), %xmm14, %xmm14; | ||
978 | vpxor (9 * 16)(%rdx), %xmm13, %xmm13; | ||
979 | vpxor (10 * 16)(%rdx), %xmm12, %xmm12; | ||
980 | vpxor (11 * 16)(%rdx), %xmm11, %xmm11; | ||
981 | vpxor (12 * 16)(%rdx), %xmm10, %xmm10; | ||
982 | vpxor (13 * 16)(%rdx), %xmm9, %xmm9; | ||
983 | vpxor (14 * 16)(%rdx), %xmm8, %xmm8; | ||
984 | write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, | ||
985 | %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, | ||
986 | %xmm8, %rsi); | ||
987 | |||
988 | ret; | ||
989 | |||
990 | #define inc_le128(x, minus_one, tmp) \ | ||
991 | vpcmpeqq minus_one, x, tmp; \ | ||
992 | vpsubq minus_one, x, x; \ | ||
993 | vpslldq $8, tmp, tmp; \ | ||
994 | vpsubq tmp, x, x; | ||
995 | |||
996 | .align 8 | ||
997 | .global camellia_ctr_16way | ||
998 | .type camellia_ctr_16way,@function; | ||
999 | |||
1000 | camellia_ctr_16way: | ||
1001 | /* input: | ||
1002 | * %rdi: ctx, CTX | ||
1003 | * %rsi: dst (16 blocks) | ||
1004 | * %rdx: src (16 blocks) | ||
1005 | * %rcx: iv (little endian, 128bit) | ||
1006 | */ | ||
1007 | |||
1008 | subq $(16 * 16), %rsp; | ||
1009 | movq %rsp, %rax; | ||
1010 | |||
1011 | vmovdqa .Lbswap128_mask, %xmm14; | ||
1012 | |||
1013 | /* load IV and byteswap */ | ||
1014 | vmovdqu (%rcx), %xmm0; | ||
1015 | vpshufb %xmm14, %xmm0, %xmm15; | ||
1016 | vmovdqu %xmm15, 15 * 16(%rax); | ||
1017 | |||
1018 | vpcmpeqd %xmm15, %xmm15, %xmm15; | ||
1019 | vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */ | ||
1020 | |||
1021 | /* construct IVs */ | ||
1022 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1023 | vpshufb %xmm14, %xmm0, %xmm13; | ||
1024 | vmovdqu %xmm13, 14 * 16(%rax); | ||
1025 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1026 | vpshufb %xmm14, %xmm0, %xmm13; | ||
1027 | vmovdqu %xmm13, 13 * 16(%rax); | ||
1028 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1029 | vpshufb %xmm14, %xmm0, %xmm12; | ||
1030 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1031 | vpshufb %xmm14, %xmm0, %xmm11; | ||
1032 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1033 | vpshufb %xmm14, %xmm0, %xmm10; | ||
1034 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1035 | vpshufb %xmm14, %xmm0, %xmm9; | ||
1036 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1037 | vpshufb %xmm14, %xmm0, %xmm8; | ||
1038 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1039 | vpshufb %xmm14, %xmm0, %xmm7; | ||
1040 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1041 | vpshufb %xmm14, %xmm0, %xmm6; | ||
1042 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1043 | vpshufb %xmm14, %xmm0, %xmm5; | ||
1044 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1045 | vpshufb %xmm14, %xmm0, %xmm4; | ||
1046 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1047 | vpshufb %xmm14, %xmm0, %xmm3; | ||
1048 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1049 | vpshufb %xmm14, %xmm0, %xmm2; | ||
1050 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1051 | vpshufb %xmm14, %xmm0, %xmm1; | ||
1052 | inc_le128(%xmm0, %xmm15, %xmm13); | ||
1053 | vmovdqa %xmm0, %xmm13; | ||
1054 | vpshufb %xmm14, %xmm0, %xmm0; | ||
1055 | inc_le128(%xmm13, %xmm15, %xmm14); | ||
1056 | vmovdqu %xmm13, (%rcx); | ||
1057 | |||
1058 | /* inpack16_pre: */ | ||
1059 | vmovq (key_table)(CTX), %xmm15; | ||
1060 | vpshufb .Lpack_bswap, %xmm15, %xmm15; | ||
1061 | vpxor %xmm0, %xmm15, %xmm0; | ||
1062 | vpxor %xmm1, %xmm15, %xmm1; | ||
1063 | vpxor %xmm2, %xmm15, %xmm2; | ||
1064 | vpxor %xmm3, %xmm15, %xmm3; | ||
1065 | vpxor %xmm4, %xmm15, %xmm4; | ||
1066 | vpxor %xmm5, %xmm15, %xmm5; | ||
1067 | vpxor %xmm6, %xmm15, %xmm6; | ||
1068 | vpxor %xmm7, %xmm15, %xmm7; | ||
1069 | vpxor %xmm8, %xmm15, %xmm8; | ||
1070 | vpxor %xmm9, %xmm15, %xmm9; | ||
1071 | vpxor %xmm10, %xmm15, %xmm10; | ||
1072 | vpxor %xmm11, %xmm15, %xmm11; | ||
1073 | vpxor %xmm12, %xmm15, %xmm12; | ||
1074 | vpxor 13 * 16(%rax), %xmm15, %xmm13; | ||
1075 | vpxor 14 * 16(%rax), %xmm15, %xmm14; | ||
1076 | vpxor 15 * 16(%rax), %xmm15, %xmm15; | ||
1077 | |||
1078 | call __camellia_enc_blk16; | ||
1079 | |||
1080 | addq $(16 * 16), %rsp; | ||
1081 | |||
1082 | vpxor 0 * 16(%rdx), %xmm7, %xmm7; | ||
1083 | vpxor 1 * 16(%rdx), %xmm6, %xmm6; | ||
1084 | vpxor 2 * 16(%rdx), %xmm5, %xmm5; | ||
1085 | vpxor 3 * 16(%rdx), %xmm4, %xmm4; | ||
1086 | vpxor 4 * 16(%rdx), %xmm3, %xmm3; | ||
1087 | vpxor 5 * 16(%rdx), %xmm2, %xmm2; | ||
1088 | vpxor 6 * 16(%rdx), %xmm1, %xmm1; | ||
1089 | vpxor 7 * 16(%rdx), %xmm0, %xmm0; | ||
1090 | vpxor 8 * 16(%rdx), %xmm15, %xmm15; | ||
1091 | vpxor 9 * 16(%rdx), %xmm14, %xmm14; | ||
1092 | vpxor 10 * 16(%rdx), %xmm13, %xmm13; | ||
1093 | vpxor 11 * 16(%rdx), %xmm12, %xmm12; | ||
1094 | vpxor 12 * 16(%rdx), %xmm11, %xmm11; | ||
1095 | vpxor 13 * 16(%rdx), %xmm10, %xmm10; | ||
1096 | vpxor 14 * 16(%rdx), %xmm9, %xmm9; | ||
1097 | vpxor 15 * 16(%rdx), %xmm8, %xmm8; | ||
1098 | write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, | ||
1099 | %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, | ||
1100 | %xmm8, %rsi); | ||
1101 | |||
1102 | ret; | ||
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S deleted file mode 100644 index 0b3374335fd..00000000000 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,520 +0,0 @@ | |||
1 | /* | ||
2 | * Camellia Cipher Algorithm (x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
19 | * USA | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | .file "camellia-x86_64-asm_64.S" | ||
24 | .text | ||
25 | |||
26 | .extern camellia_sp10011110; | ||
27 | .extern camellia_sp22000222; | ||
28 | .extern camellia_sp03303033; | ||
29 | .extern camellia_sp00444404; | ||
30 | .extern camellia_sp02220222; | ||
31 | .extern camellia_sp30333033; | ||
32 | .extern camellia_sp44044404; | ||
33 | .extern camellia_sp11101110; | ||
34 | |||
35 | #define sp10011110 camellia_sp10011110 | ||
36 | #define sp22000222 camellia_sp22000222 | ||
37 | #define sp03303033 camellia_sp03303033 | ||
38 | #define sp00444404 camellia_sp00444404 | ||
39 | #define sp02220222 camellia_sp02220222 | ||
40 | #define sp30333033 camellia_sp30333033 | ||
41 | #define sp44044404 camellia_sp44044404 | ||
42 | #define sp11101110 camellia_sp11101110 | ||
43 | |||
44 | #define CAMELLIA_TABLE_BYTE_LEN 272 | ||
45 | |||
46 | /* struct camellia_ctx: */ | ||
47 | #define key_table 0 | ||
48 | #define key_length CAMELLIA_TABLE_BYTE_LEN | ||
49 | |||
50 | /* register macros */ | ||
51 | #define CTX %rdi | ||
52 | #define RIO %rsi | ||
53 | #define RIOd %esi | ||
54 | |||
55 | #define RAB0 %rax | ||
56 | #define RCD0 %rcx | ||
57 | #define RAB1 %rbx | ||
58 | #define RCD1 %rdx | ||
59 | |||
60 | #define RAB0d %eax | ||
61 | #define RCD0d %ecx | ||
62 | #define RAB1d %ebx | ||
63 | #define RCD1d %edx | ||
64 | |||
65 | #define RAB0bl %al | ||
66 | #define RCD0bl %cl | ||
67 | #define RAB1bl %bl | ||
68 | #define RCD1bl %dl | ||
69 | |||
70 | #define RAB0bh %ah | ||
71 | #define RCD0bh %ch | ||
72 | #define RAB1bh %bh | ||
73 | #define RCD1bh %dh | ||
74 | |||
75 | #define RT0 %rsi | ||
76 | #define RT1 %rbp | ||
77 | #define RT2 %r8 | ||
78 | |||
79 | #define RT0d %esi | ||
80 | #define RT1d %ebp | ||
81 | #define RT2d %r8d | ||
82 | |||
83 | #define RT2bl %r8b | ||
84 | |||
85 | #define RXOR %r9 | ||
86 | #define RRBP %r10 | ||
87 | #define RDST %r11 | ||
88 | |||
89 | #define RXORd %r9d | ||
90 | #define RXORbl %r9b | ||
91 | |||
92 | #define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \ | ||
93 | movzbl ab ## bl, tmp2 ## d; \ | ||
94 | movzbl ab ## bh, tmp1 ## d; \ | ||
95 | rorq $16, ab; \ | ||
96 | xorq T0(, tmp2, 8), dst; \ | ||
97 | xorq T1(, tmp1, 8), dst; | ||
98 | |||
99 | /********************************************************************** | ||
100 | 1-way camellia | ||
101 | **********************************************************************/ | ||
102 | #define roundsm(ab, subkey, cd) \ | ||
103 | movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ | ||
104 | \ | ||
105 | xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ | ||
106 | xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ | ||
107 | xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ | ||
108 | xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ | ||
109 | \ | ||
110 | xorq RT2, cd ## 0; | ||
111 | |||
112 | #define fls(l, r, kl, kr) \ | ||
113 | movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ | ||
114 | andl l ## 0d, RT0d; \ | ||
115 | roll $1, RT0d; \ | ||
116 | shlq $32, RT0; \ | ||
117 | xorq RT0, l ## 0; \ | ||
118 | movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ | ||
119 | orq r ## 0, RT1; \ | ||
120 | shrq $32, RT1; \ | ||
121 | xorq RT1, r ## 0; \ | ||
122 | \ | ||
123 | movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \ | ||
124 | orq l ## 0, RT2; \ | ||
125 | shrq $32, RT2; \ | ||
126 | xorq RT2, l ## 0; \ | ||
127 | movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \ | ||
128 | andl r ## 0d, RT0d; \ | ||
129 | roll $1, RT0d; \ | ||
130 | shlq $32, RT0; \ | ||
131 | xorq RT0, r ## 0; | ||
132 | |||
133 | #define enc_rounds(i) \ | ||
134 | roundsm(RAB, i + 2, RCD); \ | ||
135 | roundsm(RCD, i + 3, RAB); \ | ||
136 | roundsm(RAB, i + 4, RCD); \ | ||
137 | roundsm(RCD, i + 5, RAB); \ | ||
138 | roundsm(RAB, i + 6, RCD); \ | ||
139 | roundsm(RCD, i + 7, RAB); | ||
140 | |||
141 | #define enc_fls(i) \ | ||
142 | fls(RAB, RCD, i + 0, i + 1); | ||
143 | |||
144 | #define enc_inpack() \ | ||
145 | movq (RIO), RAB0; \ | ||
146 | bswapq RAB0; \ | ||
147 | rolq $32, RAB0; \ | ||
148 | movq 4*2(RIO), RCD0; \ | ||
149 | bswapq RCD0; \ | ||
150 | rorq $32, RCD0; \ | ||
151 | xorq key_table(CTX), RAB0; | ||
152 | |||
153 | #define enc_outunpack(op, max) \ | ||
154 | xorq key_table(CTX, max, 8), RCD0; \ | ||
155 | rorq $32, RCD0; \ | ||
156 | bswapq RCD0; \ | ||
157 | op ## q RCD0, (RIO); \ | ||
158 | rolq $32, RAB0; \ | ||
159 | bswapq RAB0; \ | ||
160 | op ## q RAB0, 4*2(RIO); | ||
161 | |||
162 | #define dec_rounds(i) \ | ||
163 | roundsm(RAB, i + 7, RCD); \ | ||
164 | roundsm(RCD, i + 6, RAB); \ | ||
165 | roundsm(RAB, i + 5, RCD); \ | ||
166 | roundsm(RCD, i + 4, RAB); \ | ||
167 | roundsm(RAB, i + 3, RCD); \ | ||
168 | roundsm(RCD, i + 2, RAB); | ||
169 | |||
170 | #define dec_fls(i) \ | ||
171 | fls(RAB, RCD, i + 1, i + 0); | ||
172 | |||
173 | #define dec_inpack(max) \ | ||
174 | movq (RIO), RAB0; \ | ||
175 | bswapq RAB0; \ | ||
176 | rolq $32, RAB0; \ | ||
177 | movq 4*2(RIO), RCD0; \ | ||
178 | bswapq RCD0; \ | ||
179 | rorq $32, RCD0; \ | ||
180 | xorq key_table(CTX, max, 8), RAB0; | ||
181 | |||
182 | #define dec_outunpack() \ | ||
183 | xorq key_table(CTX), RCD0; \ | ||
184 | rorq $32, RCD0; \ | ||
185 | bswapq RCD0; \ | ||
186 | movq RCD0, (RIO); \ | ||
187 | rolq $32, RAB0; \ | ||
188 | bswapq RAB0; \ | ||
189 | movq RAB0, 4*2(RIO); | ||
190 | |||
191 | .global __camellia_enc_blk; | ||
192 | .type __camellia_enc_blk,@function; | ||
193 | |||
194 | __camellia_enc_blk: | ||
195 | /* input: | ||
196 | * %rdi: ctx, CTX | ||
197 | * %rsi: dst | ||
198 | * %rdx: src | ||
199 | * %rcx: bool xor | ||
200 | */ | ||
201 | movq %rbp, RRBP; | ||
202 | |||
203 | movq %rcx, RXOR; | ||
204 | movq %rsi, RDST; | ||
205 | movq %rdx, RIO; | ||
206 | |||
207 | enc_inpack(); | ||
208 | |||
209 | enc_rounds(0); | ||
210 | enc_fls(8); | ||
211 | enc_rounds(8); | ||
212 | enc_fls(16); | ||
213 | enc_rounds(16); | ||
214 | movl $24, RT1d; /* max */ | ||
215 | |||
216 | cmpb $16, key_length(CTX); | ||
217 | je __enc_done; | ||
218 | |||
219 | enc_fls(24); | ||
220 | enc_rounds(24); | ||
221 | movl $32, RT1d; /* max */ | ||
222 | |||
223 | __enc_done: | ||
224 | testb RXORbl, RXORbl; | ||
225 | movq RDST, RIO; | ||
226 | |||
227 | jnz __enc_xor; | ||
228 | |||
229 | enc_outunpack(mov, RT1); | ||
230 | |||
231 | movq RRBP, %rbp; | ||
232 | ret; | ||
233 | |||
234 | __enc_xor: | ||
235 | enc_outunpack(xor, RT1); | ||
236 | |||
237 | movq RRBP, %rbp; | ||
238 | ret; | ||
239 | |||
240 | .global camellia_dec_blk; | ||
241 | .type camellia_dec_blk,@function; | ||
242 | |||
243 | camellia_dec_blk: | ||
244 | /* input: | ||
245 | * %rdi: ctx, CTX | ||
246 | * %rsi: dst | ||
247 | * %rdx: src | ||
248 | */ | ||
249 | cmpl $16, key_length(CTX); | ||
250 | movl $32, RT2d; | ||
251 | movl $24, RXORd; | ||
252 | cmovel RXORd, RT2d; /* max */ | ||
253 | |||
254 | movq %rbp, RRBP; | ||
255 | movq %rsi, RDST; | ||
256 | movq %rdx, RIO; | ||
257 | |||
258 | dec_inpack(RT2); | ||
259 | |||
260 | cmpb $24, RT2bl; | ||
261 | je __dec_rounds16; | ||
262 | |||
263 | dec_rounds(24); | ||
264 | dec_fls(24); | ||
265 | |||
266 | __dec_rounds16: | ||
267 | dec_rounds(16); | ||
268 | dec_fls(16); | ||
269 | dec_rounds(8); | ||
270 | dec_fls(8); | ||
271 | dec_rounds(0); | ||
272 | |||
273 | movq RDST, RIO; | ||
274 | |||
275 | dec_outunpack(); | ||
276 | |||
277 | movq RRBP, %rbp; | ||
278 | ret; | ||
279 | |||
280 | /********************************************************************** | ||
281 | 2-way camellia | ||
282 | **********************************************************************/ | ||
283 | #define roundsm2(ab, subkey, cd) \ | ||
284 | movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \ | ||
285 | xorq RT2, cd ## 1; \ | ||
286 | \ | ||
287 | xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \ | ||
288 | xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \ | ||
289 | xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \ | ||
290 | xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \ | ||
291 | \ | ||
292 | xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \ | ||
293 | xorq RT2, cd ## 0; \ | ||
294 | xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \ | ||
295 | xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \ | ||
296 | xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1); | ||
297 | |||
298 | #define fls2(l, r, kl, kr) \ | ||
299 | movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \ | ||
300 | andl l ## 0d, RT0d; \ | ||
301 | roll $1, RT0d; \ | ||
302 | shlq $32, RT0; \ | ||
303 | xorq RT0, l ## 0; \ | ||
304 | movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \ | ||
305 | orq r ## 0, RT1; \ | ||
306 | shrq $32, RT1; \ | ||
307 | xorq RT1, r ## 0; \ | ||
308 | \ | ||
309 | movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \ | ||
310 | andl l ## 1d, RT2d; \ | ||
311 | roll $1, RT2d; \ | ||
312 | shlq $32, RT2; \ | ||
313 | xorq RT2, l ## 1; \ | ||
314 | movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \ | ||
315 | orq r ## 1, RT0; \ | ||
316 | shrq $32, RT0; \ | ||
317 | xorq RT0, r ## 1; \ | ||
318 | \ | ||
319 | movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \ | ||
320 | orq l ## 0, RT1; \ | ||
321 | shrq $32, RT1; \ | ||
322 | xorq RT1, l ## 0; \ | ||
323 | movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \ | ||
324 | andl r ## 0d, RT2d; \ | ||
325 | roll $1, RT2d; \ | ||
326 | shlq $32, RT2; \ | ||
327 | xorq RT2, r ## 0; \ | ||
328 | \ | ||
329 | movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \ | ||
330 | orq l ## 1, RT0; \ | ||
331 | shrq $32, RT0; \ | ||
332 | xorq RT0, l ## 1; \ | ||
333 | movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \ | ||
334 | andl r ## 1d, RT1d; \ | ||
335 | roll $1, RT1d; \ | ||
336 | shlq $32, RT1; \ | ||
337 | xorq RT1, r ## 1; | ||
338 | |||
339 | #define enc_rounds2(i) \ | ||
340 | roundsm2(RAB, i + 2, RCD); \ | ||
341 | roundsm2(RCD, i + 3, RAB); \ | ||
342 | roundsm2(RAB, i + 4, RCD); \ | ||
343 | roundsm2(RCD, i + 5, RAB); \ | ||
344 | roundsm2(RAB, i + 6, RCD); \ | ||
345 | roundsm2(RCD, i + 7, RAB); | ||
346 | |||
347 | #define enc_fls2(i) \ | ||
348 | fls2(RAB, RCD, i + 0, i + 1); | ||
349 | |||
350 | #define enc_inpack2() \ | ||
351 | movq (RIO), RAB0; \ | ||
352 | bswapq RAB0; \ | ||
353 | rorq $32, RAB0; \ | ||
354 | movq 4*2(RIO), RCD0; \ | ||
355 | bswapq RCD0; \ | ||
356 | rolq $32, RCD0; \ | ||
357 | xorq key_table(CTX), RAB0; \ | ||
358 | \ | ||
359 | movq 8*2(RIO), RAB1; \ | ||
360 | bswapq RAB1; \ | ||
361 | rorq $32, RAB1; \ | ||
362 | movq 12*2(RIO), RCD1; \ | ||
363 | bswapq RCD1; \ | ||
364 | rolq $32, RCD1; \ | ||
365 | xorq key_table(CTX), RAB1; | ||
366 | |||
367 | #define enc_outunpack2(op, max) \ | ||
368 | xorq key_table(CTX, max, 8), RCD0; \ | ||
369 | rolq $32, RCD0; \ | ||
370 | bswapq RCD0; \ | ||
371 | op ## q RCD0, (RIO); \ | ||
372 | rorq $32, RAB0; \ | ||
373 | bswapq RAB0; \ | ||
374 | op ## q RAB0, 4*2(RIO); \ | ||
375 | \ | ||
376 | xorq key_table(CTX, max, 8), RCD1; \ | ||
377 | rolq $32, RCD1; \ | ||
378 | bswapq RCD1; \ | ||
379 | op ## q RCD1, 8*2(RIO); \ | ||
380 | rorq $32, RAB1; \ | ||
381 | bswapq RAB1; \ | ||
382 | op ## q RAB1, 12*2(RIO); | ||
383 | |||
384 | #define dec_rounds2(i) \ | ||
385 | roundsm2(RAB, i + 7, RCD); \ | ||
386 | roundsm2(RCD, i + 6, RAB); \ | ||
387 | roundsm2(RAB, i + 5, RCD); \ | ||
388 | roundsm2(RCD, i + 4, RAB); \ | ||
389 | roundsm2(RAB, i + 3, RCD); \ | ||
390 | roundsm2(RCD, i + 2, RAB); | ||
391 | |||
392 | #define dec_fls2(i) \ | ||
393 | fls2(RAB, RCD, i + 1, i + 0); | ||
394 | |||
395 | #define dec_inpack2(max) \ | ||
396 | movq (RIO), RAB0; \ | ||
397 | bswapq RAB0; \ | ||
398 | rorq $32, RAB0; \ | ||
399 | movq 4*2(RIO), RCD0; \ | ||
400 | bswapq RCD0; \ | ||
401 | rolq $32, RCD0; \ | ||
402 | xorq key_table(CTX, max, 8), RAB0; \ | ||
403 | \ | ||
404 | movq 8*2(RIO), RAB1; \ | ||
405 | bswapq RAB1; \ | ||
406 | rorq $32, RAB1; \ | ||
407 | movq 12*2(RIO), RCD1; \ | ||
408 | bswapq RCD1; \ | ||
409 | rolq $32, RCD1; \ | ||
410 | xorq key_table(CTX, max, 8), RAB1; | ||
411 | |||
412 | #define dec_outunpack2() \ | ||
413 | xorq key_table(CTX), RCD0; \ | ||
414 | rolq $32, RCD0; \ | ||
415 | bswapq RCD0; \ | ||
416 | movq RCD0, (RIO); \ | ||
417 | rorq $32, RAB0; \ | ||
418 | bswapq RAB0; \ | ||
419 | movq RAB0, 4*2(RIO); \ | ||
420 | \ | ||
421 | xorq key_table(CTX), RCD1; \ | ||
422 | rolq $32, RCD1; \ | ||
423 | bswapq RCD1; \ | ||
424 | movq RCD1, 8*2(RIO); \ | ||
425 | rorq $32, RAB1; \ | ||
426 | bswapq RAB1; \ | ||
427 | movq RAB1, 12*2(RIO); | ||
428 | |||
429 | .global __camellia_enc_blk_2way; | ||
430 | .type __camellia_enc_blk_2way,@function; | ||
431 | |||
432 | __camellia_enc_blk_2way: | ||
433 | /* input: | ||
434 | * %rdi: ctx, CTX | ||
435 | * %rsi: dst | ||
436 | * %rdx: src | ||
437 | * %rcx: bool xor | ||
438 | */ | ||
439 | pushq %rbx; | ||
440 | |||
441 | movq %rbp, RRBP; | ||
442 | movq %rcx, RXOR; | ||
443 | movq %rsi, RDST; | ||
444 | movq %rdx, RIO; | ||
445 | |||
446 | enc_inpack2(); | ||
447 | |||
448 | enc_rounds2(0); | ||
449 | enc_fls2(8); | ||
450 | enc_rounds2(8); | ||
451 | enc_fls2(16); | ||
452 | enc_rounds2(16); | ||
453 | movl $24, RT2d; /* max */ | ||
454 | |||
455 | cmpb $16, key_length(CTX); | ||
456 | je __enc2_done; | ||
457 | |||
458 | enc_fls2(24); | ||
459 | enc_rounds2(24); | ||
460 | movl $32, RT2d; /* max */ | ||
461 | |||
462 | __enc2_done: | ||
463 | test RXORbl, RXORbl; | ||
464 | movq RDST, RIO; | ||
465 | jnz __enc2_xor; | ||
466 | |||
467 | enc_outunpack2(mov, RT2); | ||
468 | |||
469 | movq RRBP, %rbp; | ||
470 | popq %rbx; | ||
471 | ret; | ||
472 | |||
473 | __enc2_xor: | ||
474 | enc_outunpack2(xor, RT2); | ||
475 | |||
476 | movq RRBP, %rbp; | ||
477 | popq %rbx; | ||
478 | ret; | ||
479 | |||
480 | .global camellia_dec_blk_2way; | ||
481 | .type camellia_dec_blk_2way,@function; | ||
482 | |||
483 | camellia_dec_blk_2way: | ||
484 | /* input: | ||
485 | * %rdi: ctx, CTX | ||
486 | * %rsi: dst | ||
487 | * %rdx: src | ||
488 | */ | ||
489 | cmpl $16, key_length(CTX); | ||
490 | movl $32, RT2d; | ||
491 | movl $24, RXORd; | ||
492 | cmovel RXORd, RT2d; /* max */ | ||
493 | |||
494 | movq %rbx, RXOR; | ||
495 | movq %rbp, RRBP; | ||
496 | movq %rsi, RDST; | ||
497 | movq %rdx, RIO; | ||
498 | |||
499 | dec_inpack2(RT2); | ||
500 | |||
501 | cmpb $24, RT2bl; | ||
502 | je __dec2_rounds16; | ||
503 | |||
504 | dec_rounds2(24); | ||
505 | dec_fls2(24); | ||
506 | |||
507 | __dec2_rounds16: | ||
508 | dec_rounds2(16); | ||
509 | dec_fls2(16); | ||
510 | dec_rounds2(8); | ||
511 | dec_fls2(8); | ||
512 | dec_rounds2(0); | ||
513 | |||
514 | movq RDST, RIO; | ||
515 | |||
516 | dec_outunpack2(); | ||
517 | |||
518 | movq RRBP, %rbp; | ||
519 | movq RXOR, %rbx; | ||
520 | ret; | ||
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c deleted file mode 100644 index 96cbb6068fc..00000000000 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ /dev/null | |||
@@ -1,558 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia | ||
3 | * | ||
4 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/crypto.h> | ||
16 | #include <linux/err.h> | ||
17 | #include <crypto/algapi.h> | ||
18 | #include <crypto/ctr.h> | ||
19 | #include <crypto/lrw.h> | ||
20 | #include <crypto/xts.h> | ||
21 | #include <asm/xcr.h> | ||
22 | #include <asm/xsave.h> | ||
23 | #include <asm/crypto/camellia.h> | ||
24 | #include <asm/crypto/ablk_helper.h> | ||
25 | #include <asm/crypto/glue_helper.h> | ||
26 | |||
27 | #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 | ||
28 | |||
29 | /* 16-way AES-NI parallel cipher functions */ | ||
30 | asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, | ||
31 | const u8 *src); | ||
32 | asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, | ||
33 | const u8 *src); | ||
34 | |||
35 | asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, | ||
36 | const u8 *src); | ||
37 | asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, | ||
38 | const u8 *src, le128 *iv); | ||
39 | |||
40 | static const struct common_glue_ctx camellia_enc = { | ||
41 | .num_funcs = 3, | ||
42 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
43 | |||
44 | .funcs = { { | ||
45 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
46 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) } | ||
47 | }, { | ||
48 | .num_blocks = 2, | ||
49 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } | ||
50 | }, { | ||
51 | .num_blocks = 1, | ||
52 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
53 | } } | ||
54 | }; | ||
55 | |||
56 | static const struct common_glue_ctx camellia_ctr = { | ||
57 | .num_funcs = 3, | ||
58 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
59 | |||
60 | .funcs = { { | ||
61 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
62 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) } | ||
63 | }, { | ||
64 | .num_blocks = 2, | ||
65 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
66 | }, { | ||
67 | .num_blocks = 1, | ||
68 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
69 | } } | ||
70 | }; | ||
71 | |||
72 | static const struct common_glue_ctx camellia_dec = { | ||
73 | .num_funcs = 3, | ||
74 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
75 | |||
76 | .funcs = { { | ||
77 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
78 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) } | ||
79 | }, { | ||
80 | .num_blocks = 2, | ||
81 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
82 | }, { | ||
83 | .num_blocks = 1, | ||
84 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
85 | } } | ||
86 | }; | ||
87 | |||
88 | static const struct common_glue_ctx camellia_dec_cbc = { | ||
89 | .num_funcs = 3, | ||
90 | .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
91 | |||
92 | .funcs = { { | ||
93 | .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, | ||
94 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) } | ||
95 | }, { | ||
96 | .num_blocks = 2, | ||
97 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
98 | }, { | ||
99 | .num_blocks = 1, | ||
100 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
101 | } } | ||
102 | }; | ||
103 | |||
104 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
105 | struct scatterlist *src, unsigned int nbytes) | ||
106 | { | ||
107 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); | ||
108 | } | ||
109 | |||
110 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
111 | struct scatterlist *src, unsigned int nbytes) | ||
112 | { | ||
113 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); | ||
114 | } | ||
115 | |||
116 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
117 | struct scatterlist *src, unsigned int nbytes) | ||
118 | { | ||
119 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, | ||
120 | dst, src, nbytes); | ||
121 | } | ||
122 | |||
123 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
124 | struct scatterlist *src, unsigned int nbytes) | ||
125 | { | ||
126 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, | ||
127 | nbytes); | ||
128 | } | ||
129 | |||
130 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); | ||
134 | } | ||
135 | |||
136 | static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
137 | { | ||
138 | return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, | ||
139 | CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, | ||
140 | nbytes); | ||
141 | } | ||
142 | |||
143 | static inline void camellia_fpu_end(bool fpu_enabled) | ||
144 | { | ||
145 | glue_fpu_end(fpu_enabled); | ||
146 | } | ||
147 | |||
148 | static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
149 | unsigned int key_len) | ||
150 | { | ||
151 | return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, | ||
152 | &tfm->crt_flags); | ||
153 | } | ||
154 | |||
155 | struct crypt_priv { | ||
156 | struct camellia_ctx *ctx; | ||
157 | bool fpu_enabled; | ||
158 | }; | ||
159 | |||
160 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
161 | { | ||
162 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
163 | struct crypt_priv *ctx = priv; | ||
164 | int i; | ||
165 | |||
166 | ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); | ||
167 | |||
168 | if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { | ||
169 | camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | ||
170 | srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
171 | nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
172 | } | ||
173 | |||
174 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | ||
175 | camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); | ||
176 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
177 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
178 | } | ||
179 | |||
180 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
181 | camellia_enc_blk(ctx->ctx, srcdst, srcdst); | ||
182 | } | ||
183 | |||
184 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
185 | { | ||
186 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
187 | struct crypt_priv *ctx = priv; | ||
188 | int i; | ||
189 | |||
190 | ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); | ||
191 | |||
192 | if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { | ||
193 | camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | ||
194 | srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
195 | nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | ||
196 | } | ||
197 | |||
198 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | ||
199 | camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); | ||
200 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
201 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | ||
202 | } | ||
203 | |||
204 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
205 | camellia_dec_blk(ctx->ctx, srcdst, srcdst); | ||
206 | } | ||
207 | |||
208 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
209 | struct scatterlist *src, unsigned int nbytes) | ||
210 | { | ||
211 | struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
212 | be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; | ||
213 | struct crypt_priv crypt_ctx = { | ||
214 | .ctx = &ctx->camellia_ctx, | ||
215 | .fpu_enabled = false, | ||
216 | }; | ||
217 | struct lrw_crypt_req req = { | ||
218 | .tbuf = buf, | ||
219 | .tbuflen = sizeof(buf), | ||
220 | |||
221 | .table_ctx = &ctx->lrw_table, | ||
222 | .crypt_ctx = &crypt_ctx, | ||
223 | .crypt_fn = encrypt_callback, | ||
224 | }; | ||
225 | int ret; | ||
226 | |||
227 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
228 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
229 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
230 | |||
231 | return ret; | ||
232 | } | ||
233 | |||
234 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
235 | struct scatterlist *src, unsigned int nbytes) | ||
236 | { | ||
237 | struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
238 | be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; | ||
239 | struct crypt_priv crypt_ctx = { | ||
240 | .ctx = &ctx->camellia_ctx, | ||
241 | .fpu_enabled = false, | ||
242 | }; | ||
243 | struct lrw_crypt_req req = { | ||
244 | .tbuf = buf, | ||
245 | .tbuflen = sizeof(buf), | ||
246 | |||
247 | .table_ctx = &ctx->lrw_table, | ||
248 | .crypt_ctx = &crypt_ctx, | ||
249 | .crypt_fn = decrypt_callback, | ||
250 | }; | ||
251 | int ret; | ||
252 | |||
253 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
254 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
255 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
256 | |||
257 | return ret; | ||
258 | } | ||
259 | |||
260 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
261 | struct scatterlist *src, unsigned int nbytes) | ||
262 | { | ||
263 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
264 | be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; | ||
265 | struct crypt_priv crypt_ctx = { | ||
266 | .ctx = &ctx->crypt_ctx, | ||
267 | .fpu_enabled = false, | ||
268 | }; | ||
269 | struct xts_crypt_req req = { | ||
270 | .tbuf = buf, | ||
271 | .tbuflen = sizeof(buf), | ||
272 | |||
273 | .tweak_ctx = &ctx->tweak_ctx, | ||
274 | .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), | ||
275 | .crypt_ctx = &crypt_ctx, | ||
276 | .crypt_fn = encrypt_callback, | ||
277 | }; | ||
278 | int ret; | ||
279 | |||
280 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
281 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
282 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
283 | |||
284 | return ret; | ||
285 | } | ||
286 | |||
287 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
288 | struct scatterlist *src, unsigned int nbytes) | ||
289 | { | ||
290 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
291 | be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; | ||
292 | struct crypt_priv crypt_ctx = { | ||
293 | .ctx = &ctx->crypt_ctx, | ||
294 | .fpu_enabled = false, | ||
295 | }; | ||
296 | struct xts_crypt_req req = { | ||
297 | .tbuf = buf, | ||
298 | .tbuflen = sizeof(buf), | ||
299 | |||
300 | .tweak_ctx = &ctx->tweak_ctx, | ||
301 | .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), | ||
302 | .crypt_ctx = &crypt_ctx, | ||
303 | .crypt_fn = decrypt_callback, | ||
304 | }; | ||
305 | int ret; | ||
306 | |||
307 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
308 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
309 | camellia_fpu_end(crypt_ctx.fpu_enabled); | ||
310 | |||
311 | return ret; | ||
312 | } | ||
313 | |||
314 | static struct crypto_alg cmll_algs[10] = { { | ||
315 | .cra_name = "__ecb-camellia-aesni", | ||
316 | .cra_driver_name = "__driver-ecb-camellia-aesni", | ||
317 | .cra_priority = 0, | ||
318 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
319 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
320 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
321 | .cra_alignmask = 0, | ||
322 | .cra_type = &crypto_blkcipher_type, | ||
323 | .cra_module = THIS_MODULE, | ||
324 | .cra_u = { | ||
325 | .blkcipher = { | ||
326 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
327 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
328 | .setkey = camellia_setkey, | ||
329 | .encrypt = ecb_encrypt, | ||
330 | .decrypt = ecb_decrypt, | ||
331 | }, | ||
332 | }, | ||
333 | }, { | ||
334 | .cra_name = "__cbc-camellia-aesni", | ||
335 | .cra_driver_name = "__driver-cbc-camellia-aesni", | ||
336 | .cra_priority = 0, | ||
337 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
338 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
339 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
340 | .cra_alignmask = 0, | ||
341 | .cra_type = &crypto_blkcipher_type, | ||
342 | .cra_module = THIS_MODULE, | ||
343 | .cra_u = { | ||
344 | .blkcipher = { | ||
345 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
346 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
347 | .setkey = camellia_setkey, | ||
348 | .encrypt = cbc_encrypt, | ||
349 | .decrypt = cbc_decrypt, | ||
350 | }, | ||
351 | }, | ||
352 | }, { | ||
353 | .cra_name = "__ctr-camellia-aesni", | ||
354 | .cra_driver_name = "__driver-ctr-camellia-aesni", | ||
355 | .cra_priority = 0, | ||
356 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
357 | .cra_blocksize = 1, | ||
358 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
359 | .cra_alignmask = 0, | ||
360 | .cra_type = &crypto_blkcipher_type, | ||
361 | .cra_module = THIS_MODULE, | ||
362 | .cra_u = { | ||
363 | .blkcipher = { | ||
364 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
365 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
366 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
367 | .setkey = camellia_setkey, | ||
368 | .encrypt = ctr_crypt, | ||
369 | .decrypt = ctr_crypt, | ||
370 | }, | ||
371 | }, | ||
372 | }, { | ||
373 | .cra_name = "__lrw-camellia-aesni", | ||
374 | .cra_driver_name = "__driver-lrw-camellia-aesni", | ||
375 | .cra_priority = 0, | ||
376 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
377 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
378 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | ||
379 | .cra_alignmask = 0, | ||
380 | .cra_type = &crypto_blkcipher_type, | ||
381 | .cra_module = THIS_MODULE, | ||
382 | .cra_exit = lrw_camellia_exit_tfm, | ||
383 | .cra_u = { | ||
384 | .blkcipher = { | ||
385 | .min_keysize = CAMELLIA_MIN_KEY_SIZE + | ||
386 | CAMELLIA_BLOCK_SIZE, | ||
387 | .max_keysize = CAMELLIA_MAX_KEY_SIZE + | ||
388 | CAMELLIA_BLOCK_SIZE, | ||
389 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
390 | .setkey = lrw_camellia_setkey, | ||
391 | .encrypt = lrw_encrypt, | ||
392 | .decrypt = lrw_decrypt, | ||
393 | }, | ||
394 | }, | ||
395 | }, { | ||
396 | .cra_name = "__xts-camellia-aesni", | ||
397 | .cra_driver_name = "__driver-xts-camellia-aesni", | ||
398 | .cra_priority = 0, | ||
399 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
400 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
401 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | ||
402 | .cra_alignmask = 0, | ||
403 | .cra_type = &crypto_blkcipher_type, | ||
404 | .cra_module = THIS_MODULE, | ||
405 | .cra_u = { | ||
406 | .blkcipher = { | ||
407 | .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, | ||
408 | .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, | ||
409 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
410 | .setkey = xts_camellia_setkey, | ||
411 | .encrypt = xts_encrypt, | ||
412 | .decrypt = xts_decrypt, | ||
413 | }, | ||
414 | }, | ||
415 | }, { | ||
416 | .cra_name = "ecb(camellia)", | ||
417 | .cra_driver_name = "ecb-camellia-aesni", | ||
418 | .cra_priority = 400, | ||
419 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
420 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
421 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
422 | .cra_alignmask = 0, | ||
423 | .cra_type = &crypto_ablkcipher_type, | ||
424 | .cra_module = THIS_MODULE, | ||
425 | .cra_init = ablk_init, | ||
426 | .cra_exit = ablk_exit, | ||
427 | .cra_u = { | ||
428 | .ablkcipher = { | ||
429 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
430 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
431 | .setkey = ablk_set_key, | ||
432 | .encrypt = ablk_encrypt, | ||
433 | .decrypt = ablk_decrypt, | ||
434 | }, | ||
435 | }, | ||
436 | }, { | ||
437 | .cra_name = "cbc(camellia)", | ||
438 | .cra_driver_name = "cbc-camellia-aesni", | ||
439 | .cra_priority = 400, | ||
440 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
441 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
442 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
443 | .cra_alignmask = 0, | ||
444 | .cra_type = &crypto_ablkcipher_type, | ||
445 | .cra_module = THIS_MODULE, | ||
446 | .cra_init = ablk_init, | ||
447 | .cra_exit = ablk_exit, | ||
448 | .cra_u = { | ||
449 | .ablkcipher = { | ||
450 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
451 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
452 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
453 | .setkey = ablk_set_key, | ||
454 | .encrypt = __ablk_encrypt, | ||
455 | .decrypt = ablk_decrypt, | ||
456 | }, | ||
457 | }, | ||
458 | }, { | ||
459 | .cra_name = "ctr(camellia)", | ||
460 | .cra_driver_name = "ctr-camellia-aesni", | ||
461 | .cra_priority = 400, | ||
462 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
463 | .cra_blocksize = 1, | ||
464 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
465 | .cra_alignmask = 0, | ||
466 | .cra_type = &crypto_ablkcipher_type, | ||
467 | .cra_module = THIS_MODULE, | ||
468 | .cra_init = ablk_init, | ||
469 | .cra_exit = ablk_exit, | ||
470 | .cra_u = { | ||
471 | .ablkcipher = { | ||
472 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
473 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
474 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
475 | .setkey = ablk_set_key, | ||
476 | .encrypt = ablk_encrypt, | ||
477 | .decrypt = ablk_encrypt, | ||
478 | .geniv = "chainiv", | ||
479 | }, | ||
480 | }, | ||
481 | }, { | ||
482 | .cra_name = "lrw(camellia)", | ||
483 | .cra_driver_name = "lrw-camellia-aesni", | ||
484 | .cra_priority = 400, | ||
485 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
486 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
487 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
488 | .cra_alignmask = 0, | ||
489 | .cra_type = &crypto_ablkcipher_type, | ||
490 | .cra_module = THIS_MODULE, | ||
491 | .cra_init = ablk_init, | ||
492 | .cra_exit = ablk_exit, | ||
493 | .cra_u = { | ||
494 | .ablkcipher = { | ||
495 | .min_keysize = CAMELLIA_MIN_KEY_SIZE + | ||
496 | CAMELLIA_BLOCK_SIZE, | ||
497 | .max_keysize = CAMELLIA_MAX_KEY_SIZE + | ||
498 | CAMELLIA_BLOCK_SIZE, | ||
499 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
500 | .setkey = ablk_set_key, | ||
501 | .encrypt = ablk_encrypt, | ||
502 | .decrypt = ablk_decrypt, | ||
503 | }, | ||
504 | }, | ||
505 | }, { | ||
506 | .cra_name = "xts(camellia)", | ||
507 | .cra_driver_name = "xts-camellia-aesni", | ||
508 | .cra_priority = 400, | ||
509 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
510 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
511 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
512 | .cra_alignmask = 0, | ||
513 | .cra_type = &crypto_ablkcipher_type, | ||
514 | .cra_module = THIS_MODULE, | ||
515 | .cra_init = ablk_init, | ||
516 | .cra_exit = ablk_exit, | ||
517 | .cra_u = { | ||
518 | .ablkcipher = { | ||
519 | .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, | ||
520 | .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, | ||
521 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
522 | .setkey = ablk_set_key, | ||
523 | .encrypt = ablk_encrypt, | ||
524 | .decrypt = ablk_decrypt, | ||
525 | }, | ||
526 | }, | ||
527 | } }; | ||
528 | |||
529 | static int __init camellia_aesni_init(void) | ||
530 | { | ||
531 | u64 xcr0; | ||
532 | |||
533 | if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { | ||
534 | pr_info("AVX or AES-NI instructions are not detected.\n"); | ||
535 | return -ENODEV; | ||
536 | } | ||
537 | |||
538 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
539 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
540 | pr_info("AVX detected but unusable.\n"); | ||
541 | return -ENODEV; | ||
542 | } | ||
543 | |||
544 | return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); | ||
545 | } | ||
546 | |||
547 | static void __exit camellia_aesni_fini(void) | ||
548 | { | ||
549 | crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); | ||
550 | } | ||
551 | |||
552 | module_init(camellia_aesni_init); | ||
553 | module_exit(camellia_aesni_fini); | ||
554 | |||
555 | MODULE_LICENSE("GPL"); | ||
556 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized"); | ||
557 | MODULE_ALIAS("camellia"); | ||
558 | MODULE_ALIAS("camellia-asm"); | ||
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c deleted file mode 100644 index 5cb86ccd4ac..00000000000 --- a/arch/x86/crypto/camellia_glue.c +++ /dev/null | |||
@@ -1,1729 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for assembler optimized version of Camellia | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Camellia parts based on code by: | ||
7 | * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
22 | * USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <asm/processor.h> | ||
27 | #include <asm/unaligned.h> | ||
28 | #include <linux/crypto.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/types.h> | ||
32 | #include <crypto/algapi.h> | ||
33 | #include <crypto/lrw.h> | ||
34 | #include <crypto/xts.h> | ||
35 | #include <asm/crypto/camellia.h> | ||
36 | #include <asm/crypto/glue_helper.h> | ||
37 | |||
38 | /* regular block cipher functions */ | ||
39 | asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, | ||
40 | const u8 *src, bool xor); | ||
41 | EXPORT_SYMBOL_GPL(__camellia_enc_blk); | ||
42 | asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, | ||
43 | const u8 *src); | ||
44 | EXPORT_SYMBOL_GPL(camellia_dec_blk); | ||
45 | |||
46 | /* 2-way parallel cipher functions */ | ||
47 | asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, | ||
48 | const u8 *src, bool xor); | ||
49 | EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); | ||
50 | asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, | ||
51 | const u8 *src); | ||
52 | EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); | ||
53 | |||
54 | static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
55 | { | ||
56 | camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); | ||
57 | } | ||
58 | |||
59 | static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
60 | { | ||
61 | camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); | ||
62 | } | ||
63 | |||
64 | /* camellia sboxes */ | ||
65 | const u64 camellia_sp10011110[256] = { | ||
66 | 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, | ||
67 | 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, | ||
68 | 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, | ||
69 | 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, | ||
70 | 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, | ||
71 | 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, | ||
72 | 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, | ||
73 | 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, | ||
74 | 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, | ||
75 | 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, | ||
76 | 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, | ||
77 | 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, | ||
78 | 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, | ||
79 | 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, | ||
80 | 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, | ||
81 | 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, | ||
82 | 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, | ||
83 | 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, | ||
84 | 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, | ||
85 | 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, | ||
86 | 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, | ||
87 | 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, | ||
88 | 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, | ||
89 | 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, | ||
90 | 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, | ||
91 | 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, | ||
92 | 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, | ||
93 | 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, | ||
94 | 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, | ||
95 | 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, | ||
96 | 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, | ||
97 | 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, | ||
98 | 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, | ||
99 | 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, | ||
100 | 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, | ||
101 | 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, | ||
102 | 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, | ||
103 | 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, | ||
104 | 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, | ||
105 | 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, | ||
106 | 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, | ||
107 | 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, | ||
108 | 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, | ||
109 | 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, | ||
110 | 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, | ||
111 | 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, | ||
112 | 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, | ||
113 | 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, | ||
114 | 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, | ||
115 | 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, | ||
116 | 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, | ||
117 | 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, | ||
118 | 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, | ||
119 | 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, | ||
120 | 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, | ||
121 | 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, | ||
122 | 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, | ||
123 | 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, | ||
124 | 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, | ||
125 | 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, | ||
126 | 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, | ||
127 | 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, | ||
128 | 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, | ||
129 | 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, | ||
130 | 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, | ||
131 | 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, | ||
132 | 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, | ||
133 | 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, | ||
134 | 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, | ||
135 | 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, | ||
136 | 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, | ||
137 | 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, | ||
138 | 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, | ||
139 | 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, | ||
140 | 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, | ||
141 | 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, | ||
142 | 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, | ||
143 | 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, | ||
144 | 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, | ||
145 | 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, | ||
146 | 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, | ||
147 | 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, | ||
148 | 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, | ||
149 | 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, | ||
150 | 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, | ||
151 | 0x9e00009e9e9e9e00ULL, | ||
152 | }; | ||
153 | |||
154 | const u64 camellia_sp22000222[256] = { | ||
155 | 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, | ||
156 | 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, | ||
157 | 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, | ||
158 | 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, | ||
159 | 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, | ||
160 | 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, | ||
161 | 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, | ||
162 | 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, | ||
163 | 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, | ||
164 | 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, | ||
165 | 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, | ||
166 | 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, | ||
167 | 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, | ||
168 | 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, | ||
169 | 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, | ||
170 | 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, | ||
171 | 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, | ||
172 | 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, | ||
173 | 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, | ||
174 | 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, | ||
175 | 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, | ||
176 | 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, | ||
177 | 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, | ||
178 | 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, | ||
179 | 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, | ||
180 | 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, | ||
181 | 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, | ||
182 | 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, | ||
183 | 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, | ||
184 | 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, | ||
185 | 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, | ||
186 | 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, | ||
187 | 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, | ||
188 | 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, | ||
189 | 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, | ||
190 | 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, | ||
191 | 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, | ||
192 | 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, | ||
193 | 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, | ||
194 | 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, | ||
195 | 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, | ||
196 | 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, | ||
197 | 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, | ||
198 | 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, | ||
199 | 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, | ||
200 | 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, | ||
201 | 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, | ||
202 | 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, | ||
203 | 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, | ||
204 | 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, | ||
205 | 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, | ||
206 | 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, | ||
207 | 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, | ||
208 | 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, | ||
209 | 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, | ||
210 | 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, | ||
211 | 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, | ||
212 | 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, | ||
213 | 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, | ||
214 | 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, | ||
215 | 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, | ||
216 | 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, | ||
217 | 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, | ||
218 | 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, | ||
219 | 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, | ||
220 | 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, | ||
221 | 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, | ||
222 | 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, | ||
223 | 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, | ||
224 | 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, | ||
225 | 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, | ||
226 | 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, | ||
227 | 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, | ||
228 | 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, | ||
229 | 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, | ||
230 | 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, | ||
231 | 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, | ||
232 | 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, | ||
233 | 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, | ||
234 | 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, | ||
235 | 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, | ||
236 | 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, | ||
237 | 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, | ||
238 | 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, | ||
239 | 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, | ||
240 | 0x3d3d0000003d3d3dULL, | ||
241 | }; | ||
242 | |||
243 | const u64 camellia_sp03303033[256] = { | ||
244 | 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, | ||
245 | 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, | ||
246 | 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, | ||
247 | 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, | ||
248 | 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, | ||
249 | 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, | ||
250 | 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, | ||
251 | 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, | ||
252 | 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, | ||
253 | 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, | ||
254 | 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, | ||
255 | 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, | ||
256 | 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, | ||
257 | 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, | ||
258 | 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, | ||
259 | 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, | ||
260 | 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, | ||
261 | 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, | ||
262 | 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, | ||
263 | 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, | ||
264 | 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, | ||
265 | 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, | ||
266 | 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, | ||
267 | 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, | ||
268 | 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, | ||
269 | 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, | ||
270 | 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, | ||
271 | 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, | ||
272 | 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, | ||
273 | 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, | ||
274 | 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, | ||
275 | 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, | ||
276 | 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, | ||
277 | 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, | ||
278 | 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, | ||
279 | 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, | ||
280 | 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, | ||
281 | 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, | ||
282 | 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, | ||
283 | 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, | ||
284 | 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, | ||
285 | 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, | ||
286 | 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, | ||
287 | 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, | ||
288 | 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, | ||
289 | 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, | ||
290 | 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, | ||
291 | 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, | ||
292 | 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, | ||
293 | 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, | ||
294 | 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, | ||
295 | 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, | ||
296 | 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, | ||
297 | 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, | ||
298 | 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, | ||
299 | 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, | ||
300 | 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, | ||
301 | 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, | ||
302 | 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, | ||
303 | 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, | ||
304 | 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, | ||
305 | 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, | ||
306 | 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, | ||
307 | 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, | ||
308 | 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, | ||
309 | 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, | ||
310 | 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, | ||
311 | 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, | ||
312 | 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, | ||
313 | 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, | ||
314 | 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, | ||
315 | 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, | ||
316 | 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, | ||
317 | 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, | ||
318 | 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, | ||
319 | 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, | ||
320 | 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, | ||
321 | 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, | ||
322 | 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, | ||
323 | 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, | ||
324 | 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, | ||
325 | 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, | ||
326 | 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, | ||
327 | 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, | ||
328 | 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, | ||
329 | 0x004f4f004f004f4fULL, | ||
330 | }; | ||
331 | |||
332 | const u64 camellia_sp00444404[256] = { | ||
333 | 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, | ||
334 | 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, | ||
335 | 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, | ||
336 | 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, | ||
337 | 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, | ||
338 | 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, | ||
339 | 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, | ||
340 | 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, | ||
341 | 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, | ||
342 | 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, | ||
343 | 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, | ||
344 | 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, | ||
345 | 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, | ||
346 | 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, | ||
347 | 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, | ||
348 | 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, | ||
349 | 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, | ||
350 | 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, | ||
351 | 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, | ||
352 | 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, | ||
353 | 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, | ||
354 | 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, | ||
355 | 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, | ||
356 | 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, | ||
357 | 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, | ||
358 | 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, | ||
359 | 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, | ||
360 | 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, | ||
361 | 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, | ||
362 | 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, | ||
363 | 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, | ||
364 | 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, | ||
365 | 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, | ||
366 | 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, | ||
367 | 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, | ||
368 | 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, | ||
369 | 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, | ||
370 | 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, | ||
371 | 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, | ||
372 | 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, | ||
373 | 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, | ||
374 | 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, | ||
375 | 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, | ||
376 | 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, | ||
377 | 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, | ||
378 | 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, | ||
379 | 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, | ||
380 | 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, | ||
381 | 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, | ||
382 | 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, | ||
383 | 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, | ||
384 | 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, | ||
385 | 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, | ||
386 | 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, | ||
387 | 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, | ||
388 | 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, | ||
389 | 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, | ||
390 | 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, | ||
391 | 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, | ||
392 | 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, | ||
393 | 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, | ||
394 | 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, | ||
395 | 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, | ||
396 | 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, | ||
397 | 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, | ||
398 | 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, | ||
399 | 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, | ||
400 | 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, | ||
401 | 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, | ||
402 | 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, | ||
403 | 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, | ||
404 | 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, | ||
405 | 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, | ||
406 | 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, | ||
407 | 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, | ||
408 | 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, | ||
409 | 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, | ||
410 | 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, | ||
411 | 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, | ||
412 | 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, | ||
413 | 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, | ||
414 | 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, | ||
415 | 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, | ||
416 | 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, | ||
417 | 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, | ||
418 | 0x00009e9e9e9e009eULL, | ||
419 | }; | ||
420 | |||
421 | const u64 camellia_sp02220222[256] = { | ||
422 | 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, | ||
423 | 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, | ||
424 | 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, | ||
425 | 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, | ||
426 | 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, | ||
427 | 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, | ||
428 | 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, | ||
429 | 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, | ||
430 | 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, | ||
431 | 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, | ||
432 | 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, | ||
433 | 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, | ||
434 | 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, | ||
435 | 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, | ||
436 | 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, | ||
437 | 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, | ||
438 | 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, | ||
439 | 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, | ||
440 | 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, | ||
441 | 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, | ||
442 | 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, | ||
443 | 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, | ||
444 | 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, | ||
445 | 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, | ||
446 | 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, | ||
447 | 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, | ||
448 | 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, | ||
449 | 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, | ||
450 | 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, | ||
451 | 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, | ||
452 | 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, | ||
453 | 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, | ||
454 | 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, | ||
455 | 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, | ||
456 | 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, | ||
457 | 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, | ||
458 | 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, | ||
459 | 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, | ||
460 | 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, | ||
461 | 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, | ||
462 | 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, | ||
463 | 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, | ||
464 | 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, | ||
465 | 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, | ||
466 | 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, | ||
467 | 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, | ||
468 | 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, | ||
469 | 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, | ||
470 | 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, | ||
471 | 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, | ||
472 | 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, | ||
473 | 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, | ||
474 | 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, | ||
475 | 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, | ||
476 | 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, | ||
477 | 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, | ||
478 | 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, | ||
479 | 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, | ||
480 | 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, | ||
481 | 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, | ||
482 | 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, | ||
483 | 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, | ||
484 | 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, | ||
485 | 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, | ||
486 | 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, | ||
487 | 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, | ||
488 | 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, | ||
489 | 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, | ||
490 | 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, | ||
491 | 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, | ||
492 | 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, | ||
493 | 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, | ||
494 | 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, | ||
495 | 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, | ||
496 | 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, | ||
497 | 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, | ||
498 | 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, | ||
499 | 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, | ||
500 | 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, | ||
501 | 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, | ||
502 | 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, | ||
503 | 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, | ||
504 | 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, | ||
505 | 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, | ||
506 | 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, | ||
507 | 0x003d3d3d003d3d3dULL, | ||
508 | }; | ||
509 | |||
510 | const u64 camellia_sp30333033[256] = { | ||
511 | 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, | ||
512 | 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, | ||
513 | 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, | ||
514 | 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, | ||
515 | 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, | ||
516 | 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, | ||
517 | 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, | ||
518 | 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, | ||
519 | 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, | ||
520 | 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, | ||
521 | 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, | ||
522 | 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, | ||
523 | 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, | ||
524 | 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, | ||
525 | 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, | ||
526 | 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, | ||
527 | 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, | ||
528 | 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, | ||
529 | 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, | ||
530 | 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, | ||
531 | 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, | ||
532 | 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, | ||
533 | 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, | ||
534 | 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, | ||
535 | 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, | ||
536 | 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, | ||
537 | 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, | ||
538 | 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, | ||
539 | 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, | ||
540 | 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, | ||
541 | 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, | ||
542 | 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, | ||
543 | 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, | ||
544 | 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, | ||
545 | 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, | ||
546 | 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, | ||
547 | 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, | ||
548 | 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, | ||
549 | 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, | ||
550 | 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, | ||
551 | 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, | ||
552 | 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, | ||
553 | 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, | ||
554 | 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, | ||
555 | 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, | ||
556 | 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, | ||
557 | 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, | ||
558 | 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, | ||
559 | 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, | ||
560 | 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, | ||
561 | 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, | ||
562 | 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, | ||
563 | 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, | ||
564 | 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, | ||
565 | 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, | ||
566 | 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, | ||
567 | 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, | ||
568 | 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, | ||
569 | 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, | ||
570 | 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, | ||
571 | 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, | ||
572 | 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, | ||
573 | 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, | ||
574 | 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, | ||
575 | 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, | ||
576 | 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, | ||
577 | 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, | ||
578 | 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, | ||
579 | 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, | ||
580 | 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, | ||
581 | 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, | ||
582 | 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, | ||
583 | 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, | ||
584 | 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, | ||
585 | 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, | ||
586 | 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, | ||
587 | 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, | ||
588 | 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, | ||
589 | 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, | ||
590 | 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, | ||
591 | 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, | ||
592 | 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, | ||
593 | 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, | ||
594 | 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, | ||
595 | 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, | ||
596 | 0x4f004f4f4f004f4fULL, | ||
597 | }; | ||
598 | |||
599 | const u64 camellia_sp44044404[256] = { | ||
600 | 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, | ||
601 | 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, | ||
602 | 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, | ||
603 | 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, | ||
604 | 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, | ||
605 | 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, | ||
606 | 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, | ||
607 | 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, | ||
608 | 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, | ||
609 | 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, | ||
610 | 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, | ||
611 | 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, | ||
612 | 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, | ||
613 | 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, | ||
614 | 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, | ||
615 | 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, | ||
616 | 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, | ||
617 | 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, | ||
618 | 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, | ||
619 | 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, | ||
620 | 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, | ||
621 | 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, | ||
622 | 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, | ||
623 | 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, | ||
624 | 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, | ||
625 | 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, | ||
626 | 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, | ||
627 | 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, | ||
628 | 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, | ||
629 | 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, | ||
630 | 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, | ||
631 | 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, | ||
632 | 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, | ||
633 | 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, | ||
634 | 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, | ||
635 | 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, | ||
636 | 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, | ||
637 | 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, | ||
638 | 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, | ||
639 | 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, | ||
640 | 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, | ||
641 | 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, | ||
642 | 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, | ||
643 | 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, | ||
644 | 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, | ||
645 | 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, | ||
646 | 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, | ||
647 | 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, | ||
648 | 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, | ||
649 | 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, | ||
650 | 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, | ||
651 | 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, | ||
652 | 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, | ||
653 | 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, | ||
654 | 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, | ||
655 | 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, | ||
656 | 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, | ||
657 | 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, | ||
658 | 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, | ||
659 | 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, | ||
660 | 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, | ||
661 | 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, | ||
662 | 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, | ||
663 | 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, | ||
664 | 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, | ||
665 | 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, | ||
666 | 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, | ||
667 | 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, | ||
668 | 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, | ||
669 | 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, | ||
670 | 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, | ||
671 | 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, | ||
672 | 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, | ||
673 | 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, | ||
674 | 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, | ||
675 | 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, | ||
676 | 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, | ||
677 | 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, | ||
678 | 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, | ||
679 | 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, | ||
680 | 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, | ||
681 | 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, | ||
682 | 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, | ||
683 | 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, | ||
684 | 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, | ||
685 | 0x9e9e009e9e9e009eULL, | ||
686 | }; | ||
687 | |||
688 | const u64 camellia_sp11101110[256] = { | ||
689 | 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, | ||
690 | 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, | ||
691 | 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, | ||
692 | 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, | ||
693 | 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, | ||
694 | 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, | ||
695 | 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, | ||
696 | 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, | ||
697 | 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, | ||
698 | 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, | ||
699 | 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, | ||
700 | 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, | ||
701 | 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, | ||
702 | 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, | ||
703 | 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, | ||
704 | 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, | ||
705 | 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, | ||
706 | 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, | ||
707 | 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, | ||
708 | 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, | ||
709 | 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, | ||
710 | 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, | ||
711 | 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, | ||
712 | 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, | ||
713 | 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, | ||
714 | 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, | ||
715 | 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, | ||
716 | 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, | ||
717 | 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, | ||
718 | 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, | ||
719 | 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, | ||
720 | 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, | ||
721 | 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, | ||
722 | 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, | ||
723 | 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, | ||
724 | 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, | ||
725 | 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, | ||
726 | 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, | ||
727 | 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, | ||
728 | 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, | ||
729 | 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, | ||
730 | 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, | ||
731 | 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, | ||
732 | 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, | ||
733 | 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, | ||
734 | 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, | ||
735 | 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, | ||
736 | 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, | ||
737 | 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, | ||
738 | 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, | ||
739 | 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, | ||
740 | 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, | ||
741 | 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, | ||
742 | 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, | ||
743 | 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, | ||
744 | 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, | ||
745 | 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, | ||
746 | 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, | ||
747 | 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, | ||
748 | 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, | ||
749 | 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, | ||
750 | 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, | ||
751 | 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, | ||
752 | 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, | ||
753 | 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, | ||
754 | 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, | ||
755 | 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, | ||
756 | 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, | ||
757 | 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, | ||
758 | 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, | ||
759 | 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, | ||
760 | 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, | ||
761 | 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, | ||
762 | 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, | ||
763 | 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, | ||
764 | 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, | ||
765 | 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, | ||
766 | 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, | ||
767 | 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, | ||
768 | 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, | ||
769 | 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, | ||
770 | 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, | ||
771 | 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, | ||
772 | 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, | ||
773 | 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, | ||
774 | 0x9e9e9e009e9e9e00ULL, | ||
775 | }; | ||
776 | |||
777 | /* key constants */ | ||
778 | #define CAMELLIA_SIGMA1L (0xA09E667FL) | ||
779 | #define CAMELLIA_SIGMA1R (0x3BCC908BL) | ||
780 | #define CAMELLIA_SIGMA2L (0xB67AE858L) | ||
781 | #define CAMELLIA_SIGMA2R (0x4CAA73B2L) | ||
782 | #define CAMELLIA_SIGMA3L (0xC6EF372FL) | ||
783 | #define CAMELLIA_SIGMA3R (0xE94F82BEL) | ||
784 | #define CAMELLIA_SIGMA4L (0x54FF53A5L) | ||
785 | #define CAMELLIA_SIGMA4R (0xF1D36F1CL) | ||
786 | #define CAMELLIA_SIGMA5L (0x10E527FAL) | ||
787 | #define CAMELLIA_SIGMA5R (0xDE682D1DL) | ||
788 | #define CAMELLIA_SIGMA6L (0xB05688C2L) | ||
789 | #define CAMELLIA_SIGMA6R (0xB3E6C1FDL) | ||
790 | |||
791 | /* macros */ | ||
792 | #define ROLDQ(l, r, bits) ({ \ | ||
793 | u64 t = l; \ | ||
794 | l = (l << bits) | (r >> (64 - bits)); \ | ||
795 | r = (r << bits) | (t >> (64 - bits)); \ | ||
796 | }) | ||
797 | |||
798 | #define CAMELLIA_F(x, kl, kr, y) ({ \ | ||
799 | u64 ii = x ^ (((u64)kl << 32) | kr); \ | ||
800 | y = camellia_sp11101110[(uint8_t)ii]; \ | ||
801 | y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ | ||
802 | ii >>= 16; \ | ||
803 | y ^= camellia_sp30333033[(uint8_t)ii]; \ | ||
804 | y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ | ||
805 | ii >>= 16; \ | ||
806 | y ^= camellia_sp00444404[(uint8_t)ii]; \ | ||
807 | y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ | ||
808 | ii >>= 16; \ | ||
809 | y ^= camellia_sp22000222[(uint8_t)ii]; \ | ||
810 | y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ | ||
811 | y = ror64(y, 32); \ | ||
812 | }) | ||
813 | |||
814 | #define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) | ||
815 | |||
816 | static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) | ||
817 | { | ||
818 | u64 kw4, tt; | ||
819 | u32 dw, tl, tr; | ||
820 | |||
821 | /* absorb kw2 to other subkeys */ | ||
822 | /* round 2 */ | ||
823 | subRL[3] ^= subRL[1]; | ||
824 | /* round 4 */ | ||
825 | subRL[5] ^= subRL[1]; | ||
826 | /* round 6 */ | ||
827 | subRL[7] ^= subRL[1]; | ||
828 | |||
829 | subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; | ||
830 | /* modified for FLinv(kl2) */ | ||
831 | dw = (subRL[1] & subRL[9]) >> 32, | ||
832 | subRL[1] ^= rol32(dw, 1); | ||
833 | |||
834 | /* round 8 */ | ||
835 | subRL[11] ^= subRL[1]; | ||
836 | /* round 10 */ | ||
837 | subRL[13] ^= subRL[1]; | ||
838 | /* round 12 */ | ||
839 | subRL[15] ^= subRL[1]; | ||
840 | |||
841 | subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; | ||
842 | /* modified for FLinv(kl4) */ | ||
843 | dw = (subRL[1] & subRL[17]) >> 32, | ||
844 | subRL[1] ^= rol32(dw, 1); | ||
845 | |||
846 | /* round 14 */ | ||
847 | subRL[19] ^= subRL[1]; | ||
848 | /* round 16 */ | ||
849 | subRL[21] ^= subRL[1]; | ||
850 | /* round 18 */ | ||
851 | subRL[23] ^= subRL[1]; | ||
852 | |||
853 | if (max == 24) { | ||
854 | /* kw3 */ | ||
855 | subRL[24] ^= subRL[1]; | ||
856 | |||
857 | /* absorb kw4 to other subkeys */ | ||
858 | kw4 = subRL[25]; | ||
859 | } else { | ||
860 | subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; | ||
861 | /* modified for FLinv(kl6) */ | ||
862 | dw = (subRL[1] & subRL[25]) >> 32, | ||
863 | subRL[1] ^= rol32(dw, 1); | ||
864 | |||
865 | /* round 20 */ | ||
866 | subRL[27] ^= subRL[1]; | ||
867 | /* round 22 */ | ||
868 | subRL[29] ^= subRL[1]; | ||
869 | /* round 24 */ | ||
870 | subRL[31] ^= subRL[1]; | ||
871 | /* kw3 */ | ||
872 | subRL[32] ^= subRL[1]; | ||
873 | |||
874 | /* absorb kw4 to other subkeys */ | ||
875 | kw4 = subRL[33]; | ||
876 | /* round 23 */ | ||
877 | subRL[30] ^= kw4; | ||
878 | /* round 21 */ | ||
879 | subRL[28] ^= kw4; | ||
880 | /* round 19 */ | ||
881 | subRL[26] ^= kw4; | ||
882 | |||
883 | kw4 ^= (kw4 & ~subRL[24]) << 32; | ||
884 | /* modified for FL(kl5) */ | ||
885 | dw = (kw4 & subRL[24]) >> 32, | ||
886 | kw4 ^= rol32(dw, 1); | ||
887 | } | ||
888 | |||
889 | /* round 17 */ | ||
890 | subRL[22] ^= kw4; | ||
891 | /* round 15 */ | ||
892 | subRL[20] ^= kw4; | ||
893 | /* round 13 */ | ||
894 | subRL[18] ^= kw4; | ||
895 | |||
896 | kw4 ^= (kw4 & ~subRL[16]) << 32; | ||
897 | /* modified for FL(kl3) */ | ||
898 | dw = (kw4 & subRL[16]) >> 32, | ||
899 | kw4 ^= rol32(dw, 1); | ||
900 | |||
901 | /* round 11 */ | ||
902 | subRL[14] ^= kw4; | ||
903 | /* round 9 */ | ||
904 | subRL[12] ^= kw4; | ||
905 | /* round 7 */ | ||
906 | subRL[10] ^= kw4; | ||
907 | |||
908 | kw4 ^= (kw4 & ~subRL[8]) << 32; | ||
909 | /* modified for FL(kl1) */ | ||
910 | dw = (kw4 & subRL[8]) >> 32, | ||
911 | kw4 ^= rol32(dw, 1); | ||
912 | |||
913 | /* round 5 */ | ||
914 | subRL[6] ^= kw4; | ||
915 | /* round 3 */ | ||
916 | subRL[4] ^= kw4; | ||
917 | /* round 1 */ | ||
918 | subRL[2] ^= kw4; | ||
919 | /* kw1 */ | ||
920 | subRL[0] ^= kw4; | ||
921 | |||
922 | /* key XOR is end of F-function */ | ||
923 | SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ | ||
924 | SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ | ||
925 | SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ | ||
926 | SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ | ||
927 | SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ | ||
928 | SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ | ||
929 | |||
930 | tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); | ||
931 | dw = tl & (subRL[8] >> 32), /* FL(kl1) */ | ||
932 | tr = subRL[10] ^ rol32(dw, 1); | ||
933 | tt = (tr | ((u64)tl << 32)); | ||
934 | |||
935 | SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ | ||
936 | SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ | ||
937 | SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ | ||
938 | |||
939 | tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); | ||
940 | dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */ | ||
941 | tr = subRL[7] ^ rol32(dw, 1); | ||
942 | tt = (tr | ((u64)tl << 32)); | ||
943 | |||
944 | SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ | ||
945 | SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ | ||
946 | SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ | ||
947 | SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ | ||
948 | SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ | ||
949 | |||
950 | tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); | ||
951 | dw = tl & (subRL[16] >> 32), /* FL(kl3) */ | ||
952 | tr = subRL[18] ^ rol32(dw, 1); | ||
953 | tt = (tr | ((u64)tl << 32)); | ||
954 | |||
955 | SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ | ||
956 | SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ | ||
957 | SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ | ||
958 | |||
959 | tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); | ||
960 | dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */ | ||
961 | tr = subRL[15] ^ rol32(dw, 1); | ||
962 | tt = (tr | ((u64)tl << 32)); | ||
963 | |||
964 | SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ | ||
965 | SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ | ||
966 | SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ | ||
967 | SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ | ||
968 | SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ | ||
969 | |||
970 | if (max == 24) { | ||
971 | SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ | ||
972 | SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ | ||
973 | } else { | ||
974 | tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); | ||
975 | dw = tl & (subRL[24] >> 32), /* FL(kl5) */ | ||
976 | tr = subRL[26] ^ rol32(dw, 1); | ||
977 | tt = (tr | ((u64)tl << 32)); | ||
978 | |||
979 | SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ | ||
980 | SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ | ||
981 | SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ | ||
982 | |||
983 | tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); | ||
984 | dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */ | ||
985 | tr = subRL[23] ^ rol32(dw, 1); | ||
986 | tt = (tr | ((u64)tl << 32)); | ||
987 | |||
988 | SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ | ||
989 | SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ | ||
990 | SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ | ||
991 | SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ | ||
992 | SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ | ||
993 | SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ | ||
994 | SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ | ||
995 | } | ||
996 | } | ||
997 | |||
998 | static void camellia_setup128(const unsigned char *key, u64 *subkey) | ||
999 | { | ||
1000 | u64 kl, kr, ww; | ||
1001 | u64 subRL[26]; | ||
1002 | |||
1003 | /** | ||
1004 | * k == kl || kr (|| is concatenation) | ||
1005 | */ | ||
1006 | kl = get_unaligned_be64(key); | ||
1007 | kr = get_unaligned_be64(key + 8); | ||
1008 | |||
1009 | /* generate KL dependent subkeys */ | ||
1010 | /* kw1 */ | ||
1011 | subRL[0] = kl; | ||
1012 | /* kw2 */ | ||
1013 | subRL[1] = kr; | ||
1014 | |||
1015 | /* rotation left shift 15bit */ | ||
1016 | ROLDQ(kl, kr, 15); | ||
1017 | |||
1018 | /* k3 */ | ||
1019 | subRL[4] = kl; | ||
1020 | /* k4 */ | ||
1021 | subRL[5] = kr; | ||
1022 | |||
1023 | /* rotation left shift 15+30bit */ | ||
1024 | ROLDQ(kl, kr, 30); | ||
1025 | |||
1026 | /* k7 */ | ||
1027 | subRL[10] = kl; | ||
1028 | /* k8 */ | ||
1029 | subRL[11] = kr; | ||
1030 | |||
1031 | /* rotation left shift 15+30+15bit */ | ||
1032 | ROLDQ(kl, kr, 15); | ||
1033 | |||
1034 | /* k10 */ | ||
1035 | subRL[13] = kr; | ||
1036 | /* rotation left shift 15+30+15+17 bit */ | ||
1037 | ROLDQ(kl, kr, 17); | ||
1038 | |||
1039 | /* kl3 */ | ||
1040 | subRL[16] = kl; | ||
1041 | /* kl4 */ | ||
1042 | subRL[17] = kr; | ||
1043 | |||
1044 | /* rotation left shift 15+30+15+17+17 bit */ | ||
1045 | ROLDQ(kl, kr, 17); | ||
1046 | |||
1047 | /* k13 */ | ||
1048 | subRL[18] = kl; | ||
1049 | /* k14 */ | ||
1050 | subRL[19] = kr; | ||
1051 | |||
1052 | /* rotation left shift 15+30+15+17+17+17 bit */ | ||
1053 | ROLDQ(kl, kr, 17); | ||
1054 | |||
1055 | /* k17 */ | ||
1056 | subRL[22] = kl; | ||
1057 | /* k18 */ | ||
1058 | subRL[23] = kr; | ||
1059 | |||
1060 | /* generate KA */ | ||
1061 | kl = subRL[0]; | ||
1062 | kr = subRL[1]; | ||
1063 | CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); | ||
1064 | kr ^= ww; | ||
1065 | CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); | ||
1066 | |||
1067 | /* current status == (kll, klr, w0, w1) */ | ||
1068 | CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); | ||
1069 | kr ^= ww; | ||
1070 | CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); | ||
1071 | kl ^= ww; | ||
1072 | |||
1073 | /* generate KA dependent subkeys */ | ||
1074 | /* k1, k2 */ | ||
1075 | subRL[2] = kl; | ||
1076 | subRL[3] = kr; | ||
1077 | ROLDQ(kl, kr, 15); | ||
1078 | /* k5,k6 */ | ||
1079 | subRL[6] = kl; | ||
1080 | subRL[7] = kr; | ||
1081 | ROLDQ(kl, kr, 15); | ||
1082 | /* kl1, kl2 */ | ||
1083 | subRL[8] = kl; | ||
1084 | subRL[9] = kr; | ||
1085 | ROLDQ(kl, kr, 15); | ||
1086 | /* k9 */ | ||
1087 | subRL[12] = kl; | ||
1088 | ROLDQ(kl, kr, 15); | ||
1089 | /* k11, k12 */ | ||
1090 | subRL[14] = kl; | ||
1091 | subRL[15] = kr; | ||
1092 | ROLDQ(kl, kr, 34); | ||
1093 | /* k15, k16 */ | ||
1094 | subRL[20] = kl; | ||
1095 | subRL[21] = kr; | ||
1096 | ROLDQ(kl, kr, 17); | ||
1097 | /* kw3, kw4 */ | ||
1098 | subRL[24] = kl; | ||
1099 | subRL[25] = kr; | ||
1100 | |||
1101 | camellia_setup_tail(subkey, subRL, 24); | ||
1102 | } | ||
1103 | |||
1104 | static void camellia_setup256(const unsigned char *key, u64 *subkey) | ||
1105 | { | ||
1106 | u64 kl, kr; /* left half of key */ | ||
1107 | u64 krl, krr; /* right half of key */ | ||
1108 | u64 ww; /* temporary variables */ | ||
1109 | u64 subRL[34]; | ||
1110 | |||
1111 | /** | ||
1112 | * key = (kl || kr || krl || krr) (|| is concatenation) | ||
1113 | */ | ||
1114 | kl = get_unaligned_be64(key); | ||
1115 | kr = get_unaligned_be64(key + 8); | ||
1116 | krl = get_unaligned_be64(key + 16); | ||
1117 | krr = get_unaligned_be64(key + 24); | ||
1118 | |||
1119 | /* generate KL dependent subkeys */ | ||
1120 | /* kw1 */ | ||
1121 | subRL[0] = kl; | ||
1122 | /* kw2 */ | ||
1123 | subRL[1] = kr; | ||
1124 | ROLDQ(kl, kr, 45); | ||
1125 | /* k9 */ | ||
1126 | subRL[12] = kl; | ||
1127 | /* k10 */ | ||
1128 | subRL[13] = kr; | ||
1129 | ROLDQ(kl, kr, 15); | ||
1130 | /* kl3 */ | ||
1131 | subRL[16] = kl; | ||
1132 | /* kl4 */ | ||
1133 | subRL[17] = kr; | ||
1134 | ROLDQ(kl, kr, 17); | ||
1135 | /* k17 */ | ||
1136 | subRL[22] = kl; | ||
1137 | /* k18 */ | ||
1138 | subRL[23] = kr; | ||
1139 | ROLDQ(kl, kr, 34); | ||
1140 | /* k23 */ | ||
1141 | subRL[30] = kl; | ||
1142 | /* k24 */ | ||
1143 | subRL[31] = kr; | ||
1144 | |||
1145 | /* generate KR dependent subkeys */ | ||
1146 | ROLDQ(krl, krr, 15); | ||
1147 | /* k3 */ | ||
1148 | subRL[4] = krl; | ||
1149 | /* k4 */ | ||
1150 | subRL[5] = krr; | ||
1151 | ROLDQ(krl, krr, 15); | ||
1152 | /* kl1 */ | ||
1153 | subRL[8] = krl; | ||
1154 | /* kl2 */ | ||
1155 | subRL[9] = krr; | ||
1156 | ROLDQ(krl, krr, 30); | ||
1157 | /* k13 */ | ||
1158 | subRL[18] = krl; | ||
1159 | /* k14 */ | ||
1160 | subRL[19] = krr; | ||
1161 | ROLDQ(krl, krr, 34); | ||
1162 | /* k19 */ | ||
1163 | subRL[26] = krl; | ||
1164 | /* k20 */ | ||
1165 | subRL[27] = krr; | ||
1166 | ROLDQ(krl, krr, 34); | ||
1167 | |||
1168 | /* generate KA */ | ||
1169 | kl = subRL[0] ^ krl; | ||
1170 | kr = subRL[1] ^ krr; | ||
1171 | |||
1172 | CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); | ||
1173 | kr ^= ww; | ||
1174 | CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); | ||
1175 | kl ^= krl; | ||
1176 | CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); | ||
1177 | kr ^= ww ^ krr; | ||
1178 | CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); | ||
1179 | kl ^= ww; | ||
1180 | |||
1181 | /* generate KB */ | ||
1182 | krl ^= kl; | ||
1183 | krr ^= kr; | ||
1184 | CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); | ||
1185 | krr ^= ww; | ||
1186 | CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); | ||
1187 | krl ^= ww; | ||
1188 | |||
1189 | /* generate KA dependent subkeys */ | ||
1190 | ROLDQ(kl, kr, 15); | ||
1191 | /* k5 */ | ||
1192 | subRL[6] = kl; | ||
1193 | /* k6 */ | ||
1194 | subRL[7] = kr; | ||
1195 | ROLDQ(kl, kr, 30); | ||
1196 | /* k11 */ | ||
1197 | subRL[14] = kl; | ||
1198 | /* k12 */ | ||
1199 | subRL[15] = kr; | ||
1200 | /* rotation left shift 32bit */ | ||
1201 | ROLDQ(kl, kr, 32); | ||
1202 | /* kl5 */ | ||
1203 | subRL[24] = kl; | ||
1204 | /* kl6 */ | ||
1205 | subRL[25] = kr; | ||
1206 | /* rotation left shift 17 from k11,k12 -> k21,k22 */ | ||
1207 | ROLDQ(kl, kr, 17); | ||
1208 | /* k21 */ | ||
1209 | subRL[28] = kl; | ||
1210 | /* k22 */ | ||
1211 | subRL[29] = kr; | ||
1212 | |||
1213 | /* generate KB dependent subkeys */ | ||
1214 | /* k1 */ | ||
1215 | subRL[2] = krl; | ||
1216 | /* k2 */ | ||
1217 | subRL[3] = krr; | ||
1218 | ROLDQ(krl, krr, 30); | ||
1219 | /* k7 */ | ||
1220 | subRL[10] = krl; | ||
1221 | /* k8 */ | ||
1222 | subRL[11] = krr; | ||
1223 | ROLDQ(krl, krr, 30); | ||
1224 | /* k15 */ | ||
1225 | subRL[20] = krl; | ||
1226 | /* k16 */ | ||
1227 | subRL[21] = krr; | ||
1228 | ROLDQ(krl, krr, 51); | ||
1229 | /* kw3 */ | ||
1230 | subRL[32] = krl; | ||
1231 | /* kw4 */ | ||
1232 | subRL[33] = krr; | ||
1233 | |||
1234 | camellia_setup_tail(subkey, subRL, 32); | ||
1235 | } | ||
1236 | |||
1237 | static void camellia_setup192(const unsigned char *key, u64 *subkey) | ||
1238 | { | ||
1239 | unsigned char kk[32]; | ||
1240 | u64 krl, krr; | ||
1241 | |||
1242 | memcpy(kk, key, 24); | ||
1243 | memcpy((unsigned char *)&krl, key+16, 8); | ||
1244 | krr = ~krl; | ||
1245 | memcpy(kk+24, (unsigned char *)&krr, 8); | ||
1246 | camellia_setup256(kk, subkey); | ||
1247 | } | ||
1248 | |||
1249 | int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, | ||
1250 | unsigned int key_len, u32 *flags) | ||
1251 | { | ||
1252 | if (key_len != 16 && key_len != 24 && key_len != 32) { | ||
1253 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
1254 | return -EINVAL; | ||
1255 | } | ||
1256 | |||
1257 | cctx->key_length = key_len; | ||
1258 | |||
1259 | switch (key_len) { | ||
1260 | case 16: | ||
1261 | camellia_setup128(key, cctx->key_table); | ||
1262 | break; | ||
1263 | case 24: | ||
1264 | camellia_setup192(key, cctx->key_table); | ||
1265 | break; | ||
1266 | case 32: | ||
1267 | camellia_setup256(key, cctx->key_table); | ||
1268 | break; | ||
1269 | } | ||
1270 | |||
1271 | return 0; | ||
1272 | } | ||
1273 | EXPORT_SYMBOL_GPL(__camellia_setkey); | ||
1274 | |||
1275 | static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, | ||
1276 | unsigned int key_len) | ||
1277 | { | ||
1278 | return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, | ||
1279 | &tfm->crt_flags); | ||
1280 | } | ||
1281 | |||
1282 | void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) | ||
1283 | { | ||
1284 | u128 iv = *src; | ||
1285 | |||
1286 | camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); | ||
1287 | |||
1288 | u128_xor(&dst[1], &dst[1], &iv); | ||
1289 | } | ||
1290 | EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); | ||
1291 | |||
1292 | void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
1293 | { | ||
1294 | be128 ctrblk; | ||
1295 | |||
1296 | if (dst != src) | ||
1297 | *dst = *src; | ||
1298 | |||
1299 | le128_to_be128(&ctrblk, iv); | ||
1300 | le128_inc(iv); | ||
1301 | |||
1302 | camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk); | ||
1303 | } | ||
1304 | EXPORT_SYMBOL_GPL(camellia_crypt_ctr); | ||
1305 | |||
1306 | void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
1307 | { | ||
1308 | be128 ctrblks[2]; | ||
1309 | |||
1310 | if (dst != src) { | ||
1311 | dst[0] = src[0]; | ||
1312 | dst[1] = src[1]; | ||
1313 | } | ||
1314 | |||
1315 | le128_to_be128(&ctrblks[0], iv); | ||
1316 | le128_inc(iv); | ||
1317 | le128_to_be128(&ctrblks[1], iv); | ||
1318 | le128_inc(iv); | ||
1319 | |||
1320 | camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
1321 | } | ||
1322 | EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way); | ||
1323 | |||
1324 | static const struct common_glue_ctx camellia_enc = { | ||
1325 | .num_funcs = 2, | ||
1326 | .fpu_blocks_limit = -1, | ||
1327 | |||
1328 | .funcs = { { | ||
1329 | .num_blocks = 2, | ||
1330 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } | ||
1331 | }, { | ||
1332 | .num_blocks = 1, | ||
1333 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } | ||
1334 | } } | ||
1335 | }; | ||
1336 | |||
1337 | static const struct common_glue_ctx camellia_ctr = { | ||
1338 | .num_funcs = 2, | ||
1339 | .fpu_blocks_limit = -1, | ||
1340 | |||
1341 | .funcs = { { | ||
1342 | .num_blocks = 2, | ||
1343 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } | ||
1344 | }, { | ||
1345 | .num_blocks = 1, | ||
1346 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } | ||
1347 | } } | ||
1348 | }; | ||
1349 | |||
1350 | static const struct common_glue_ctx camellia_dec = { | ||
1351 | .num_funcs = 2, | ||
1352 | .fpu_blocks_limit = -1, | ||
1353 | |||
1354 | .funcs = { { | ||
1355 | .num_blocks = 2, | ||
1356 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } | ||
1357 | }, { | ||
1358 | .num_blocks = 1, | ||
1359 | .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } | ||
1360 | } } | ||
1361 | }; | ||
1362 | |||
1363 | static const struct common_glue_ctx camellia_dec_cbc = { | ||
1364 | .num_funcs = 2, | ||
1365 | .fpu_blocks_limit = -1, | ||
1366 | |||
1367 | .funcs = { { | ||
1368 | .num_blocks = 2, | ||
1369 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } | ||
1370 | }, { | ||
1371 | .num_blocks = 1, | ||
1372 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } | ||
1373 | } } | ||
1374 | }; | ||
1375 | |||
1376 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1377 | struct scatterlist *src, unsigned int nbytes) | ||
1378 | { | ||
1379 | return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); | ||
1380 | } | ||
1381 | |||
1382 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1383 | struct scatterlist *src, unsigned int nbytes) | ||
1384 | { | ||
1385 | return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); | ||
1386 | } | ||
1387 | |||
1388 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1389 | struct scatterlist *src, unsigned int nbytes) | ||
1390 | { | ||
1391 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, | ||
1392 | dst, src, nbytes); | ||
1393 | } | ||
1394 | |||
1395 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1396 | struct scatterlist *src, unsigned int nbytes) | ||
1397 | { | ||
1398 | return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, | ||
1399 | nbytes); | ||
1400 | } | ||
1401 | |||
1402 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1403 | struct scatterlist *src, unsigned int nbytes) | ||
1404 | { | ||
1405 | return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); | ||
1406 | } | ||
1407 | |||
1408 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
1409 | { | ||
1410 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1411 | struct camellia_ctx *ctx = priv; | ||
1412 | int i; | ||
1413 | |||
1414 | while (nbytes >= 2 * bsize) { | ||
1415 | camellia_enc_blk_2way(ctx, srcdst, srcdst); | ||
1416 | srcdst += bsize * 2; | ||
1417 | nbytes -= bsize * 2; | ||
1418 | } | ||
1419 | |||
1420 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
1421 | camellia_enc_blk(ctx, srcdst, srcdst); | ||
1422 | } | ||
1423 | |||
1424 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
1425 | { | ||
1426 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | ||
1427 | struct camellia_ctx *ctx = priv; | ||
1428 | int i; | ||
1429 | |||
1430 | while (nbytes >= 2 * bsize) { | ||
1431 | camellia_dec_blk_2way(ctx, srcdst, srcdst); | ||
1432 | srcdst += bsize * 2; | ||
1433 | nbytes -= bsize * 2; | ||
1434 | } | ||
1435 | |||
1436 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
1437 | camellia_dec_blk(ctx, srcdst, srcdst); | ||
1438 | } | ||
1439 | |||
1440 | int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
1441 | unsigned int keylen) | ||
1442 | { | ||
1443 | struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
1444 | int err; | ||
1445 | |||
1446 | err = __camellia_setkey(&ctx->camellia_ctx, key, | ||
1447 | keylen - CAMELLIA_BLOCK_SIZE, | ||
1448 | &tfm->crt_flags); | ||
1449 | if (err) | ||
1450 | return err; | ||
1451 | |||
1452 | return lrw_init_table(&ctx->lrw_table, | ||
1453 | key + keylen - CAMELLIA_BLOCK_SIZE); | ||
1454 | } | ||
1455 | EXPORT_SYMBOL_GPL(lrw_camellia_setkey); | ||
1456 | |||
1457 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1458 | struct scatterlist *src, unsigned int nbytes) | ||
1459 | { | ||
1460 | struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1461 | be128 buf[2 * 4]; | ||
1462 | struct lrw_crypt_req req = { | ||
1463 | .tbuf = buf, | ||
1464 | .tbuflen = sizeof(buf), | ||
1465 | |||
1466 | .table_ctx = &ctx->lrw_table, | ||
1467 | .crypt_ctx = &ctx->camellia_ctx, | ||
1468 | .crypt_fn = encrypt_callback, | ||
1469 | }; | ||
1470 | |||
1471 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
1472 | } | ||
1473 | |||
1474 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1475 | struct scatterlist *src, unsigned int nbytes) | ||
1476 | { | ||
1477 | struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1478 | be128 buf[2 * 4]; | ||
1479 | struct lrw_crypt_req req = { | ||
1480 | .tbuf = buf, | ||
1481 | .tbuflen = sizeof(buf), | ||
1482 | |||
1483 | .table_ctx = &ctx->lrw_table, | ||
1484 | .crypt_ctx = &ctx->camellia_ctx, | ||
1485 | .crypt_fn = decrypt_callback, | ||
1486 | }; | ||
1487 | |||
1488 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
1489 | } | ||
1490 | |||
1491 | void lrw_camellia_exit_tfm(struct crypto_tfm *tfm) | ||
1492 | { | ||
1493 | struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
1494 | |||
1495 | lrw_free_table(&ctx->lrw_table); | ||
1496 | } | ||
1497 | EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm); | ||
1498 | |||
1499 | int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
1500 | unsigned int keylen) | ||
1501 | { | ||
1502 | struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
1503 | u32 *flags = &tfm->crt_flags; | ||
1504 | int err; | ||
1505 | |||
1506 | /* key consists of keys of equal size concatenated, therefore | ||
1507 | * the length must be even | ||
1508 | */ | ||
1509 | if (keylen % 2) { | ||
1510 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
1511 | return -EINVAL; | ||
1512 | } | ||
1513 | |||
1514 | /* first half of xts-key is for crypt */ | ||
1515 | err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); | ||
1516 | if (err) | ||
1517 | return err; | ||
1518 | |||
1519 | /* second half of xts-key is for tweak */ | ||
1520 | return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | ||
1521 | flags); | ||
1522 | } | ||
1523 | EXPORT_SYMBOL_GPL(xts_camellia_setkey); | ||
1524 | |||
1525 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1526 | struct scatterlist *src, unsigned int nbytes) | ||
1527 | { | ||
1528 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1529 | be128 buf[2 * 4]; | ||
1530 | struct xts_crypt_req req = { | ||
1531 | .tbuf = buf, | ||
1532 | .tbuflen = sizeof(buf), | ||
1533 | |||
1534 | .tweak_ctx = &ctx->tweak_ctx, | ||
1535 | .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), | ||
1536 | .crypt_ctx = &ctx->crypt_ctx, | ||
1537 | .crypt_fn = encrypt_callback, | ||
1538 | }; | ||
1539 | |||
1540 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
1541 | } | ||
1542 | |||
1543 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
1544 | struct scatterlist *src, unsigned int nbytes) | ||
1545 | { | ||
1546 | struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
1547 | be128 buf[2 * 4]; | ||
1548 | struct xts_crypt_req req = { | ||
1549 | .tbuf = buf, | ||
1550 | .tbuflen = sizeof(buf), | ||
1551 | |||
1552 | .tweak_ctx = &ctx->tweak_ctx, | ||
1553 | .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), | ||
1554 | .crypt_ctx = &ctx->crypt_ctx, | ||
1555 | .crypt_fn = decrypt_callback, | ||
1556 | }; | ||
1557 | |||
1558 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
1559 | } | ||
1560 | |||
1561 | static struct crypto_alg camellia_algs[6] = { { | ||
1562 | .cra_name = "camellia", | ||
1563 | .cra_driver_name = "camellia-asm", | ||
1564 | .cra_priority = 200, | ||
1565 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
1566 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
1567 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
1568 | .cra_alignmask = 0, | ||
1569 | .cra_module = THIS_MODULE, | ||
1570 | .cra_u = { | ||
1571 | .cipher = { | ||
1572 | .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
1573 | .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
1574 | .cia_setkey = camellia_setkey, | ||
1575 | .cia_encrypt = camellia_encrypt, | ||
1576 | .cia_decrypt = camellia_decrypt | ||
1577 | } | ||
1578 | } | ||
1579 | }, { | ||
1580 | .cra_name = "ecb(camellia)", | ||
1581 | .cra_driver_name = "ecb-camellia-asm", | ||
1582 | .cra_priority = 300, | ||
1583 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1584 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
1585 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
1586 | .cra_alignmask = 0, | ||
1587 | .cra_type = &crypto_blkcipher_type, | ||
1588 | .cra_module = THIS_MODULE, | ||
1589 | .cra_u = { | ||
1590 | .blkcipher = { | ||
1591 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
1592 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
1593 | .setkey = camellia_setkey, | ||
1594 | .encrypt = ecb_encrypt, | ||
1595 | .decrypt = ecb_decrypt, | ||
1596 | }, | ||
1597 | }, | ||
1598 | }, { | ||
1599 | .cra_name = "cbc(camellia)", | ||
1600 | .cra_driver_name = "cbc-camellia-asm", | ||
1601 | .cra_priority = 300, | ||
1602 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1603 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
1604 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
1605 | .cra_alignmask = 0, | ||
1606 | .cra_type = &crypto_blkcipher_type, | ||
1607 | .cra_module = THIS_MODULE, | ||
1608 | .cra_u = { | ||
1609 | .blkcipher = { | ||
1610 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
1611 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
1612 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
1613 | .setkey = camellia_setkey, | ||
1614 | .encrypt = cbc_encrypt, | ||
1615 | .decrypt = cbc_decrypt, | ||
1616 | }, | ||
1617 | }, | ||
1618 | }, { | ||
1619 | .cra_name = "ctr(camellia)", | ||
1620 | .cra_driver_name = "ctr-camellia-asm", | ||
1621 | .cra_priority = 300, | ||
1622 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1623 | .cra_blocksize = 1, | ||
1624 | .cra_ctxsize = sizeof(struct camellia_ctx), | ||
1625 | .cra_alignmask = 0, | ||
1626 | .cra_type = &crypto_blkcipher_type, | ||
1627 | .cra_module = THIS_MODULE, | ||
1628 | .cra_u = { | ||
1629 | .blkcipher = { | ||
1630 | .min_keysize = CAMELLIA_MIN_KEY_SIZE, | ||
1631 | .max_keysize = CAMELLIA_MAX_KEY_SIZE, | ||
1632 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
1633 | .setkey = camellia_setkey, | ||
1634 | .encrypt = ctr_crypt, | ||
1635 | .decrypt = ctr_crypt, | ||
1636 | }, | ||
1637 | }, | ||
1638 | }, { | ||
1639 | .cra_name = "lrw(camellia)", | ||
1640 | .cra_driver_name = "lrw-camellia-asm", | ||
1641 | .cra_priority = 300, | ||
1642 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1643 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
1644 | .cra_ctxsize = sizeof(struct camellia_lrw_ctx), | ||
1645 | .cra_alignmask = 0, | ||
1646 | .cra_type = &crypto_blkcipher_type, | ||
1647 | .cra_module = THIS_MODULE, | ||
1648 | .cra_exit = lrw_camellia_exit_tfm, | ||
1649 | .cra_u = { | ||
1650 | .blkcipher = { | ||
1651 | .min_keysize = CAMELLIA_MIN_KEY_SIZE + | ||
1652 | CAMELLIA_BLOCK_SIZE, | ||
1653 | .max_keysize = CAMELLIA_MAX_KEY_SIZE + | ||
1654 | CAMELLIA_BLOCK_SIZE, | ||
1655 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
1656 | .setkey = lrw_camellia_setkey, | ||
1657 | .encrypt = lrw_encrypt, | ||
1658 | .decrypt = lrw_decrypt, | ||
1659 | }, | ||
1660 | }, | ||
1661 | }, { | ||
1662 | .cra_name = "xts(camellia)", | ||
1663 | .cra_driver_name = "xts-camellia-asm", | ||
1664 | .cra_priority = 300, | ||
1665 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
1666 | .cra_blocksize = CAMELLIA_BLOCK_SIZE, | ||
1667 | .cra_ctxsize = sizeof(struct camellia_xts_ctx), | ||
1668 | .cra_alignmask = 0, | ||
1669 | .cra_type = &crypto_blkcipher_type, | ||
1670 | .cra_module = THIS_MODULE, | ||
1671 | .cra_u = { | ||
1672 | .blkcipher = { | ||
1673 | .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, | ||
1674 | .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, | ||
1675 | .ivsize = CAMELLIA_BLOCK_SIZE, | ||
1676 | .setkey = xts_camellia_setkey, | ||
1677 | .encrypt = xts_encrypt, | ||
1678 | .decrypt = xts_decrypt, | ||
1679 | }, | ||
1680 | }, | ||
1681 | } }; | ||
1682 | |||
1683 | static bool is_blacklisted_cpu(void) | ||
1684 | { | ||
1685 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
1686 | return false; | ||
1687 | |||
1688 | if (boot_cpu_data.x86 == 0x0f) { | ||
1689 | /* | ||
1690 | * On Pentium 4, camellia-asm is slower than original assembler | ||
1691 | * implementation because excessive uses of 64bit rotate and | ||
1692 | * left-shifts (which are really slow on P4) needed to store and | ||
1693 | * handle 128bit block in two 64bit registers. | ||
1694 | */ | ||
1695 | return true; | ||
1696 | } | ||
1697 | |||
1698 | return false; | ||
1699 | } | ||
1700 | |||
1701 | static int force; | ||
1702 | module_param(force, int, 0); | ||
1703 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); | ||
1704 | |||
1705 | static int __init init(void) | ||
1706 | { | ||
1707 | if (!force && is_blacklisted_cpu()) { | ||
1708 | printk(KERN_INFO | ||
1709 | "camellia-x86_64: performance on this CPU " | ||
1710 | "would be suboptimal: disabling " | ||
1711 | "camellia-x86_64.\n"); | ||
1712 | return -ENODEV; | ||
1713 | } | ||
1714 | |||
1715 | return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); | ||
1716 | } | ||
1717 | |||
1718 | static void __exit fini(void) | ||
1719 | { | ||
1720 | crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); | ||
1721 | } | ||
1722 | |||
1723 | module_init(init); | ||
1724 | module_exit(fini); | ||
1725 | |||
1726 | MODULE_LICENSE("GPL"); | ||
1727 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); | ||
1728 | MODULE_ALIAS("camellia"); | ||
1729 | MODULE_ALIAS("camellia-asm"); | ||
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S deleted file mode 100644 index 15b00ac7cbd..00000000000 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,558 +0,0 @@ | |||
1 | /* | ||
2 | * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
22 | * USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | .file "cast5-avx-x86_64-asm_64.S" | ||
27 | |||
28 | .extern cast_s1 | ||
29 | .extern cast_s2 | ||
30 | .extern cast_s3 | ||
31 | .extern cast_s4 | ||
32 | |||
33 | /* structure of crypto context */ | ||
34 | #define km 0 | ||
35 | #define kr (16*4) | ||
36 | #define rr ((16*4)+16) | ||
37 | |||
38 | /* s-boxes */ | ||
39 | #define s1 cast_s1 | ||
40 | #define s2 cast_s2 | ||
41 | #define s3 cast_s3 | ||
42 | #define s4 cast_s4 | ||
43 | |||
44 | /********************************************************************** | ||
45 | 16-way AVX cast5 | ||
46 | **********************************************************************/ | ||
47 | #define CTX %rdi | ||
48 | |||
49 | #define RL1 %xmm0 | ||
50 | #define RR1 %xmm1 | ||
51 | #define RL2 %xmm2 | ||
52 | #define RR2 %xmm3 | ||
53 | #define RL3 %xmm4 | ||
54 | #define RR3 %xmm5 | ||
55 | #define RL4 %xmm6 | ||
56 | #define RR4 %xmm7 | ||
57 | |||
58 | #define RX %xmm8 | ||
59 | |||
60 | #define RKM %xmm9 | ||
61 | #define RKR %xmm10 | ||
62 | #define RKRF %xmm11 | ||
63 | #define RKRR %xmm12 | ||
64 | |||
65 | #define R32 %xmm13 | ||
66 | #define R1ST %xmm14 | ||
67 | |||
68 | #define RTMP %xmm15 | ||
69 | |||
70 | #define RID1 %rbp | ||
71 | #define RID1d %ebp | ||
72 | #define RID2 %rsi | ||
73 | #define RID2d %esi | ||
74 | |||
75 | #define RGI1 %rdx | ||
76 | #define RGI1bl %dl | ||
77 | #define RGI1bh %dh | ||
78 | #define RGI2 %rcx | ||
79 | #define RGI2bl %cl | ||
80 | #define RGI2bh %ch | ||
81 | |||
82 | #define RGI3 %rax | ||
83 | #define RGI3bl %al | ||
84 | #define RGI3bh %ah | ||
85 | #define RGI4 %rbx | ||
86 | #define RGI4bl %bl | ||
87 | #define RGI4bh %bh | ||
88 | |||
89 | #define RFS1 %r8 | ||
90 | #define RFS1d %r8d | ||
91 | #define RFS2 %r9 | ||
92 | #define RFS2d %r9d | ||
93 | #define RFS3 %r10 | ||
94 | #define RFS3d %r10d | ||
95 | |||
96 | |||
97 | #define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ | ||
98 | movzbl src ## bh, RID1d; \ | ||
99 | movzbl src ## bl, RID2d; \ | ||
100 | shrq $16, src; \ | ||
101 | movl s1(, RID1, 4), dst ## d; \ | ||
102 | op1 s2(, RID2, 4), dst ## d; \ | ||
103 | movzbl src ## bh, RID1d; \ | ||
104 | movzbl src ## bl, RID2d; \ | ||
105 | interleave_op(il_reg); \ | ||
106 | op2 s3(, RID1, 4), dst ## d; \ | ||
107 | op3 s4(, RID2, 4), dst ## d; | ||
108 | |||
109 | #define dummy(d) /* do nothing */ | ||
110 | |||
111 | #define shr_next(reg) \ | ||
112 | shrq $16, reg; | ||
113 | |||
114 | #define F_head(a, x, gi1, gi2, op0) \ | ||
115 | op0 a, RKM, x; \ | ||
116 | vpslld RKRF, x, RTMP; \ | ||
117 | vpsrld RKRR, x, x; \ | ||
118 | vpor RTMP, x, x; \ | ||
119 | \ | ||
120 | vmovq x, gi1; \ | ||
121 | vpextrq $1, x, gi2; | ||
122 | |||
123 | #define F_tail(a, x, gi1, gi2, op1, op2, op3) \ | ||
124 | lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ | ||
125 | lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ | ||
126 | \ | ||
127 | lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ | ||
128 | shlq $32, RFS2; \ | ||
129 | orq RFS1, RFS2; \ | ||
130 | lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ | ||
131 | shlq $32, RFS1; \ | ||
132 | orq RFS1, RFS3; \ | ||
133 | \ | ||
134 | vmovq RFS2, x; \ | ||
135 | vpinsrq $1, RFS3, x, x; | ||
136 | |||
137 | #define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ | ||
138 | F_head(b1, RX, RGI1, RGI2, op0); \ | ||
139 | F_head(b2, RX, RGI3, RGI4, op0); \ | ||
140 | \ | ||
141 | F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ | ||
142 | F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ | ||
143 | \ | ||
144 | vpxor a1, RX, a1; \ | ||
145 | vpxor a2, RTMP, a2; | ||
146 | |||
147 | #define F1_2(a1, b1, a2, b2) \ | ||
148 | F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) | ||
149 | #define F2_2(a1, b1, a2, b2) \ | ||
150 | F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) | ||
151 | #define F3_2(a1, b1, a2, b2) \ | ||
152 | F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) | ||
153 | |||
154 | #define subround(a1, b1, a2, b2, f) \ | ||
155 | F ## f ## _2(a1, b1, a2, b2); | ||
156 | |||
157 | #define round(l, r, n, f) \ | ||
158 | vbroadcastss (km+(4*n))(CTX), RKM; \ | ||
159 | vpand R1ST, RKR, RKRF; \ | ||
160 | vpsubq RKRF, R32, RKRR; \ | ||
161 | vpsrldq $1, RKR, RKR; \ | ||
162 | subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \ | ||
163 | subround(l ## 3, r ## 3, l ## 4, r ## 4, f); | ||
164 | |||
165 | #define enc_preload_rkr() \ | ||
166 | vbroadcastss .L16_mask, RKR; \ | ||
167 | /* add 16-bit rotation to key rotations (mod 32) */ \ | ||
168 | vpxor kr(CTX), RKR, RKR; | ||
169 | |||
170 | #define dec_preload_rkr() \ | ||
171 | vbroadcastss .L16_mask, RKR; \ | ||
172 | /* add 16-bit rotation to key rotations (mod 32) */ \ | ||
173 | vpxor kr(CTX), RKR, RKR; \ | ||
174 | vpshufb .Lbswap128_mask, RKR, RKR; | ||
175 | |||
176 | #define transpose_2x4(x0, x1, t0, t1) \ | ||
177 | vpunpckldq x1, x0, t0; \ | ||
178 | vpunpckhdq x1, x0, t1; \ | ||
179 | \ | ||
180 | vpunpcklqdq t1, t0, x0; \ | ||
181 | vpunpckhqdq t1, t0, x1; | ||
182 | |||
183 | #define inpack_blocks(x0, x1, t0, t1, rmask) \ | ||
184 | vpshufb rmask, x0, x0; \ | ||
185 | vpshufb rmask, x1, x1; \ | ||
186 | \ | ||
187 | transpose_2x4(x0, x1, t0, t1) | ||
188 | |||
189 | #define outunpack_blocks(x0, x1, t0, t1, rmask) \ | ||
190 | transpose_2x4(x0, x1, t0, t1) \ | ||
191 | \ | ||
192 | vpshufb rmask, x0, x0; \ | ||
193 | vpshufb rmask, x1, x1; | ||
194 | |||
195 | .data | ||
196 | |||
197 | .align 16 | ||
198 | .Lbswap_mask: | ||
199 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | ||
200 | .Lbswap128_mask: | ||
201 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
202 | .Lbswap_iv_mask: | ||
203 | .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 | ||
204 | .L16_mask: | ||
205 | .byte 16, 16, 16, 16 | ||
206 | .L32_mask: | ||
207 | .byte 32, 0, 0, 0 | ||
208 | .Lfirst_mask: | ||
209 | .byte 0x1f, 0, 0, 0 | ||
210 | |||
211 | .text | ||
212 | |||
213 | .align 16 | ||
214 | .type __cast5_enc_blk16,@function; | ||
215 | |||
216 | __cast5_enc_blk16: | ||
217 | /* input: | ||
218 | * %rdi: ctx, CTX | ||
219 | * RL1: blocks 1 and 2 | ||
220 | * RR1: blocks 3 and 4 | ||
221 | * RL2: blocks 5 and 6 | ||
222 | * RR2: blocks 7 and 8 | ||
223 | * RL3: blocks 9 and 10 | ||
224 | * RR3: blocks 11 and 12 | ||
225 | * RL4: blocks 13 and 14 | ||
226 | * RR4: blocks 15 and 16 | ||
227 | * output: | ||
228 | * RL1: encrypted blocks 1 and 2 | ||
229 | * RR1: encrypted blocks 3 and 4 | ||
230 | * RL2: encrypted blocks 5 and 6 | ||
231 | * RR2: encrypted blocks 7 and 8 | ||
232 | * RL3: encrypted blocks 9 and 10 | ||
233 | * RR3: encrypted blocks 11 and 12 | ||
234 | * RL4: encrypted blocks 13 and 14 | ||
235 | * RR4: encrypted blocks 15 and 16 | ||
236 | */ | ||
237 | |||
238 | pushq %rbp; | ||
239 | pushq %rbx; | ||
240 | |||
241 | vmovdqa .Lbswap_mask, RKM; | ||
242 | vmovd .Lfirst_mask, R1ST; | ||
243 | vmovd .L32_mask, R32; | ||
244 | enc_preload_rkr(); | ||
245 | |||
246 | inpack_blocks(RL1, RR1, RTMP, RX, RKM); | ||
247 | inpack_blocks(RL2, RR2, RTMP, RX, RKM); | ||
248 | inpack_blocks(RL3, RR3, RTMP, RX, RKM); | ||
249 | inpack_blocks(RL4, RR4, RTMP, RX, RKM); | ||
250 | |||
251 | round(RL, RR, 0, 1); | ||
252 | round(RR, RL, 1, 2); | ||
253 | round(RL, RR, 2, 3); | ||
254 | round(RR, RL, 3, 1); | ||
255 | round(RL, RR, 4, 2); | ||
256 | round(RR, RL, 5, 3); | ||
257 | round(RL, RR, 6, 1); | ||
258 | round(RR, RL, 7, 2); | ||
259 | round(RL, RR, 8, 3); | ||
260 | round(RR, RL, 9, 1); | ||
261 | round(RL, RR, 10, 2); | ||
262 | round(RR, RL, 11, 3); | ||
263 | |||
264 | movzbl rr(CTX), %eax; | ||
265 | testl %eax, %eax; | ||
266 | jnz __skip_enc; | ||
267 | |||
268 | round(RL, RR, 12, 1); | ||
269 | round(RR, RL, 13, 2); | ||
270 | round(RL, RR, 14, 3); | ||
271 | round(RR, RL, 15, 1); | ||
272 | |||
273 | __skip_enc: | ||
274 | popq %rbx; | ||
275 | popq %rbp; | ||
276 | |||
277 | vmovdqa .Lbswap_mask, RKM; | ||
278 | |||
279 | outunpack_blocks(RR1, RL1, RTMP, RX, RKM); | ||
280 | outunpack_blocks(RR2, RL2, RTMP, RX, RKM); | ||
281 | outunpack_blocks(RR3, RL3, RTMP, RX, RKM); | ||
282 | outunpack_blocks(RR4, RL4, RTMP, RX, RKM); | ||
283 | |||
284 | ret; | ||
285 | |||
286 | .align 16 | ||
287 | .type __cast5_dec_blk16,@function; | ||
288 | |||
289 | __cast5_dec_blk16: | ||
290 | /* input: | ||
291 | * %rdi: ctx, CTX | ||
292 | * RL1: encrypted blocks 1 and 2 | ||
293 | * RR1: encrypted blocks 3 and 4 | ||
294 | * RL2: encrypted blocks 5 and 6 | ||
295 | * RR2: encrypted blocks 7 and 8 | ||
296 | * RL3: encrypted blocks 9 and 10 | ||
297 | * RR3: encrypted blocks 11 and 12 | ||
298 | * RL4: encrypted blocks 13 and 14 | ||
299 | * RR4: encrypted blocks 15 and 16 | ||
300 | * output: | ||
301 | * RL1: decrypted blocks 1 and 2 | ||
302 | * RR1: decrypted blocks 3 and 4 | ||
303 | * RL2: decrypted blocks 5 and 6 | ||
304 | * RR2: decrypted blocks 7 and 8 | ||
305 | * RL3: decrypted blocks 9 and 10 | ||
306 | * RR3: decrypted blocks 11 and 12 | ||
307 | * RL4: decrypted blocks 13 and 14 | ||
308 | * RR4: decrypted blocks 15 and 16 | ||
309 | */ | ||
310 | |||
311 | pushq %rbp; | ||
312 | pushq %rbx; | ||
313 | |||
314 | vmovdqa .Lbswap_mask, RKM; | ||
315 | vmovd .Lfirst_mask, R1ST; | ||
316 | vmovd .L32_mask, R32; | ||
317 | dec_preload_rkr(); | ||
318 | |||
319 | inpack_blocks(RL1, RR1, RTMP, RX, RKM); | ||
320 | inpack_blocks(RL2, RR2, RTMP, RX, RKM); | ||
321 | inpack_blocks(RL3, RR3, RTMP, RX, RKM); | ||
322 | inpack_blocks(RL4, RR4, RTMP, RX, RKM); | ||
323 | |||
324 | movzbl rr(CTX), %eax; | ||
325 | testl %eax, %eax; | ||
326 | jnz __skip_dec; | ||
327 | |||
328 | round(RL, RR, 15, 1); | ||
329 | round(RR, RL, 14, 3); | ||
330 | round(RL, RR, 13, 2); | ||
331 | round(RR, RL, 12, 1); | ||
332 | |||
333 | __dec_tail: | ||
334 | round(RL, RR, 11, 3); | ||
335 | round(RR, RL, 10, 2); | ||
336 | round(RL, RR, 9, 1); | ||
337 | round(RR, RL, 8, 3); | ||
338 | round(RL, RR, 7, 2); | ||
339 | round(RR, RL, 6, 1); | ||
340 | round(RL, RR, 5, 3); | ||
341 | round(RR, RL, 4, 2); | ||
342 | round(RL, RR, 3, 1); | ||
343 | round(RR, RL, 2, 3); | ||
344 | round(RL, RR, 1, 2); | ||
345 | round(RR, RL, 0, 1); | ||
346 | |||
347 | vmovdqa .Lbswap_mask, RKM; | ||
348 | popq %rbx; | ||
349 | popq %rbp; | ||
350 | |||
351 | outunpack_blocks(RR1, RL1, RTMP, RX, RKM); | ||
352 | outunpack_blocks(RR2, RL2, RTMP, RX, RKM); | ||
353 | outunpack_blocks(RR3, RL3, RTMP, RX, RKM); | ||
354 | outunpack_blocks(RR4, RL4, RTMP, RX, RKM); | ||
355 | |||
356 | ret; | ||
357 | |||
358 | __skip_dec: | ||
359 | vpsrldq $4, RKR, RKR; | ||
360 | jmp __dec_tail; | ||
361 | |||
362 | .align 16 | ||
363 | .global cast5_ecb_enc_16way | ||
364 | .type cast5_ecb_enc_16way,@function; | ||
365 | |||
366 | cast5_ecb_enc_16way: | ||
367 | /* input: | ||
368 | * %rdi: ctx, CTX | ||
369 | * %rsi: dst | ||
370 | * %rdx: src | ||
371 | */ | ||
372 | |||
373 | movq %rsi, %r11; | ||
374 | |||
375 | vmovdqu (0*4*4)(%rdx), RL1; | ||
376 | vmovdqu (1*4*4)(%rdx), RR1; | ||
377 | vmovdqu (2*4*4)(%rdx), RL2; | ||
378 | vmovdqu (3*4*4)(%rdx), RR2; | ||
379 | vmovdqu (4*4*4)(%rdx), RL3; | ||
380 | vmovdqu (5*4*4)(%rdx), RR3; | ||
381 | vmovdqu (6*4*4)(%rdx), RL4; | ||
382 | vmovdqu (7*4*4)(%rdx), RR4; | ||
383 | |||
384 | call __cast5_enc_blk16; | ||
385 | |||
386 | vmovdqu RR1, (0*4*4)(%r11); | ||
387 | vmovdqu RL1, (1*4*4)(%r11); | ||
388 | vmovdqu RR2, (2*4*4)(%r11); | ||
389 | vmovdqu RL2, (3*4*4)(%r11); | ||
390 | vmovdqu RR3, (4*4*4)(%r11); | ||
391 | vmovdqu RL3, (5*4*4)(%r11); | ||
392 | vmovdqu RR4, (6*4*4)(%r11); | ||
393 | vmovdqu RL4, (7*4*4)(%r11); | ||
394 | |||
395 | ret; | ||
396 | |||
397 | .align 16 | ||
398 | .global cast5_ecb_dec_16way | ||
399 | .type cast5_ecb_dec_16way,@function; | ||
400 | |||
401 | cast5_ecb_dec_16way: | ||
402 | /* input: | ||
403 | * %rdi: ctx, CTX | ||
404 | * %rsi: dst | ||
405 | * %rdx: src | ||
406 | */ | ||
407 | |||
408 | movq %rsi, %r11; | ||
409 | |||
410 | vmovdqu (0*4*4)(%rdx), RL1; | ||
411 | vmovdqu (1*4*4)(%rdx), RR1; | ||
412 | vmovdqu (2*4*4)(%rdx), RL2; | ||
413 | vmovdqu (3*4*4)(%rdx), RR2; | ||
414 | vmovdqu (4*4*4)(%rdx), RL3; | ||
415 | vmovdqu (5*4*4)(%rdx), RR3; | ||
416 | vmovdqu (6*4*4)(%rdx), RL4; | ||
417 | vmovdqu (7*4*4)(%rdx), RR4; | ||
418 | |||
419 | call __cast5_dec_blk16; | ||
420 | |||
421 | vmovdqu RR1, (0*4*4)(%r11); | ||
422 | vmovdqu RL1, (1*4*4)(%r11); | ||
423 | vmovdqu RR2, (2*4*4)(%r11); | ||
424 | vmovdqu RL2, (3*4*4)(%r11); | ||
425 | vmovdqu RR3, (4*4*4)(%r11); | ||
426 | vmovdqu RL3, (5*4*4)(%r11); | ||
427 | vmovdqu RR4, (6*4*4)(%r11); | ||
428 | vmovdqu RL4, (7*4*4)(%r11); | ||
429 | |||
430 | ret; | ||
431 | |||
432 | .align 16 | ||
433 | .global cast5_cbc_dec_16way | ||
434 | .type cast5_cbc_dec_16way,@function; | ||
435 | |||
436 | cast5_cbc_dec_16way: | ||
437 | /* input: | ||
438 | * %rdi: ctx, CTX | ||
439 | * %rsi: dst | ||
440 | * %rdx: src | ||
441 | */ | ||
442 | |||
443 | pushq %r12; | ||
444 | |||
445 | movq %rsi, %r11; | ||
446 | movq %rdx, %r12; | ||
447 | |||
448 | vmovdqu (0*16)(%rdx), RL1; | ||
449 | vmovdqu (1*16)(%rdx), RR1; | ||
450 | vmovdqu (2*16)(%rdx), RL2; | ||
451 | vmovdqu (3*16)(%rdx), RR2; | ||
452 | vmovdqu (4*16)(%rdx), RL3; | ||
453 | vmovdqu (5*16)(%rdx), RR3; | ||
454 | vmovdqu (6*16)(%rdx), RL4; | ||
455 | vmovdqu (7*16)(%rdx), RR4; | ||
456 | |||
457 | call __cast5_dec_blk16; | ||
458 | |||
459 | /* xor with src */ | ||
460 | vmovq (%r12), RX; | ||
461 | vpshufd $0x4f, RX, RX; | ||
462 | vpxor RX, RR1, RR1; | ||
463 | vpxor 0*16+8(%r12), RL1, RL1; | ||
464 | vpxor 1*16+8(%r12), RR2, RR2; | ||
465 | vpxor 2*16+8(%r12), RL2, RL2; | ||
466 | vpxor 3*16+8(%r12), RR3, RR3; | ||
467 | vpxor 4*16+8(%r12), RL3, RL3; | ||
468 | vpxor 5*16+8(%r12), RR4, RR4; | ||
469 | vpxor 6*16+8(%r12), RL4, RL4; | ||
470 | |||
471 | vmovdqu RR1, (0*16)(%r11); | ||
472 | vmovdqu RL1, (1*16)(%r11); | ||
473 | vmovdqu RR2, (2*16)(%r11); | ||
474 | vmovdqu RL2, (3*16)(%r11); | ||
475 | vmovdqu RR3, (4*16)(%r11); | ||
476 | vmovdqu RL3, (5*16)(%r11); | ||
477 | vmovdqu RR4, (6*16)(%r11); | ||
478 | vmovdqu RL4, (7*16)(%r11); | ||
479 | |||
480 | popq %r12; | ||
481 | |||
482 | ret; | ||
483 | |||
484 | .align 16 | ||
485 | .global cast5_ctr_16way | ||
486 | .type cast5_ctr_16way,@function; | ||
487 | |||
488 | cast5_ctr_16way: | ||
489 | /* input: | ||
490 | * %rdi: ctx, CTX | ||
491 | * %rsi: dst | ||
492 | * %rdx: src | ||
493 | * %rcx: iv (big endian, 64bit) | ||
494 | */ | ||
495 | |||
496 | pushq %r12; | ||
497 | |||
498 | movq %rsi, %r11; | ||
499 | movq %rdx, %r12; | ||
500 | |||
501 | vpcmpeqd RTMP, RTMP, RTMP; | ||
502 | vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */ | ||
503 | |||
504 | vpcmpeqd RKR, RKR, RKR; | ||
505 | vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */ | ||
506 | vmovdqa .Lbswap_iv_mask, R1ST; | ||
507 | vmovdqa .Lbswap128_mask, RKM; | ||
508 | |||
509 | /* load IV and byteswap */ | ||
510 | vmovq (%rcx), RX; | ||
511 | vpshufb R1ST, RX, RX; | ||
512 | |||
513 | /* construct IVs */ | ||
514 | vpsubq RTMP, RX, RX; /* le: IV1, IV0 */ | ||
515 | vpshufb RKM, RX, RL1; /* be: IV0, IV1 */ | ||
516 | vpsubq RKR, RX, RX; | ||
517 | vpshufb RKM, RX, RR1; /* be: IV2, IV3 */ | ||
518 | vpsubq RKR, RX, RX; | ||
519 | vpshufb RKM, RX, RL2; /* be: IV4, IV5 */ | ||
520 | vpsubq RKR, RX, RX; | ||
521 | vpshufb RKM, RX, RR2; /* be: IV6, IV7 */ | ||
522 | vpsubq RKR, RX, RX; | ||
523 | vpshufb RKM, RX, RL3; /* be: IV8, IV9 */ | ||
524 | vpsubq RKR, RX, RX; | ||
525 | vpshufb RKM, RX, RR3; /* be: IV10, IV11 */ | ||
526 | vpsubq RKR, RX, RX; | ||
527 | vpshufb RKM, RX, RL4; /* be: IV12, IV13 */ | ||
528 | vpsubq RKR, RX, RX; | ||
529 | vpshufb RKM, RX, RR4; /* be: IV14, IV15 */ | ||
530 | |||
531 | /* store last IV */ | ||
532 | vpsubq RTMP, RX, RX; /* le: IV16, IV14 */ | ||
533 | vpshufb R1ST, RX, RX; /* be: IV16, IV16 */ | ||
534 | vmovq RX, (%rcx); | ||
535 | |||
536 | call __cast5_enc_blk16; | ||
537 | |||
538 | /* dst = src ^ iv */ | ||
539 | vpxor (0*16)(%r12), RR1, RR1; | ||
540 | vpxor (1*16)(%r12), RL1, RL1; | ||
541 | vpxor (2*16)(%r12), RR2, RR2; | ||
542 | vpxor (3*16)(%r12), RL2, RL2; | ||
543 | vpxor (4*16)(%r12), RR3, RR3; | ||
544 | vpxor (5*16)(%r12), RL3, RL3; | ||
545 | vpxor (6*16)(%r12), RR4, RR4; | ||
546 | vpxor (7*16)(%r12), RL4, RL4; | ||
547 | vmovdqu RR1, (0*16)(%r11); | ||
548 | vmovdqu RL1, (1*16)(%r11); | ||
549 | vmovdqu RR2, (2*16)(%r11); | ||
550 | vmovdqu RL2, (3*16)(%r11); | ||
551 | vmovdqu RR3, (4*16)(%r11); | ||
552 | vmovdqu RL3, (5*16)(%r11); | ||
553 | vmovdqu RR4, (6*16)(%r11); | ||
554 | vmovdqu RL4, (7*16)(%r11); | ||
555 | |||
556 | popq %r12; | ||
557 | |||
558 | ret; | ||
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c deleted file mode 100644 index c6631813dc1..00000000000 --- a/arch/x86/crypto/cast5_avx_glue.c +++ /dev/null | |||
@@ -1,497 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for the AVX assembler implemention of the Cast5 Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/cast5.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/ctr.h> | ||
33 | #include <asm/xcr.h> | ||
34 | #include <asm/xsave.h> | ||
35 | #include <asm/crypto/ablk_helper.h> | ||
36 | #include <asm/crypto/glue_helper.h> | ||
37 | |||
38 | #define CAST5_PARALLEL_BLOCKS 16 | ||
39 | |||
40 | asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, | ||
41 | const u8 *src); | ||
42 | asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, | ||
43 | const u8 *src); | ||
44 | asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, | ||
45 | const u8 *src); | ||
46 | asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, | ||
47 | __be64 *iv); | ||
48 | |||
49 | static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
50 | { | ||
51 | return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, | ||
52 | NULL, fpu_enabled, nbytes); | ||
53 | } | ||
54 | |||
55 | static inline void cast5_fpu_end(bool fpu_enabled) | ||
56 | { | ||
57 | return glue_fpu_end(fpu_enabled); | ||
58 | } | ||
59 | |||
60 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | ||
61 | bool enc) | ||
62 | { | ||
63 | bool fpu_enabled = false; | ||
64 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
65 | const unsigned int bsize = CAST5_BLOCK_SIZE; | ||
66 | unsigned int nbytes; | ||
67 | void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); | ||
68 | int err; | ||
69 | |||
70 | fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; | ||
71 | |||
72 | err = blkcipher_walk_virt(desc, walk); | ||
73 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
74 | |||
75 | while ((nbytes = walk->nbytes)) { | ||
76 | u8 *wsrc = walk->src.virt.addr; | ||
77 | u8 *wdst = walk->dst.virt.addr; | ||
78 | |||
79 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | ||
80 | |||
81 | /* Process multi-block batch */ | ||
82 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | ||
83 | do { | ||
84 | fn(ctx, wdst, wsrc); | ||
85 | |||
86 | wsrc += bsize * CAST5_PARALLEL_BLOCKS; | ||
87 | wdst += bsize * CAST5_PARALLEL_BLOCKS; | ||
88 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | ||
89 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | ||
90 | |||
91 | if (nbytes < bsize) | ||
92 | goto done; | ||
93 | } | ||
94 | |||
95 | fn = (enc) ? __cast5_encrypt : __cast5_decrypt; | ||
96 | |||
97 | /* Handle leftovers */ | ||
98 | do { | ||
99 | fn(ctx, wdst, wsrc); | ||
100 | |||
101 | wsrc += bsize; | ||
102 | wdst += bsize; | ||
103 | nbytes -= bsize; | ||
104 | } while (nbytes >= bsize); | ||
105 | |||
106 | done: | ||
107 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
108 | } | ||
109 | |||
110 | cast5_fpu_end(fpu_enabled); | ||
111 | return err; | ||
112 | } | ||
113 | |||
114 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
115 | struct scatterlist *src, unsigned int nbytes) | ||
116 | { | ||
117 | struct blkcipher_walk walk; | ||
118 | |||
119 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
120 | return ecb_crypt(desc, &walk, true); | ||
121 | } | ||
122 | |||
123 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
124 | struct scatterlist *src, unsigned int nbytes) | ||
125 | { | ||
126 | struct blkcipher_walk walk; | ||
127 | |||
128 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
129 | return ecb_crypt(desc, &walk, false); | ||
130 | } | ||
131 | |||
132 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | ||
133 | struct blkcipher_walk *walk) | ||
134 | { | ||
135 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
136 | const unsigned int bsize = CAST5_BLOCK_SIZE; | ||
137 | unsigned int nbytes = walk->nbytes; | ||
138 | u64 *src = (u64 *)walk->src.virt.addr; | ||
139 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
140 | u64 *iv = (u64 *)walk->iv; | ||
141 | |||
142 | do { | ||
143 | *dst = *src ^ *iv; | ||
144 | __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); | ||
145 | iv = dst; | ||
146 | |||
147 | src += 1; | ||
148 | dst += 1; | ||
149 | nbytes -= bsize; | ||
150 | } while (nbytes >= bsize); | ||
151 | |||
152 | *(u64 *)walk->iv = *iv; | ||
153 | return nbytes; | ||
154 | } | ||
155 | |||
156 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
157 | struct scatterlist *src, unsigned int nbytes) | ||
158 | { | ||
159 | struct blkcipher_walk walk; | ||
160 | int err; | ||
161 | |||
162 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
163 | err = blkcipher_walk_virt(desc, &walk); | ||
164 | |||
165 | while ((nbytes = walk.nbytes)) { | ||
166 | nbytes = __cbc_encrypt(desc, &walk); | ||
167 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
168 | } | ||
169 | |||
170 | return err; | ||
171 | } | ||
172 | |||
173 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | ||
174 | struct blkcipher_walk *walk) | ||
175 | { | ||
176 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
177 | const unsigned int bsize = CAST5_BLOCK_SIZE; | ||
178 | unsigned int nbytes = walk->nbytes; | ||
179 | u64 *src = (u64 *)walk->src.virt.addr; | ||
180 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
181 | u64 last_iv; | ||
182 | |||
183 | /* Start of the last block. */ | ||
184 | src += nbytes / bsize - 1; | ||
185 | dst += nbytes / bsize - 1; | ||
186 | |||
187 | last_iv = *src; | ||
188 | |||
189 | /* Process multi-block batch */ | ||
190 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | ||
191 | do { | ||
192 | nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); | ||
193 | src -= CAST5_PARALLEL_BLOCKS - 1; | ||
194 | dst -= CAST5_PARALLEL_BLOCKS - 1; | ||
195 | |||
196 | cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); | ||
197 | |||
198 | nbytes -= bsize; | ||
199 | if (nbytes < bsize) | ||
200 | goto done; | ||
201 | |||
202 | *dst ^= *(src - 1); | ||
203 | src -= 1; | ||
204 | dst -= 1; | ||
205 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | ||
206 | |||
207 | if (nbytes < bsize) | ||
208 | goto done; | ||
209 | } | ||
210 | |||
211 | /* Handle leftovers */ | ||
212 | for (;;) { | ||
213 | __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); | ||
214 | |||
215 | nbytes -= bsize; | ||
216 | if (nbytes < bsize) | ||
217 | break; | ||
218 | |||
219 | *dst ^= *(src - 1); | ||
220 | src -= 1; | ||
221 | dst -= 1; | ||
222 | } | ||
223 | |||
224 | done: | ||
225 | *dst ^= *(u64 *)walk->iv; | ||
226 | *(u64 *)walk->iv = last_iv; | ||
227 | |||
228 | return nbytes; | ||
229 | } | ||
230 | |||
231 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
232 | struct scatterlist *src, unsigned int nbytes) | ||
233 | { | ||
234 | bool fpu_enabled = false; | ||
235 | struct blkcipher_walk walk; | ||
236 | int err; | ||
237 | |||
238 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
239 | err = blkcipher_walk_virt(desc, &walk); | ||
240 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
241 | |||
242 | while ((nbytes = walk.nbytes)) { | ||
243 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | ||
244 | nbytes = __cbc_decrypt(desc, &walk); | ||
245 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
246 | } | ||
247 | |||
248 | cast5_fpu_end(fpu_enabled); | ||
249 | return err; | ||
250 | } | ||
251 | |||
252 | static void ctr_crypt_final(struct blkcipher_desc *desc, | ||
253 | struct blkcipher_walk *walk) | ||
254 | { | ||
255 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
256 | u8 *ctrblk = walk->iv; | ||
257 | u8 keystream[CAST5_BLOCK_SIZE]; | ||
258 | u8 *src = walk->src.virt.addr; | ||
259 | u8 *dst = walk->dst.virt.addr; | ||
260 | unsigned int nbytes = walk->nbytes; | ||
261 | |||
262 | __cast5_encrypt(ctx, keystream, ctrblk); | ||
263 | crypto_xor(keystream, src, nbytes); | ||
264 | memcpy(dst, keystream, nbytes); | ||
265 | |||
266 | crypto_inc(ctrblk, CAST5_BLOCK_SIZE); | ||
267 | } | ||
268 | |||
269 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | ||
270 | struct blkcipher_walk *walk) | ||
271 | { | ||
272 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
273 | const unsigned int bsize = CAST5_BLOCK_SIZE; | ||
274 | unsigned int nbytes = walk->nbytes; | ||
275 | u64 *src = (u64 *)walk->src.virt.addr; | ||
276 | u64 *dst = (u64 *)walk->dst.virt.addr; | ||
277 | |||
278 | /* Process multi-block batch */ | ||
279 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | ||
280 | do { | ||
281 | cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, | ||
282 | (__be64 *)walk->iv); | ||
283 | |||
284 | src += CAST5_PARALLEL_BLOCKS; | ||
285 | dst += CAST5_PARALLEL_BLOCKS; | ||
286 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | ||
287 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | ||
288 | |||
289 | if (nbytes < bsize) | ||
290 | goto done; | ||
291 | } | ||
292 | |||
293 | /* Handle leftovers */ | ||
294 | do { | ||
295 | u64 ctrblk; | ||
296 | |||
297 | if (dst != src) | ||
298 | *dst = *src; | ||
299 | |||
300 | ctrblk = *(u64 *)walk->iv; | ||
301 | be64_add_cpu((__be64 *)walk->iv, 1); | ||
302 | |||
303 | __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
304 | *dst ^= ctrblk; | ||
305 | |||
306 | src += 1; | ||
307 | dst += 1; | ||
308 | nbytes -= bsize; | ||
309 | } while (nbytes >= bsize); | ||
310 | |||
311 | done: | ||
312 | return nbytes; | ||
313 | } | ||
314 | |||
315 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
316 | struct scatterlist *src, unsigned int nbytes) | ||
317 | { | ||
318 | bool fpu_enabled = false; | ||
319 | struct blkcipher_walk walk; | ||
320 | int err; | ||
321 | |||
322 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
323 | err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); | ||
324 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
325 | |||
326 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | ||
327 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | ||
328 | nbytes = __ctr_crypt(desc, &walk); | ||
329 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
330 | } | ||
331 | |||
332 | cast5_fpu_end(fpu_enabled); | ||
333 | |||
334 | if (walk.nbytes) { | ||
335 | ctr_crypt_final(desc, &walk); | ||
336 | err = blkcipher_walk_done(desc, &walk, 0); | ||
337 | } | ||
338 | |||
339 | return err; | ||
340 | } | ||
341 | |||
342 | |||
343 | static struct crypto_alg cast5_algs[6] = { { | ||
344 | .cra_name = "__ecb-cast5-avx", | ||
345 | .cra_driver_name = "__driver-ecb-cast5-avx", | ||
346 | .cra_priority = 0, | ||
347 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
348 | .cra_blocksize = CAST5_BLOCK_SIZE, | ||
349 | .cra_ctxsize = sizeof(struct cast5_ctx), | ||
350 | .cra_alignmask = 0, | ||
351 | .cra_type = &crypto_blkcipher_type, | ||
352 | .cra_module = THIS_MODULE, | ||
353 | .cra_u = { | ||
354 | .blkcipher = { | ||
355 | .min_keysize = CAST5_MIN_KEY_SIZE, | ||
356 | .max_keysize = CAST5_MAX_KEY_SIZE, | ||
357 | .setkey = cast5_setkey, | ||
358 | .encrypt = ecb_encrypt, | ||
359 | .decrypt = ecb_decrypt, | ||
360 | }, | ||
361 | }, | ||
362 | }, { | ||
363 | .cra_name = "__cbc-cast5-avx", | ||
364 | .cra_driver_name = "__driver-cbc-cast5-avx", | ||
365 | .cra_priority = 0, | ||
366 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
367 | .cra_blocksize = CAST5_BLOCK_SIZE, | ||
368 | .cra_ctxsize = sizeof(struct cast5_ctx), | ||
369 | .cra_alignmask = 0, | ||
370 | .cra_type = &crypto_blkcipher_type, | ||
371 | .cra_module = THIS_MODULE, | ||
372 | .cra_u = { | ||
373 | .blkcipher = { | ||
374 | .min_keysize = CAST5_MIN_KEY_SIZE, | ||
375 | .max_keysize = CAST5_MAX_KEY_SIZE, | ||
376 | .setkey = cast5_setkey, | ||
377 | .encrypt = cbc_encrypt, | ||
378 | .decrypt = cbc_decrypt, | ||
379 | }, | ||
380 | }, | ||
381 | }, { | ||
382 | .cra_name = "__ctr-cast5-avx", | ||
383 | .cra_driver_name = "__driver-ctr-cast5-avx", | ||
384 | .cra_priority = 0, | ||
385 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
386 | .cra_blocksize = 1, | ||
387 | .cra_ctxsize = sizeof(struct cast5_ctx), | ||
388 | .cra_alignmask = 0, | ||
389 | .cra_type = &crypto_blkcipher_type, | ||
390 | .cra_module = THIS_MODULE, | ||
391 | .cra_u = { | ||
392 | .blkcipher = { | ||
393 | .min_keysize = CAST5_MIN_KEY_SIZE, | ||
394 | .max_keysize = CAST5_MAX_KEY_SIZE, | ||
395 | .ivsize = CAST5_BLOCK_SIZE, | ||
396 | .setkey = cast5_setkey, | ||
397 | .encrypt = ctr_crypt, | ||
398 | .decrypt = ctr_crypt, | ||
399 | }, | ||
400 | }, | ||
401 | }, { | ||
402 | .cra_name = "ecb(cast5)", | ||
403 | .cra_driver_name = "ecb-cast5-avx", | ||
404 | .cra_priority = 200, | ||
405 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
406 | .cra_blocksize = CAST5_BLOCK_SIZE, | ||
407 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
408 | .cra_alignmask = 0, | ||
409 | .cra_type = &crypto_ablkcipher_type, | ||
410 | .cra_module = THIS_MODULE, | ||
411 | .cra_init = ablk_init, | ||
412 | .cra_exit = ablk_exit, | ||
413 | .cra_u = { | ||
414 | .ablkcipher = { | ||
415 | .min_keysize = CAST5_MIN_KEY_SIZE, | ||
416 | .max_keysize = CAST5_MAX_KEY_SIZE, | ||
417 | .setkey = ablk_set_key, | ||
418 | .encrypt = ablk_encrypt, | ||
419 | .decrypt = ablk_decrypt, | ||
420 | }, | ||
421 | }, | ||
422 | }, { | ||
423 | .cra_name = "cbc(cast5)", | ||
424 | .cra_driver_name = "cbc-cast5-avx", | ||
425 | .cra_priority = 200, | ||
426 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
427 | .cra_blocksize = CAST5_BLOCK_SIZE, | ||
428 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
429 | .cra_alignmask = 0, | ||
430 | .cra_type = &crypto_ablkcipher_type, | ||
431 | .cra_module = THIS_MODULE, | ||
432 | .cra_init = ablk_init, | ||
433 | .cra_exit = ablk_exit, | ||
434 | .cra_u = { | ||
435 | .ablkcipher = { | ||
436 | .min_keysize = CAST5_MIN_KEY_SIZE, | ||
437 | .max_keysize = CAST5_MAX_KEY_SIZE, | ||
438 | .ivsize = CAST5_BLOCK_SIZE, | ||
439 | .setkey = ablk_set_key, | ||
440 | .encrypt = __ablk_encrypt, | ||
441 | .decrypt = ablk_decrypt, | ||
442 | }, | ||
443 | }, | ||
444 | }, { | ||
445 | .cra_name = "ctr(cast5)", | ||
446 | .cra_driver_name = "ctr-cast5-avx", | ||
447 | .cra_priority = 200, | ||
448 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
449 | .cra_blocksize = 1, | ||
450 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
451 | .cra_alignmask = 0, | ||
452 | .cra_type = &crypto_ablkcipher_type, | ||
453 | .cra_module = THIS_MODULE, | ||
454 | .cra_init = ablk_init, | ||
455 | .cra_exit = ablk_exit, | ||
456 | .cra_u = { | ||
457 | .ablkcipher = { | ||
458 | .min_keysize = CAST5_MIN_KEY_SIZE, | ||
459 | .max_keysize = CAST5_MAX_KEY_SIZE, | ||
460 | .ivsize = CAST5_BLOCK_SIZE, | ||
461 | .setkey = ablk_set_key, | ||
462 | .encrypt = ablk_encrypt, | ||
463 | .decrypt = ablk_encrypt, | ||
464 | .geniv = "chainiv", | ||
465 | }, | ||
466 | }, | ||
467 | } }; | ||
468 | |||
469 | static int __init cast5_init(void) | ||
470 | { | ||
471 | u64 xcr0; | ||
472 | |||
473 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
474 | pr_info("AVX instructions are not detected.\n"); | ||
475 | return -ENODEV; | ||
476 | } | ||
477 | |||
478 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
479 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
480 | pr_info("AVX detected but unusable.\n"); | ||
481 | return -ENODEV; | ||
482 | } | ||
483 | |||
484 | return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); | ||
485 | } | ||
486 | |||
487 | static void __exit cast5_exit(void) | ||
488 | { | ||
489 | crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); | ||
490 | } | ||
491 | |||
492 | module_init(cast5_init); | ||
493 | module_exit(cast5_exit); | ||
494 | |||
495 | MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); | ||
496 | MODULE_LICENSE("GPL"); | ||
497 | MODULE_ALIAS("cast5"); | ||
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S deleted file mode 100644 index 2569d0da841..00000000000 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,439 +0,0 @@ | |||
1 | /* | ||
2 | * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
22 | * USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include "glue_helper-asm-avx.S" | ||
27 | |||
28 | .file "cast6-avx-x86_64-asm_64.S" | ||
29 | |||
30 | .extern cast_s1 | ||
31 | .extern cast_s2 | ||
32 | .extern cast_s3 | ||
33 | .extern cast_s4 | ||
34 | |||
35 | /* structure of crypto context */ | ||
36 | #define km 0 | ||
37 | #define kr (12*4*4) | ||
38 | |||
39 | /* s-boxes */ | ||
40 | #define s1 cast_s1 | ||
41 | #define s2 cast_s2 | ||
42 | #define s3 cast_s3 | ||
43 | #define s4 cast_s4 | ||
44 | |||
45 | /********************************************************************** | ||
46 | 8-way AVX cast6 | ||
47 | **********************************************************************/ | ||
48 | #define CTX %rdi | ||
49 | |||
50 | #define RA1 %xmm0 | ||
51 | #define RB1 %xmm1 | ||
52 | #define RC1 %xmm2 | ||
53 | #define RD1 %xmm3 | ||
54 | |||
55 | #define RA2 %xmm4 | ||
56 | #define RB2 %xmm5 | ||
57 | #define RC2 %xmm6 | ||
58 | #define RD2 %xmm7 | ||
59 | |||
60 | #define RX %xmm8 | ||
61 | |||
62 | #define RKM %xmm9 | ||
63 | #define RKR %xmm10 | ||
64 | #define RKRF %xmm11 | ||
65 | #define RKRR %xmm12 | ||
66 | #define R32 %xmm13 | ||
67 | #define R1ST %xmm14 | ||
68 | |||
69 | #define RTMP %xmm15 | ||
70 | |||
71 | #define RID1 %rbp | ||
72 | #define RID1d %ebp | ||
73 | #define RID2 %rsi | ||
74 | #define RID2d %esi | ||
75 | |||
76 | #define RGI1 %rdx | ||
77 | #define RGI1bl %dl | ||
78 | #define RGI1bh %dh | ||
79 | #define RGI2 %rcx | ||
80 | #define RGI2bl %cl | ||
81 | #define RGI2bh %ch | ||
82 | |||
83 | #define RGI3 %rax | ||
84 | #define RGI3bl %al | ||
85 | #define RGI3bh %ah | ||
86 | #define RGI4 %rbx | ||
87 | #define RGI4bl %bl | ||
88 | #define RGI4bh %bh | ||
89 | |||
90 | #define RFS1 %r8 | ||
91 | #define RFS1d %r8d | ||
92 | #define RFS2 %r9 | ||
93 | #define RFS2d %r9d | ||
94 | #define RFS3 %r10 | ||
95 | #define RFS3d %r10d | ||
96 | |||
97 | |||
98 | #define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ | ||
99 | movzbl src ## bh, RID1d; \ | ||
100 | movzbl src ## bl, RID2d; \ | ||
101 | shrq $16, src; \ | ||
102 | movl s1(, RID1, 4), dst ## d; \ | ||
103 | op1 s2(, RID2, 4), dst ## d; \ | ||
104 | movzbl src ## bh, RID1d; \ | ||
105 | movzbl src ## bl, RID2d; \ | ||
106 | interleave_op(il_reg); \ | ||
107 | op2 s3(, RID1, 4), dst ## d; \ | ||
108 | op3 s4(, RID2, 4), dst ## d; | ||
109 | |||
110 | #define dummy(d) /* do nothing */ | ||
111 | |||
112 | #define shr_next(reg) \ | ||
113 | shrq $16, reg; | ||
114 | |||
115 | #define F_head(a, x, gi1, gi2, op0) \ | ||
116 | op0 a, RKM, x; \ | ||
117 | vpslld RKRF, x, RTMP; \ | ||
118 | vpsrld RKRR, x, x; \ | ||
119 | vpor RTMP, x, x; \ | ||
120 | \ | ||
121 | vmovq x, gi1; \ | ||
122 | vpextrq $1, x, gi2; | ||
123 | |||
124 | #define F_tail(a, x, gi1, gi2, op1, op2, op3) \ | ||
125 | lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ | ||
126 | lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ | ||
127 | \ | ||
128 | lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ | ||
129 | shlq $32, RFS2; \ | ||
130 | orq RFS1, RFS2; \ | ||
131 | lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ | ||
132 | shlq $32, RFS1; \ | ||
133 | orq RFS1, RFS3; \ | ||
134 | \ | ||
135 | vmovq RFS2, x; \ | ||
136 | vpinsrq $1, RFS3, x, x; | ||
137 | |||
138 | #define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ | ||
139 | F_head(b1, RX, RGI1, RGI2, op0); \ | ||
140 | F_head(b2, RX, RGI3, RGI4, op0); \ | ||
141 | \ | ||
142 | F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ | ||
143 | F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ | ||
144 | \ | ||
145 | vpxor a1, RX, a1; \ | ||
146 | vpxor a2, RTMP, a2; | ||
147 | |||
148 | #define F1_2(a1, b1, a2, b2) \ | ||
149 | F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) | ||
150 | #define F2_2(a1, b1, a2, b2) \ | ||
151 | F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) | ||
152 | #define F3_2(a1, b1, a2, b2) \ | ||
153 | F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) | ||
154 | |||
155 | #define qop(in, out, f) \ | ||
156 | F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2); | ||
157 | |||
158 | #define get_round_keys(nn) \ | ||
159 | vbroadcastss (km+(4*(nn)))(CTX), RKM; \ | ||
160 | vpand R1ST, RKR, RKRF; \ | ||
161 | vpsubq RKRF, R32, RKRR; \ | ||
162 | vpsrldq $1, RKR, RKR; | ||
163 | |||
164 | #define Q(n) \ | ||
165 | get_round_keys(4*n+0); \ | ||
166 | qop(RD, RC, 1); \ | ||
167 | \ | ||
168 | get_round_keys(4*n+1); \ | ||
169 | qop(RC, RB, 2); \ | ||
170 | \ | ||
171 | get_round_keys(4*n+2); \ | ||
172 | qop(RB, RA, 3); \ | ||
173 | \ | ||
174 | get_round_keys(4*n+3); \ | ||
175 | qop(RA, RD, 1); | ||
176 | |||
177 | #define QBAR(n) \ | ||
178 | get_round_keys(4*n+3); \ | ||
179 | qop(RA, RD, 1); \ | ||
180 | \ | ||
181 | get_round_keys(4*n+2); \ | ||
182 | qop(RB, RA, 3); \ | ||
183 | \ | ||
184 | get_round_keys(4*n+1); \ | ||
185 | qop(RC, RB, 2); \ | ||
186 | \ | ||
187 | get_round_keys(4*n+0); \ | ||
188 | qop(RD, RC, 1); | ||
189 | |||
190 | #define shuffle(mask) \ | ||
191 | vpshufb mask, RKR, RKR; | ||
192 | |||
193 | #define preload_rkr(n, do_mask, mask) \ | ||
194 | vbroadcastss .L16_mask, RKR; \ | ||
195 | /* add 16-bit rotation to key rotations (mod 32) */ \ | ||
196 | vpxor (kr+n*16)(CTX), RKR, RKR; \ | ||
197 | do_mask(mask); | ||
198 | |||
199 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
200 | vpunpckldq x1, x0, t0; \ | ||
201 | vpunpckhdq x1, x0, t2; \ | ||
202 | vpunpckldq x3, x2, t1; \ | ||
203 | vpunpckhdq x3, x2, x3; \ | ||
204 | \ | ||
205 | vpunpcklqdq t1, t0, x0; \ | ||
206 | vpunpckhqdq t1, t0, x1; \ | ||
207 | vpunpcklqdq x3, t2, x2; \ | ||
208 | vpunpckhqdq x3, t2, x3; | ||
209 | |||
210 | #define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \ | ||
211 | vpshufb rmask, x0, x0; \ | ||
212 | vpshufb rmask, x1, x1; \ | ||
213 | vpshufb rmask, x2, x2; \ | ||
214 | vpshufb rmask, x3, x3; \ | ||
215 | \ | ||
216 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
217 | |||
218 | #define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \ | ||
219 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
220 | \ | ||
221 | vpshufb rmask, x0, x0; \ | ||
222 | vpshufb rmask, x1, x1; \ | ||
223 | vpshufb rmask, x2, x2; \ | ||
224 | vpshufb rmask, x3, x3; | ||
225 | |||
226 | .data | ||
227 | |||
228 | .align 16 | ||
229 | .Lbswap_mask: | ||
230 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | ||
231 | .Lbswap128_mask: | ||
232 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
233 | .Lrkr_enc_Q_Q_QBAR_QBAR: | ||
234 | .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 | ||
235 | .Lrkr_enc_QBAR_QBAR_QBAR_QBAR: | ||
236 | .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | ||
237 | .Lrkr_dec_Q_Q_Q_Q: | ||
238 | .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 | ||
239 | .Lrkr_dec_Q_Q_QBAR_QBAR: | ||
240 | .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 | ||
241 | .Lrkr_dec_QBAR_QBAR_QBAR_QBAR: | ||
242 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
243 | .L16_mask: | ||
244 | .byte 16, 16, 16, 16 | ||
245 | .L32_mask: | ||
246 | .byte 32, 0, 0, 0 | ||
247 | .Lfirst_mask: | ||
248 | .byte 0x1f, 0, 0, 0 | ||
249 | |||
250 | .text | ||
251 | |||
252 | .align 8 | ||
253 | .type __cast6_enc_blk8,@function; | ||
254 | |||
255 | __cast6_enc_blk8: | ||
256 | /* input: | ||
257 | * %rdi: ctx, CTX | ||
258 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks | ||
259 | * output: | ||
260 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | ||
261 | */ | ||
262 | |||
263 | pushq %rbp; | ||
264 | pushq %rbx; | ||
265 | |||
266 | vmovdqa .Lbswap_mask, RKM; | ||
267 | vmovd .Lfirst_mask, R1ST; | ||
268 | vmovd .L32_mask, R32; | ||
269 | |||
270 | inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); | ||
271 | inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); | ||
272 | |||
273 | preload_rkr(0, dummy, none); | ||
274 | Q(0); | ||
275 | Q(1); | ||
276 | Q(2); | ||
277 | Q(3); | ||
278 | preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR); | ||
279 | Q(4); | ||
280 | Q(5); | ||
281 | QBAR(6); | ||
282 | QBAR(7); | ||
283 | preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR); | ||
284 | QBAR(8); | ||
285 | QBAR(9); | ||
286 | QBAR(10); | ||
287 | QBAR(11); | ||
288 | |||
289 | popq %rbx; | ||
290 | popq %rbp; | ||
291 | |||
292 | vmovdqa .Lbswap_mask, RKM; | ||
293 | |||
294 | outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); | ||
295 | outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); | ||
296 | |||
297 | ret; | ||
298 | |||
299 | .align 8 | ||
300 | .type __cast6_dec_blk8,@function; | ||
301 | |||
302 | __cast6_dec_blk8: | ||
303 | /* input: | ||
304 | * %rdi: ctx, CTX | ||
305 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | ||
306 | * output: | ||
307 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks | ||
308 | */ | ||
309 | |||
310 | pushq %rbp; | ||
311 | pushq %rbx; | ||
312 | |||
313 | vmovdqa .Lbswap_mask, RKM; | ||
314 | vmovd .Lfirst_mask, R1ST; | ||
315 | vmovd .L32_mask, R32; | ||
316 | |||
317 | inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); | ||
318 | inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); | ||
319 | |||
320 | preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); | ||
321 | Q(11); | ||
322 | Q(10); | ||
323 | Q(9); | ||
324 | Q(8); | ||
325 | preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR); | ||
326 | Q(7); | ||
327 | Q(6); | ||
328 | QBAR(5); | ||
329 | QBAR(4); | ||
330 | preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR); | ||
331 | QBAR(3); | ||
332 | QBAR(2); | ||
333 | QBAR(1); | ||
334 | QBAR(0); | ||
335 | |||
336 | popq %rbx; | ||
337 | popq %rbp; | ||
338 | |||
339 | vmovdqa .Lbswap_mask, RKM; | ||
340 | outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); | ||
341 | outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); | ||
342 | |||
343 | ret; | ||
344 | |||
345 | .align 8 | ||
346 | .global cast6_ecb_enc_8way | ||
347 | .type cast6_ecb_enc_8way,@function; | ||
348 | |||
349 | cast6_ecb_enc_8way: | ||
350 | /* input: | ||
351 | * %rdi: ctx, CTX | ||
352 | * %rsi: dst | ||
353 | * %rdx: src | ||
354 | */ | ||
355 | |||
356 | movq %rsi, %r11; | ||
357 | |||
358 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
359 | |||
360 | call __cast6_enc_blk8; | ||
361 | |||
362 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
363 | |||
364 | ret; | ||
365 | |||
366 | .align 8 | ||
367 | .global cast6_ecb_dec_8way | ||
368 | .type cast6_ecb_dec_8way,@function; | ||
369 | |||
370 | cast6_ecb_dec_8way: | ||
371 | /* input: | ||
372 | * %rdi: ctx, CTX | ||
373 | * %rsi: dst | ||
374 | * %rdx: src | ||
375 | */ | ||
376 | |||
377 | movq %rsi, %r11; | ||
378 | |||
379 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
380 | |||
381 | call __cast6_dec_blk8; | ||
382 | |||
383 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
384 | |||
385 | ret; | ||
386 | |||
387 | .align 8 | ||
388 | .global cast6_cbc_dec_8way | ||
389 | .type cast6_cbc_dec_8way,@function; | ||
390 | |||
391 | cast6_cbc_dec_8way: | ||
392 | /* input: | ||
393 | * %rdi: ctx, CTX | ||
394 | * %rsi: dst | ||
395 | * %rdx: src | ||
396 | */ | ||
397 | |||
398 | pushq %r12; | ||
399 | |||
400 | movq %rsi, %r11; | ||
401 | movq %rdx, %r12; | ||
402 | |||
403 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
404 | |||
405 | call __cast6_dec_blk8; | ||
406 | |||
407 | store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
408 | |||
409 | popq %r12; | ||
410 | |||
411 | ret; | ||
412 | |||
413 | .align 8 | ||
414 | .global cast6_ctr_8way | ||
415 | .type cast6_ctr_8way,@function; | ||
416 | |||
417 | cast6_ctr_8way: | ||
418 | /* input: | ||
419 | * %rdi: ctx, CTX | ||
420 | * %rsi: dst | ||
421 | * %rdx: src | ||
422 | * %rcx: iv (little endian, 128bit) | ||
423 | */ | ||
424 | |||
425 | pushq %r12; | ||
426 | |||
427 | movq %rsi, %r11; | ||
428 | movq %rdx, %r12; | ||
429 | |||
430 | load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
431 | RD2, RX, RKR, RKM); | ||
432 | |||
433 | call __cast6_enc_blk8; | ||
434 | |||
435 | store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
436 | |||
437 | popq %r12; | ||
438 | |||
439 | ret; | ||
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c deleted file mode 100644 index 92f7ca24790..00000000000 --- a/arch/x86/crypto/cast6_avx_glue.c +++ /dev/null | |||
@@ -1,603 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for the AVX assembler implemention of the Cast6 Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/cast6.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/b128ops.h> | ||
33 | #include <crypto/ctr.h> | ||
34 | #include <crypto/lrw.h> | ||
35 | #include <crypto/xts.h> | ||
36 | #include <asm/xcr.h> | ||
37 | #include <asm/xsave.h> | ||
38 | #include <asm/crypto/ablk_helper.h> | ||
39 | #include <asm/crypto/glue_helper.h> | ||
40 | |||
41 | #define CAST6_PARALLEL_BLOCKS 8 | ||
42 | |||
43 | asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst, | ||
44 | const u8 *src); | ||
45 | asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst, | ||
46 | const u8 *src); | ||
47 | |||
48 | asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst, | ||
49 | const u8 *src); | ||
50 | asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, | ||
51 | le128 *iv); | ||
52 | |||
53 | static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
54 | { | ||
55 | be128 ctrblk; | ||
56 | |||
57 | le128_to_be128(&ctrblk, iv); | ||
58 | le128_inc(iv); | ||
59 | |||
60 | __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
61 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
62 | } | ||
63 | |||
64 | static const struct common_glue_ctx cast6_enc = { | ||
65 | .num_funcs = 2, | ||
66 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
67 | |||
68 | .funcs = { { | ||
69 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
70 | .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) } | ||
71 | }, { | ||
72 | .num_blocks = 1, | ||
73 | .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } | ||
74 | } } | ||
75 | }; | ||
76 | |||
77 | static const struct common_glue_ctx cast6_ctr = { | ||
78 | .num_funcs = 2, | ||
79 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
80 | |||
81 | .funcs = { { | ||
82 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
83 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) } | ||
84 | }, { | ||
85 | .num_blocks = 1, | ||
86 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } | ||
87 | } } | ||
88 | }; | ||
89 | |||
90 | static const struct common_glue_ctx cast6_dec = { | ||
91 | .num_funcs = 2, | ||
92 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
93 | |||
94 | .funcs = { { | ||
95 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) } | ||
97 | }, { | ||
98 | .num_blocks = 1, | ||
99 | .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } | ||
100 | } } | ||
101 | }; | ||
102 | |||
103 | static const struct common_glue_ctx cast6_dec_cbc = { | ||
104 | .num_funcs = 2, | ||
105 | .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, | ||
106 | |||
107 | .funcs = { { | ||
108 | .num_blocks = CAST6_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) } | ||
110 | }, { | ||
111 | .num_blocks = 1, | ||
112 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } | ||
113 | } } | ||
114 | }; | ||
115 | |||
116 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
117 | struct scatterlist *src, unsigned int nbytes) | ||
118 | { | ||
119 | return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); | ||
120 | } | ||
121 | |||
122 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
123 | struct scatterlist *src, unsigned int nbytes) | ||
124 | { | ||
125 | return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); | ||
126 | } | ||
127 | |||
128 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
129 | struct scatterlist *src, unsigned int nbytes) | ||
130 | { | ||
131 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, | ||
132 | dst, src, nbytes); | ||
133 | } | ||
134 | |||
135 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
136 | struct scatterlist *src, unsigned int nbytes) | ||
137 | { | ||
138 | return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, | ||
139 | nbytes); | ||
140 | } | ||
141 | |||
142 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
143 | struct scatterlist *src, unsigned int nbytes) | ||
144 | { | ||
145 | return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); | ||
146 | } | ||
147 | |||
148 | static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
149 | { | ||
150 | return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, | ||
151 | NULL, fpu_enabled, nbytes); | ||
152 | } | ||
153 | |||
154 | static inline void cast6_fpu_end(bool fpu_enabled) | ||
155 | { | ||
156 | glue_fpu_end(fpu_enabled); | ||
157 | } | ||
158 | |||
159 | struct crypt_priv { | ||
160 | struct cast6_ctx *ctx; | ||
161 | bool fpu_enabled; | ||
162 | }; | ||
163 | |||
164 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
165 | { | ||
166 | const unsigned int bsize = CAST6_BLOCK_SIZE; | ||
167 | struct crypt_priv *ctx = priv; | ||
168 | int i; | ||
169 | |||
170 | ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); | ||
171 | |||
172 | if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { | ||
173 | cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | ||
174 | return; | ||
175 | } | ||
176 | |||
177 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
178 | __cast6_encrypt(ctx->ctx, srcdst, srcdst); | ||
179 | } | ||
180 | |||
181 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
182 | { | ||
183 | const unsigned int bsize = CAST6_BLOCK_SIZE; | ||
184 | struct crypt_priv *ctx = priv; | ||
185 | int i; | ||
186 | |||
187 | ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); | ||
188 | |||
189 | if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { | ||
190 | cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | ||
191 | return; | ||
192 | } | ||
193 | |||
194 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
195 | __cast6_decrypt(ctx->ctx, srcdst, srcdst); | ||
196 | } | ||
197 | |||
198 | struct cast6_lrw_ctx { | ||
199 | struct lrw_table_ctx lrw_table; | ||
200 | struct cast6_ctx cast6_ctx; | ||
201 | }; | ||
202 | |||
203 | static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
204 | unsigned int keylen) | ||
205 | { | ||
206 | struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
207 | int err; | ||
208 | |||
209 | err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, | ||
210 | &tfm->crt_flags); | ||
211 | if (err) | ||
212 | return err; | ||
213 | |||
214 | return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); | ||
215 | } | ||
216 | |||
217 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
218 | struct scatterlist *src, unsigned int nbytes) | ||
219 | { | ||
220 | struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
221 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
222 | struct crypt_priv crypt_ctx = { | ||
223 | .ctx = &ctx->cast6_ctx, | ||
224 | .fpu_enabled = false, | ||
225 | }; | ||
226 | struct lrw_crypt_req req = { | ||
227 | .tbuf = buf, | ||
228 | .tbuflen = sizeof(buf), | ||
229 | |||
230 | .table_ctx = &ctx->lrw_table, | ||
231 | .crypt_ctx = &crypt_ctx, | ||
232 | .crypt_fn = encrypt_callback, | ||
233 | }; | ||
234 | int ret; | ||
235 | |||
236 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
237 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
238 | cast6_fpu_end(crypt_ctx.fpu_enabled); | ||
239 | |||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
244 | struct scatterlist *src, unsigned int nbytes) | ||
245 | { | ||
246 | struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
247 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
248 | struct crypt_priv crypt_ctx = { | ||
249 | .ctx = &ctx->cast6_ctx, | ||
250 | .fpu_enabled = false, | ||
251 | }; | ||
252 | struct lrw_crypt_req req = { | ||
253 | .tbuf = buf, | ||
254 | .tbuflen = sizeof(buf), | ||
255 | |||
256 | .table_ctx = &ctx->lrw_table, | ||
257 | .crypt_ctx = &crypt_ctx, | ||
258 | .crypt_fn = decrypt_callback, | ||
259 | }; | ||
260 | int ret; | ||
261 | |||
262 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
263 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
264 | cast6_fpu_end(crypt_ctx.fpu_enabled); | ||
265 | |||
266 | return ret; | ||
267 | } | ||
268 | |||
269 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
270 | { | ||
271 | struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
272 | |||
273 | lrw_free_table(&ctx->lrw_table); | ||
274 | } | ||
275 | |||
276 | struct cast6_xts_ctx { | ||
277 | struct cast6_ctx tweak_ctx; | ||
278 | struct cast6_ctx crypt_ctx; | ||
279 | }; | ||
280 | |||
281 | static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
282 | unsigned int keylen) | ||
283 | { | ||
284 | struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
285 | u32 *flags = &tfm->crt_flags; | ||
286 | int err; | ||
287 | |||
288 | /* key consists of keys of equal size concatenated, therefore | ||
289 | * the length must be even | ||
290 | */ | ||
291 | if (keylen % 2) { | ||
292 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
293 | return -EINVAL; | ||
294 | } | ||
295 | |||
296 | /* first half of xts-key is for crypt */ | ||
297 | err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); | ||
298 | if (err) | ||
299 | return err; | ||
300 | |||
301 | /* second half of xts-key is for tweak */ | ||
302 | return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | ||
303 | flags); | ||
304 | } | ||
305 | |||
306 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
307 | struct scatterlist *src, unsigned int nbytes) | ||
308 | { | ||
309 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
310 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
311 | struct crypt_priv crypt_ctx = { | ||
312 | .ctx = &ctx->crypt_ctx, | ||
313 | .fpu_enabled = false, | ||
314 | }; | ||
315 | struct xts_crypt_req req = { | ||
316 | .tbuf = buf, | ||
317 | .tbuflen = sizeof(buf), | ||
318 | |||
319 | .tweak_ctx = &ctx->tweak_ctx, | ||
320 | .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), | ||
321 | .crypt_ctx = &crypt_ctx, | ||
322 | .crypt_fn = encrypt_callback, | ||
323 | }; | ||
324 | int ret; | ||
325 | |||
326 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
327 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
328 | cast6_fpu_end(crypt_ctx.fpu_enabled); | ||
329 | |||
330 | return ret; | ||
331 | } | ||
332 | |||
333 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
334 | struct scatterlist *src, unsigned int nbytes) | ||
335 | { | ||
336 | struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
337 | be128 buf[CAST6_PARALLEL_BLOCKS]; | ||
338 | struct crypt_priv crypt_ctx = { | ||
339 | .ctx = &ctx->crypt_ctx, | ||
340 | .fpu_enabled = false, | ||
341 | }; | ||
342 | struct xts_crypt_req req = { | ||
343 | .tbuf = buf, | ||
344 | .tbuflen = sizeof(buf), | ||
345 | |||
346 | .tweak_ctx = &ctx->tweak_ctx, | ||
347 | .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), | ||
348 | .crypt_ctx = &crypt_ctx, | ||
349 | .crypt_fn = decrypt_callback, | ||
350 | }; | ||
351 | int ret; | ||
352 | |||
353 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
354 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
355 | cast6_fpu_end(crypt_ctx.fpu_enabled); | ||
356 | |||
357 | return ret; | ||
358 | } | ||
359 | |||
360 | static struct crypto_alg cast6_algs[10] = { { | ||
361 | .cra_name = "__ecb-cast6-avx", | ||
362 | .cra_driver_name = "__driver-ecb-cast6-avx", | ||
363 | .cra_priority = 0, | ||
364 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
365 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
366 | .cra_ctxsize = sizeof(struct cast6_ctx), | ||
367 | .cra_alignmask = 0, | ||
368 | .cra_type = &crypto_blkcipher_type, | ||
369 | .cra_module = THIS_MODULE, | ||
370 | .cra_u = { | ||
371 | .blkcipher = { | ||
372 | .min_keysize = CAST6_MIN_KEY_SIZE, | ||
373 | .max_keysize = CAST6_MAX_KEY_SIZE, | ||
374 | .setkey = cast6_setkey, | ||
375 | .encrypt = ecb_encrypt, | ||
376 | .decrypt = ecb_decrypt, | ||
377 | }, | ||
378 | }, | ||
379 | }, { | ||
380 | .cra_name = "__cbc-cast6-avx", | ||
381 | .cra_driver_name = "__driver-cbc-cast6-avx", | ||
382 | .cra_priority = 0, | ||
383 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
384 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
385 | .cra_ctxsize = sizeof(struct cast6_ctx), | ||
386 | .cra_alignmask = 0, | ||
387 | .cra_type = &crypto_blkcipher_type, | ||
388 | .cra_module = THIS_MODULE, | ||
389 | .cra_u = { | ||
390 | .blkcipher = { | ||
391 | .min_keysize = CAST6_MIN_KEY_SIZE, | ||
392 | .max_keysize = CAST6_MAX_KEY_SIZE, | ||
393 | .setkey = cast6_setkey, | ||
394 | .encrypt = cbc_encrypt, | ||
395 | .decrypt = cbc_decrypt, | ||
396 | }, | ||
397 | }, | ||
398 | }, { | ||
399 | .cra_name = "__ctr-cast6-avx", | ||
400 | .cra_driver_name = "__driver-ctr-cast6-avx", | ||
401 | .cra_priority = 0, | ||
402 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
403 | .cra_blocksize = 1, | ||
404 | .cra_ctxsize = sizeof(struct cast6_ctx), | ||
405 | .cra_alignmask = 0, | ||
406 | .cra_type = &crypto_blkcipher_type, | ||
407 | .cra_module = THIS_MODULE, | ||
408 | .cra_u = { | ||
409 | .blkcipher = { | ||
410 | .min_keysize = CAST6_MIN_KEY_SIZE, | ||
411 | .max_keysize = CAST6_MAX_KEY_SIZE, | ||
412 | .ivsize = CAST6_BLOCK_SIZE, | ||
413 | .setkey = cast6_setkey, | ||
414 | .encrypt = ctr_crypt, | ||
415 | .decrypt = ctr_crypt, | ||
416 | }, | ||
417 | }, | ||
418 | }, { | ||
419 | .cra_name = "__lrw-cast6-avx", | ||
420 | .cra_driver_name = "__driver-lrw-cast6-avx", | ||
421 | .cra_priority = 0, | ||
422 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
423 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
424 | .cra_ctxsize = sizeof(struct cast6_lrw_ctx), | ||
425 | .cra_alignmask = 0, | ||
426 | .cra_type = &crypto_blkcipher_type, | ||
427 | .cra_module = THIS_MODULE, | ||
428 | .cra_exit = lrw_exit_tfm, | ||
429 | .cra_u = { | ||
430 | .blkcipher = { | ||
431 | .min_keysize = CAST6_MIN_KEY_SIZE + | ||
432 | CAST6_BLOCK_SIZE, | ||
433 | .max_keysize = CAST6_MAX_KEY_SIZE + | ||
434 | CAST6_BLOCK_SIZE, | ||
435 | .ivsize = CAST6_BLOCK_SIZE, | ||
436 | .setkey = lrw_cast6_setkey, | ||
437 | .encrypt = lrw_encrypt, | ||
438 | .decrypt = lrw_decrypt, | ||
439 | }, | ||
440 | }, | ||
441 | }, { | ||
442 | .cra_name = "__xts-cast6-avx", | ||
443 | .cra_driver_name = "__driver-xts-cast6-avx", | ||
444 | .cra_priority = 0, | ||
445 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
446 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
447 | .cra_ctxsize = sizeof(struct cast6_xts_ctx), | ||
448 | .cra_alignmask = 0, | ||
449 | .cra_type = &crypto_blkcipher_type, | ||
450 | .cra_module = THIS_MODULE, | ||
451 | .cra_u = { | ||
452 | .blkcipher = { | ||
453 | .min_keysize = CAST6_MIN_KEY_SIZE * 2, | ||
454 | .max_keysize = CAST6_MAX_KEY_SIZE * 2, | ||
455 | .ivsize = CAST6_BLOCK_SIZE, | ||
456 | .setkey = xts_cast6_setkey, | ||
457 | .encrypt = xts_encrypt, | ||
458 | .decrypt = xts_decrypt, | ||
459 | }, | ||
460 | }, | ||
461 | }, { | ||
462 | .cra_name = "ecb(cast6)", | ||
463 | .cra_driver_name = "ecb-cast6-avx", | ||
464 | .cra_priority = 200, | ||
465 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
466 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
467 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
468 | .cra_alignmask = 0, | ||
469 | .cra_type = &crypto_ablkcipher_type, | ||
470 | .cra_module = THIS_MODULE, | ||
471 | .cra_init = ablk_init, | ||
472 | .cra_exit = ablk_exit, | ||
473 | .cra_u = { | ||
474 | .ablkcipher = { | ||
475 | .min_keysize = CAST6_MIN_KEY_SIZE, | ||
476 | .max_keysize = CAST6_MAX_KEY_SIZE, | ||
477 | .setkey = ablk_set_key, | ||
478 | .encrypt = ablk_encrypt, | ||
479 | .decrypt = ablk_decrypt, | ||
480 | }, | ||
481 | }, | ||
482 | }, { | ||
483 | .cra_name = "cbc(cast6)", | ||
484 | .cra_driver_name = "cbc-cast6-avx", | ||
485 | .cra_priority = 200, | ||
486 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
487 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
488 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
489 | .cra_alignmask = 0, | ||
490 | .cra_type = &crypto_ablkcipher_type, | ||
491 | .cra_module = THIS_MODULE, | ||
492 | .cra_init = ablk_init, | ||
493 | .cra_exit = ablk_exit, | ||
494 | .cra_u = { | ||
495 | .ablkcipher = { | ||
496 | .min_keysize = CAST6_MIN_KEY_SIZE, | ||
497 | .max_keysize = CAST6_MAX_KEY_SIZE, | ||
498 | .ivsize = CAST6_BLOCK_SIZE, | ||
499 | .setkey = ablk_set_key, | ||
500 | .encrypt = __ablk_encrypt, | ||
501 | .decrypt = ablk_decrypt, | ||
502 | }, | ||
503 | }, | ||
504 | }, { | ||
505 | .cra_name = "ctr(cast6)", | ||
506 | .cra_driver_name = "ctr-cast6-avx", | ||
507 | .cra_priority = 200, | ||
508 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
509 | .cra_blocksize = 1, | ||
510 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
511 | .cra_alignmask = 0, | ||
512 | .cra_type = &crypto_ablkcipher_type, | ||
513 | .cra_module = THIS_MODULE, | ||
514 | .cra_init = ablk_init, | ||
515 | .cra_exit = ablk_exit, | ||
516 | .cra_u = { | ||
517 | .ablkcipher = { | ||
518 | .min_keysize = CAST6_MIN_KEY_SIZE, | ||
519 | .max_keysize = CAST6_MAX_KEY_SIZE, | ||
520 | .ivsize = CAST6_BLOCK_SIZE, | ||
521 | .setkey = ablk_set_key, | ||
522 | .encrypt = ablk_encrypt, | ||
523 | .decrypt = ablk_encrypt, | ||
524 | .geniv = "chainiv", | ||
525 | }, | ||
526 | }, | ||
527 | }, { | ||
528 | .cra_name = "lrw(cast6)", | ||
529 | .cra_driver_name = "lrw-cast6-avx", | ||
530 | .cra_priority = 200, | ||
531 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
532 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
533 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
534 | .cra_alignmask = 0, | ||
535 | .cra_type = &crypto_ablkcipher_type, | ||
536 | .cra_module = THIS_MODULE, | ||
537 | .cra_init = ablk_init, | ||
538 | .cra_exit = ablk_exit, | ||
539 | .cra_u = { | ||
540 | .ablkcipher = { | ||
541 | .min_keysize = CAST6_MIN_KEY_SIZE + | ||
542 | CAST6_BLOCK_SIZE, | ||
543 | .max_keysize = CAST6_MAX_KEY_SIZE + | ||
544 | CAST6_BLOCK_SIZE, | ||
545 | .ivsize = CAST6_BLOCK_SIZE, | ||
546 | .setkey = ablk_set_key, | ||
547 | .encrypt = ablk_encrypt, | ||
548 | .decrypt = ablk_decrypt, | ||
549 | }, | ||
550 | }, | ||
551 | }, { | ||
552 | .cra_name = "xts(cast6)", | ||
553 | .cra_driver_name = "xts-cast6-avx", | ||
554 | .cra_priority = 200, | ||
555 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
556 | .cra_blocksize = CAST6_BLOCK_SIZE, | ||
557 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
558 | .cra_alignmask = 0, | ||
559 | .cra_type = &crypto_ablkcipher_type, | ||
560 | .cra_module = THIS_MODULE, | ||
561 | .cra_init = ablk_init, | ||
562 | .cra_exit = ablk_exit, | ||
563 | .cra_u = { | ||
564 | .ablkcipher = { | ||
565 | .min_keysize = CAST6_MIN_KEY_SIZE * 2, | ||
566 | .max_keysize = CAST6_MAX_KEY_SIZE * 2, | ||
567 | .ivsize = CAST6_BLOCK_SIZE, | ||
568 | .setkey = ablk_set_key, | ||
569 | .encrypt = ablk_encrypt, | ||
570 | .decrypt = ablk_decrypt, | ||
571 | }, | ||
572 | }, | ||
573 | } }; | ||
574 | |||
575 | static int __init cast6_init(void) | ||
576 | { | ||
577 | u64 xcr0; | ||
578 | |||
579 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
580 | pr_info("AVX instructions are not detected.\n"); | ||
581 | return -ENODEV; | ||
582 | } | ||
583 | |||
584 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
585 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
586 | pr_info("AVX detected but unusable.\n"); | ||
587 | return -ENODEV; | ||
588 | } | ||
589 | |||
590 | return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); | ||
591 | } | ||
592 | |||
593 | static void __exit cast6_exit(void) | ||
594 | { | ||
595 | crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); | ||
596 | } | ||
597 | |||
598 | module_init(cast6_init); | ||
599 | module_exit(cast6_exit); | ||
600 | |||
601 | MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); | ||
602 | MODULE_LICENSE("GPL"); | ||
603 | MODULE_ALIAS("cast6"); | ||
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c deleted file mode 100644 index 6812ad98355..00000000000 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ /dev/null | |||
@@ -1,284 +0,0 @@ | |||
1 | /* | ||
2 | * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal. | ||
3 | * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) | ||
4 | * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at: | ||
5 | * http://www.intel.com/products/processor/manuals/ | ||
6 | * Intel(R) 64 and IA-32 Architectures Software Developer's Manual | ||
7 | * Volume 2A: Instruction Set Reference, A-M | ||
8 | * | ||
9 | * Copyright (C) 2008 Intel Corporation | ||
10 | * Authors: Austin Zhang <austin_zhang@linux.intel.com> | ||
11 | * Kent Liu <kent.liu@intel.com> | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify it | ||
14 | * under the terms and conditions of the GNU General Public License, | ||
15 | * version 2, as published by the Free Software Foundation. | ||
16 | * | ||
17 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
20 | * more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License along with | ||
23 | * this program; if not, write to the Free Software Foundation, Inc., | ||
24 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
25 | * | ||
26 | */ | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <crypto/internal/hash.h> | ||
32 | |||
33 | #include <asm/cpufeature.h> | ||
34 | #include <asm/cpu_device_id.h> | ||
35 | #include <asm/i387.h> | ||
36 | #include <asm/fpu-internal.h> | ||
37 | |||
38 | #define CHKSUM_BLOCK_SIZE 1 | ||
39 | #define CHKSUM_DIGEST_SIZE 4 | ||
40 | |||
41 | #define SCALE_F sizeof(unsigned long) | ||
42 | |||
43 | #ifdef CONFIG_X86_64 | ||
44 | #define REX_PRE "0x48, " | ||
45 | #else | ||
46 | #define REX_PRE | ||
47 | #endif | ||
48 | |||
49 | #ifdef CONFIG_X86_64 | ||
50 | /* | ||
51 | * use carryless multiply version of crc32c when buffer | ||
52 | * size is >= 512 (when eager fpu is enabled) or | ||
53 | * >= 1024 (when eager fpu is disabled) to account | ||
54 | * for fpu state save/restore overhead. | ||
55 | */ | ||
56 | #define CRC32C_PCL_BREAKEVEN_EAGERFPU 512 | ||
57 | #define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024 | ||
58 | |||
59 | asmlinkage unsigned int crc_pcl(const u8 *buffer, int len, | ||
60 | unsigned int crc_init); | ||
61 | static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU; | ||
62 | #if defined(X86_FEATURE_EAGER_FPU) | ||
63 | #define set_pcl_breakeven_point() \ | ||
64 | do { \ | ||
65 | if (!use_eager_fpu()) \ | ||
66 | crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \ | ||
67 | } while (0) | ||
68 | #else | ||
69 | #define set_pcl_breakeven_point() \ | ||
70 | (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU) | ||
71 | #endif | ||
72 | #endif /* CONFIG_X86_64 */ | ||
73 | |||
74 | static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length) | ||
75 | { | ||
76 | while (length--) { | ||
77 | __asm__ __volatile__( | ||
78 | ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" | ||
79 | :"=S"(crc) | ||
80 | :"0"(crc), "c"(*data) | ||
81 | ); | ||
82 | data++; | ||
83 | } | ||
84 | |||
85 | return crc; | ||
86 | } | ||
87 | |||
88 | static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len) | ||
89 | { | ||
90 | unsigned int iquotient = len / SCALE_F; | ||
91 | unsigned int iremainder = len % SCALE_F; | ||
92 | unsigned long *ptmp = (unsigned long *)p; | ||
93 | |||
94 | while (iquotient--) { | ||
95 | __asm__ __volatile__( | ||
96 | ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" | ||
97 | :"=S"(crc) | ||
98 | :"0"(crc), "c"(*ptmp) | ||
99 | ); | ||
100 | ptmp++; | ||
101 | } | ||
102 | |||
103 | if (iremainder) | ||
104 | crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, | ||
105 | iremainder); | ||
106 | |||
107 | return crc; | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * Setting the seed allows arbitrary accumulators and flexible XOR policy | ||
112 | * If your algorithm starts with ~0, then XOR with ~0 before you set | ||
113 | * the seed. | ||
114 | */ | ||
115 | static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, | ||
116 | unsigned int keylen) | ||
117 | { | ||
118 | u32 *mctx = crypto_shash_ctx(hash); | ||
119 | |||
120 | if (keylen != sizeof(u32)) { | ||
121 | crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); | ||
122 | return -EINVAL; | ||
123 | } | ||
124 | *mctx = le32_to_cpup((__le32 *)key); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static int crc32c_intel_init(struct shash_desc *desc) | ||
129 | { | ||
130 | u32 *mctx = crypto_shash_ctx(desc->tfm); | ||
131 | u32 *crcp = shash_desc_ctx(desc); | ||
132 | |||
133 | *crcp = *mctx; | ||
134 | |||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, | ||
139 | unsigned int len) | ||
140 | { | ||
141 | u32 *crcp = shash_desc_ctx(desc); | ||
142 | |||
143 | *crcp = crc32c_intel_le_hw(*crcp, data, len); | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, | ||
148 | u8 *out) | ||
149 | { | ||
150 | *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, | ||
155 | unsigned int len, u8 *out) | ||
156 | { | ||
157 | return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); | ||
158 | } | ||
159 | |||
160 | static int crc32c_intel_final(struct shash_desc *desc, u8 *out) | ||
161 | { | ||
162 | u32 *crcp = shash_desc_ctx(desc); | ||
163 | |||
164 | *(__le32 *)out = ~cpu_to_le32p(crcp); | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, | ||
169 | unsigned int len, u8 *out) | ||
170 | { | ||
171 | return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, | ||
172 | out); | ||
173 | } | ||
174 | |||
175 | static int crc32c_intel_cra_init(struct crypto_tfm *tfm) | ||
176 | { | ||
177 | u32 *key = crypto_tfm_ctx(tfm); | ||
178 | |||
179 | *key = ~0; | ||
180 | |||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | #ifdef CONFIG_X86_64 | ||
185 | static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, | ||
186 | unsigned int len) | ||
187 | { | ||
188 | u32 *crcp = shash_desc_ctx(desc); | ||
189 | |||
190 | /* | ||
191 | * use faster PCL version if datasize is large enough to | ||
192 | * overcome kernel fpu state save/restore overhead | ||
193 | */ | ||
194 | if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { | ||
195 | kernel_fpu_begin(); | ||
196 | *crcp = crc_pcl(data, len, *crcp); | ||
197 | kernel_fpu_end(); | ||
198 | } else | ||
199 | *crcp = crc32c_intel_le_hw(*crcp, data, len); | ||
200 | return 0; | ||
201 | } | ||
202 | |||
203 | static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, | ||
204 | u8 *out) | ||
205 | { | ||
206 | if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) { | ||
207 | kernel_fpu_begin(); | ||
208 | *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); | ||
209 | kernel_fpu_end(); | ||
210 | } else | ||
211 | *(__le32 *)out = | ||
212 | ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, | ||
217 | unsigned int len, u8 *out) | ||
218 | { | ||
219 | return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); | ||
220 | } | ||
221 | |||
222 | static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, | ||
223 | unsigned int len, u8 *out) | ||
224 | { | ||
225 | return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, | ||
226 | out); | ||
227 | } | ||
228 | #endif /* CONFIG_X86_64 */ | ||
229 | |||
230 | static struct shash_alg alg = { | ||
231 | .setkey = crc32c_intel_setkey, | ||
232 | .init = crc32c_intel_init, | ||
233 | .update = crc32c_intel_update, | ||
234 | .final = crc32c_intel_final, | ||
235 | .finup = crc32c_intel_finup, | ||
236 | .digest = crc32c_intel_digest, | ||
237 | .descsize = sizeof(u32), | ||
238 | .digestsize = CHKSUM_DIGEST_SIZE, | ||
239 | .base = { | ||
240 | .cra_name = "crc32c", | ||
241 | .cra_driver_name = "crc32c-intel", | ||
242 | .cra_priority = 200, | ||
243 | .cra_blocksize = CHKSUM_BLOCK_SIZE, | ||
244 | .cra_ctxsize = sizeof(u32), | ||
245 | .cra_module = THIS_MODULE, | ||
246 | .cra_init = crc32c_intel_cra_init, | ||
247 | } | ||
248 | }; | ||
249 | |||
250 | static const struct x86_cpu_id crc32c_cpu_id[] = { | ||
251 | X86_FEATURE_MATCH(X86_FEATURE_XMM4_2), | ||
252 | {} | ||
253 | }; | ||
254 | MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); | ||
255 | |||
256 | static int __init crc32c_intel_mod_init(void) | ||
257 | { | ||
258 | if (!x86_match_cpu(crc32c_cpu_id)) | ||
259 | return -ENODEV; | ||
260 | #ifdef CONFIG_X86_64 | ||
261 | if (cpu_has_pclmulqdq) { | ||
262 | alg.update = crc32c_pcl_intel_update; | ||
263 | alg.finup = crc32c_pcl_intel_finup; | ||
264 | alg.digest = crc32c_pcl_intel_digest; | ||
265 | set_pcl_breakeven_point(); | ||
266 | } | ||
267 | #endif | ||
268 | return crypto_register_shash(&alg); | ||
269 | } | ||
270 | |||
271 | static void __exit crc32c_intel_mod_fini(void) | ||
272 | { | ||
273 | crypto_unregister_shash(&alg); | ||
274 | } | ||
275 | |||
276 | module_init(crc32c_intel_mod_init); | ||
277 | module_exit(crc32c_intel_mod_fini); | ||
278 | |||
279 | MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>"); | ||
280 | MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); | ||
281 | MODULE_LICENSE("GPL"); | ||
282 | |||
283 | MODULE_ALIAS("crc32c"); | ||
284 | MODULE_ALIAS("crc32c-intel"); | ||
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S deleted file mode 100644 index 93c6d39237a..00000000000 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ /dev/null | |||
@@ -1,460 +0,0 @@ | |||
1 | /* | ||
2 | * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64) | ||
3 | * | ||
4 | * The white paper on CRC32C calculations with PCLMULQDQ instruction can be | ||
5 | * downloaded from: | ||
6 | * http://download.intel.com/design/intarch/papers/323405.pdf | ||
7 | * | ||
8 | * Copyright (C) 2012 Intel Corporation. | ||
9 | * | ||
10 | * Authors: | ||
11 | * Wajdi Feghali <wajdi.k.feghali@intel.com> | ||
12 | * James Guilford <james.guilford@intel.com> | ||
13 | * David Cote <david.m.cote@intel.com> | ||
14 | * Tim Chen <tim.c.chen@linux.intel.com> | ||
15 | * | ||
16 | * This software is available to you under a choice of one of two | ||
17 | * licenses. You may choose to be licensed under the terms of the GNU | ||
18 | * General Public License (GPL) Version 2, available from the file | ||
19 | * COPYING in the main directory of this source tree, or the | ||
20 | * OpenIB.org BSD license below: | ||
21 | * | ||
22 | * Redistribution and use in source and binary forms, with or | ||
23 | * without modification, are permitted provided that the following | ||
24 | * conditions are met: | ||
25 | * | ||
26 | * - Redistributions of source code must retain the above | ||
27 | * copyright notice, this list of conditions and the following | ||
28 | * disclaimer. | ||
29 | * | ||
30 | * - Redistributions in binary form must reproduce the above | ||
31 | * copyright notice, this list of conditions and the following | ||
32 | * disclaimer in the documentation and/or other materials | ||
33 | * provided with the distribution. | ||
34 | * | ||
35 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
36 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
37 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
38 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
39 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
40 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
41 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
42 | * SOFTWARE. | ||
43 | */ | ||
44 | |||
45 | ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction | ||
46 | |||
47 | .macro LABEL prefix n | ||
48 | \prefix\n\(): | ||
49 | .endm | ||
50 | |||
51 | .macro JMPTBL_ENTRY i | ||
52 | .word crc_\i - crc_array | ||
53 | .endm | ||
54 | |||
55 | .macro JNC_LESS_THAN j | ||
56 | jnc less_than_\j | ||
57 | .endm | ||
58 | |||
59 | # Define threshold where buffers are considered "small" and routed to more | ||
60 | # efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so | ||
61 | # SMALL_SIZE can be no larger than 255. | ||
62 | |||
63 | #define SMALL_SIZE 200 | ||
64 | |||
65 | .if (SMALL_SIZE > 255) | ||
66 | .error "SMALL_ SIZE must be < 256" | ||
67 | .endif | ||
68 | |||
69 | # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); | ||
70 | |||
71 | .global crc_pcl | ||
72 | crc_pcl: | ||
73 | #define bufp %rdi | ||
74 | #define bufp_dw %edi | ||
75 | #define bufp_w %di | ||
76 | #define bufp_b %dil | ||
77 | #define bufptmp %rcx | ||
78 | #define block_0 %rcx | ||
79 | #define block_1 %rdx | ||
80 | #define block_2 %r11 | ||
81 | #define len %rsi | ||
82 | #define len_dw %esi | ||
83 | #define len_w %si | ||
84 | #define len_b %sil | ||
85 | #define crc_init_arg %rdx | ||
86 | #define tmp %rbx | ||
87 | #define crc_init %r8 | ||
88 | #define crc_init_dw %r8d | ||
89 | #define crc1 %r9 | ||
90 | #define crc2 %r10 | ||
91 | |||
92 | pushq %rbx | ||
93 | pushq %rdi | ||
94 | pushq %rsi | ||
95 | |||
96 | ## Move crc_init for Linux to a different | ||
97 | mov crc_init_arg, crc_init | ||
98 | |||
99 | ################################################################ | ||
100 | ## 1) ALIGN: | ||
101 | ################################################################ | ||
102 | |||
103 | mov bufp, bufptmp # rdi = *buf | ||
104 | neg bufp | ||
105 | and $7, bufp # calculate the unalignment amount of | ||
106 | # the address | ||
107 | je proc_block # Skip if aligned | ||
108 | |||
109 | ## If len is less than 8 and we're unaligned, we need to jump | ||
110 | ## to special code to avoid reading beyond the end of the buffer | ||
111 | cmp $8, len | ||
112 | jae do_align | ||
113 | # less_than_8 expects length in upper 3 bits of len_dw | ||
114 | # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] | ||
115 | shl $32-3+1, len_dw | ||
116 | jmp less_than_8_post_shl1 | ||
117 | |||
118 | do_align: | ||
119 | #### Calculate CRC of unaligned bytes of the buffer (if any) | ||
120 | movq (bufptmp), tmp # load a quadward from the buffer | ||
121 | add bufp, bufptmp # align buffer pointer for quadword | ||
122 | # processing | ||
123 | sub bufp, len # update buffer length | ||
124 | align_loop: | ||
125 | crc32b %bl, crc_init_dw # compute crc32 of 1-byte | ||
126 | shr $8, tmp # get next byte | ||
127 | dec bufp | ||
128 | jne align_loop | ||
129 | |||
130 | proc_block: | ||
131 | |||
132 | ################################################################ | ||
133 | ## 2) PROCESS BLOCKS: | ||
134 | ################################################################ | ||
135 | |||
136 | ## compute num of bytes to be processed | ||
137 | movq len, tmp # save num bytes in tmp | ||
138 | |||
139 | cmpq $128*24, len | ||
140 | jae full_block | ||
141 | |||
142 | continue_block: | ||
143 | cmpq $SMALL_SIZE, len | ||
144 | jb small | ||
145 | |||
146 | ## len < 128*24 | ||
147 | movq $2731, %rax # 2731 = ceil(2^16 / 24) | ||
148 | mul len_dw | ||
149 | shrq $16, %rax | ||
150 | |||
151 | ## eax contains floor(bytes / 24) = num 24-byte chunks to do | ||
152 | |||
153 | ## process rax 24-byte chunks (128 >= rax >= 0) | ||
154 | |||
155 | ## compute end address of each block | ||
156 | ## block 0 (base addr + RAX * 8) | ||
157 | ## block 1 (base addr + RAX * 16) | ||
158 | ## block 2 (base addr + RAX * 24) | ||
159 | lea (bufptmp, %rax, 8), block_0 | ||
160 | lea (block_0, %rax, 8), block_1 | ||
161 | lea (block_1, %rax, 8), block_2 | ||
162 | |||
163 | xor crc1, crc1 | ||
164 | xor crc2, crc2 | ||
165 | |||
166 | ## branch into array | ||
167 | lea jump_table(%rip), bufp | ||
168 | movzxw (bufp, %rax, 2), len | ||
169 | offset=crc_array-jump_table | ||
170 | lea offset(bufp, len, 1), bufp | ||
171 | jmp *bufp | ||
172 | |||
173 | ################################################################ | ||
174 | ## 2a) PROCESS FULL BLOCKS: | ||
175 | ################################################################ | ||
176 | full_block: | ||
177 | movq $128,%rax | ||
178 | lea 128*8*2(block_0), block_1 | ||
179 | lea 128*8*3(block_0), block_2 | ||
180 | add $128*8*1, block_0 | ||
181 | |||
182 | xor crc1,crc1 | ||
183 | xor crc2,crc2 | ||
184 | |||
185 | # Fall thruogh into top of crc array (crc_128) | ||
186 | |||
187 | ################################################################ | ||
188 | ## 3) CRC Array: | ||
189 | ################################################################ | ||
190 | |||
191 | crc_array: | ||
192 | i=128 | ||
193 | .rept 128-1 | ||
194 | .altmacro | ||
195 | LABEL crc_ %i | ||
196 | .noaltmacro | ||
197 | crc32q -i*8(block_0), crc_init | ||
198 | crc32q -i*8(block_1), crc1 | ||
199 | crc32q -i*8(block_2), crc2 | ||
200 | i=(i-1) | ||
201 | .endr | ||
202 | |||
203 | .altmacro | ||
204 | LABEL crc_ %i | ||
205 | .noaltmacro | ||
206 | crc32q -i*8(block_0), crc_init | ||
207 | crc32q -i*8(block_1), crc1 | ||
208 | # SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet | ||
209 | |||
210 | mov block_2, block_0 | ||
211 | |||
212 | ################################################################ | ||
213 | ## 4) Combine three results: | ||
214 | ################################################################ | ||
215 | |||
216 | lea (K_table-16)(%rip), bufp # first entry is for idx 1 | ||
217 | shlq $3, %rax # rax *= 8 | ||
218 | subq %rax, tmp # tmp -= rax*8 | ||
219 | shlq $1, %rax | ||
220 | subq %rax, tmp # tmp -= rax*16 | ||
221 | # (total tmp -= rax*24) | ||
222 | addq %rax, bufp | ||
223 | |||
224 | movdqa (bufp), %xmm0 # 2 consts: K1:K2 | ||
225 | |||
226 | movq crc_init, %xmm1 # CRC for block 1 | ||
227 | pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2 | ||
228 | |||
229 | movq crc1, %xmm2 # CRC for block 2 | ||
230 | pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 | ||
231 | |||
232 | pxor %xmm2,%xmm1 | ||
233 | movq %xmm1, %rax | ||
234 | xor -i*8(block_2), %rax | ||
235 | mov crc2, crc_init | ||
236 | crc32 %rax, crc_init | ||
237 | |||
238 | ################################################################ | ||
239 | ## 5) Check for end: | ||
240 | ################################################################ | ||
241 | |||
242 | LABEL crc_ 0 | ||
243 | mov tmp, len | ||
244 | cmp $128*24, tmp | ||
245 | jae full_block | ||
246 | cmp $24, tmp | ||
247 | jae continue_block | ||
248 | |||
249 | less_than_24: | ||
250 | shl $32-4, len_dw # less_than_16 expects length | ||
251 | # in upper 4 bits of len_dw | ||
252 | jnc less_than_16 | ||
253 | crc32q (bufptmp), crc_init | ||
254 | crc32q 8(bufptmp), crc_init | ||
255 | jz do_return | ||
256 | add $16, bufptmp | ||
257 | # len is less than 8 if we got here | ||
258 | # less_than_8 expects length in upper 3 bits of len_dw | ||
259 | # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30] | ||
260 | shl $2, len_dw | ||
261 | jmp less_than_8_post_shl1 | ||
262 | |||
263 | ####################################################################### | ||
264 | ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full) | ||
265 | ####################################################################### | ||
266 | small: | ||
267 | shl $32-8, len_dw # Prepare len_dw for less_than_256 | ||
268 | j=256 | ||
269 | .rept 5 # j = {256, 128, 64, 32, 16} | ||
270 | .altmacro | ||
271 | LABEL less_than_ %j # less_than_j: Length should be in | ||
272 | # upper lg(j) bits of len_dw | ||
273 | j=(j/2) | ||
274 | shl $1, len_dw # Get next MSB | ||
275 | JNC_LESS_THAN %j | ||
276 | .noaltmacro | ||
277 | i=0 | ||
278 | .rept (j/8) | ||
279 | crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data | ||
280 | i=i+8 | ||
281 | .endr | ||
282 | jz do_return # Return if remaining length is zero | ||
283 | add $j, bufptmp # Advance buf | ||
284 | .endr | ||
285 | |||
286 | less_than_8: # Length should be stored in | ||
287 | # upper 3 bits of len_dw | ||
288 | shl $1, len_dw | ||
289 | less_than_8_post_shl1: | ||
290 | jnc less_than_4 | ||
291 | crc32l (bufptmp), crc_init_dw # CRC of 4 bytes | ||
292 | jz do_return # return if remaining data is zero | ||
293 | add $4, bufptmp | ||
294 | less_than_4: # Length should be stored in | ||
295 | # upper 2 bits of len_dw | ||
296 | shl $1, len_dw | ||
297 | jnc less_than_2 | ||
298 | crc32w (bufptmp), crc_init_dw # CRC of 2 bytes | ||
299 | jz do_return # return if remaining data is zero | ||
300 | add $2, bufptmp | ||
301 | less_than_2: # Length should be stored in the MSB | ||
302 | # of len_dw | ||
303 | shl $1, len_dw | ||
304 | jnc less_than_1 | ||
305 | crc32b (bufptmp), crc_init_dw # CRC of 1 byte | ||
306 | less_than_1: # Length should be zero | ||
307 | do_return: | ||
308 | movq crc_init, %rax | ||
309 | popq %rsi | ||
310 | popq %rdi | ||
311 | popq %rbx | ||
312 | ret | ||
313 | |||
314 | ################################################################ | ||
315 | ## jump table Table is 129 entries x 2 bytes each | ||
316 | ################################################################ | ||
317 | .align 4 | ||
318 | jump_table: | ||
319 | i=0 | ||
320 | .rept 129 | ||
321 | .altmacro | ||
322 | JMPTBL_ENTRY %i | ||
323 | .noaltmacro | ||
324 | i=i+1 | ||
325 | .endr | ||
326 | ################################################################ | ||
327 | ## PCLMULQDQ tables | ||
328 | ## Table is 128 entries x 2 quad words each | ||
329 | ################################################################ | ||
330 | .data | ||
331 | .align 64 | ||
332 | K_table: | ||
333 | .quad 0x14cd00bd6,0x105ec76f0 | ||
334 | .quad 0x0ba4fc28e,0x14cd00bd6 | ||
335 | .quad 0x1d82c63da,0x0f20c0dfe | ||
336 | .quad 0x09e4addf8,0x0ba4fc28e | ||
337 | .quad 0x039d3b296,0x1384aa63a | ||
338 | .quad 0x102f9b8a2,0x1d82c63da | ||
339 | .quad 0x14237f5e6,0x01c291d04 | ||
340 | .quad 0x00d3b6092,0x09e4addf8 | ||
341 | .quad 0x0c96cfdc0,0x0740eef02 | ||
342 | .quad 0x18266e456,0x039d3b296 | ||
343 | .quad 0x0daece73e,0x0083a6eec | ||
344 | .quad 0x0ab7aff2a,0x102f9b8a2 | ||
345 | .quad 0x1248ea574,0x1c1733996 | ||
346 | .quad 0x083348832,0x14237f5e6 | ||
347 | .quad 0x12c743124,0x02ad91c30 | ||
348 | .quad 0x0b9e02b86,0x00d3b6092 | ||
349 | .quad 0x018b33a4e,0x06992cea2 | ||
350 | .quad 0x1b331e26a,0x0c96cfdc0 | ||
351 | .quad 0x17d35ba46,0x07e908048 | ||
352 | .quad 0x1bf2e8b8a,0x18266e456 | ||
353 | .quad 0x1a3e0968a,0x11ed1f9d8 | ||
354 | .quad 0x0ce7f39f4,0x0daece73e | ||
355 | .quad 0x061d82e56,0x0f1d0f55e | ||
356 | .quad 0x0d270f1a2,0x0ab7aff2a | ||
357 | .quad 0x1c3f5f66c,0x0a87ab8a8 | ||
358 | .quad 0x12ed0daac,0x1248ea574 | ||
359 | .quad 0x065863b64,0x08462d800 | ||
360 | .quad 0x11eef4f8e,0x083348832 | ||
361 | .quad 0x1ee54f54c,0x071d111a8 | ||
362 | .quad 0x0b3e32c28,0x12c743124 | ||
363 | .quad 0x0064f7f26,0x0ffd852c6 | ||
364 | .quad 0x0dd7e3b0c,0x0b9e02b86 | ||
365 | .quad 0x0f285651c,0x0dcb17aa4 | ||
366 | .quad 0x010746f3c,0x018b33a4e | ||
367 | .quad 0x1c24afea4,0x0f37c5aee | ||
368 | .quad 0x0271d9844,0x1b331e26a | ||
369 | .quad 0x08e766a0c,0x06051d5a2 | ||
370 | .quad 0x093a5f730,0x17d35ba46 | ||
371 | .quad 0x06cb08e5c,0x11d5ca20e | ||
372 | .quad 0x06b749fb2,0x1bf2e8b8a | ||
373 | .quad 0x1167f94f2,0x021f3d99c | ||
374 | .quad 0x0cec3662e,0x1a3e0968a | ||
375 | .quad 0x19329634a,0x08f158014 | ||
376 | .quad 0x0e6fc4e6a,0x0ce7f39f4 | ||
377 | .quad 0x08227bb8a,0x1a5e82106 | ||
378 | .quad 0x0b0cd4768,0x061d82e56 | ||
379 | .quad 0x13c2b89c4,0x188815ab2 | ||
380 | .quad 0x0d7a4825c,0x0d270f1a2 | ||
381 | .quad 0x10f5ff2ba,0x105405f3e | ||
382 | .quad 0x00167d312,0x1c3f5f66c | ||
383 | .quad 0x0f6076544,0x0e9adf796 | ||
384 | .quad 0x026f6a60a,0x12ed0daac | ||
385 | .quad 0x1a2adb74e,0x096638b34 | ||
386 | .quad 0x19d34af3a,0x065863b64 | ||
387 | .quad 0x049c3cc9c,0x1e50585a0 | ||
388 | .quad 0x068bce87a,0x11eef4f8e | ||
389 | .quad 0x1524fa6c6,0x19f1c69dc | ||
390 | .quad 0x16cba8aca,0x1ee54f54c | ||
391 | .quad 0x042d98888,0x12913343e | ||
392 | .quad 0x1329d9f7e,0x0b3e32c28 | ||
393 | .quad 0x1b1c69528,0x088f25a3a | ||
394 | .quad 0x02178513a,0x0064f7f26 | ||
395 | .quad 0x0e0ac139e,0x04e36f0b0 | ||
396 | .quad 0x0170076fa,0x0dd7e3b0c | ||
397 | .quad 0x141a1a2e2,0x0bd6f81f8 | ||
398 | .quad 0x16ad828b4,0x0f285651c | ||
399 | .quad 0x041d17b64,0x19425cbba | ||
400 | .quad 0x1fae1cc66,0x010746f3c | ||
401 | .quad 0x1a75b4b00,0x18db37e8a | ||
402 | .quad 0x0f872e54c,0x1c24afea4 | ||
403 | .quad 0x01e41e9fc,0x04c144932 | ||
404 | .quad 0x086d8e4d2,0x0271d9844 | ||
405 | .quad 0x160f7af7a,0x052148f02 | ||
406 | .quad 0x05bb8f1bc,0x08e766a0c | ||
407 | .quad 0x0a90fd27a,0x0a3c6f37a | ||
408 | .quad 0x0b3af077a,0x093a5f730 | ||
409 | .quad 0x04984d782,0x1d22c238e | ||
410 | .quad 0x0ca6ef3ac,0x06cb08e5c | ||
411 | .quad 0x0234e0b26,0x063ded06a | ||
412 | .quad 0x1d88abd4a,0x06b749fb2 | ||
413 | .quad 0x04597456a,0x04d56973c | ||
414 | .quad 0x0e9e28eb4,0x1167f94f2 | ||
415 | .quad 0x07b3ff57a,0x19385bf2e | ||
416 | .quad 0x0c9c8b782,0x0cec3662e | ||
417 | .quad 0x13a9cba9e,0x0e417f38a | ||
418 | .quad 0x093e106a4,0x19329634a | ||
419 | .quad 0x167001a9c,0x14e727980 | ||
420 | .quad 0x1ddffc5d4,0x0e6fc4e6a | ||
421 | .quad 0x00df04680,0x0d104b8fc | ||
422 | .quad 0x02342001e,0x08227bb8a | ||
423 | .quad 0x00a2a8d7e,0x05b397730 | ||
424 | .quad 0x168763fa6,0x0b0cd4768 | ||
425 | .quad 0x1ed5a407a,0x0e78eb416 | ||
426 | .quad 0x0d2c3ed1a,0x13c2b89c4 | ||
427 | .quad 0x0995a5724,0x1641378f0 | ||
428 | .quad 0x19b1afbc4,0x0d7a4825c | ||
429 | .quad 0x109ffedc0,0x08d96551c | ||
430 | .quad 0x0f2271e60,0x10f5ff2ba | ||
431 | .quad 0x00b0bf8ca,0x00bf80dd2 | ||
432 | .quad 0x123888b7a,0x00167d312 | ||
433 | .quad 0x1e888f7dc,0x18dcddd1c | ||
434 | .quad 0x002ee03b2,0x0f6076544 | ||
435 | .quad 0x183e8d8fe,0x06a45d2b2 | ||
436 | .quad 0x133d7a042,0x026f6a60a | ||
437 | .quad 0x116b0f50c,0x1dd3e10e8 | ||
438 | .quad 0x05fabe670,0x1a2adb74e | ||
439 | .quad 0x130004488,0x0de87806c | ||
440 | .quad 0x000bcf5f6,0x19d34af3a | ||
441 | .quad 0x18f0c7078,0x014338754 | ||
442 | .quad 0x017f27698,0x049c3cc9c | ||
443 | .quad 0x058ca5f00,0x15e3e77ee | ||
444 | .quad 0x1af900c24,0x068bce87a | ||
445 | .quad 0x0b5cfca28,0x0dd07448e | ||
446 | .quad 0x0ded288f8,0x1524fa6c6 | ||
447 | .quad 0x059f229bc,0x1d8048348 | ||
448 | .quad 0x06d390dec,0x16cba8aca | ||
449 | .quad 0x037170390,0x0a3e3e02c | ||
450 | .quad 0x06353c1cc,0x042d98888 | ||
451 | .quad 0x0c4584f5c,0x0d73c7bea | ||
452 | .quad 0x1f16a3418,0x1329d9f7e | ||
453 | .quad 0x0531377e2,0x185137662 | ||
454 | .quad 0x1d8d9ca7c,0x1b1c69528 | ||
455 | .quad 0x0b25b29f2,0x18a08b5bc | ||
456 | .quad 0x19fb2a8b0,0x02178513a | ||
457 | .quad 0x1a08fe6ac,0x1da758ae0 | ||
458 | .quad 0x045cddf4e,0x0e0ac139e | ||
459 | .quad 0x1a91647f2,0x169cf9eb0 | ||
460 | .quad 0x1a0f717c4,0x0170076fa | ||
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 6759dd1135b..976aa64d9a2 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <crypto/gf128mul.h> | 20 | #include <crypto/gf128mul.h> |
21 | #include <crypto/internal/hash.h> | 21 | #include <crypto/internal/hash.h> |
22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
23 | #include <asm/cpu_device_id.h> | ||
24 | 23 | ||
25 | #define GHASH_BLOCK_SIZE 16 | 24 | #define GHASH_BLOCK_SIZE 16 |
26 | #define GHASH_DIGEST_SIZE 16 | 25 | #define GHASH_DIGEST_SIZE 16 |
@@ -150,6 +149,7 @@ static struct shash_alg ghash_alg = { | |||
150 | .cra_blocksize = GHASH_BLOCK_SIZE, | 149 | .cra_blocksize = GHASH_BLOCK_SIZE, |
151 | .cra_ctxsize = sizeof(struct ghash_ctx), | 150 | .cra_ctxsize = sizeof(struct ghash_ctx), |
152 | .cra_module = THIS_MODULE, | 151 | .cra_module = THIS_MODULE, |
152 | .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), | ||
153 | }, | 153 | }, |
154 | }; | 154 | }; |
155 | 155 | ||
@@ -287,24 +287,22 @@ static struct ahash_alg ghash_async_alg = { | |||
287 | .cra_blocksize = GHASH_BLOCK_SIZE, | 287 | .cra_blocksize = GHASH_BLOCK_SIZE, |
288 | .cra_type = &crypto_ahash_type, | 288 | .cra_type = &crypto_ahash_type, |
289 | .cra_module = THIS_MODULE, | 289 | .cra_module = THIS_MODULE, |
290 | .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), | ||
290 | .cra_init = ghash_async_init_tfm, | 291 | .cra_init = ghash_async_init_tfm, |
291 | .cra_exit = ghash_async_exit_tfm, | 292 | .cra_exit = ghash_async_exit_tfm, |
292 | }, | 293 | }, |
293 | }, | 294 | }, |
294 | }; | 295 | }; |
295 | 296 | ||
296 | static const struct x86_cpu_id pcmul_cpu_id[] = { | ||
297 | X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */ | ||
298 | {} | ||
299 | }; | ||
300 | MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id); | ||
301 | |||
302 | static int __init ghash_pclmulqdqni_mod_init(void) | 297 | static int __init ghash_pclmulqdqni_mod_init(void) |
303 | { | 298 | { |
304 | int err; | 299 | int err; |
305 | 300 | ||
306 | if (!x86_match_cpu(pcmul_cpu_id)) | 301 | if (!cpu_has_pclmulqdq) { |
302 | printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not" | ||
303 | " detected.\n"); | ||
307 | return -ENODEV; | 304 | return -ENODEV; |
305 | } | ||
308 | 306 | ||
309 | err = crypto_register_shash(&ghash_alg); | 307 | err = crypto_register_shash(&ghash_alg); |
310 | if (err) | 308 | if (err) |
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S deleted file mode 100644 index f7b6ea2ddfd..00000000000 --- a/arch/x86/crypto/glue_helper-asm-avx.S +++ /dev/null | |||
@@ -1,91 +0,0 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers, AVX assembler macros | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | */ | ||
17 | |||
18 | #define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
19 | vmovdqu (0*16)(src), x0; \ | ||
20 | vmovdqu (1*16)(src), x1; \ | ||
21 | vmovdqu (2*16)(src), x2; \ | ||
22 | vmovdqu (3*16)(src), x3; \ | ||
23 | vmovdqu (4*16)(src), x4; \ | ||
24 | vmovdqu (5*16)(src), x5; \ | ||
25 | vmovdqu (6*16)(src), x6; \ | ||
26 | vmovdqu (7*16)(src), x7; | ||
27 | |||
28 | #define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
29 | vmovdqu x0, (0*16)(dst); \ | ||
30 | vmovdqu x1, (1*16)(dst); \ | ||
31 | vmovdqu x2, (2*16)(dst); \ | ||
32 | vmovdqu x3, (3*16)(dst); \ | ||
33 | vmovdqu x4, (4*16)(dst); \ | ||
34 | vmovdqu x5, (5*16)(dst); \ | ||
35 | vmovdqu x6, (6*16)(dst); \ | ||
36 | vmovdqu x7, (7*16)(dst); | ||
37 | |||
38 | #define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
39 | vpxor (0*16)(src), x1, x1; \ | ||
40 | vpxor (1*16)(src), x2, x2; \ | ||
41 | vpxor (2*16)(src), x3, x3; \ | ||
42 | vpxor (3*16)(src), x4, x4; \ | ||
43 | vpxor (4*16)(src), x5, x5; \ | ||
44 | vpxor (5*16)(src), x6, x6; \ | ||
45 | vpxor (6*16)(src), x7, x7; \ | ||
46 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
47 | |||
48 | #define inc_le128(x, minus_one, tmp) \ | ||
49 | vpcmpeqq minus_one, x, tmp; \ | ||
50 | vpsubq minus_one, x, x; \ | ||
51 | vpslldq $8, tmp, tmp; \ | ||
52 | vpsubq tmp, x, x; | ||
53 | |||
54 | #define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \ | ||
55 | vpcmpeqd t0, t0, t0; \ | ||
56 | vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \ | ||
57 | vmovdqa bswap, t1; \ | ||
58 | \ | ||
59 | /* load IV and byteswap */ \ | ||
60 | vmovdqu (iv), x7; \ | ||
61 | vpshufb t1, x7, x0; \ | ||
62 | \ | ||
63 | /* construct IVs */ \ | ||
64 | inc_le128(x7, t0, t2); \ | ||
65 | vpshufb t1, x7, x1; \ | ||
66 | inc_le128(x7, t0, t2); \ | ||
67 | vpshufb t1, x7, x2; \ | ||
68 | inc_le128(x7, t0, t2); \ | ||
69 | vpshufb t1, x7, x3; \ | ||
70 | inc_le128(x7, t0, t2); \ | ||
71 | vpshufb t1, x7, x4; \ | ||
72 | inc_le128(x7, t0, t2); \ | ||
73 | vpshufb t1, x7, x5; \ | ||
74 | inc_le128(x7, t0, t2); \ | ||
75 | vpshufb t1, x7, x6; \ | ||
76 | inc_le128(x7, t0, t2); \ | ||
77 | vmovdqa x7, t2; \ | ||
78 | vpshufb t1, x7, x7; \ | ||
79 | inc_le128(t2, t0, t1); \ | ||
80 | vmovdqu t2, (iv); | ||
81 | |||
82 | #define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \ | ||
83 | vpxor (0*16)(src), x0, x0; \ | ||
84 | vpxor (1*16)(src), x1, x1; \ | ||
85 | vpxor (2*16)(src), x2, x2; \ | ||
86 | vpxor (3*16)(src), x3, x3; \ | ||
87 | vpxor (4*16)(src), x4, x4; \ | ||
88 | vpxor (5*16)(src), x5, x5; \ | ||
89 | vpxor (6*16)(src), x6, x6; \ | ||
90 | vpxor (7*16)(src), x7, x7; \ | ||
91 | store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7); | ||
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c deleted file mode 100644 index 22ce4f683e5..00000000000 --- a/arch/x86/crypto/glue_helper.c +++ /dev/null | |||
@@ -1,307 +0,0 @@ | |||
1 | /* | ||
2 | * Shared glue code for 128bit block ciphers | ||
3 | * | ||
4 | * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
7 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
8 | * CTR part based on code (crypto/ctr.c) by: | ||
9 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
24 | * USA | ||
25 | * | ||
26 | */ | ||
27 | |||
28 | #include <linux/module.h> | ||
29 | #include <crypto/b128ops.h> | ||
30 | #include <crypto/lrw.h> | ||
31 | #include <crypto/xts.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
33 | #include <crypto/scatterwalk.h> | ||
34 | |||
35 | static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
36 | struct blkcipher_desc *desc, | ||
37 | struct blkcipher_walk *walk) | ||
38 | { | ||
39 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
40 | const unsigned int bsize = 128 / 8; | ||
41 | unsigned int nbytes, i, func_bytes; | ||
42 | bool fpu_enabled = false; | ||
43 | int err; | ||
44 | |||
45 | err = blkcipher_walk_virt(desc, walk); | ||
46 | |||
47 | while ((nbytes = walk->nbytes)) { | ||
48 | u8 *wsrc = walk->src.virt.addr; | ||
49 | u8 *wdst = walk->dst.virt.addr; | ||
50 | |||
51 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
52 | desc, fpu_enabled, nbytes); | ||
53 | |||
54 | for (i = 0; i < gctx->num_funcs; i++) { | ||
55 | func_bytes = bsize * gctx->funcs[i].num_blocks; | ||
56 | |||
57 | /* Process multi-block batch */ | ||
58 | if (nbytes >= func_bytes) { | ||
59 | do { | ||
60 | gctx->funcs[i].fn_u.ecb(ctx, wdst, | ||
61 | wsrc); | ||
62 | |||
63 | wsrc += func_bytes; | ||
64 | wdst += func_bytes; | ||
65 | nbytes -= func_bytes; | ||
66 | } while (nbytes >= func_bytes); | ||
67 | |||
68 | if (nbytes < bsize) | ||
69 | goto done; | ||
70 | } | ||
71 | } | ||
72 | |||
73 | done: | ||
74 | err = blkcipher_walk_done(desc, walk, nbytes); | ||
75 | } | ||
76 | |||
77 | glue_fpu_end(fpu_enabled); | ||
78 | return err; | ||
79 | } | ||
80 | |||
81 | int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | ||
82 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
83 | struct scatterlist *src, unsigned int nbytes) | ||
84 | { | ||
85 | struct blkcipher_walk walk; | ||
86 | |||
87 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
88 | return __glue_ecb_crypt_128bit(gctx, desc, &walk); | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); | ||
91 | |||
92 | static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
93 | struct blkcipher_desc *desc, | ||
94 | struct blkcipher_walk *walk) | ||
95 | { | ||
96 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
97 | const unsigned int bsize = 128 / 8; | ||
98 | unsigned int nbytes = walk->nbytes; | ||
99 | u128 *src = (u128 *)walk->src.virt.addr; | ||
100 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
101 | u128 *iv = (u128 *)walk->iv; | ||
102 | |||
103 | do { | ||
104 | u128_xor(dst, src, iv); | ||
105 | fn(ctx, (u8 *)dst, (u8 *)dst); | ||
106 | iv = dst; | ||
107 | |||
108 | src += 1; | ||
109 | dst += 1; | ||
110 | nbytes -= bsize; | ||
111 | } while (nbytes >= bsize); | ||
112 | |||
113 | *(u128 *)walk->iv = *iv; | ||
114 | return nbytes; | ||
115 | } | ||
116 | |||
117 | int glue_cbc_encrypt_128bit(const common_glue_func_t fn, | ||
118 | struct blkcipher_desc *desc, | ||
119 | struct scatterlist *dst, | ||
120 | struct scatterlist *src, unsigned int nbytes) | ||
121 | { | ||
122 | struct blkcipher_walk walk; | ||
123 | int err; | ||
124 | |||
125 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
126 | err = blkcipher_walk_virt(desc, &walk); | ||
127 | |||
128 | while ((nbytes = walk.nbytes)) { | ||
129 | nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); | ||
130 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
131 | } | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); | ||
136 | |||
137 | static unsigned int | ||
138 | __glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
139 | struct blkcipher_desc *desc, | ||
140 | struct blkcipher_walk *walk) | ||
141 | { | ||
142 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
143 | const unsigned int bsize = 128 / 8; | ||
144 | unsigned int nbytes = walk->nbytes; | ||
145 | u128 *src = (u128 *)walk->src.virt.addr; | ||
146 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
147 | u128 last_iv; | ||
148 | unsigned int num_blocks, func_bytes; | ||
149 | unsigned int i; | ||
150 | |||
151 | /* Start of the last block. */ | ||
152 | src += nbytes / bsize - 1; | ||
153 | dst += nbytes / bsize - 1; | ||
154 | |||
155 | last_iv = *src; | ||
156 | |||
157 | for (i = 0; i < gctx->num_funcs; i++) { | ||
158 | num_blocks = gctx->funcs[i].num_blocks; | ||
159 | func_bytes = bsize * num_blocks; | ||
160 | |||
161 | /* Process multi-block batch */ | ||
162 | if (nbytes >= func_bytes) { | ||
163 | do { | ||
164 | nbytes -= func_bytes - bsize; | ||
165 | src -= num_blocks - 1; | ||
166 | dst -= num_blocks - 1; | ||
167 | |||
168 | gctx->funcs[i].fn_u.cbc(ctx, dst, src); | ||
169 | |||
170 | nbytes -= bsize; | ||
171 | if (nbytes < bsize) | ||
172 | goto done; | ||
173 | |||
174 | u128_xor(dst, dst, src - 1); | ||
175 | src -= 1; | ||
176 | dst -= 1; | ||
177 | } while (nbytes >= func_bytes); | ||
178 | |||
179 | if (nbytes < bsize) | ||
180 | goto done; | ||
181 | } | ||
182 | } | ||
183 | |||
184 | done: | ||
185 | u128_xor(dst, dst, (u128 *)walk->iv); | ||
186 | *(u128 *)walk->iv = last_iv; | ||
187 | |||
188 | return nbytes; | ||
189 | } | ||
190 | |||
191 | int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | ||
192 | struct blkcipher_desc *desc, | ||
193 | struct scatterlist *dst, | ||
194 | struct scatterlist *src, unsigned int nbytes) | ||
195 | { | ||
196 | const unsigned int bsize = 128 / 8; | ||
197 | bool fpu_enabled = false; | ||
198 | struct blkcipher_walk walk; | ||
199 | int err; | ||
200 | |||
201 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
202 | err = blkcipher_walk_virt(desc, &walk); | ||
203 | |||
204 | while ((nbytes = walk.nbytes)) { | ||
205 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
206 | desc, fpu_enabled, nbytes); | ||
207 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | ||
208 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
209 | } | ||
210 | |||
211 | glue_fpu_end(fpu_enabled); | ||
212 | return err; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | ||
215 | |||
216 | static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, | ||
217 | struct blkcipher_desc *desc, | ||
218 | struct blkcipher_walk *walk) | ||
219 | { | ||
220 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
221 | u8 *src = (u8 *)walk->src.virt.addr; | ||
222 | u8 *dst = (u8 *)walk->dst.virt.addr; | ||
223 | unsigned int nbytes = walk->nbytes; | ||
224 | le128 ctrblk; | ||
225 | u128 tmp; | ||
226 | |||
227 | be128_to_le128(&ctrblk, (be128 *)walk->iv); | ||
228 | |||
229 | memcpy(&tmp, src, nbytes); | ||
230 | fn_ctr(ctx, &tmp, &tmp, &ctrblk); | ||
231 | memcpy(dst, &tmp, nbytes); | ||
232 | |||
233 | le128_to_be128((be128 *)walk->iv, &ctrblk); | ||
234 | } | ||
235 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit); | ||
236 | |||
237 | static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
238 | struct blkcipher_desc *desc, | ||
239 | struct blkcipher_walk *walk) | ||
240 | { | ||
241 | const unsigned int bsize = 128 / 8; | ||
242 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
243 | unsigned int nbytes = walk->nbytes; | ||
244 | u128 *src = (u128 *)walk->src.virt.addr; | ||
245 | u128 *dst = (u128 *)walk->dst.virt.addr; | ||
246 | le128 ctrblk; | ||
247 | unsigned int num_blocks, func_bytes; | ||
248 | unsigned int i; | ||
249 | |||
250 | be128_to_le128(&ctrblk, (be128 *)walk->iv); | ||
251 | |||
252 | /* Process multi-block batch */ | ||
253 | for (i = 0; i < gctx->num_funcs; i++) { | ||
254 | num_blocks = gctx->funcs[i].num_blocks; | ||
255 | func_bytes = bsize * num_blocks; | ||
256 | |||
257 | if (nbytes >= func_bytes) { | ||
258 | do { | ||
259 | gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); | ||
260 | |||
261 | src += num_blocks; | ||
262 | dst += num_blocks; | ||
263 | nbytes -= func_bytes; | ||
264 | } while (nbytes >= func_bytes); | ||
265 | |||
266 | if (nbytes < bsize) | ||
267 | goto done; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | done: | ||
272 | le128_to_be128((be128 *)walk->iv, &ctrblk); | ||
273 | return nbytes; | ||
274 | } | ||
275 | |||
276 | int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | ||
277 | struct blkcipher_desc *desc, struct scatterlist *dst, | ||
278 | struct scatterlist *src, unsigned int nbytes) | ||
279 | { | ||
280 | const unsigned int bsize = 128 / 8; | ||
281 | bool fpu_enabled = false; | ||
282 | struct blkcipher_walk walk; | ||
283 | int err; | ||
284 | |||
285 | blkcipher_walk_init(&walk, dst, src, nbytes); | ||
286 | err = blkcipher_walk_virt_block(desc, &walk, bsize); | ||
287 | |||
288 | while ((nbytes = walk.nbytes) >= bsize) { | ||
289 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | ||
290 | desc, fpu_enabled, nbytes); | ||
291 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | ||
292 | err = blkcipher_walk_done(desc, &walk, nbytes); | ||
293 | } | ||
294 | |||
295 | glue_fpu_end(fpu_enabled); | ||
296 | |||
297 | if (walk.nbytes) { | ||
298 | glue_ctr_crypt_final_128bit( | ||
299 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | ||
300 | err = blkcipher_walk_done(desc, &walk, 0); | ||
301 | } | ||
302 | |||
303 | return err; | ||
304 | } | ||
305 | EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); | ||
306 | |||
307 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index a3a3c0205c1..bccb76d8098 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c | |||
@@ -97,6 +97,7 @@ static struct crypto_alg alg = { | |||
97 | .cra_ctxsize = sizeof(struct salsa20_ctx), | 97 | .cra_ctxsize = sizeof(struct salsa20_ctx), |
98 | .cra_alignmask = 3, | 98 | .cra_alignmask = 3, |
99 | .cra_module = THIS_MODULE, | 99 | .cra_module = THIS_MODULE, |
100 | .cra_list = LIST_HEAD_INIT(alg.cra_list), | ||
100 | .cra_u = { | 101 | .cra_u = { |
101 | .blkcipher = { | 102 | .blkcipher = { |
102 | .setkey = setkey, | 103 | .setkey = setkey, |
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S deleted file mode 100644 index 02b0e9fe997..00000000000 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,754 +0,0 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Based on arch/x86/crypto/serpent-sse2-x86_64-asm_64.S by | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include "glue_helper-asm-avx.S" | ||
28 | |||
29 | .file "serpent-avx-x86_64-asm_64.S" | ||
30 | |||
31 | .data | ||
32 | .align 16 | ||
33 | |||
34 | .Lbswap128_mask: | ||
35 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
36 | |||
37 | .text | ||
38 | |||
39 | #define CTX %rdi | ||
40 | |||
41 | /********************************************************************** | ||
42 | 8-way AVX serpent | ||
43 | **********************************************************************/ | ||
44 | #define RA1 %xmm0 | ||
45 | #define RB1 %xmm1 | ||
46 | #define RC1 %xmm2 | ||
47 | #define RD1 %xmm3 | ||
48 | #define RE1 %xmm4 | ||
49 | |||
50 | #define tp %xmm5 | ||
51 | |||
52 | #define RA2 %xmm6 | ||
53 | #define RB2 %xmm7 | ||
54 | #define RC2 %xmm8 | ||
55 | #define RD2 %xmm9 | ||
56 | #define RE2 %xmm10 | ||
57 | |||
58 | #define RNOT %xmm11 | ||
59 | |||
60 | #define RK0 %xmm12 | ||
61 | #define RK1 %xmm13 | ||
62 | #define RK2 %xmm14 | ||
63 | #define RK3 %xmm15 | ||
64 | |||
65 | |||
66 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
67 | vpor x0, x3, tp; \ | ||
68 | vpxor x3, x0, x0; \ | ||
69 | vpxor x2, x3, x4; \ | ||
70 | vpxor RNOT, x4, x4; \ | ||
71 | vpxor x1, tp, x3; \ | ||
72 | vpand x0, x1, x1; \ | ||
73 | vpxor x4, x1, x1; \ | ||
74 | vpxor x0, x2, x2; | ||
75 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
76 | vpxor x3, x0, x0; \ | ||
77 | vpor x0, x4, x4; \ | ||
78 | vpxor x2, x0, x0; \ | ||
79 | vpand x1, x2, x2; \ | ||
80 | vpxor x2, x3, x3; \ | ||
81 | vpxor RNOT, x1, x1; \ | ||
82 | vpxor x4, x2, x2; \ | ||
83 | vpxor x2, x1, x1; | ||
84 | |||
85 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
86 | vpxor x0, x1, tp; \ | ||
87 | vpxor x3, x0, x0; \ | ||
88 | vpxor RNOT, x3, x3; \ | ||
89 | vpand tp, x1, x4; \ | ||
90 | vpor tp, x0, x0; \ | ||
91 | vpxor x2, x3, x3; \ | ||
92 | vpxor x3, x0, x0; \ | ||
93 | vpxor x3, tp, x1; | ||
94 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
95 | vpxor x4, x3, x3; \ | ||
96 | vpor x4, x1, x1; \ | ||
97 | vpxor x2, x4, x4; \ | ||
98 | vpand x0, x2, x2; \ | ||
99 | vpxor x1, x2, x2; \ | ||
100 | vpor x0, x1, x1; \ | ||
101 | vpxor RNOT, x0, x0; \ | ||
102 | vpxor x2, x0, x0; \ | ||
103 | vpxor x1, x4, x4; | ||
104 | |||
105 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
106 | vpxor RNOT, x3, x3; \ | ||
107 | vpxor x0, x1, x1; \ | ||
108 | vpand x2, x0, tp; \ | ||
109 | vpxor x3, tp, tp; \ | ||
110 | vpor x0, x3, x3; \ | ||
111 | vpxor x1, x2, x2; \ | ||
112 | vpxor x1, x3, x3; \ | ||
113 | vpand tp, x1, x1; | ||
114 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
115 | vpxor x2, tp, tp; \ | ||
116 | vpand x3, x2, x2; \ | ||
117 | vpor x1, x3, x3; \ | ||
118 | vpxor RNOT, tp, tp; \ | ||
119 | vpxor tp, x3, x3; \ | ||
120 | vpxor tp, x0, x4; \ | ||
121 | vpxor x2, tp, x0; \ | ||
122 | vpor x2, x1, x1; | ||
123 | |||
124 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
125 | vpxor x3, x1, tp; \ | ||
126 | vpor x0, x3, x3; \ | ||
127 | vpand x0, x1, x4; \ | ||
128 | vpxor x2, x0, x0; \ | ||
129 | vpxor tp, x2, x2; \ | ||
130 | vpand x3, tp, x1; \ | ||
131 | vpxor x3, x2, x2; \ | ||
132 | vpor x4, x0, x0; \ | ||
133 | vpxor x3, x4, x4; | ||
134 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
135 | vpxor x0, x1, x1; \ | ||
136 | vpand x3, x0, x0; \ | ||
137 | vpand x4, x3, x3; \ | ||
138 | vpxor x2, x3, x3; \ | ||
139 | vpor x1, x4, x4; \ | ||
140 | vpand x1, x2, x2; \ | ||
141 | vpxor x3, x4, x4; \ | ||
142 | vpxor x3, x0, x0; \ | ||
143 | vpxor x2, x3, x3; | ||
144 | |||
145 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
146 | vpand x0, x3, tp; \ | ||
147 | vpxor x3, x0, x0; \ | ||
148 | vpxor x2, tp, tp; \ | ||
149 | vpor x3, x2, x2; \ | ||
150 | vpxor x1, x0, x0; \ | ||
151 | vpxor tp, x3, x4; \ | ||
152 | vpor x0, x2, x2; \ | ||
153 | vpxor x1, x2, x2; | ||
154 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
155 | vpand x0, x1, x1; \ | ||
156 | vpxor x4, x1, x1; \ | ||
157 | vpand x2, x4, x4; \ | ||
158 | vpxor tp, x2, x2; \ | ||
159 | vpxor x0, x4, x4; \ | ||
160 | vpor x1, tp, x3; \ | ||
161 | vpxor RNOT, x1, x1; \ | ||
162 | vpxor x0, x3, x3; | ||
163 | |||
164 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
165 | vpor x0, x1, tp; \ | ||
166 | vpxor tp, x2, x2; \ | ||
167 | vpxor RNOT, x3, x3; \ | ||
168 | vpxor x0, x1, x4; \ | ||
169 | vpxor x2, x0, x0; \ | ||
170 | vpand x4, tp, x1; \ | ||
171 | vpor x3, x4, x4; \ | ||
172 | vpxor x0, x4, x4; | ||
173 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
174 | vpand x3, x0, x0; \ | ||
175 | vpxor x3, x1, x1; \ | ||
176 | vpxor x2, x3, x3; \ | ||
177 | vpxor x1, x0, x0; \ | ||
178 | vpand x4, x2, x2; \ | ||
179 | vpxor x2, x1, x1; \ | ||
180 | vpand x0, x2, x2; \ | ||
181 | vpxor x2, x3, x3; | ||
182 | |||
183 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
184 | vpxor x0, x3, x3; \ | ||
185 | vpxor x2, x1, tp; \ | ||
186 | vpxor x0, x2, x2; \ | ||
187 | vpand x3, x0, x0; \ | ||
188 | vpor x3, tp, tp; \ | ||
189 | vpxor RNOT, x1, x4; \ | ||
190 | vpxor tp, x0, x0; \ | ||
191 | vpxor x2, tp, x1; | ||
192 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
193 | vpxor x4, x3, x3; \ | ||
194 | vpxor x0, x4, x4; \ | ||
195 | vpand x0, x2, x2; \ | ||
196 | vpxor x1, x4, x4; \ | ||
197 | vpxor x3, x2, x2; \ | ||
198 | vpand x1, x3, x3; \ | ||
199 | vpxor x0, x3, x3; \ | ||
200 | vpxor x2, x1, x1; | ||
201 | |||
202 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
203 | vpxor RNOT, x1, tp; \ | ||
204 | vpxor RNOT, x0, x0; \ | ||
205 | vpand x2, tp, x1; \ | ||
206 | vpxor x3, x1, x1; \ | ||
207 | vpor tp, x3, x3; \ | ||
208 | vpxor x2, tp, x4; \ | ||
209 | vpxor x3, x2, x2; \ | ||
210 | vpxor x0, x3, x3; \ | ||
211 | vpor x1, x0, x0; | ||
212 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
213 | vpand x0, x2, x2; \ | ||
214 | vpxor x4, x0, x0; \ | ||
215 | vpxor x3, x4, x4; \ | ||
216 | vpand x0, x3, x3; \ | ||
217 | vpxor x1, x4, x4; \ | ||
218 | vpxor x4, x2, x2; \ | ||
219 | vpxor x1, x3, x3; \ | ||
220 | vpor x0, x4, x4; \ | ||
221 | vpxor x1, x4, x4; | ||
222 | |||
223 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
224 | vpxor x0, x1, x1; \ | ||
225 | vpor x1, x3, tp; \ | ||
226 | vpxor x1, x3, x4; \ | ||
227 | vpxor RNOT, x0, x0; \ | ||
228 | vpxor tp, x2, x2; \ | ||
229 | vpxor x0, tp, x3; \ | ||
230 | vpand x1, x0, x0; \ | ||
231 | vpxor x2, x0, x0; | ||
232 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
233 | vpand x3, x2, x2; \ | ||
234 | vpxor x4, x3, x3; \ | ||
235 | vpxor x3, x2, x2; \ | ||
236 | vpxor x3, x1, x1; \ | ||
237 | vpand x0, x3, x3; \ | ||
238 | vpxor x0, x1, x1; \ | ||
239 | vpxor x2, x0, x0; \ | ||
240 | vpxor x3, x4, x4; | ||
241 | |||
242 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
243 | vpxor x3, x1, x1; \ | ||
244 | vpxor x2, x0, tp; \ | ||
245 | vpxor RNOT, x2, x2; \ | ||
246 | vpor x1, x0, x4; \ | ||
247 | vpxor x3, x4, x4; \ | ||
248 | vpand x1, x3, x3; \ | ||
249 | vpxor x2, x1, x1; \ | ||
250 | vpand x4, x2, x2; | ||
251 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
252 | vpxor x1, x4, x4; \ | ||
253 | vpor x3, x1, x1; \ | ||
254 | vpxor tp, x3, x3; \ | ||
255 | vpxor tp, x2, x2; \ | ||
256 | vpor x4, tp, x0; \ | ||
257 | vpxor x4, x2, x2; \ | ||
258 | vpxor x0, x1, x1; \ | ||
259 | vpxor x1, x4, x4; | ||
260 | |||
261 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
262 | vpxor x1, x2, x2; \ | ||
263 | vpxor RNOT, x3, tp; \ | ||
264 | vpor x2, tp, tp; \ | ||
265 | vpxor x3, x2, x2; \ | ||
266 | vpxor x0, x3, x4; \ | ||
267 | vpxor x1, tp, x3; \ | ||
268 | vpor x2, x1, x1; \ | ||
269 | vpxor x0, x2, x2; | ||
270 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
271 | vpxor x4, x1, x1; \ | ||
272 | vpor x3, x4, x4; \ | ||
273 | vpxor x3, x2, x2; \ | ||
274 | vpxor x2, x4, x4; \ | ||
275 | vpand x1, x2, x2; \ | ||
276 | vpxor x3, x2, x2; \ | ||
277 | vpxor x4, x3, x3; \ | ||
278 | vpxor x0, x4, x4; | ||
279 | |||
280 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
281 | vpxor x1, x2, x2; \ | ||
282 | vpand x2, x1, tp; \ | ||
283 | vpxor x0, tp, tp; \ | ||
284 | vpor x1, x0, x0; \ | ||
285 | vpxor x3, x1, x4; \ | ||
286 | vpxor x3, x0, x0; \ | ||
287 | vpor tp, x3, x3; \ | ||
288 | vpxor x2, tp, x1; | ||
289 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
290 | vpxor x3, x1, x1; \ | ||
291 | vpxor x2, x0, x0; \ | ||
292 | vpxor x3, x2, x2; \ | ||
293 | vpand x1, x3, x3; \ | ||
294 | vpxor x0, x1, x1; \ | ||
295 | vpand x2, x0, x0; \ | ||
296 | vpxor x3, x4, x4; \ | ||
297 | vpxor x0, x3, x3; \ | ||
298 | vpxor x1, x0, x0; | ||
299 | |||
300 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
301 | vpxor x3, x2, x2; \ | ||
302 | vpand x1, x0, tp; \ | ||
303 | vpxor x2, tp, tp; \ | ||
304 | vpor x3, x2, x2; \ | ||
305 | vpxor RNOT, x0, x4; \ | ||
306 | vpxor tp, x1, x1; \ | ||
307 | vpxor x2, tp, x0; \ | ||
308 | vpand x4, x2, x2; | ||
309 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
310 | vpxor x0, x2, x2; \ | ||
311 | vpor x4, x0, x0; \ | ||
312 | vpxor x3, x0, x0; \ | ||
313 | vpand x2, x3, x3; \ | ||
314 | vpxor x3, x4, x4; \ | ||
315 | vpxor x1, x3, x3; \ | ||
316 | vpand x0, x1, x1; \ | ||
317 | vpxor x1, x4, x4; \ | ||
318 | vpxor x3, x0, x0; | ||
319 | |||
320 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
321 | vpor x2, x1, tp; \ | ||
322 | vpxor x1, x2, x2; \ | ||
323 | vpxor x3, tp, tp; \ | ||
324 | vpand x1, x3, x3; \ | ||
325 | vpxor x3, x2, x2; \ | ||
326 | vpor x0, x3, x3; \ | ||
327 | vpxor RNOT, x0, x0; \ | ||
328 | vpxor x2, x3, x3; \ | ||
329 | vpor x0, x2, x2; | ||
330 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
331 | vpxor tp, x1, x4; \ | ||
332 | vpxor x4, x2, x2; \ | ||
333 | vpand x0, x4, x4; \ | ||
334 | vpxor tp, x0, x0; \ | ||
335 | vpxor x3, tp, x1; \ | ||
336 | vpand x2, x0, x0; \ | ||
337 | vpxor x3, x2, x2; \ | ||
338 | vpxor x2, x0, x0; \ | ||
339 | vpxor x4, x2, x2; \ | ||
340 | vpxor x3, x4, x4; | ||
341 | |||
342 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
343 | vpxor x2, x0, x0; \ | ||
344 | vpand x3, x0, tp; \ | ||
345 | vpxor x3, x2, x2; \ | ||
346 | vpxor x2, tp, tp; \ | ||
347 | vpxor x1, x3, x3; \ | ||
348 | vpor x0, x2, x2; \ | ||
349 | vpxor x3, x2, x2; \ | ||
350 | vpand tp, x3, x3; | ||
351 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
352 | vpxor RNOT, tp, tp; \ | ||
353 | vpxor x1, x3, x3; \ | ||
354 | vpand x2, x1, x1; \ | ||
355 | vpxor tp, x0, x4; \ | ||
356 | vpxor x4, x3, x3; \ | ||
357 | vpxor x2, x4, x4; \ | ||
358 | vpxor x1, tp, x0; \ | ||
359 | vpxor x0, x2, x2; | ||
360 | |||
361 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
362 | vpand x0, x3, tp; \ | ||
363 | vpxor x2, x0, x0; \ | ||
364 | vpor x3, x2, x2; \ | ||
365 | vpxor x1, x3, x4; \ | ||
366 | vpxor RNOT, x0, x0; \ | ||
367 | vpor tp, x1, x1; \ | ||
368 | vpxor x0, x4, x4; \ | ||
369 | vpand x2, x0, x0; \ | ||
370 | vpxor x1, x0, x0; | ||
371 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
372 | vpand x2, x1, x1; \ | ||
373 | vpxor x2, tp, x3; \ | ||
374 | vpxor x3, x4, x4; \ | ||
375 | vpand x3, x2, x2; \ | ||
376 | vpor x0, x3, x3; \ | ||
377 | vpxor x4, x1, x1; \ | ||
378 | vpxor x4, x3, x3; \ | ||
379 | vpand x0, x4, x4; \ | ||
380 | vpxor x2, x4, x4; | ||
381 | |||
382 | #define get_key(i, j, t) \ | ||
383 | vbroadcastss (4*(i)+(j))*4(CTX), t; | ||
384 | |||
385 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
386 | get_key(i, 0, RK0); \ | ||
387 | get_key(i, 1, RK1); \ | ||
388 | get_key(i, 2, RK2); \ | ||
389 | get_key(i, 3, RK3); \ | ||
390 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
391 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
392 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
393 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
394 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
395 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
396 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
397 | vpxor RK3, x3 ## 2, x3 ## 2; | ||
398 | |||
399 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
400 | vpslld $13, x0 ## 1, x4 ## 1; \ | ||
401 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
402 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
403 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
404 | vpslld $3, x2 ## 1, x4 ## 1; \ | ||
405 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
406 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
407 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
408 | vpslld $13, x0 ## 2, x4 ## 2; \ | ||
409 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
410 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
411 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
412 | vpslld $3, x2 ## 2, x4 ## 2; \ | ||
413 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
414 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
415 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
416 | vpslld $1, x1 ## 1, x4 ## 1; \ | ||
417 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
418 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
419 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
420 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
421 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
422 | get_key(i, 1, RK1); \ | ||
423 | vpslld $1, x1 ## 2, x4 ## 2; \ | ||
424 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
425 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
426 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
427 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
428 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
429 | get_key(i, 3, RK3); \ | ||
430 | vpslld $7, x3 ## 1, x4 ## 1; \ | ||
431 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
432 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
433 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
434 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
435 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
436 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
437 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
438 | get_key(i, 0, RK0); \ | ||
439 | vpslld $7, x3 ## 2, x4 ## 2; \ | ||
440 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
441 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
442 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
443 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
444 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
445 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
446 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
447 | get_key(i, 2, RK2); \ | ||
448 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
449 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
450 | vpslld $5, x0 ## 1, x4 ## 1; \ | ||
451 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
452 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
453 | vpslld $22, x2 ## 1, x4 ## 1; \ | ||
454 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
455 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
456 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
457 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
458 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
459 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
460 | vpslld $5, x0 ## 2, x4 ## 2; \ | ||
461 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
462 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
463 | vpslld $22, x2 ## 2, x4 ## 2; \ | ||
464 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
465 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
466 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
467 | vpxor RK2, x2 ## 2, x2 ## 2; | ||
468 | |||
469 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
470 | vpxor RK0, x0 ## 1, x0 ## 1; \ | ||
471 | vpxor RK2, x2 ## 1, x2 ## 1; \ | ||
472 | vpsrld $5, x0 ## 1, x4 ## 1; \ | ||
473 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | ||
474 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
475 | vpxor RK3, x3 ## 1, x3 ## 1; \ | ||
476 | vpxor RK1, x1 ## 1, x1 ## 1; \ | ||
477 | vpsrld $22, x2 ## 1, x4 ## 1; \ | ||
478 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | ||
479 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
480 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | ||
481 | vpxor RK0, x0 ## 2, x0 ## 2; \ | ||
482 | vpxor RK2, x2 ## 2, x2 ## 2; \ | ||
483 | vpsrld $5, x0 ## 2, x4 ## 2; \ | ||
484 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | ||
485 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
486 | vpxor RK3, x3 ## 2, x3 ## 2; \ | ||
487 | vpxor RK1, x1 ## 2, x1 ## 2; \ | ||
488 | vpsrld $22, x2 ## 2, x4 ## 2; \ | ||
489 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | ||
490 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
491 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | ||
492 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | ||
493 | vpslld $7, x1 ## 1, x4 ## 1; \ | ||
494 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | ||
495 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
496 | vpsrld $1, x1 ## 1, x4 ## 1; \ | ||
497 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | ||
498 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | ||
499 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | ||
500 | vpslld $7, x1 ## 2, x4 ## 2; \ | ||
501 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | ||
502 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | ||
503 | vpsrld $1, x1 ## 2, x4 ## 2; \ | ||
504 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | ||
505 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | ||
506 | vpsrld $7, x3 ## 1, x4 ## 1; \ | ||
507 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | ||
508 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
509 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | ||
510 | vpslld $3, x0 ## 1, x4 ## 1; \ | ||
511 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | ||
512 | vpsrld $7, x3 ## 2, x4 ## 2; \ | ||
513 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | ||
514 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
515 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | ||
516 | vpslld $3, x0 ## 2, x4 ## 2; \ | ||
517 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | ||
518 | vpsrld $13, x0 ## 1, x4 ## 1; \ | ||
519 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | ||
520 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | ||
521 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | ||
522 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | ||
523 | vpsrld $3, x2 ## 1, x4 ## 1; \ | ||
524 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | ||
525 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | ||
526 | vpsrld $13, x0 ## 2, x4 ## 2; \ | ||
527 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | ||
528 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | ||
529 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | ||
530 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | ||
531 | vpsrld $3, x2 ## 2, x4 ## 2; \ | ||
532 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | ||
533 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | ||
534 | |||
535 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
536 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
537 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
538 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
539 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
540 | |||
541 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
542 | get_key(i, 0, RK0); \ | ||
543 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
544 | get_key(i, 2, RK2); \ | ||
545 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
546 | get_key(i, 3, RK3); \ | ||
547 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
548 | get_key(i, 1, RK1); \ | ||
549 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
550 | |||
551 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
552 | vpunpckldq x1, x0, t0; \ | ||
553 | vpunpckhdq x1, x0, t2; \ | ||
554 | vpunpckldq x3, x2, t1; \ | ||
555 | vpunpckhdq x3, x2, x3; \ | ||
556 | \ | ||
557 | vpunpcklqdq t1, t0, x0; \ | ||
558 | vpunpckhqdq t1, t0, x1; \ | ||
559 | vpunpcklqdq x3, t2, x2; \ | ||
560 | vpunpckhqdq x3, t2, x3; | ||
561 | |||
562 | #define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ | ||
563 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
564 | |||
565 | #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ | ||
566 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
567 | |||
568 | .align 8 | ||
569 | .type __serpent_enc_blk8_avx,@function; | ||
570 | |||
571 | __serpent_enc_blk8_avx: | ||
572 | /* input: | ||
573 | * %rdi: ctx, CTX | ||
574 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks | ||
575 | * output: | ||
576 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | ||
577 | */ | ||
578 | |||
579 | vpcmpeqd RNOT, RNOT, RNOT; | ||
580 | |||
581 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
582 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
583 | |||
584 | K2(RA, RB, RC, RD, RE, 0); | ||
585 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
586 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
587 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
588 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
589 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
590 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
591 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
592 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
593 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
594 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
595 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
596 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
597 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
598 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
599 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
600 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
601 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
602 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
603 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
604 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
605 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
606 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
607 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
608 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
609 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
610 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
611 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
612 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
613 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
614 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
615 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
616 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
617 | |||
618 | write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
619 | write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
620 | |||
621 | ret; | ||
622 | |||
623 | .align 8 | ||
624 | .type __serpent_dec_blk8_avx,@function; | ||
625 | |||
626 | __serpent_dec_blk8_avx: | ||
627 | /* input: | ||
628 | * %rdi: ctx, CTX | ||
629 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | ||
630 | * output: | ||
631 | * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks | ||
632 | */ | ||
633 | |||
634 | vpcmpeqd RNOT, RNOT, RNOT; | ||
635 | |||
636 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
637 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
638 | |||
639 | K2(RA, RB, RC, RD, RE, 32); | ||
640 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
641 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
642 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
643 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
644 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
645 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
646 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
647 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
648 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
649 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
650 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
651 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
652 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
653 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
654 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
655 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
656 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
657 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
658 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
659 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
660 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
661 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
662 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
663 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
664 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
665 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
666 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
667 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
668 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
669 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
670 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
671 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
672 | |||
673 | write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
674 | write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
675 | |||
676 | ret; | ||
677 | |||
678 | .align 8 | ||
679 | .global serpent_ecb_enc_8way_avx | ||
680 | .type serpent_ecb_enc_8way_avx,@function; | ||
681 | |||
682 | serpent_ecb_enc_8way_avx: | ||
683 | /* input: | ||
684 | * %rdi: ctx, CTX | ||
685 | * %rsi: dst | ||
686 | * %rdx: src | ||
687 | */ | ||
688 | |||
689 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
690 | |||
691 | call __serpent_enc_blk8_avx; | ||
692 | |||
693 | store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
694 | |||
695 | ret; | ||
696 | |||
697 | .align 8 | ||
698 | .global serpent_ecb_dec_8way_avx | ||
699 | .type serpent_ecb_dec_8way_avx,@function; | ||
700 | |||
701 | serpent_ecb_dec_8way_avx: | ||
702 | /* input: | ||
703 | * %rdi: ctx, CTX | ||
704 | * %rsi: dst | ||
705 | * %rdx: src | ||
706 | */ | ||
707 | |||
708 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
709 | |||
710 | call __serpent_dec_blk8_avx; | ||
711 | |||
712 | store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
713 | |||
714 | ret; | ||
715 | |||
716 | .align 8 | ||
717 | .global serpent_cbc_dec_8way_avx | ||
718 | .type serpent_cbc_dec_8way_avx,@function; | ||
719 | |||
720 | serpent_cbc_dec_8way_avx: | ||
721 | /* input: | ||
722 | * %rdi: ctx, CTX | ||
723 | * %rsi: dst | ||
724 | * %rdx: src | ||
725 | */ | ||
726 | |||
727 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
728 | |||
729 | call __serpent_dec_blk8_avx; | ||
730 | |||
731 | store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | ||
732 | |||
733 | ret; | ||
734 | |||
735 | .align 8 | ||
736 | .global serpent_ctr_8way_avx | ||
737 | .type serpent_ctr_8way_avx,@function; | ||
738 | |||
739 | serpent_ctr_8way_avx: | ||
740 | /* input: | ||
741 | * %rdi: ctx, CTX | ||
742 | * %rsi: dst | ||
743 | * %rdx: src | ||
744 | * %rcx: iv (little endian, 128bit) | ||
745 | */ | ||
746 | |||
747 | load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
748 | RD2, RK0, RK1, RK2); | ||
749 | |||
750 | call __serpent_enc_blk8_avx; | ||
751 | |||
752 | store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
753 | |||
754 | ret; | ||
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S deleted file mode 100644 index c00053d42f9..00000000000 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ /dev/null | |||
@@ -1,635 +0,0 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 4-way parallel algorithm (i586/SSE2) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on crypto/serpent.c by | ||
7 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> | ||
8 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-sse2-i586-asm_32.S" | ||
28 | .text | ||
29 | |||
30 | #define arg_ctx 4 | ||
31 | #define arg_dst 8 | ||
32 | #define arg_src 12 | ||
33 | #define arg_xor 16 | ||
34 | |||
35 | /********************************************************************** | ||
36 | 4-way SSE2 serpent | ||
37 | **********************************************************************/ | ||
38 | #define CTX %edx | ||
39 | |||
40 | #define RA %xmm0 | ||
41 | #define RB %xmm1 | ||
42 | #define RC %xmm2 | ||
43 | #define RD %xmm3 | ||
44 | #define RE %xmm4 | ||
45 | |||
46 | #define RT0 %xmm5 | ||
47 | #define RT1 %xmm6 | ||
48 | |||
49 | #define RNOT %xmm7 | ||
50 | |||
51 | #define get_key(i, j, t) \ | ||
52 | movd (4*(i)+(j))*4(CTX), t; \ | ||
53 | pshufd $0, t, t; | ||
54 | |||
55 | #define K(x0, x1, x2, x3, x4, i) \ | ||
56 | get_key(i, 0, x4); \ | ||
57 | get_key(i, 1, RT0); \ | ||
58 | get_key(i, 2, RT1); \ | ||
59 | pxor x4, x0; \ | ||
60 | pxor RT0, x1; \ | ||
61 | pxor RT1, x2; \ | ||
62 | get_key(i, 3, x4); \ | ||
63 | pxor x4, x3; | ||
64 | |||
65 | #define LK(x0, x1, x2, x3, x4, i) \ | ||
66 | movdqa x0, x4; \ | ||
67 | pslld $13, x0; \ | ||
68 | psrld $(32 - 13), x4; \ | ||
69 | por x4, x0; \ | ||
70 | pxor x0, x1; \ | ||
71 | movdqa x2, x4; \ | ||
72 | pslld $3, x2; \ | ||
73 | psrld $(32 - 3), x4; \ | ||
74 | por x4, x2; \ | ||
75 | pxor x2, x1; \ | ||
76 | movdqa x1, x4; \ | ||
77 | pslld $1, x1; \ | ||
78 | psrld $(32 - 1), x4; \ | ||
79 | por x4, x1; \ | ||
80 | movdqa x0, x4; \ | ||
81 | pslld $3, x4; \ | ||
82 | pxor x2, x3; \ | ||
83 | pxor x4, x3; \ | ||
84 | movdqa x3, x4; \ | ||
85 | pslld $7, x3; \ | ||
86 | psrld $(32 - 7), x4; \ | ||
87 | por x4, x3; \ | ||
88 | movdqa x1, x4; \ | ||
89 | pslld $7, x4; \ | ||
90 | pxor x1, x0; \ | ||
91 | pxor x3, x0; \ | ||
92 | pxor x3, x2; \ | ||
93 | pxor x4, x2; \ | ||
94 | movdqa x0, x4; \ | ||
95 | get_key(i, 1, RT0); \ | ||
96 | pxor RT0, x1; \ | ||
97 | get_key(i, 3, RT0); \ | ||
98 | pxor RT0, x3; \ | ||
99 | pslld $5, x0; \ | ||
100 | psrld $(32 - 5), x4; \ | ||
101 | por x4, x0; \ | ||
102 | movdqa x2, x4; \ | ||
103 | pslld $22, x2; \ | ||
104 | psrld $(32 - 22), x4; \ | ||
105 | por x4, x2; \ | ||
106 | get_key(i, 0, RT0); \ | ||
107 | pxor RT0, x0; \ | ||
108 | get_key(i, 2, RT0); \ | ||
109 | pxor RT0, x2; | ||
110 | |||
111 | #define KL(x0, x1, x2, x3, x4, i) \ | ||
112 | K(x0, x1, x2, x3, x4, i); \ | ||
113 | movdqa x0, x4; \ | ||
114 | psrld $5, x0; \ | ||
115 | pslld $(32 - 5), x4; \ | ||
116 | por x4, x0; \ | ||
117 | movdqa x2, x4; \ | ||
118 | psrld $22, x2; \ | ||
119 | pslld $(32 - 22), x4; \ | ||
120 | por x4, x2; \ | ||
121 | pxor x3, x2; \ | ||
122 | pxor x3, x0; \ | ||
123 | movdqa x1, x4; \ | ||
124 | pslld $7, x4; \ | ||
125 | pxor x1, x0; \ | ||
126 | pxor x4, x2; \ | ||
127 | movdqa x1, x4; \ | ||
128 | psrld $1, x1; \ | ||
129 | pslld $(32 - 1), x4; \ | ||
130 | por x4, x1; \ | ||
131 | movdqa x3, x4; \ | ||
132 | psrld $7, x3; \ | ||
133 | pslld $(32 - 7), x4; \ | ||
134 | por x4, x3; \ | ||
135 | pxor x0, x1; \ | ||
136 | movdqa x0, x4; \ | ||
137 | pslld $3, x4; \ | ||
138 | pxor x4, x3; \ | ||
139 | movdqa x0, x4; \ | ||
140 | psrld $13, x0; \ | ||
141 | pslld $(32 - 13), x4; \ | ||
142 | por x4, x0; \ | ||
143 | pxor x2, x1; \ | ||
144 | pxor x2, x3; \ | ||
145 | movdqa x2, x4; \ | ||
146 | psrld $3, x2; \ | ||
147 | pslld $(32 - 3), x4; \ | ||
148 | por x4, x2; | ||
149 | |||
150 | #define S0(x0, x1, x2, x3, x4) \ | ||
151 | movdqa x3, x4; \ | ||
152 | por x0, x3; \ | ||
153 | pxor x4, x0; \ | ||
154 | pxor x2, x4; \ | ||
155 | pxor RNOT, x4; \ | ||
156 | pxor x1, x3; \ | ||
157 | pand x0, x1; \ | ||
158 | pxor x4, x1; \ | ||
159 | pxor x0, x2; \ | ||
160 | pxor x3, x0; \ | ||
161 | por x0, x4; \ | ||
162 | pxor x2, x0; \ | ||
163 | pand x1, x2; \ | ||
164 | pxor x2, x3; \ | ||
165 | pxor RNOT, x1; \ | ||
166 | pxor x4, x2; \ | ||
167 | pxor x2, x1; | ||
168 | |||
169 | #define S1(x0, x1, x2, x3, x4) \ | ||
170 | movdqa x1, x4; \ | ||
171 | pxor x0, x1; \ | ||
172 | pxor x3, x0; \ | ||
173 | pxor RNOT, x3; \ | ||
174 | pand x1, x4; \ | ||
175 | por x1, x0; \ | ||
176 | pxor x2, x3; \ | ||
177 | pxor x3, x0; \ | ||
178 | pxor x3, x1; \ | ||
179 | pxor x4, x3; \ | ||
180 | por x4, x1; \ | ||
181 | pxor x2, x4; \ | ||
182 | pand x0, x2; \ | ||
183 | pxor x1, x2; \ | ||
184 | por x0, x1; \ | ||
185 | pxor RNOT, x0; \ | ||
186 | pxor x2, x0; \ | ||
187 | pxor x1, x4; | ||
188 | |||
189 | #define S2(x0, x1, x2, x3, x4) \ | ||
190 | pxor RNOT, x3; \ | ||
191 | pxor x0, x1; \ | ||
192 | movdqa x0, x4; \ | ||
193 | pand x2, x0; \ | ||
194 | pxor x3, x0; \ | ||
195 | por x4, x3; \ | ||
196 | pxor x1, x2; \ | ||
197 | pxor x1, x3; \ | ||
198 | pand x0, x1; \ | ||
199 | pxor x2, x0; \ | ||
200 | pand x3, x2; \ | ||
201 | por x1, x3; \ | ||
202 | pxor RNOT, x0; \ | ||
203 | pxor x0, x3; \ | ||
204 | pxor x0, x4; \ | ||
205 | pxor x2, x0; \ | ||
206 | por x2, x1; | ||
207 | |||
208 | #define S3(x0, x1, x2, x3, x4) \ | ||
209 | movdqa x1, x4; \ | ||
210 | pxor x3, x1; \ | ||
211 | por x0, x3; \ | ||
212 | pand x0, x4; \ | ||
213 | pxor x2, x0; \ | ||
214 | pxor x1, x2; \ | ||
215 | pand x3, x1; \ | ||
216 | pxor x3, x2; \ | ||
217 | por x4, x0; \ | ||
218 | pxor x3, x4; \ | ||
219 | pxor x0, x1; \ | ||
220 | pand x3, x0; \ | ||
221 | pand x4, x3; \ | ||
222 | pxor x2, x3; \ | ||
223 | por x1, x4; \ | ||
224 | pand x1, x2; \ | ||
225 | pxor x3, x4; \ | ||
226 | pxor x3, x0; \ | ||
227 | pxor x2, x3; | ||
228 | |||
229 | #define S4(x0, x1, x2, x3, x4) \ | ||
230 | movdqa x3, x4; \ | ||
231 | pand x0, x3; \ | ||
232 | pxor x4, x0; \ | ||
233 | pxor x2, x3; \ | ||
234 | por x4, x2; \ | ||
235 | pxor x1, x0; \ | ||
236 | pxor x3, x4; \ | ||
237 | por x0, x2; \ | ||
238 | pxor x1, x2; \ | ||
239 | pand x0, x1; \ | ||
240 | pxor x4, x1; \ | ||
241 | pand x2, x4; \ | ||
242 | pxor x3, x2; \ | ||
243 | pxor x0, x4; \ | ||
244 | por x1, x3; \ | ||
245 | pxor RNOT, x1; \ | ||
246 | pxor x0, x3; | ||
247 | |||
248 | #define S5(x0, x1, x2, x3, x4) \ | ||
249 | movdqa x1, x4; \ | ||
250 | por x0, x1; \ | ||
251 | pxor x1, x2; \ | ||
252 | pxor RNOT, x3; \ | ||
253 | pxor x0, x4; \ | ||
254 | pxor x2, x0; \ | ||
255 | pand x4, x1; \ | ||
256 | por x3, x4; \ | ||
257 | pxor x0, x4; \ | ||
258 | pand x3, x0; \ | ||
259 | pxor x3, x1; \ | ||
260 | pxor x2, x3; \ | ||
261 | pxor x1, x0; \ | ||
262 | pand x4, x2; \ | ||
263 | pxor x2, x1; \ | ||
264 | pand x0, x2; \ | ||
265 | pxor x2, x3; | ||
266 | |||
267 | #define S6(x0, x1, x2, x3, x4) \ | ||
268 | movdqa x1, x4; \ | ||
269 | pxor x0, x3; \ | ||
270 | pxor x2, x1; \ | ||
271 | pxor x0, x2; \ | ||
272 | pand x3, x0; \ | ||
273 | por x3, x1; \ | ||
274 | pxor RNOT, x4; \ | ||
275 | pxor x1, x0; \ | ||
276 | pxor x2, x1; \ | ||
277 | pxor x4, x3; \ | ||
278 | pxor x0, x4; \ | ||
279 | pand x0, x2; \ | ||
280 | pxor x1, x4; \ | ||
281 | pxor x3, x2; \ | ||
282 | pand x1, x3; \ | ||
283 | pxor x0, x3; \ | ||
284 | pxor x2, x1; | ||
285 | |||
286 | #define S7(x0, x1, x2, x3, x4) \ | ||
287 | pxor RNOT, x1; \ | ||
288 | movdqa x1, x4; \ | ||
289 | pxor RNOT, x0; \ | ||
290 | pand x2, x1; \ | ||
291 | pxor x3, x1; \ | ||
292 | por x4, x3; \ | ||
293 | pxor x2, x4; \ | ||
294 | pxor x3, x2; \ | ||
295 | pxor x0, x3; \ | ||
296 | por x1, x0; \ | ||
297 | pand x0, x2; \ | ||
298 | pxor x4, x0; \ | ||
299 | pxor x3, x4; \ | ||
300 | pand x0, x3; \ | ||
301 | pxor x1, x4; \ | ||
302 | pxor x4, x2; \ | ||
303 | pxor x1, x3; \ | ||
304 | por x0, x4; \ | ||
305 | pxor x1, x4; | ||
306 | |||
307 | #define SI0(x0, x1, x2, x3, x4) \ | ||
308 | movdqa x3, x4; \ | ||
309 | pxor x0, x1; \ | ||
310 | por x1, x3; \ | ||
311 | pxor x1, x4; \ | ||
312 | pxor RNOT, x0; \ | ||
313 | pxor x3, x2; \ | ||
314 | pxor x0, x3; \ | ||
315 | pand x1, x0; \ | ||
316 | pxor x2, x0; \ | ||
317 | pand x3, x2; \ | ||
318 | pxor x4, x3; \ | ||
319 | pxor x3, x2; \ | ||
320 | pxor x3, x1; \ | ||
321 | pand x0, x3; \ | ||
322 | pxor x0, x1; \ | ||
323 | pxor x2, x0; \ | ||
324 | pxor x3, x4; | ||
325 | |||
326 | #define SI1(x0, x1, x2, x3, x4) \ | ||
327 | pxor x3, x1; \ | ||
328 | movdqa x0, x4; \ | ||
329 | pxor x2, x0; \ | ||
330 | pxor RNOT, x2; \ | ||
331 | por x1, x4; \ | ||
332 | pxor x3, x4; \ | ||
333 | pand x1, x3; \ | ||
334 | pxor x2, x1; \ | ||
335 | pand x4, x2; \ | ||
336 | pxor x1, x4; \ | ||
337 | por x3, x1; \ | ||
338 | pxor x0, x3; \ | ||
339 | pxor x0, x2; \ | ||
340 | por x4, x0; \ | ||
341 | pxor x4, x2; \ | ||
342 | pxor x0, x1; \ | ||
343 | pxor x1, x4; | ||
344 | |||
345 | #define SI2(x0, x1, x2, x3, x4) \ | ||
346 | pxor x1, x2; \ | ||
347 | movdqa x3, x4; \ | ||
348 | pxor RNOT, x3; \ | ||
349 | por x2, x3; \ | ||
350 | pxor x4, x2; \ | ||
351 | pxor x0, x4; \ | ||
352 | pxor x1, x3; \ | ||
353 | por x2, x1; \ | ||
354 | pxor x0, x2; \ | ||
355 | pxor x4, x1; \ | ||
356 | por x3, x4; \ | ||
357 | pxor x3, x2; \ | ||
358 | pxor x2, x4; \ | ||
359 | pand x1, x2; \ | ||
360 | pxor x3, x2; \ | ||
361 | pxor x4, x3; \ | ||
362 | pxor x0, x4; | ||
363 | |||
364 | #define SI3(x0, x1, x2, x3, x4) \ | ||
365 | pxor x1, x2; \ | ||
366 | movdqa x1, x4; \ | ||
367 | pand x2, x1; \ | ||
368 | pxor x0, x1; \ | ||
369 | por x4, x0; \ | ||
370 | pxor x3, x4; \ | ||
371 | pxor x3, x0; \ | ||
372 | por x1, x3; \ | ||
373 | pxor x2, x1; \ | ||
374 | pxor x3, x1; \ | ||
375 | pxor x2, x0; \ | ||
376 | pxor x3, x2; \ | ||
377 | pand x1, x3; \ | ||
378 | pxor x0, x1; \ | ||
379 | pand x2, x0; \ | ||
380 | pxor x3, x4; \ | ||
381 | pxor x0, x3; \ | ||
382 | pxor x1, x0; | ||
383 | |||
384 | #define SI4(x0, x1, x2, x3, x4) \ | ||
385 | pxor x3, x2; \ | ||
386 | movdqa x0, x4; \ | ||
387 | pand x1, x0; \ | ||
388 | pxor x2, x0; \ | ||
389 | por x3, x2; \ | ||
390 | pxor RNOT, x4; \ | ||
391 | pxor x0, x1; \ | ||
392 | pxor x2, x0; \ | ||
393 | pand x4, x2; \ | ||
394 | pxor x0, x2; \ | ||
395 | por x4, x0; \ | ||
396 | pxor x3, x0; \ | ||
397 | pand x2, x3; \ | ||
398 | pxor x3, x4; \ | ||
399 | pxor x1, x3; \ | ||
400 | pand x0, x1; \ | ||
401 | pxor x1, x4; \ | ||
402 | pxor x3, x0; | ||
403 | |||
404 | #define SI5(x0, x1, x2, x3, x4) \ | ||
405 | movdqa x1, x4; \ | ||
406 | por x2, x1; \ | ||
407 | pxor x4, x2; \ | ||
408 | pxor x3, x1; \ | ||
409 | pand x4, x3; \ | ||
410 | pxor x3, x2; \ | ||
411 | por x0, x3; \ | ||
412 | pxor RNOT, x0; \ | ||
413 | pxor x2, x3; \ | ||
414 | por x0, x2; \ | ||
415 | pxor x1, x4; \ | ||
416 | pxor x4, x2; \ | ||
417 | pand x0, x4; \ | ||
418 | pxor x1, x0; \ | ||
419 | pxor x3, x1; \ | ||
420 | pand x2, x0; \ | ||
421 | pxor x3, x2; \ | ||
422 | pxor x2, x0; \ | ||
423 | pxor x4, x2; \ | ||
424 | pxor x3, x4; | ||
425 | |||
426 | #define SI6(x0, x1, x2, x3, x4) \ | ||
427 | pxor x2, x0; \ | ||
428 | movdqa x0, x4; \ | ||
429 | pand x3, x0; \ | ||
430 | pxor x3, x2; \ | ||
431 | pxor x2, x0; \ | ||
432 | pxor x1, x3; \ | ||
433 | por x4, x2; \ | ||
434 | pxor x3, x2; \ | ||
435 | pand x0, x3; \ | ||
436 | pxor RNOT, x0; \ | ||
437 | pxor x1, x3; \ | ||
438 | pand x2, x1; \ | ||
439 | pxor x0, x4; \ | ||
440 | pxor x4, x3; \ | ||
441 | pxor x2, x4; \ | ||
442 | pxor x1, x0; \ | ||
443 | pxor x0, x2; | ||
444 | |||
445 | #define SI7(x0, x1, x2, x3, x4) \ | ||
446 | movdqa x3, x4; \ | ||
447 | pand x0, x3; \ | ||
448 | pxor x2, x0; \ | ||
449 | por x4, x2; \ | ||
450 | pxor x1, x4; \ | ||
451 | pxor RNOT, x0; \ | ||
452 | por x3, x1; \ | ||
453 | pxor x0, x4; \ | ||
454 | pand x2, x0; \ | ||
455 | pxor x1, x0; \ | ||
456 | pand x2, x1; \ | ||
457 | pxor x2, x3; \ | ||
458 | pxor x3, x4; \ | ||
459 | pand x3, x2; \ | ||
460 | por x0, x3; \ | ||
461 | pxor x4, x1; \ | ||
462 | pxor x4, x3; \ | ||
463 | pand x0, x4; \ | ||
464 | pxor x2, x4; | ||
465 | |||
466 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
467 | movdqa x0, t2; \ | ||
468 | punpckldq x1, x0; \ | ||
469 | punpckhdq x1, t2; \ | ||
470 | movdqa x2, t1; \ | ||
471 | punpckhdq x3, x2; \ | ||
472 | punpckldq x3, t1; \ | ||
473 | movdqa x0, x1; \ | ||
474 | punpcklqdq t1, x0; \ | ||
475 | punpckhqdq t1, x1; \ | ||
476 | movdqa t2, x3; \ | ||
477 | punpcklqdq x2, t2; \ | ||
478 | punpckhqdq x2, x3; \ | ||
479 | movdqa t2, x2; | ||
480 | |||
481 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
482 | movdqu (0*4*4)(in), x0; \ | ||
483 | movdqu (1*4*4)(in), x1; \ | ||
484 | movdqu (2*4*4)(in), x2; \ | ||
485 | movdqu (3*4*4)(in), x3; \ | ||
486 | \ | ||
487 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
488 | |||
489 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
490 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
491 | \ | ||
492 | movdqu x0, (0*4*4)(out); \ | ||
493 | movdqu x1, (1*4*4)(out); \ | ||
494 | movdqu x2, (2*4*4)(out); \ | ||
495 | movdqu x3, (3*4*4)(out); | ||
496 | |||
497 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
498 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
499 | \ | ||
500 | movdqu (0*4*4)(out), t0; \ | ||
501 | pxor t0, x0; \ | ||
502 | movdqu x0, (0*4*4)(out); \ | ||
503 | movdqu (1*4*4)(out), t0; \ | ||
504 | pxor t0, x1; \ | ||
505 | movdqu x1, (1*4*4)(out); \ | ||
506 | movdqu (2*4*4)(out), t0; \ | ||
507 | pxor t0, x2; \ | ||
508 | movdqu x2, (2*4*4)(out); \ | ||
509 | movdqu (3*4*4)(out), t0; \ | ||
510 | pxor t0, x3; \ | ||
511 | movdqu x3, (3*4*4)(out); | ||
512 | |||
513 | .align 8 | ||
514 | .global __serpent_enc_blk_4way | ||
515 | .type __serpent_enc_blk_4way,@function; | ||
516 | |||
517 | __serpent_enc_blk_4way: | ||
518 | /* input: | ||
519 | * arg_ctx(%esp): ctx, CTX | ||
520 | * arg_dst(%esp): dst | ||
521 | * arg_src(%esp): src | ||
522 | * arg_xor(%esp): bool, if true: xor output | ||
523 | */ | ||
524 | |||
525 | pcmpeqd RNOT, RNOT; | ||
526 | |||
527 | movl arg_ctx(%esp), CTX; | ||
528 | |||
529 | movl arg_src(%esp), %eax; | ||
530 | read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
531 | |||
532 | K(RA, RB, RC, RD, RE, 0); | ||
533 | S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1); | ||
534 | S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2); | ||
535 | S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3); | ||
536 | S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4); | ||
537 | S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5); | ||
538 | S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6); | ||
539 | S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7); | ||
540 | S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8); | ||
541 | S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9); | ||
542 | S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10); | ||
543 | S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11); | ||
544 | S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12); | ||
545 | S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13); | ||
546 | S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14); | ||
547 | S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15); | ||
548 | S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16); | ||
549 | S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17); | ||
550 | S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18); | ||
551 | S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19); | ||
552 | S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20); | ||
553 | S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21); | ||
554 | S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22); | ||
555 | S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23); | ||
556 | S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24); | ||
557 | S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25); | ||
558 | S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26); | ||
559 | S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27); | ||
560 | S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28); | ||
561 | S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29); | ||
562 | S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30); | ||
563 | S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31); | ||
564 | S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32); | ||
565 | |||
566 | movl arg_dst(%esp), %eax; | ||
567 | |||
568 | cmpb $0, arg_xor(%esp); | ||
569 | jnz __enc_xor4; | ||
570 | |||
571 | write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
572 | |||
573 | ret; | ||
574 | |||
575 | __enc_xor4: | ||
576 | xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
577 | |||
578 | ret; | ||
579 | |||
580 | .align 8 | ||
581 | .global serpent_dec_blk_4way | ||
582 | .type serpent_dec_blk_4way,@function; | ||
583 | |||
584 | serpent_dec_blk_4way: | ||
585 | /* input: | ||
586 | * arg_ctx(%esp): ctx, CTX | ||
587 | * arg_dst(%esp): dst | ||
588 | * arg_src(%esp): src | ||
589 | */ | ||
590 | |||
591 | pcmpeqd RNOT, RNOT; | ||
592 | |||
593 | movl arg_ctx(%esp), CTX; | ||
594 | |||
595 | movl arg_src(%esp), %eax; | ||
596 | read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); | ||
597 | |||
598 | K(RA, RB, RC, RD, RE, 32); | ||
599 | SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31); | ||
600 | SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30); | ||
601 | SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29); | ||
602 | SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28); | ||
603 | SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27); | ||
604 | SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26); | ||
605 | SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25); | ||
606 | SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24); | ||
607 | SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23); | ||
608 | SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22); | ||
609 | SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21); | ||
610 | SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20); | ||
611 | SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19); | ||
612 | SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18); | ||
613 | SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17); | ||
614 | SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16); | ||
615 | SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15); | ||
616 | SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14); | ||
617 | SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13); | ||
618 | SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12); | ||
619 | SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11); | ||
620 | SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10); | ||
621 | SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9); | ||
622 | SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8); | ||
623 | SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7); | ||
624 | SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6); | ||
625 | SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5); | ||
626 | SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4); | ||
627 | SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3); | ||
628 | SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2); | ||
629 | SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1); | ||
630 | SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0); | ||
631 | |||
632 | movl arg_dst(%esp), %eax; | ||
633 | write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); | ||
634 | |||
635 | ret; | ||
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S deleted file mode 100644 index 3ee1ff04d3e..00000000000 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,758 +0,0 @@ | |||
1 | /* | ||
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/SSE2) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Based on crypto/serpent.c by | ||
7 | * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no> | ||
8 | * 2003 Herbert Valerio Riedel <hvr@gnu.org> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | .file "serpent-sse2-x86_64-asm_64.S" | ||
28 | .text | ||
29 | |||
30 | #define CTX %rdi | ||
31 | |||
32 | /********************************************************************** | ||
33 | 8-way SSE2 serpent | ||
34 | **********************************************************************/ | ||
35 | #define RA1 %xmm0 | ||
36 | #define RB1 %xmm1 | ||
37 | #define RC1 %xmm2 | ||
38 | #define RD1 %xmm3 | ||
39 | #define RE1 %xmm4 | ||
40 | |||
41 | #define RA2 %xmm5 | ||
42 | #define RB2 %xmm6 | ||
43 | #define RC2 %xmm7 | ||
44 | #define RD2 %xmm8 | ||
45 | #define RE2 %xmm9 | ||
46 | |||
47 | #define RNOT %xmm10 | ||
48 | |||
49 | #define RK0 %xmm11 | ||
50 | #define RK1 %xmm12 | ||
51 | #define RK2 %xmm13 | ||
52 | #define RK3 %xmm14 | ||
53 | |||
54 | #define S0_1(x0, x1, x2, x3, x4) \ | ||
55 | movdqa x3, x4; \ | ||
56 | por x0, x3; \ | ||
57 | pxor x4, x0; \ | ||
58 | pxor x2, x4; \ | ||
59 | pxor RNOT, x4; \ | ||
60 | pxor x1, x3; \ | ||
61 | pand x0, x1; \ | ||
62 | pxor x4, x1; \ | ||
63 | pxor x0, x2; | ||
64 | #define S0_2(x0, x1, x2, x3, x4) \ | ||
65 | pxor x3, x0; \ | ||
66 | por x0, x4; \ | ||
67 | pxor x2, x0; \ | ||
68 | pand x1, x2; \ | ||
69 | pxor x2, x3; \ | ||
70 | pxor RNOT, x1; \ | ||
71 | pxor x4, x2; \ | ||
72 | pxor x2, x1; | ||
73 | |||
74 | #define S1_1(x0, x1, x2, x3, x4) \ | ||
75 | movdqa x1, x4; \ | ||
76 | pxor x0, x1; \ | ||
77 | pxor x3, x0; \ | ||
78 | pxor RNOT, x3; \ | ||
79 | pand x1, x4; \ | ||
80 | por x1, x0; \ | ||
81 | pxor x2, x3; \ | ||
82 | pxor x3, x0; \ | ||
83 | pxor x3, x1; | ||
84 | #define S1_2(x0, x1, x2, x3, x4) \ | ||
85 | pxor x4, x3; \ | ||
86 | por x4, x1; \ | ||
87 | pxor x2, x4; \ | ||
88 | pand x0, x2; \ | ||
89 | pxor x1, x2; \ | ||
90 | por x0, x1; \ | ||
91 | pxor RNOT, x0; \ | ||
92 | pxor x2, x0; \ | ||
93 | pxor x1, x4; | ||
94 | |||
95 | #define S2_1(x0, x1, x2, x3, x4) \ | ||
96 | pxor RNOT, x3; \ | ||
97 | pxor x0, x1; \ | ||
98 | movdqa x0, x4; \ | ||
99 | pand x2, x0; \ | ||
100 | pxor x3, x0; \ | ||
101 | por x4, x3; \ | ||
102 | pxor x1, x2; \ | ||
103 | pxor x1, x3; \ | ||
104 | pand x0, x1; | ||
105 | #define S2_2(x0, x1, x2, x3, x4) \ | ||
106 | pxor x2, x0; \ | ||
107 | pand x3, x2; \ | ||
108 | por x1, x3; \ | ||
109 | pxor RNOT, x0; \ | ||
110 | pxor x0, x3; \ | ||
111 | pxor x0, x4; \ | ||
112 | pxor x2, x0; \ | ||
113 | por x2, x1; | ||
114 | |||
115 | #define S3_1(x0, x1, x2, x3, x4) \ | ||
116 | movdqa x1, x4; \ | ||
117 | pxor x3, x1; \ | ||
118 | por x0, x3; \ | ||
119 | pand x0, x4; \ | ||
120 | pxor x2, x0; \ | ||
121 | pxor x1, x2; \ | ||
122 | pand x3, x1; \ | ||
123 | pxor x3, x2; \ | ||
124 | por x4, x0; \ | ||
125 | pxor x3, x4; | ||
126 | #define S3_2(x0, x1, x2, x3, x4) \ | ||
127 | pxor x0, x1; \ | ||
128 | pand x3, x0; \ | ||
129 | pand x4, x3; \ | ||
130 | pxor x2, x3; \ | ||
131 | por x1, x4; \ | ||
132 | pand x1, x2; \ | ||
133 | pxor x3, x4; \ | ||
134 | pxor x3, x0; \ | ||
135 | pxor x2, x3; | ||
136 | |||
137 | #define S4_1(x0, x1, x2, x3, x4) \ | ||
138 | movdqa x3, x4; \ | ||
139 | pand x0, x3; \ | ||
140 | pxor x4, x0; \ | ||
141 | pxor x2, x3; \ | ||
142 | por x4, x2; \ | ||
143 | pxor x1, x0; \ | ||
144 | pxor x3, x4; \ | ||
145 | por x0, x2; \ | ||
146 | pxor x1, x2; | ||
147 | #define S4_2(x0, x1, x2, x3, x4) \ | ||
148 | pand x0, x1; \ | ||
149 | pxor x4, x1; \ | ||
150 | pand x2, x4; \ | ||
151 | pxor x3, x2; \ | ||
152 | pxor x0, x4; \ | ||
153 | por x1, x3; \ | ||
154 | pxor RNOT, x1; \ | ||
155 | pxor x0, x3; | ||
156 | |||
157 | #define S5_1(x0, x1, x2, x3, x4) \ | ||
158 | movdqa x1, x4; \ | ||
159 | por x0, x1; \ | ||
160 | pxor x1, x2; \ | ||
161 | pxor RNOT, x3; \ | ||
162 | pxor x0, x4; \ | ||
163 | pxor x2, x0; \ | ||
164 | pand x4, x1; \ | ||
165 | por x3, x4; \ | ||
166 | pxor x0, x4; | ||
167 | #define S5_2(x0, x1, x2, x3, x4) \ | ||
168 | pand x3, x0; \ | ||
169 | pxor x3, x1; \ | ||
170 | pxor x2, x3; \ | ||
171 | pxor x1, x0; \ | ||
172 | pand x4, x2; \ | ||
173 | pxor x2, x1; \ | ||
174 | pand x0, x2; \ | ||
175 | pxor x2, x3; | ||
176 | |||
177 | #define S6_1(x0, x1, x2, x3, x4) \ | ||
178 | movdqa x1, x4; \ | ||
179 | pxor x0, x3; \ | ||
180 | pxor x2, x1; \ | ||
181 | pxor x0, x2; \ | ||
182 | pand x3, x0; \ | ||
183 | por x3, x1; \ | ||
184 | pxor RNOT, x4; \ | ||
185 | pxor x1, x0; \ | ||
186 | pxor x2, x1; | ||
187 | #define S6_2(x0, x1, x2, x3, x4) \ | ||
188 | pxor x4, x3; \ | ||
189 | pxor x0, x4; \ | ||
190 | pand x0, x2; \ | ||
191 | pxor x1, x4; \ | ||
192 | pxor x3, x2; \ | ||
193 | pand x1, x3; \ | ||
194 | pxor x0, x3; \ | ||
195 | pxor x2, x1; | ||
196 | |||
197 | #define S7_1(x0, x1, x2, x3, x4) \ | ||
198 | pxor RNOT, x1; \ | ||
199 | movdqa x1, x4; \ | ||
200 | pxor RNOT, x0; \ | ||
201 | pand x2, x1; \ | ||
202 | pxor x3, x1; \ | ||
203 | por x4, x3; \ | ||
204 | pxor x2, x4; \ | ||
205 | pxor x3, x2; \ | ||
206 | pxor x0, x3; \ | ||
207 | por x1, x0; | ||
208 | #define S7_2(x0, x1, x2, x3, x4) \ | ||
209 | pand x0, x2; \ | ||
210 | pxor x4, x0; \ | ||
211 | pxor x3, x4; \ | ||
212 | pand x0, x3; \ | ||
213 | pxor x1, x4; \ | ||
214 | pxor x4, x2; \ | ||
215 | pxor x1, x3; \ | ||
216 | por x0, x4; \ | ||
217 | pxor x1, x4; | ||
218 | |||
219 | #define SI0_1(x0, x1, x2, x3, x4) \ | ||
220 | movdqa x3, x4; \ | ||
221 | pxor x0, x1; \ | ||
222 | por x1, x3; \ | ||
223 | pxor x1, x4; \ | ||
224 | pxor RNOT, x0; \ | ||
225 | pxor x3, x2; \ | ||
226 | pxor x0, x3; \ | ||
227 | pand x1, x0; \ | ||
228 | pxor x2, x0; | ||
229 | #define SI0_2(x0, x1, x2, x3, x4) \ | ||
230 | pand x3, x2; \ | ||
231 | pxor x4, x3; \ | ||
232 | pxor x3, x2; \ | ||
233 | pxor x3, x1; \ | ||
234 | pand x0, x3; \ | ||
235 | pxor x0, x1; \ | ||
236 | pxor x2, x0; \ | ||
237 | pxor x3, x4; | ||
238 | |||
239 | #define SI1_1(x0, x1, x2, x3, x4) \ | ||
240 | pxor x3, x1; \ | ||
241 | movdqa x0, x4; \ | ||
242 | pxor x2, x0; \ | ||
243 | pxor RNOT, x2; \ | ||
244 | por x1, x4; \ | ||
245 | pxor x3, x4; \ | ||
246 | pand x1, x3; \ | ||
247 | pxor x2, x1; \ | ||
248 | pand x4, x2; | ||
249 | #define SI1_2(x0, x1, x2, x3, x4) \ | ||
250 | pxor x1, x4; \ | ||
251 | por x3, x1; \ | ||
252 | pxor x0, x3; \ | ||
253 | pxor x0, x2; \ | ||
254 | por x4, x0; \ | ||
255 | pxor x4, x2; \ | ||
256 | pxor x0, x1; \ | ||
257 | pxor x1, x4; | ||
258 | |||
259 | #define SI2_1(x0, x1, x2, x3, x4) \ | ||
260 | pxor x1, x2; \ | ||
261 | movdqa x3, x4; \ | ||
262 | pxor RNOT, x3; \ | ||
263 | por x2, x3; \ | ||
264 | pxor x4, x2; \ | ||
265 | pxor x0, x4; \ | ||
266 | pxor x1, x3; \ | ||
267 | por x2, x1; \ | ||
268 | pxor x0, x2; | ||
269 | #define SI2_2(x0, x1, x2, x3, x4) \ | ||
270 | pxor x4, x1; \ | ||
271 | por x3, x4; \ | ||
272 | pxor x3, x2; \ | ||
273 | pxor x2, x4; \ | ||
274 | pand x1, x2; \ | ||
275 | pxor x3, x2; \ | ||
276 | pxor x4, x3; \ | ||
277 | pxor x0, x4; | ||
278 | |||
279 | #define SI3_1(x0, x1, x2, x3, x4) \ | ||
280 | pxor x1, x2; \ | ||
281 | movdqa x1, x4; \ | ||
282 | pand x2, x1; \ | ||
283 | pxor x0, x1; \ | ||
284 | por x4, x0; \ | ||
285 | pxor x3, x4; \ | ||
286 | pxor x3, x0; \ | ||
287 | por x1, x3; \ | ||
288 | pxor x2, x1; | ||
289 | #define SI3_2(x0, x1, x2, x3, x4) \ | ||
290 | pxor x3, x1; \ | ||
291 | pxor x2, x0; \ | ||
292 | pxor x3, x2; \ | ||
293 | pand x1, x3; \ | ||
294 | pxor x0, x1; \ | ||
295 | pand x2, x0; \ | ||
296 | pxor x3, x4; \ | ||
297 | pxor x0, x3; \ | ||
298 | pxor x1, x0; | ||
299 | |||
300 | #define SI4_1(x0, x1, x2, x3, x4) \ | ||
301 | pxor x3, x2; \ | ||
302 | movdqa x0, x4; \ | ||
303 | pand x1, x0; \ | ||
304 | pxor x2, x0; \ | ||
305 | por x3, x2; \ | ||
306 | pxor RNOT, x4; \ | ||
307 | pxor x0, x1; \ | ||
308 | pxor x2, x0; \ | ||
309 | pand x4, x2; | ||
310 | #define SI4_2(x0, x1, x2, x3, x4) \ | ||
311 | pxor x0, x2; \ | ||
312 | por x4, x0; \ | ||
313 | pxor x3, x0; \ | ||
314 | pand x2, x3; \ | ||
315 | pxor x3, x4; \ | ||
316 | pxor x1, x3; \ | ||
317 | pand x0, x1; \ | ||
318 | pxor x1, x4; \ | ||
319 | pxor x3, x0; | ||
320 | |||
321 | #define SI5_1(x0, x1, x2, x3, x4) \ | ||
322 | movdqa x1, x4; \ | ||
323 | por x2, x1; \ | ||
324 | pxor x4, x2; \ | ||
325 | pxor x3, x1; \ | ||
326 | pand x4, x3; \ | ||
327 | pxor x3, x2; \ | ||
328 | por x0, x3; \ | ||
329 | pxor RNOT, x0; \ | ||
330 | pxor x2, x3; \ | ||
331 | por x0, x2; | ||
332 | #define SI5_2(x0, x1, x2, x3, x4) \ | ||
333 | pxor x1, x4; \ | ||
334 | pxor x4, x2; \ | ||
335 | pand x0, x4; \ | ||
336 | pxor x1, x0; \ | ||
337 | pxor x3, x1; \ | ||
338 | pand x2, x0; \ | ||
339 | pxor x3, x2; \ | ||
340 | pxor x2, x0; \ | ||
341 | pxor x4, x2; \ | ||
342 | pxor x3, x4; | ||
343 | |||
344 | #define SI6_1(x0, x1, x2, x3, x4) \ | ||
345 | pxor x2, x0; \ | ||
346 | movdqa x0, x4; \ | ||
347 | pand x3, x0; \ | ||
348 | pxor x3, x2; \ | ||
349 | pxor x2, x0; \ | ||
350 | pxor x1, x3; \ | ||
351 | por x4, x2; \ | ||
352 | pxor x3, x2; \ | ||
353 | pand x0, x3; | ||
354 | #define SI6_2(x0, x1, x2, x3, x4) \ | ||
355 | pxor RNOT, x0; \ | ||
356 | pxor x1, x3; \ | ||
357 | pand x2, x1; \ | ||
358 | pxor x0, x4; \ | ||
359 | pxor x4, x3; \ | ||
360 | pxor x2, x4; \ | ||
361 | pxor x1, x0; \ | ||
362 | pxor x0, x2; | ||
363 | |||
364 | #define SI7_1(x0, x1, x2, x3, x4) \ | ||
365 | movdqa x3, x4; \ | ||
366 | pand x0, x3; \ | ||
367 | pxor x2, x0; \ | ||
368 | por x4, x2; \ | ||
369 | pxor x1, x4; \ | ||
370 | pxor RNOT, x0; \ | ||
371 | por x3, x1; \ | ||
372 | pxor x0, x4; \ | ||
373 | pand x2, x0; \ | ||
374 | pxor x1, x0; | ||
375 | #define SI7_2(x0, x1, x2, x3, x4) \ | ||
376 | pand x2, x1; \ | ||
377 | pxor x2, x3; \ | ||
378 | pxor x3, x4; \ | ||
379 | pand x3, x2; \ | ||
380 | por x0, x3; \ | ||
381 | pxor x4, x1; \ | ||
382 | pxor x4, x3; \ | ||
383 | pand x0, x4; \ | ||
384 | pxor x2, x4; | ||
385 | |||
386 | #define get_key(i, j, t) \ | ||
387 | movd (4*(i)+(j))*4(CTX), t; \ | ||
388 | pshufd $0, t, t; | ||
389 | |||
390 | #define K2(x0, x1, x2, x3, x4, i) \ | ||
391 | get_key(i, 0, RK0); \ | ||
392 | get_key(i, 1, RK1); \ | ||
393 | get_key(i, 2, RK2); \ | ||
394 | get_key(i, 3, RK3); \ | ||
395 | pxor RK0, x0 ## 1; \ | ||
396 | pxor RK1, x1 ## 1; \ | ||
397 | pxor RK2, x2 ## 1; \ | ||
398 | pxor RK3, x3 ## 1; \ | ||
399 | pxor RK0, x0 ## 2; \ | ||
400 | pxor RK1, x1 ## 2; \ | ||
401 | pxor RK2, x2 ## 2; \ | ||
402 | pxor RK3, x3 ## 2; | ||
403 | |||
404 | #define LK2(x0, x1, x2, x3, x4, i) \ | ||
405 | movdqa x0 ## 1, x4 ## 1; \ | ||
406 | pslld $13, x0 ## 1; \ | ||
407 | psrld $(32 - 13), x4 ## 1; \ | ||
408 | por x4 ## 1, x0 ## 1; \ | ||
409 | pxor x0 ## 1, x1 ## 1; \ | ||
410 | movdqa x2 ## 1, x4 ## 1; \ | ||
411 | pslld $3, x2 ## 1; \ | ||
412 | psrld $(32 - 3), x4 ## 1; \ | ||
413 | por x4 ## 1, x2 ## 1; \ | ||
414 | pxor x2 ## 1, x1 ## 1; \ | ||
415 | movdqa x0 ## 2, x4 ## 2; \ | ||
416 | pslld $13, x0 ## 2; \ | ||
417 | psrld $(32 - 13), x4 ## 2; \ | ||
418 | por x4 ## 2, x0 ## 2; \ | ||
419 | pxor x0 ## 2, x1 ## 2; \ | ||
420 | movdqa x2 ## 2, x4 ## 2; \ | ||
421 | pslld $3, x2 ## 2; \ | ||
422 | psrld $(32 - 3), x4 ## 2; \ | ||
423 | por x4 ## 2, x2 ## 2; \ | ||
424 | pxor x2 ## 2, x1 ## 2; \ | ||
425 | movdqa x1 ## 1, x4 ## 1; \ | ||
426 | pslld $1, x1 ## 1; \ | ||
427 | psrld $(32 - 1), x4 ## 1; \ | ||
428 | por x4 ## 1, x1 ## 1; \ | ||
429 | movdqa x0 ## 1, x4 ## 1; \ | ||
430 | pslld $3, x4 ## 1; \ | ||
431 | pxor x2 ## 1, x3 ## 1; \ | ||
432 | pxor x4 ## 1, x3 ## 1; \ | ||
433 | movdqa x3 ## 1, x4 ## 1; \ | ||
434 | get_key(i, 1, RK1); \ | ||
435 | movdqa x1 ## 2, x4 ## 2; \ | ||
436 | pslld $1, x1 ## 2; \ | ||
437 | psrld $(32 - 1), x4 ## 2; \ | ||
438 | por x4 ## 2, x1 ## 2; \ | ||
439 | movdqa x0 ## 2, x4 ## 2; \ | ||
440 | pslld $3, x4 ## 2; \ | ||
441 | pxor x2 ## 2, x3 ## 2; \ | ||
442 | pxor x4 ## 2, x3 ## 2; \ | ||
443 | movdqa x3 ## 2, x4 ## 2; \ | ||
444 | get_key(i, 3, RK3); \ | ||
445 | pslld $7, x3 ## 1; \ | ||
446 | psrld $(32 - 7), x4 ## 1; \ | ||
447 | por x4 ## 1, x3 ## 1; \ | ||
448 | movdqa x1 ## 1, x4 ## 1; \ | ||
449 | pslld $7, x4 ## 1; \ | ||
450 | pxor x1 ## 1, x0 ## 1; \ | ||
451 | pxor x3 ## 1, x0 ## 1; \ | ||
452 | pxor x3 ## 1, x2 ## 1; \ | ||
453 | pxor x4 ## 1, x2 ## 1; \ | ||
454 | get_key(i, 0, RK0); \ | ||
455 | pslld $7, x3 ## 2; \ | ||
456 | psrld $(32 - 7), x4 ## 2; \ | ||
457 | por x4 ## 2, x3 ## 2; \ | ||
458 | movdqa x1 ## 2, x4 ## 2; \ | ||
459 | pslld $7, x4 ## 2; \ | ||
460 | pxor x1 ## 2, x0 ## 2; \ | ||
461 | pxor x3 ## 2, x0 ## 2; \ | ||
462 | pxor x3 ## 2, x2 ## 2; \ | ||
463 | pxor x4 ## 2, x2 ## 2; \ | ||
464 | get_key(i, 2, RK2); \ | ||
465 | pxor RK1, x1 ## 1; \ | ||
466 | pxor RK3, x3 ## 1; \ | ||
467 | movdqa x0 ## 1, x4 ## 1; \ | ||
468 | pslld $5, x0 ## 1; \ | ||
469 | psrld $(32 - 5), x4 ## 1; \ | ||
470 | por x4 ## 1, x0 ## 1; \ | ||
471 | movdqa x2 ## 1, x4 ## 1; \ | ||
472 | pslld $22, x2 ## 1; \ | ||
473 | psrld $(32 - 22), x4 ## 1; \ | ||
474 | por x4 ## 1, x2 ## 1; \ | ||
475 | pxor RK0, x0 ## 1; \ | ||
476 | pxor RK2, x2 ## 1; \ | ||
477 | pxor RK1, x1 ## 2; \ | ||
478 | pxor RK3, x3 ## 2; \ | ||
479 | movdqa x0 ## 2, x4 ## 2; \ | ||
480 | pslld $5, x0 ## 2; \ | ||
481 | psrld $(32 - 5), x4 ## 2; \ | ||
482 | por x4 ## 2, x0 ## 2; \ | ||
483 | movdqa x2 ## 2, x4 ## 2; \ | ||
484 | pslld $22, x2 ## 2; \ | ||
485 | psrld $(32 - 22), x4 ## 2; \ | ||
486 | por x4 ## 2, x2 ## 2; \ | ||
487 | pxor RK0, x0 ## 2; \ | ||
488 | pxor RK2, x2 ## 2; | ||
489 | |||
490 | #define KL2(x0, x1, x2, x3, x4, i) \ | ||
491 | pxor RK0, x0 ## 1; \ | ||
492 | pxor RK2, x2 ## 1; \ | ||
493 | movdqa x0 ## 1, x4 ## 1; \ | ||
494 | psrld $5, x0 ## 1; \ | ||
495 | pslld $(32 - 5), x4 ## 1; \ | ||
496 | por x4 ## 1, x0 ## 1; \ | ||
497 | pxor RK3, x3 ## 1; \ | ||
498 | pxor RK1, x1 ## 1; \ | ||
499 | movdqa x2 ## 1, x4 ## 1; \ | ||
500 | psrld $22, x2 ## 1; \ | ||
501 | pslld $(32 - 22), x4 ## 1; \ | ||
502 | por x4 ## 1, x2 ## 1; \ | ||
503 | pxor x3 ## 1, x2 ## 1; \ | ||
504 | pxor RK0, x0 ## 2; \ | ||
505 | pxor RK2, x2 ## 2; \ | ||
506 | movdqa x0 ## 2, x4 ## 2; \ | ||
507 | psrld $5, x0 ## 2; \ | ||
508 | pslld $(32 - 5), x4 ## 2; \ | ||
509 | por x4 ## 2, x0 ## 2; \ | ||
510 | pxor RK3, x3 ## 2; \ | ||
511 | pxor RK1, x1 ## 2; \ | ||
512 | movdqa x2 ## 2, x4 ## 2; \ | ||
513 | psrld $22, x2 ## 2; \ | ||
514 | pslld $(32 - 22), x4 ## 2; \ | ||
515 | por x4 ## 2, x2 ## 2; \ | ||
516 | pxor x3 ## 2, x2 ## 2; \ | ||
517 | pxor x3 ## 1, x0 ## 1; \ | ||
518 | movdqa x1 ## 1, x4 ## 1; \ | ||
519 | pslld $7, x4 ## 1; \ | ||
520 | pxor x1 ## 1, x0 ## 1; \ | ||
521 | pxor x4 ## 1, x2 ## 1; \ | ||
522 | movdqa x1 ## 1, x4 ## 1; \ | ||
523 | psrld $1, x1 ## 1; \ | ||
524 | pslld $(32 - 1), x4 ## 1; \ | ||
525 | por x4 ## 1, x1 ## 1; \ | ||
526 | pxor x3 ## 2, x0 ## 2; \ | ||
527 | movdqa x1 ## 2, x4 ## 2; \ | ||
528 | pslld $7, x4 ## 2; \ | ||
529 | pxor x1 ## 2, x0 ## 2; \ | ||
530 | pxor x4 ## 2, x2 ## 2; \ | ||
531 | movdqa x1 ## 2, x4 ## 2; \ | ||
532 | psrld $1, x1 ## 2; \ | ||
533 | pslld $(32 - 1), x4 ## 2; \ | ||
534 | por x4 ## 2, x1 ## 2; \ | ||
535 | movdqa x3 ## 1, x4 ## 1; \ | ||
536 | psrld $7, x3 ## 1; \ | ||
537 | pslld $(32 - 7), x4 ## 1; \ | ||
538 | por x4 ## 1, x3 ## 1; \ | ||
539 | pxor x0 ## 1, x1 ## 1; \ | ||
540 | movdqa x0 ## 1, x4 ## 1; \ | ||
541 | pslld $3, x4 ## 1; \ | ||
542 | pxor x4 ## 1, x3 ## 1; \ | ||
543 | movdqa x0 ## 1, x4 ## 1; \ | ||
544 | movdqa x3 ## 2, x4 ## 2; \ | ||
545 | psrld $7, x3 ## 2; \ | ||
546 | pslld $(32 - 7), x4 ## 2; \ | ||
547 | por x4 ## 2, x3 ## 2; \ | ||
548 | pxor x0 ## 2, x1 ## 2; \ | ||
549 | movdqa x0 ## 2, x4 ## 2; \ | ||
550 | pslld $3, x4 ## 2; \ | ||
551 | pxor x4 ## 2, x3 ## 2; \ | ||
552 | movdqa x0 ## 2, x4 ## 2; \ | ||
553 | psrld $13, x0 ## 1; \ | ||
554 | pslld $(32 - 13), x4 ## 1; \ | ||
555 | por x4 ## 1, x0 ## 1; \ | ||
556 | pxor x2 ## 1, x1 ## 1; \ | ||
557 | pxor x2 ## 1, x3 ## 1; \ | ||
558 | movdqa x2 ## 1, x4 ## 1; \ | ||
559 | psrld $3, x2 ## 1; \ | ||
560 | pslld $(32 - 3), x4 ## 1; \ | ||
561 | por x4 ## 1, x2 ## 1; \ | ||
562 | psrld $13, x0 ## 2; \ | ||
563 | pslld $(32 - 13), x4 ## 2; \ | ||
564 | por x4 ## 2, x0 ## 2; \ | ||
565 | pxor x2 ## 2, x1 ## 2; \ | ||
566 | pxor x2 ## 2, x3 ## 2; \ | ||
567 | movdqa x2 ## 2, x4 ## 2; \ | ||
568 | psrld $3, x2 ## 2; \ | ||
569 | pslld $(32 - 3), x4 ## 2; \ | ||
570 | por x4 ## 2, x2 ## 2; | ||
571 | |||
572 | #define S(SBOX, x0, x1, x2, x3, x4) \ | ||
573 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
574 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
575 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
576 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | ||
577 | |||
578 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | ||
579 | get_key(i, 0, RK0); \ | ||
580 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
581 | get_key(i, 2, RK2); \ | ||
582 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
583 | get_key(i, 3, RK3); \ | ||
584 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | ||
585 | get_key(i, 1, RK1); \ | ||
586 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | ||
587 | |||
588 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
589 | movdqa x0, t2; \ | ||
590 | punpckldq x1, x0; \ | ||
591 | punpckhdq x1, t2; \ | ||
592 | movdqa x2, t1; \ | ||
593 | punpckhdq x3, x2; \ | ||
594 | punpckldq x3, t1; \ | ||
595 | movdqa x0, x1; \ | ||
596 | punpcklqdq t1, x0; \ | ||
597 | punpckhqdq t1, x1; \ | ||
598 | movdqa t2, x3; \ | ||
599 | punpcklqdq x2, t2; \ | ||
600 | punpckhqdq x2, x3; \ | ||
601 | movdqa t2, x2; | ||
602 | |||
603 | #define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \ | ||
604 | movdqu (0*4*4)(in), x0; \ | ||
605 | movdqu (1*4*4)(in), x1; \ | ||
606 | movdqu (2*4*4)(in), x2; \ | ||
607 | movdqu (3*4*4)(in), x3; \ | ||
608 | \ | ||
609 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
610 | |||
611 | #define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
612 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
613 | \ | ||
614 | movdqu x0, (0*4*4)(out); \ | ||
615 | movdqu x1, (1*4*4)(out); \ | ||
616 | movdqu x2, (2*4*4)(out); \ | ||
617 | movdqu x3, (3*4*4)(out); | ||
618 | |||
619 | #define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \ | ||
620 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
621 | \ | ||
622 | movdqu (0*4*4)(out), t0; \ | ||
623 | pxor t0, x0; \ | ||
624 | movdqu x0, (0*4*4)(out); \ | ||
625 | movdqu (1*4*4)(out), t0; \ | ||
626 | pxor t0, x1; \ | ||
627 | movdqu x1, (1*4*4)(out); \ | ||
628 | movdqu (2*4*4)(out), t0; \ | ||
629 | pxor t0, x2; \ | ||
630 | movdqu x2, (2*4*4)(out); \ | ||
631 | movdqu (3*4*4)(out), t0; \ | ||
632 | pxor t0, x3; \ | ||
633 | movdqu x3, (3*4*4)(out); | ||
634 | |||
635 | .align 8 | ||
636 | .global __serpent_enc_blk_8way | ||
637 | .type __serpent_enc_blk_8way,@function; | ||
638 | |||
639 | __serpent_enc_blk_8way: | ||
640 | /* input: | ||
641 | * %rdi: ctx, CTX | ||
642 | * %rsi: dst | ||
643 | * %rdx: src | ||
644 | * %rcx: bool, if true: xor output | ||
645 | */ | ||
646 | |||
647 | pcmpeqd RNOT, RNOT; | ||
648 | |||
649 | leaq (4*4*4)(%rdx), %rax; | ||
650 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
651 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
652 | |||
653 | K2(RA, RB, RC, RD, RE, 0); | ||
654 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | ||
655 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | ||
656 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | ||
657 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | ||
658 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | ||
659 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | ||
660 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | ||
661 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | ||
662 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | ||
663 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | ||
664 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | ||
665 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | ||
666 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | ||
667 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | ||
668 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | ||
669 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | ||
670 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | ||
671 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | ||
672 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | ||
673 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | ||
674 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | ||
675 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | ||
676 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | ||
677 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | ||
678 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | ||
679 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | ||
680 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | ||
681 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | ||
682 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | ||
683 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | ||
684 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | ||
685 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | ||
686 | |||
687 | leaq (4*4*4)(%rsi), %rax; | ||
688 | |||
689 | testb %cl, %cl; | ||
690 | jnz __enc_xor8; | ||
691 | |||
692 | write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
693 | write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
694 | |||
695 | ret; | ||
696 | |||
697 | __enc_xor8: | ||
698 | xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
699 | xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
700 | |||
701 | ret; | ||
702 | |||
703 | .align 8 | ||
704 | .global serpent_dec_blk_8way | ||
705 | .type serpent_dec_blk_8way,@function; | ||
706 | |||
707 | serpent_dec_blk_8way: | ||
708 | /* input: | ||
709 | * %rdi: ctx, CTX | ||
710 | * %rsi: dst | ||
711 | * %rdx: src | ||
712 | */ | ||
713 | |||
714 | pcmpeqd RNOT, RNOT; | ||
715 | |||
716 | leaq (4*4*4)(%rdx), %rax; | ||
717 | read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2); | ||
718 | read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); | ||
719 | |||
720 | K2(RA, RB, RC, RD, RE, 32); | ||
721 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | ||
722 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | ||
723 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | ||
724 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | ||
725 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | ||
726 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | ||
727 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | ||
728 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | ||
729 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | ||
730 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | ||
731 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | ||
732 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | ||
733 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | ||
734 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | ||
735 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | ||
736 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | ||
737 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | ||
738 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | ||
739 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | ||
740 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | ||
741 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | ||
742 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | ||
743 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | ||
744 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | ||
745 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | ||
746 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | ||
747 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | ||
748 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | ||
749 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | ||
750 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | ||
751 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | ||
752 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | ||
753 | |||
754 | leaq (4*4*4)(%rsi), %rax; | ||
755 | write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2); | ||
756 | write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); | ||
757 | |||
758 | ret; | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c deleted file mode 100644 index 52abaaf28e7..00000000000 --- a/arch/x86/crypto/serpent_avx_glue.c +++ /dev/null | |||
@@ -1,595 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Glue code based on serpent_sse2_glue.c by: | ||
8 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or | ||
13 | * (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
23 | * USA | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #include <linux/module.h> | ||
28 | #include <linux/hardirq.h> | ||
29 | #include <linux/types.h> | ||
30 | #include <linux/crypto.h> | ||
31 | #include <linux/err.h> | ||
32 | #include <crypto/algapi.h> | ||
33 | #include <crypto/serpent.h> | ||
34 | #include <crypto/cryptd.h> | ||
35 | #include <crypto/b128ops.h> | ||
36 | #include <crypto/ctr.h> | ||
37 | #include <crypto/lrw.h> | ||
38 | #include <crypto/xts.h> | ||
39 | #include <asm/xcr.h> | ||
40 | #include <asm/xsave.h> | ||
41 | #include <asm/crypto/serpent-avx.h> | ||
42 | #include <asm/crypto/ablk_helper.h> | ||
43 | #include <asm/crypto/glue_helper.h> | ||
44 | |||
45 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
46 | { | ||
47 | be128 ctrblk; | ||
48 | |||
49 | le128_to_be128(&ctrblk, iv); | ||
50 | le128_inc(iv); | ||
51 | |||
52 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
53 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
54 | } | ||
55 | |||
56 | static const struct common_glue_ctx serpent_enc = { | ||
57 | .num_funcs = 2, | ||
58 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
59 | |||
60 | .funcs = { { | ||
61 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
62 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } | ||
63 | }, { | ||
64 | .num_blocks = 1, | ||
65 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
66 | } } | ||
67 | }; | ||
68 | |||
69 | static const struct common_glue_ctx serpent_ctr = { | ||
70 | .num_funcs = 2, | ||
71 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
72 | |||
73 | .funcs = { { | ||
74 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
75 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } | ||
76 | }, { | ||
77 | .num_blocks = 1, | ||
78 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
79 | } } | ||
80 | }; | ||
81 | |||
82 | static const struct common_glue_ctx serpent_dec = { | ||
83 | .num_funcs = 2, | ||
84 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
85 | |||
86 | .funcs = { { | ||
87 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
88 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } | ||
89 | }, { | ||
90 | .num_blocks = 1, | ||
91 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
92 | } } | ||
93 | }; | ||
94 | |||
95 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
96 | .num_funcs = 2, | ||
97 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
98 | |||
99 | .funcs = { { | ||
100 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
101 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } | ||
102 | }, { | ||
103 | .num_blocks = 1, | ||
104 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
105 | } } | ||
106 | }; | ||
107 | |||
108 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
109 | struct scatterlist *src, unsigned int nbytes) | ||
110 | { | ||
111 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
112 | } | ||
113 | |||
114 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
115 | struct scatterlist *src, unsigned int nbytes) | ||
116 | { | ||
117 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
118 | } | ||
119 | |||
120 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
121 | struct scatterlist *src, unsigned int nbytes) | ||
122 | { | ||
123 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
124 | dst, src, nbytes); | ||
125 | } | ||
126 | |||
127 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
128 | struct scatterlist *src, unsigned int nbytes) | ||
129 | { | ||
130 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
131 | nbytes); | ||
132 | } | ||
133 | |||
134 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
135 | struct scatterlist *src, unsigned int nbytes) | ||
136 | { | ||
137 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
138 | } | ||
139 | |||
140 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
141 | { | ||
142 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
143 | NULL, fpu_enabled, nbytes); | ||
144 | } | ||
145 | |||
146 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
147 | { | ||
148 | glue_fpu_end(fpu_enabled); | ||
149 | } | ||
150 | |||
151 | struct crypt_priv { | ||
152 | struct serpent_ctx *ctx; | ||
153 | bool fpu_enabled; | ||
154 | }; | ||
155 | |||
156 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
157 | { | ||
158 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
159 | struct crypt_priv *ctx = priv; | ||
160 | int i; | ||
161 | |||
162 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
163 | |||
164 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
165 | serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); | ||
166 | return; | ||
167 | } | ||
168 | |||
169 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
170 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
171 | } | ||
172 | |||
173 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
174 | { | ||
175 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
176 | struct crypt_priv *ctx = priv; | ||
177 | int i; | ||
178 | |||
179 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
180 | |||
181 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
182 | serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); | ||
183 | return; | ||
184 | } | ||
185 | |||
186 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
187 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
188 | } | ||
189 | |||
190 | struct serpent_lrw_ctx { | ||
191 | struct lrw_table_ctx lrw_table; | ||
192 | struct serpent_ctx serpent_ctx; | ||
193 | }; | ||
194 | |||
195 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
196 | unsigned int keylen) | ||
197 | { | ||
198 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
199 | int err; | ||
200 | |||
201 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
202 | SERPENT_BLOCK_SIZE); | ||
203 | if (err) | ||
204 | return err; | ||
205 | |||
206 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
207 | SERPENT_BLOCK_SIZE); | ||
208 | } | ||
209 | |||
210 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
211 | struct scatterlist *src, unsigned int nbytes) | ||
212 | { | ||
213 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
214 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
215 | struct crypt_priv crypt_ctx = { | ||
216 | .ctx = &ctx->serpent_ctx, | ||
217 | .fpu_enabled = false, | ||
218 | }; | ||
219 | struct lrw_crypt_req req = { | ||
220 | .tbuf = buf, | ||
221 | .tbuflen = sizeof(buf), | ||
222 | |||
223 | .table_ctx = &ctx->lrw_table, | ||
224 | .crypt_ctx = &crypt_ctx, | ||
225 | .crypt_fn = encrypt_callback, | ||
226 | }; | ||
227 | int ret; | ||
228 | |||
229 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
230 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
231 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
232 | |||
233 | return ret; | ||
234 | } | ||
235 | |||
236 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
237 | struct scatterlist *src, unsigned int nbytes) | ||
238 | { | ||
239 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
240 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
241 | struct crypt_priv crypt_ctx = { | ||
242 | .ctx = &ctx->serpent_ctx, | ||
243 | .fpu_enabled = false, | ||
244 | }; | ||
245 | struct lrw_crypt_req req = { | ||
246 | .tbuf = buf, | ||
247 | .tbuflen = sizeof(buf), | ||
248 | |||
249 | .table_ctx = &ctx->lrw_table, | ||
250 | .crypt_ctx = &crypt_ctx, | ||
251 | .crypt_fn = decrypt_callback, | ||
252 | }; | ||
253 | int ret; | ||
254 | |||
255 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
256 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
257 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
258 | |||
259 | return ret; | ||
260 | } | ||
261 | |||
262 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
263 | { | ||
264 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
265 | |||
266 | lrw_free_table(&ctx->lrw_table); | ||
267 | } | ||
268 | |||
269 | struct serpent_xts_ctx { | ||
270 | struct serpent_ctx tweak_ctx; | ||
271 | struct serpent_ctx crypt_ctx; | ||
272 | }; | ||
273 | |||
274 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
275 | unsigned int keylen) | ||
276 | { | ||
277 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
278 | u32 *flags = &tfm->crt_flags; | ||
279 | int err; | ||
280 | |||
281 | /* key consists of keys of equal size concatenated, therefore | ||
282 | * the length must be even | ||
283 | */ | ||
284 | if (keylen % 2) { | ||
285 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
286 | return -EINVAL; | ||
287 | } | ||
288 | |||
289 | /* first half of xts-key is for crypt */ | ||
290 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
291 | if (err) | ||
292 | return err; | ||
293 | |||
294 | /* second half of xts-key is for tweak */ | ||
295 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
296 | } | ||
297 | |||
298 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
299 | struct scatterlist *src, unsigned int nbytes) | ||
300 | { | ||
301 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
302 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
303 | struct crypt_priv crypt_ctx = { | ||
304 | .ctx = &ctx->crypt_ctx, | ||
305 | .fpu_enabled = false, | ||
306 | }; | ||
307 | struct xts_crypt_req req = { | ||
308 | .tbuf = buf, | ||
309 | .tbuflen = sizeof(buf), | ||
310 | |||
311 | .tweak_ctx = &ctx->tweak_ctx, | ||
312 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
313 | .crypt_ctx = &crypt_ctx, | ||
314 | .crypt_fn = encrypt_callback, | ||
315 | }; | ||
316 | int ret; | ||
317 | |||
318 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
319 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
320 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
321 | |||
322 | return ret; | ||
323 | } | ||
324 | |||
325 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
326 | struct scatterlist *src, unsigned int nbytes) | ||
327 | { | ||
328 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
329 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
330 | struct crypt_priv crypt_ctx = { | ||
331 | .ctx = &ctx->crypt_ctx, | ||
332 | .fpu_enabled = false, | ||
333 | }; | ||
334 | struct xts_crypt_req req = { | ||
335 | .tbuf = buf, | ||
336 | .tbuflen = sizeof(buf), | ||
337 | |||
338 | .tweak_ctx = &ctx->tweak_ctx, | ||
339 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
340 | .crypt_ctx = &crypt_ctx, | ||
341 | .crypt_fn = decrypt_callback, | ||
342 | }; | ||
343 | int ret; | ||
344 | |||
345 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
346 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
347 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
348 | |||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | static struct crypto_alg serpent_algs[10] = { { | ||
353 | .cra_name = "__ecb-serpent-avx", | ||
354 | .cra_driver_name = "__driver-ecb-serpent-avx", | ||
355 | .cra_priority = 0, | ||
356 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
357 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
358 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
359 | .cra_alignmask = 0, | ||
360 | .cra_type = &crypto_blkcipher_type, | ||
361 | .cra_module = THIS_MODULE, | ||
362 | .cra_u = { | ||
363 | .blkcipher = { | ||
364 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
365 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
366 | .setkey = serpent_setkey, | ||
367 | .encrypt = ecb_encrypt, | ||
368 | .decrypt = ecb_decrypt, | ||
369 | }, | ||
370 | }, | ||
371 | }, { | ||
372 | .cra_name = "__cbc-serpent-avx", | ||
373 | .cra_driver_name = "__driver-cbc-serpent-avx", | ||
374 | .cra_priority = 0, | ||
375 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
376 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
377 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
378 | .cra_alignmask = 0, | ||
379 | .cra_type = &crypto_blkcipher_type, | ||
380 | .cra_module = THIS_MODULE, | ||
381 | .cra_u = { | ||
382 | .blkcipher = { | ||
383 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
384 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
385 | .setkey = serpent_setkey, | ||
386 | .encrypt = cbc_encrypt, | ||
387 | .decrypt = cbc_decrypt, | ||
388 | }, | ||
389 | }, | ||
390 | }, { | ||
391 | .cra_name = "__ctr-serpent-avx", | ||
392 | .cra_driver_name = "__driver-ctr-serpent-avx", | ||
393 | .cra_priority = 0, | ||
394 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
395 | .cra_blocksize = 1, | ||
396 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
397 | .cra_alignmask = 0, | ||
398 | .cra_type = &crypto_blkcipher_type, | ||
399 | .cra_module = THIS_MODULE, | ||
400 | .cra_u = { | ||
401 | .blkcipher = { | ||
402 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
403 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
404 | .ivsize = SERPENT_BLOCK_SIZE, | ||
405 | .setkey = serpent_setkey, | ||
406 | .encrypt = ctr_crypt, | ||
407 | .decrypt = ctr_crypt, | ||
408 | }, | ||
409 | }, | ||
410 | }, { | ||
411 | .cra_name = "__lrw-serpent-avx", | ||
412 | .cra_driver_name = "__driver-lrw-serpent-avx", | ||
413 | .cra_priority = 0, | ||
414 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
415 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
416 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
417 | .cra_alignmask = 0, | ||
418 | .cra_type = &crypto_blkcipher_type, | ||
419 | .cra_module = THIS_MODULE, | ||
420 | .cra_exit = lrw_exit_tfm, | ||
421 | .cra_u = { | ||
422 | .blkcipher = { | ||
423 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
424 | SERPENT_BLOCK_SIZE, | ||
425 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
426 | SERPENT_BLOCK_SIZE, | ||
427 | .ivsize = SERPENT_BLOCK_SIZE, | ||
428 | .setkey = lrw_serpent_setkey, | ||
429 | .encrypt = lrw_encrypt, | ||
430 | .decrypt = lrw_decrypt, | ||
431 | }, | ||
432 | }, | ||
433 | }, { | ||
434 | .cra_name = "__xts-serpent-avx", | ||
435 | .cra_driver_name = "__driver-xts-serpent-avx", | ||
436 | .cra_priority = 0, | ||
437 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
438 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
439 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
440 | .cra_alignmask = 0, | ||
441 | .cra_type = &crypto_blkcipher_type, | ||
442 | .cra_module = THIS_MODULE, | ||
443 | .cra_u = { | ||
444 | .blkcipher = { | ||
445 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
446 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
447 | .ivsize = SERPENT_BLOCK_SIZE, | ||
448 | .setkey = xts_serpent_setkey, | ||
449 | .encrypt = xts_encrypt, | ||
450 | .decrypt = xts_decrypt, | ||
451 | }, | ||
452 | }, | ||
453 | }, { | ||
454 | .cra_name = "ecb(serpent)", | ||
455 | .cra_driver_name = "ecb-serpent-avx", | ||
456 | .cra_priority = 500, | ||
457 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
458 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
459 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
460 | .cra_alignmask = 0, | ||
461 | .cra_type = &crypto_ablkcipher_type, | ||
462 | .cra_module = THIS_MODULE, | ||
463 | .cra_init = ablk_init, | ||
464 | .cra_exit = ablk_exit, | ||
465 | .cra_u = { | ||
466 | .ablkcipher = { | ||
467 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
468 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
469 | .setkey = ablk_set_key, | ||
470 | .encrypt = ablk_encrypt, | ||
471 | .decrypt = ablk_decrypt, | ||
472 | }, | ||
473 | }, | ||
474 | }, { | ||
475 | .cra_name = "cbc(serpent)", | ||
476 | .cra_driver_name = "cbc-serpent-avx", | ||
477 | .cra_priority = 500, | ||
478 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
479 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
480 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
481 | .cra_alignmask = 0, | ||
482 | .cra_type = &crypto_ablkcipher_type, | ||
483 | .cra_module = THIS_MODULE, | ||
484 | .cra_init = ablk_init, | ||
485 | .cra_exit = ablk_exit, | ||
486 | .cra_u = { | ||
487 | .ablkcipher = { | ||
488 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
489 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
490 | .ivsize = SERPENT_BLOCK_SIZE, | ||
491 | .setkey = ablk_set_key, | ||
492 | .encrypt = __ablk_encrypt, | ||
493 | .decrypt = ablk_decrypt, | ||
494 | }, | ||
495 | }, | ||
496 | }, { | ||
497 | .cra_name = "ctr(serpent)", | ||
498 | .cra_driver_name = "ctr-serpent-avx", | ||
499 | .cra_priority = 500, | ||
500 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
501 | .cra_blocksize = 1, | ||
502 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
503 | .cra_alignmask = 0, | ||
504 | .cra_type = &crypto_ablkcipher_type, | ||
505 | .cra_module = THIS_MODULE, | ||
506 | .cra_init = ablk_init, | ||
507 | .cra_exit = ablk_exit, | ||
508 | .cra_u = { | ||
509 | .ablkcipher = { | ||
510 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
511 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
512 | .ivsize = SERPENT_BLOCK_SIZE, | ||
513 | .setkey = ablk_set_key, | ||
514 | .encrypt = ablk_encrypt, | ||
515 | .decrypt = ablk_encrypt, | ||
516 | .geniv = "chainiv", | ||
517 | }, | ||
518 | }, | ||
519 | }, { | ||
520 | .cra_name = "lrw(serpent)", | ||
521 | .cra_driver_name = "lrw-serpent-avx", | ||
522 | .cra_priority = 500, | ||
523 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
524 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
525 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
526 | .cra_alignmask = 0, | ||
527 | .cra_type = &crypto_ablkcipher_type, | ||
528 | .cra_module = THIS_MODULE, | ||
529 | .cra_init = ablk_init, | ||
530 | .cra_exit = ablk_exit, | ||
531 | .cra_u = { | ||
532 | .ablkcipher = { | ||
533 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
534 | SERPENT_BLOCK_SIZE, | ||
535 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
536 | SERPENT_BLOCK_SIZE, | ||
537 | .ivsize = SERPENT_BLOCK_SIZE, | ||
538 | .setkey = ablk_set_key, | ||
539 | .encrypt = ablk_encrypt, | ||
540 | .decrypt = ablk_decrypt, | ||
541 | }, | ||
542 | }, | ||
543 | }, { | ||
544 | .cra_name = "xts(serpent)", | ||
545 | .cra_driver_name = "xts-serpent-avx", | ||
546 | .cra_priority = 500, | ||
547 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
548 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
549 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
550 | .cra_alignmask = 0, | ||
551 | .cra_type = &crypto_ablkcipher_type, | ||
552 | .cra_module = THIS_MODULE, | ||
553 | .cra_init = ablk_init, | ||
554 | .cra_exit = ablk_exit, | ||
555 | .cra_u = { | ||
556 | .ablkcipher = { | ||
557 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
558 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
559 | .ivsize = SERPENT_BLOCK_SIZE, | ||
560 | .setkey = ablk_set_key, | ||
561 | .encrypt = ablk_encrypt, | ||
562 | .decrypt = ablk_decrypt, | ||
563 | }, | ||
564 | }, | ||
565 | } }; | ||
566 | |||
567 | static int __init serpent_init(void) | ||
568 | { | ||
569 | u64 xcr0; | ||
570 | |||
571 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
572 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
573 | return -ENODEV; | ||
574 | } | ||
575 | |||
576 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
577 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
578 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
579 | return -ENODEV; | ||
580 | } | ||
581 | |||
582 | return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
583 | } | ||
584 | |||
585 | static void __exit serpent_exit(void) | ||
586 | { | ||
587 | crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
588 | } | ||
589 | |||
590 | module_init(serpent_init); | ||
591 | module_exit(serpent_exit); | ||
592 | |||
593 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); | ||
594 | MODULE_LICENSE("GPL"); | ||
595 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c deleted file mode 100644 index 97a356ece24..00000000000 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ /dev/null | |||
@@ -1,621 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for SSE2 assembler versions of Serpent Cipher | ||
3 | * | ||
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * Glue code based on aesni-intel_glue.c by: | ||
7 | * Copyright (C) 2008, Intel Corp. | ||
8 | * Author: Huang Ying <ying.huang@intel.com> | ||
9 | * | ||
10 | * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: | ||
11 | * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> | ||
12 | * CTR part based on code (crypto/ctr.c) by: | ||
13 | * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> | ||
14 | * | ||
15 | * This program is free software; you can redistribute it and/or modify | ||
16 | * it under the terms of the GNU General Public License as published by | ||
17 | * the Free Software Foundation; either version 2 of the License, or | ||
18 | * (at your option) any later version. | ||
19 | * | ||
20 | * This program is distributed in the hope that it will be useful, | ||
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 | * GNU General Public License for more details. | ||
24 | * | ||
25 | * You should have received a copy of the GNU General Public License | ||
26 | * along with this program; if not, write to the Free Software | ||
27 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
28 | * USA | ||
29 | * | ||
30 | */ | ||
31 | |||
32 | #include <linux/module.h> | ||
33 | #include <linux/hardirq.h> | ||
34 | #include <linux/types.h> | ||
35 | #include <linux/crypto.h> | ||
36 | #include <linux/err.h> | ||
37 | #include <crypto/algapi.h> | ||
38 | #include <crypto/serpent.h> | ||
39 | #include <crypto/cryptd.h> | ||
40 | #include <crypto/b128ops.h> | ||
41 | #include <crypto/ctr.h> | ||
42 | #include <crypto/lrw.h> | ||
43 | #include <crypto/xts.h> | ||
44 | #include <asm/crypto/serpent-sse2.h> | ||
45 | #include <asm/crypto/ablk_helper.h> | ||
46 | #include <asm/crypto/glue_helper.h> | ||
47 | |||
48 | static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) | ||
49 | { | ||
50 | u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; | ||
51 | unsigned int j; | ||
52 | |||
53 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
54 | ivs[j] = src[j]; | ||
55 | |||
56 | serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); | ||
57 | |||
58 | for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) | ||
59 | u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); | ||
60 | } | ||
61 | |||
62 | static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
63 | { | ||
64 | be128 ctrblk; | ||
65 | |||
66 | le128_to_be128(&ctrblk, iv); | ||
67 | le128_inc(iv); | ||
68 | |||
69 | __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
70 | u128_xor(dst, src, (u128 *)&ctrblk); | ||
71 | } | ||
72 | |||
73 | static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, | ||
74 | le128 *iv) | ||
75 | { | ||
76 | be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; | ||
77 | unsigned int i; | ||
78 | |||
79 | for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { | ||
80 | if (dst != src) | ||
81 | dst[i] = src[i]; | ||
82 | |||
83 | le128_to_be128(&ctrblks[i], iv); | ||
84 | le128_inc(iv); | ||
85 | } | ||
86 | |||
87 | serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
88 | } | ||
89 | |||
90 | static const struct common_glue_ctx serpent_enc = { | ||
91 | .num_funcs = 2, | ||
92 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
93 | |||
94 | .funcs = { { | ||
95 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
96 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } | ||
97 | }, { | ||
98 | .num_blocks = 1, | ||
99 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } | ||
100 | } } | ||
101 | }; | ||
102 | |||
103 | static const struct common_glue_ctx serpent_ctr = { | ||
104 | .num_funcs = 2, | ||
105 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
106 | |||
107 | .funcs = { { | ||
108 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
109 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } | ||
110 | }, { | ||
111 | .num_blocks = 1, | ||
112 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } | ||
113 | } } | ||
114 | }; | ||
115 | |||
116 | static const struct common_glue_ctx serpent_dec = { | ||
117 | .num_funcs = 2, | ||
118 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
119 | |||
120 | .funcs = { { | ||
121 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
122 | .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } | ||
123 | }, { | ||
124 | .num_blocks = 1, | ||
125 | .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } | ||
126 | } } | ||
127 | }; | ||
128 | |||
129 | static const struct common_glue_ctx serpent_dec_cbc = { | ||
130 | .num_funcs = 2, | ||
131 | .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, | ||
132 | |||
133 | .funcs = { { | ||
134 | .num_blocks = SERPENT_PARALLEL_BLOCKS, | ||
135 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } | ||
136 | }, { | ||
137 | .num_blocks = 1, | ||
138 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } | ||
139 | } } | ||
140 | }; | ||
141 | |||
142 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
143 | struct scatterlist *src, unsigned int nbytes) | ||
144 | { | ||
145 | return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); | ||
146 | } | ||
147 | |||
148 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
149 | struct scatterlist *src, unsigned int nbytes) | ||
150 | { | ||
151 | return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); | ||
152 | } | ||
153 | |||
154 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
155 | struct scatterlist *src, unsigned int nbytes) | ||
156 | { | ||
157 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, | ||
158 | dst, src, nbytes); | ||
159 | } | ||
160 | |||
161 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
162 | struct scatterlist *src, unsigned int nbytes) | ||
163 | { | ||
164 | return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, | ||
165 | nbytes); | ||
166 | } | ||
167 | |||
168 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
169 | struct scatterlist *src, unsigned int nbytes) | ||
170 | { | ||
171 | return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); | ||
172 | } | ||
173 | |||
174 | static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, | ||
177 | NULL, fpu_enabled, nbytes); | ||
178 | } | ||
179 | |||
180 | static inline void serpent_fpu_end(bool fpu_enabled) | ||
181 | { | ||
182 | glue_fpu_end(fpu_enabled); | ||
183 | } | ||
184 | |||
185 | struct crypt_priv { | ||
186 | struct serpent_ctx *ctx; | ||
187 | bool fpu_enabled; | ||
188 | }; | ||
189 | |||
190 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
191 | { | ||
192 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
193 | struct crypt_priv *ctx = priv; | ||
194 | int i; | ||
195 | |||
196 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
197 | |||
198 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
199 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
204 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | ||
205 | } | ||
206 | |||
207 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
208 | { | ||
209 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | ||
210 | struct crypt_priv *ctx = priv; | ||
211 | int i; | ||
212 | |||
213 | ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | ||
214 | |||
215 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | ||
216 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | ||
217 | return; | ||
218 | } | ||
219 | |||
220 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
221 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | ||
222 | } | ||
223 | |||
224 | struct serpent_lrw_ctx { | ||
225 | struct lrw_table_ctx lrw_table; | ||
226 | struct serpent_ctx serpent_ctx; | ||
227 | }; | ||
228 | |||
229 | static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
230 | unsigned int keylen) | ||
231 | { | ||
232 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
233 | int err; | ||
234 | |||
235 | err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - | ||
236 | SERPENT_BLOCK_SIZE); | ||
237 | if (err) | ||
238 | return err; | ||
239 | |||
240 | return lrw_init_table(&ctx->lrw_table, key + keylen - | ||
241 | SERPENT_BLOCK_SIZE); | ||
242 | } | ||
243 | |||
244 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
245 | struct scatterlist *src, unsigned int nbytes) | ||
246 | { | ||
247 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
248 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
249 | struct crypt_priv crypt_ctx = { | ||
250 | .ctx = &ctx->serpent_ctx, | ||
251 | .fpu_enabled = false, | ||
252 | }; | ||
253 | struct lrw_crypt_req req = { | ||
254 | .tbuf = buf, | ||
255 | .tbuflen = sizeof(buf), | ||
256 | |||
257 | .table_ctx = &ctx->lrw_table, | ||
258 | .crypt_ctx = &crypt_ctx, | ||
259 | .crypt_fn = encrypt_callback, | ||
260 | }; | ||
261 | int ret; | ||
262 | |||
263 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
264 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
265 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
266 | |||
267 | return ret; | ||
268 | } | ||
269 | |||
270 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
271 | struct scatterlist *src, unsigned int nbytes) | ||
272 | { | ||
273 | struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
274 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
275 | struct crypt_priv crypt_ctx = { | ||
276 | .ctx = &ctx->serpent_ctx, | ||
277 | .fpu_enabled = false, | ||
278 | }; | ||
279 | struct lrw_crypt_req req = { | ||
280 | .tbuf = buf, | ||
281 | .tbuflen = sizeof(buf), | ||
282 | |||
283 | .table_ctx = &ctx->lrw_table, | ||
284 | .crypt_ctx = &crypt_ctx, | ||
285 | .crypt_fn = decrypt_callback, | ||
286 | }; | ||
287 | int ret; | ||
288 | |||
289 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
290 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
291 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
292 | |||
293 | return ret; | ||
294 | } | ||
295 | |||
296 | static void lrw_exit_tfm(struct crypto_tfm *tfm) | ||
297 | { | ||
298 | struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
299 | |||
300 | lrw_free_table(&ctx->lrw_table); | ||
301 | } | ||
302 | |||
303 | struct serpent_xts_ctx { | ||
304 | struct serpent_ctx tweak_ctx; | ||
305 | struct serpent_ctx crypt_ctx; | ||
306 | }; | ||
307 | |||
308 | static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
309 | unsigned int keylen) | ||
310 | { | ||
311 | struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
312 | u32 *flags = &tfm->crt_flags; | ||
313 | int err; | ||
314 | |||
315 | /* key consists of keys of equal size concatenated, therefore | ||
316 | * the length must be even | ||
317 | */ | ||
318 | if (keylen % 2) { | ||
319 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
320 | return -EINVAL; | ||
321 | } | ||
322 | |||
323 | /* first half of xts-key is for crypt */ | ||
324 | err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); | ||
325 | if (err) | ||
326 | return err; | ||
327 | |||
328 | /* second half of xts-key is for tweak */ | ||
329 | return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); | ||
330 | } | ||
331 | |||
332 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
333 | struct scatterlist *src, unsigned int nbytes) | ||
334 | { | ||
335 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
336 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
337 | struct crypt_priv crypt_ctx = { | ||
338 | .ctx = &ctx->crypt_ctx, | ||
339 | .fpu_enabled = false, | ||
340 | }; | ||
341 | struct xts_crypt_req req = { | ||
342 | .tbuf = buf, | ||
343 | .tbuflen = sizeof(buf), | ||
344 | |||
345 | .tweak_ctx = &ctx->tweak_ctx, | ||
346 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
347 | .crypt_ctx = &crypt_ctx, | ||
348 | .crypt_fn = encrypt_callback, | ||
349 | }; | ||
350 | int ret; | ||
351 | |||
352 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
353 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
354 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
355 | |||
356 | return ret; | ||
357 | } | ||
358 | |||
359 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
360 | struct scatterlist *src, unsigned int nbytes) | ||
361 | { | ||
362 | struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
363 | be128 buf[SERPENT_PARALLEL_BLOCKS]; | ||
364 | struct crypt_priv crypt_ctx = { | ||
365 | .ctx = &ctx->crypt_ctx, | ||
366 | .fpu_enabled = false, | ||
367 | }; | ||
368 | struct xts_crypt_req req = { | ||
369 | .tbuf = buf, | ||
370 | .tbuflen = sizeof(buf), | ||
371 | |||
372 | .tweak_ctx = &ctx->tweak_ctx, | ||
373 | .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), | ||
374 | .crypt_ctx = &crypt_ctx, | ||
375 | .crypt_fn = decrypt_callback, | ||
376 | }; | ||
377 | int ret; | ||
378 | |||
379 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
380 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
381 | serpent_fpu_end(crypt_ctx.fpu_enabled); | ||
382 | |||
383 | return ret; | ||
384 | } | ||
385 | |||
386 | static struct crypto_alg serpent_algs[10] = { { | ||
387 | .cra_name = "__ecb-serpent-sse2", | ||
388 | .cra_driver_name = "__driver-ecb-serpent-sse2", | ||
389 | .cra_priority = 0, | ||
390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
391 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
392 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
393 | .cra_alignmask = 0, | ||
394 | .cra_type = &crypto_blkcipher_type, | ||
395 | .cra_module = THIS_MODULE, | ||
396 | .cra_u = { | ||
397 | .blkcipher = { | ||
398 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
399 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
400 | .setkey = serpent_setkey, | ||
401 | .encrypt = ecb_encrypt, | ||
402 | .decrypt = ecb_decrypt, | ||
403 | }, | ||
404 | }, | ||
405 | }, { | ||
406 | .cra_name = "__cbc-serpent-sse2", | ||
407 | .cra_driver_name = "__driver-cbc-serpent-sse2", | ||
408 | .cra_priority = 0, | ||
409 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
410 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
411 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
412 | .cra_alignmask = 0, | ||
413 | .cra_type = &crypto_blkcipher_type, | ||
414 | .cra_module = THIS_MODULE, | ||
415 | .cra_u = { | ||
416 | .blkcipher = { | ||
417 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
418 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
419 | .setkey = serpent_setkey, | ||
420 | .encrypt = cbc_encrypt, | ||
421 | .decrypt = cbc_decrypt, | ||
422 | }, | ||
423 | }, | ||
424 | }, { | ||
425 | .cra_name = "__ctr-serpent-sse2", | ||
426 | .cra_driver_name = "__driver-ctr-serpent-sse2", | ||
427 | .cra_priority = 0, | ||
428 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
429 | .cra_blocksize = 1, | ||
430 | .cra_ctxsize = sizeof(struct serpent_ctx), | ||
431 | .cra_alignmask = 0, | ||
432 | .cra_type = &crypto_blkcipher_type, | ||
433 | .cra_module = THIS_MODULE, | ||
434 | .cra_u = { | ||
435 | .blkcipher = { | ||
436 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
437 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
438 | .ivsize = SERPENT_BLOCK_SIZE, | ||
439 | .setkey = serpent_setkey, | ||
440 | .encrypt = ctr_crypt, | ||
441 | .decrypt = ctr_crypt, | ||
442 | }, | ||
443 | }, | ||
444 | }, { | ||
445 | .cra_name = "__lrw-serpent-sse2", | ||
446 | .cra_driver_name = "__driver-lrw-serpent-sse2", | ||
447 | .cra_priority = 0, | ||
448 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
449 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
450 | .cra_ctxsize = sizeof(struct serpent_lrw_ctx), | ||
451 | .cra_alignmask = 0, | ||
452 | .cra_type = &crypto_blkcipher_type, | ||
453 | .cra_module = THIS_MODULE, | ||
454 | .cra_exit = lrw_exit_tfm, | ||
455 | .cra_u = { | ||
456 | .blkcipher = { | ||
457 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
458 | SERPENT_BLOCK_SIZE, | ||
459 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
460 | SERPENT_BLOCK_SIZE, | ||
461 | .ivsize = SERPENT_BLOCK_SIZE, | ||
462 | .setkey = lrw_serpent_setkey, | ||
463 | .encrypt = lrw_encrypt, | ||
464 | .decrypt = lrw_decrypt, | ||
465 | }, | ||
466 | }, | ||
467 | }, { | ||
468 | .cra_name = "__xts-serpent-sse2", | ||
469 | .cra_driver_name = "__driver-xts-serpent-sse2", | ||
470 | .cra_priority = 0, | ||
471 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
472 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
473 | .cra_ctxsize = sizeof(struct serpent_xts_ctx), | ||
474 | .cra_alignmask = 0, | ||
475 | .cra_type = &crypto_blkcipher_type, | ||
476 | .cra_module = THIS_MODULE, | ||
477 | .cra_u = { | ||
478 | .blkcipher = { | ||
479 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
480 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
481 | .ivsize = SERPENT_BLOCK_SIZE, | ||
482 | .setkey = xts_serpent_setkey, | ||
483 | .encrypt = xts_encrypt, | ||
484 | .decrypt = xts_decrypt, | ||
485 | }, | ||
486 | }, | ||
487 | }, { | ||
488 | .cra_name = "ecb(serpent)", | ||
489 | .cra_driver_name = "ecb-serpent-sse2", | ||
490 | .cra_priority = 400, | ||
491 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
492 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
493 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
494 | .cra_alignmask = 0, | ||
495 | .cra_type = &crypto_ablkcipher_type, | ||
496 | .cra_module = THIS_MODULE, | ||
497 | .cra_init = ablk_init, | ||
498 | .cra_exit = ablk_exit, | ||
499 | .cra_u = { | ||
500 | .ablkcipher = { | ||
501 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
502 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
503 | .setkey = ablk_set_key, | ||
504 | .encrypt = ablk_encrypt, | ||
505 | .decrypt = ablk_decrypt, | ||
506 | }, | ||
507 | }, | ||
508 | }, { | ||
509 | .cra_name = "cbc(serpent)", | ||
510 | .cra_driver_name = "cbc-serpent-sse2", | ||
511 | .cra_priority = 400, | ||
512 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
513 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
514 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
515 | .cra_alignmask = 0, | ||
516 | .cra_type = &crypto_ablkcipher_type, | ||
517 | .cra_module = THIS_MODULE, | ||
518 | .cra_init = ablk_init, | ||
519 | .cra_exit = ablk_exit, | ||
520 | .cra_u = { | ||
521 | .ablkcipher = { | ||
522 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
523 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
524 | .ivsize = SERPENT_BLOCK_SIZE, | ||
525 | .setkey = ablk_set_key, | ||
526 | .encrypt = __ablk_encrypt, | ||
527 | .decrypt = ablk_decrypt, | ||
528 | }, | ||
529 | }, | ||
530 | }, { | ||
531 | .cra_name = "ctr(serpent)", | ||
532 | .cra_driver_name = "ctr-serpent-sse2", | ||
533 | .cra_priority = 400, | ||
534 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
535 | .cra_blocksize = 1, | ||
536 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
537 | .cra_alignmask = 0, | ||
538 | .cra_type = &crypto_ablkcipher_type, | ||
539 | .cra_module = THIS_MODULE, | ||
540 | .cra_init = ablk_init, | ||
541 | .cra_exit = ablk_exit, | ||
542 | .cra_u = { | ||
543 | .ablkcipher = { | ||
544 | .min_keysize = SERPENT_MIN_KEY_SIZE, | ||
545 | .max_keysize = SERPENT_MAX_KEY_SIZE, | ||
546 | .ivsize = SERPENT_BLOCK_SIZE, | ||
547 | .setkey = ablk_set_key, | ||
548 | .encrypt = ablk_encrypt, | ||
549 | .decrypt = ablk_encrypt, | ||
550 | .geniv = "chainiv", | ||
551 | }, | ||
552 | }, | ||
553 | }, { | ||
554 | .cra_name = "lrw(serpent)", | ||
555 | .cra_driver_name = "lrw-serpent-sse2", | ||
556 | .cra_priority = 400, | ||
557 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
558 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
559 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
560 | .cra_alignmask = 0, | ||
561 | .cra_type = &crypto_ablkcipher_type, | ||
562 | .cra_module = THIS_MODULE, | ||
563 | .cra_init = ablk_init, | ||
564 | .cra_exit = ablk_exit, | ||
565 | .cra_u = { | ||
566 | .ablkcipher = { | ||
567 | .min_keysize = SERPENT_MIN_KEY_SIZE + | ||
568 | SERPENT_BLOCK_SIZE, | ||
569 | .max_keysize = SERPENT_MAX_KEY_SIZE + | ||
570 | SERPENT_BLOCK_SIZE, | ||
571 | .ivsize = SERPENT_BLOCK_SIZE, | ||
572 | .setkey = ablk_set_key, | ||
573 | .encrypt = ablk_encrypt, | ||
574 | .decrypt = ablk_decrypt, | ||
575 | }, | ||
576 | }, | ||
577 | }, { | ||
578 | .cra_name = "xts(serpent)", | ||
579 | .cra_driver_name = "xts-serpent-sse2", | ||
580 | .cra_priority = 400, | ||
581 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
582 | .cra_blocksize = SERPENT_BLOCK_SIZE, | ||
583 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
584 | .cra_alignmask = 0, | ||
585 | .cra_type = &crypto_ablkcipher_type, | ||
586 | .cra_module = THIS_MODULE, | ||
587 | .cra_init = ablk_init, | ||
588 | .cra_exit = ablk_exit, | ||
589 | .cra_u = { | ||
590 | .ablkcipher = { | ||
591 | .min_keysize = SERPENT_MIN_KEY_SIZE * 2, | ||
592 | .max_keysize = SERPENT_MAX_KEY_SIZE * 2, | ||
593 | .ivsize = SERPENT_BLOCK_SIZE, | ||
594 | .setkey = ablk_set_key, | ||
595 | .encrypt = ablk_encrypt, | ||
596 | .decrypt = ablk_decrypt, | ||
597 | }, | ||
598 | }, | ||
599 | } }; | ||
600 | |||
601 | static int __init serpent_sse2_init(void) | ||
602 | { | ||
603 | if (!cpu_has_xmm2) { | ||
604 | printk(KERN_INFO "SSE2 instructions are not detected.\n"); | ||
605 | return -ENODEV; | ||
606 | } | ||
607 | |||
608 | return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
609 | } | ||
610 | |||
611 | static void __exit serpent_sse2_exit(void) | ||
612 | { | ||
613 | crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); | ||
614 | } | ||
615 | |||
616 | module_init(serpent_sse2_init); | ||
617 | module_exit(serpent_sse2_exit); | ||
618 | |||
619 | MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); | ||
620 | MODULE_LICENSE("GPL"); | ||
621 | MODULE_ALIAS("serpent"); | ||
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S deleted file mode 100644 index 49d6987a73d..00000000000 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ /dev/null | |||
@@ -1,558 +0,0 @@ | |||
1 | /* | ||
2 | * This is a SIMD SHA-1 implementation. It requires the Intel(R) Supplemental | ||
3 | * SSE3 instruction set extensions introduced in Intel Core Microarchitecture | ||
4 | * processors. CPUs supporting Intel(R) AVX extensions will get an additional | ||
5 | * boost. | ||
6 | * | ||
7 | * This work was inspired by the vectorized implementation of Dean Gaudet. | ||
8 | * Additional information on it can be found at: | ||
9 | * http://www.arctic.org/~dean/crypto/sha1.html | ||
10 | * | ||
11 | * It was improved upon with more efficient vectorization of the message | ||
12 | * scheduling. This implementation has also been optimized for all current and | ||
13 | * several future generations of Intel CPUs. | ||
14 | * | ||
15 | * See this article for more information about the implementation details: | ||
16 | * http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/ | ||
17 | * | ||
18 | * Copyright (C) 2010, Intel Corp. | ||
19 | * Authors: Maxim Locktyukhin <maxim.locktyukhin@intel.com> | ||
20 | * Ronen Zohar <ronen.zohar@intel.com> | ||
21 | * | ||
22 | * Converted to AT&T syntax and adapted for inclusion in the Linux kernel: | ||
23 | * Author: Mathias Krause <minipli@googlemail.com> | ||
24 | * | ||
25 | * This program is free software; you can redistribute it and/or modify | ||
26 | * it under the terms of the GNU General Public License as published by | ||
27 | * the Free Software Foundation; either version 2 of the License, or | ||
28 | * (at your option) any later version. | ||
29 | */ | ||
30 | |||
31 | #define CTX %rdi // arg1 | ||
32 | #define BUF %rsi // arg2 | ||
33 | #define CNT %rdx // arg3 | ||
34 | |||
35 | #define REG_A %ecx | ||
36 | #define REG_B %esi | ||
37 | #define REG_C %edi | ||
38 | #define REG_D %ebp | ||
39 | #define REG_E %edx | ||
40 | |||
41 | #define REG_T1 %eax | ||
42 | #define REG_T2 %ebx | ||
43 | |||
44 | #define K_BASE %r8 | ||
45 | #define HASH_PTR %r9 | ||
46 | #define BUFFER_PTR %r10 | ||
47 | #define BUFFER_END %r11 | ||
48 | |||
49 | #define W_TMP1 %xmm0 | ||
50 | #define W_TMP2 %xmm9 | ||
51 | |||
52 | #define W0 %xmm1 | ||
53 | #define W4 %xmm2 | ||
54 | #define W8 %xmm3 | ||
55 | #define W12 %xmm4 | ||
56 | #define W16 %xmm5 | ||
57 | #define W20 %xmm6 | ||
58 | #define W24 %xmm7 | ||
59 | #define W28 %xmm8 | ||
60 | |||
61 | #define XMM_SHUFB_BSWAP %xmm10 | ||
62 | |||
63 | /* we keep window of 64 w[i]+K pre-calculated values in a circular buffer */ | ||
64 | #define WK(t) (((t) & 15) * 4)(%rsp) | ||
65 | #define W_PRECALC_AHEAD 16 | ||
66 | |||
67 | /* | ||
68 | * This macro implements the SHA-1 function's body for single 64-byte block | ||
69 | * param: function's name | ||
70 | */ | ||
71 | .macro SHA1_VECTOR_ASM name | ||
72 | .global \name | ||
73 | .type \name, @function | ||
74 | .align 32 | ||
75 | \name: | ||
76 | push %rbx | ||
77 | push %rbp | ||
78 | push %r12 | ||
79 | |||
80 | mov %rsp, %r12 | ||
81 | sub $64, %rsp # allocate workspace | ||
82 | and $~15, %rsp # align stack | ||
83 | |||
84 | mov CTX, HASH_PTR | ||
85 | mov BUF, BUFFER_PTR | ||
86 | |||
87 | shl $6, CNT # multiply by 64 | ||
88 | add BUF, CNT | ||
89 | mov CNT, BUFFER_END | ||
90 | |||
91 | lea K_XMM_AR(%rip), K_BASE | ||
92 | xmm_mov BSWAP_SHUFB_CTL(%rip), XMM_SHUFB_BSWAP | ||
93 | |||
94 | SHA1_PIPELINED_MAIN_BODY | ||
95 | |||
96 | # cleanup workspace | ||
97 | mov $8, %ecx | ||
98 | mov %rsp, %rdi | ||
99 | xor %rax, %rax | ||
100 | rep stosq | ||
101 | |||
102 | mov %r12, %rsp # deallocate workspace | ||
103 | |||
104 | pop %r12 | ||
105 | pop %rbp | ||
106 | pop %rbx | ||
107 | ret | ||
108 | |||
109 | .size \name, .-\name | ||
110 | .endm | ||
111 | |||
112 | /* | ||
113 | * This macro implements 80 rounds of SHA-1 for one 64-byte block | ||
114 | */ | ||
115 | .macro SHA1_PIPELINED_MAIN_BODY | ||
116 | INIT_REGALLOC | ||
117 | |||
118 | mov (HASH_PTR), A | ||
119 | mov 4(HASH_PTR), B | ||
120 | mov 8(HASH_PTR), C | ||
121 | mov 12(HASH_PTR), D | ||
122 | mov 16(HASH_PTR), E | ||
123 | |||
124 | .set i, 0 | ||
125 | .rept W_PRECALC_AHEAD | ||
126 | W_PRECALC i | ||
127 | .set i, (i+1) | ||
128 | .endr | ||
129 | |||
130 | .align 4 | ||
131 | 1: | ||
132 | RR F1,A,B,C,D,E,0 | ||
133 | RR F1,D,E,A,B,C,2 | ||
134 | RR F1,B,C,D,E,A,4 | ||
135 | RR F1,E,A,B,C,D,6 | ||
136 | RR F1,C,D,E,A,B,8 | ||
137 | |||
138 | RR F1,A,B,C,D,E,10 | ||
139 | RR F1,D,E,A,B,C,12 | ||
140 | RR F1,B,C,D,E,A,14 | ||
141 | RR F1,E,A,B,C,D,16 | ||
142 | RR F1,C,D,E,A,B,18 | ||
143 | |||
144 | RR F2,A,B,C,D,E,20 | ||
145 | RR F2,D,E,A,B,C,22 | ||
146 | RR F2,B,C,D,E,A,24 | ||
147 | RR F2,E,A,B,C,D,26 | ||
148 | RR F2,C,D,E,A,B,28 | ||
149 | |||
150 | RR F2,A,B,C,D,E,30 | ||
151 | RR F2,D,E,A,B,C,32 | ||
152 | RR F2,B,C,D,E,A,34 | ||
153 | RR F2,E,A,B,C,D,36 | ||
154 | RR F2,C,D,E,A,B,38 | ||
155 | |||
156 | RR F3,A,B,C,D,E,40 | ||
157 | RR F3,D,E,A,B,C,42 | ||
158 | RR F3,B,C,D,E,A,44 | ||
159 | RR F3,E,A,B,C,D,46 | ||
160 | RR F3,C,D,E,A,B,48 | ||
161 | |||
162 | RR F3,A,B,C,D,E,50 | ||
163 | RR F3,D,E,A,B,C,52 | ||
164 | RR F3,B,C,D,E,A,54 | ||
165 | RR F3,E,A,B,C,D,56 | ||
166 | RR F3,C,D,E,A,B,58 | ||
167 | |||
168 | add $64, BUFFER_PTR # move to the next 64-byte block | ||
169 | cmp BUFFER_END, BUFFER_PTR # if the current is the last one use | ||
170 | cmovae K_BASE, BUFFER_PTR # dummy source to avoid buffer overrun | ||
171 | |||
172 | RR F4,A,B,C,D,E,60 | ||
173 | RR F4,D,E,A,B,C,62 | ||
174 | RR F4,B,C,D,E,A,64 | ||
175 | RR F4,E,A,B,C,D,66 | ||
176 | RR F4,C,D,E,A,B,68 | ||
177 | |||
178 | RR F4,A,B,C,D,E,70 | ||
179 | RR F4,D,E,A,B,C,72 | ||
180 | RR F4,B,C,D,E,A,74 | ||
181 | RR F4,E,A,B,C,D,76 | ||
182 | RR F4,C,D,E,A,B,78 | ||
183 | |||
184 | UPDATE_HASH (HASH_PTR), A | ||
185 | UPDATE_HASH 4(HASH_PTR), B | ||
186 | UPDATE_HASH 8(HASH_PTR), C | ||
187 | UPDATE_HASH 12(HASH_PTR), D | ||
188 | UPDATE_HASH 16(HASH_PTR), E | ||
189 | |||
190 | RESTORE_RENAMED_REGS | ||
191 | cmp K_BASE, BUFFER_PTR # K_BASE means, we reached the end | ||
192 | jne 1b | ||
193 | .endm | ||
194 | |||
195 | .macro INIT_REGALLOC | ||
196 | .set A, REG_A | ||
197 | .set B, REG_B | ||
198 | .set C, REG_C | ||
199 | .set D, REG_D | ||
200 | .set E, REG_E | ||
201 | .set T1, REG_T1 | ||
202 | .set T2, REG_T2 | ||
203 | .endm | ||
204 | |||
205 | .macro RESTORE_RENAMED_REGS | ||
206 | # order is important (REG_C is where it should be) | ||
207 | mov B, REG_B | ||
208 | mov D, REG_D | ||
209 | mov A, REG_A | ||
210 | mov E, REG_E | ||
211 | .endm | ||
212 | |||
213 | .macro SWAP_REG_NAMES a, b | ||
214 | .set _T, \a | ||
215 | .set \a, \b | ||
216 | .set \b, _T | ||
217 | .endm | ||
218 | |||
219 | .macro F1 b, c, d | ||
220 | mov \c, T1 | ||
221 | SWAP_REG_NAMES \c, T1 | ||
222 | xor \d, T1 | ||
223 | and \b, T1 | ||
224 | xor \d, T1 | ||
225 | .endm | ||
226 | |||
227 | .macro F2 b, c, d | ||
228 | mov \d, T1 | ||
229 | SWAP_REG_NAMES \d, T1 | ||
230 | xor \c, T1 | ||
231 | xor \b, T1 | ||
232 | .endm | ||
233 | |||
234 | .macro F3 b, c ,d | ||
235 | mov \c, T1 | ||
236 | SWAP_REG_NAMES \c, T1 | ||
237 | mov \b, T2 | ||
238 | or \b, T1 | ||
239 | and \c, T2 | ||
240 | and \d, T1 | ||
241 | or T2, T1 | ||
242 | .endm | ||
243 | |||
244 | .macro F4 b, c, d | ||
245 | F2 \b, \c, \d | ||
246 | .endm | ||
247 | |||
248 | .macro UPDATE_HASH hash, val | ||
249 | add \hash, \val | ||
250 | mov \val, \hash | ||
251 | .endm | ||
252 | |||
253 | /* | ||
254 | * RR does two rounds of SHA-1 back to back with W[] pre-calc | ||
255 | * t1 = F(b, c, d); e += w(i) | ||
256 | * e += t1; b <<= 30; d += w(i+1); | ||
257 | * t1 = F(a, b, c); | ||
258 | * d += t1; a <<= 5; | ||
259 | * e += a; | ||
260 | * t1 = e; a >>= 7; | ||
261 | * t1 <<= 5; | ||
262 | * d += t1; | ||
263 | */ | ||
264 | .macro RR F, a, b, c, d, e, round | ||
265 | add WK(\round), \e | ||
266 | \F \b, \c, \d # t1 = F(b, c, d); | ||
267 | W_PRECALC (\round + W_PRECALC_AHEAD) | ||
268 | rol $30, \b | ||
269 | add T1, \e | ||
270 | add WK(\round + 1), \d | ||
271 | |||
272 | \F \a, \b, \c | ||
273 | W_PRECALC (\round + W_PRECALC_AHEAD + 1) | ||
274 | rol $5, \a | ||
275 | add \a, \e | ||
276 | add T1, \d | ||
277 | ror $7, \a # (a <<r 5) >>r 7) => a <<r 30) | ||
278 | |||
279 | mov \e, T1 | ||
280 | SWAP_REG_NAMES \e, T1 | ||
281 | |||
282 | rol $5, T1 | ||
283 | add T1, \d | ||
284 | |||
285 | # write: \a, \b | ||
286 | # rotate: \a<=\d, \b<=\e, \c<=\a, \d<=\b, \e<=\c | ||
287 | .endm | ||
288 | |||
289 | .macro W_PRECALC r | ||
290 | .set i, \r | ||
291 | |||
292 | .if (i < 20) | ||
293 | .set K_XMM, 0 | ||
294 | .elseif (i < 40) | ||
295 | .set K_XMM, 16 | ||
296 | .elseif (i < 60) | ||
297 | .set K_XMM, 32 | ||
298 | .elseif (i < 80) | ||
299 | .set K_XMM, 48 | ||
300 | .endif | ||
301 | |||
302 | .if ((i < 16) || ((i >= 80) && (i < (80 + W_PRECALC_AHEAD)))) | ||
303 | .set i, ((\r) % 80) # pre-compute for the next iteration | ||
304 | .if (i == 0) | ||
305 | W_PRECALC_RESET | ||
306 | .endif | ||
307 | W_PRECALC_00_15 | ||
308 | .elseif (i<32) | ||
309 | W_PRECALC_16_31 | ||
310 | .elseif (i < 80) // rounds 32-79 | ||
311 | W_PRECALC_32_79 | ||
312 | .endif | ||
313 | .endm | ||
314 | |||
315 | .macro W_PRECALC_RESET | ||
316 | .set W, W0 | ||
317 | .set W_minus_04, W4 | ||
318 | .set W_minus_08, W8 | ||
319 | .set W_minus_12, W12 | ||
320 | .set W_minus_16, W16 | ||
321 | .set W_minus_20, W20 | ||
322 | .set W_minus_24, W24 | ||
323 | .set W_minus_28, W28 | ||
324 | .set W_minus_32, W | ||
325 | .endm | ||
326 | |||
327 | .macro W_PRECALC_ROTATE | ||
328 | .set W_minus_32, W_minus_28 | ||
329 | .set W_minus_28, W_minus_24 | ||
330 | .set W_minus_24, W_minus_20 | ||
331 | .set W_minus_20, W_minus_16 | ||
332 | .set W_minus_16, W_minus_12 | ||
333 | .set W_minus_12, W_minus_08 | ||
334 | .set W_minus_08, W_minus_04 | ||
335 | .set W_minus_04, W | ||
336 | .set W, W_minus_32 | ||
337 | .endm | ||
338 | |||
339 | .macro W_PRECALC_SSSE3 | ||
340 | |||
341 | .macro W_PRECALC_00_15 | ||
342 | W_PRECALC_00_15_SSSE3 | ||
343 | .endm | ||
344 | .macro W_PRECALC_16_31 | ||
345 | W_PRECALC_16_31_SSSE3 | ||
346 | .endm | ||
347 | .macro W_PRECALC_32_79 | ||
348 | W_PRECALC_32_79_SSSE3 | ||
349 | .endm | ||
350 | |||
351 | /* message scheduling pre-compute for rounds 0-15 */ | ||
352 | .macro W_PRECALC_00_15_SSSE3 | ||
353 | .if ((i & 3) == 0) | ||
354 | movdqu (i*4)(BUFFER_PTR), W_TMP1 | ||
355 | .elseif ((i & 3) == 1) | ||
356 | pshufb XMM_SHUFB_BSWAP, W_TMP1 | ||
357 | movdqa W_TMP1, W | ||
358 | .elseif ((i & 3) == 2) | ||
359 | paddd (K_BASE), W_TMP1 | ||
360 | .elseif ((i & 3) == 3) | ||
361 | movdqa W_TMP1, WK(i&~3) | ||
362 | W_PRECALC_ROTATE | ||
363 | .endif | ||
364 | .endm | ||
365 | |||
366 | /* message scheduling pre-compute for rounds 16-31 | ||
367 | * | ||
368 | * - calculating last 32 w[i] values in 8 XMM registers | ||
369 | * - pre-calculate K+w[i] values and store to mem, for later load by ALU add | ||
370 | * instruction | ||
371 | * | ||
372 | * some "heavy-lifting" vectorization for rounds 16-31 due to w[i]->w[i-3] | ||
373 | * dependency, but improves for 32-79 | ||
374 | */ | ||
375 | .macro W_PRECALC_16_31_SSSE3 | ||
376 | # blended scheduling of vector and scalar instruction streams, one 4-wide | ||
377 | # vector iteration / 4 scalar rounds | ||
378 | .if ((i & 3) == 0) | ||
379 | movdqa W_minus_12, W | ||
380 | palignr $8, W_minus_16, W # w[i-14] | ||
381 | movdqa W_minus_04, W_TMP1 | ||
382 | psrldq $4, W_TMP1 # w[i-3] | ||
383 | pxor W_minus_08, W | ||
384 | .elseif ((i & 3) == 1) | ||
385 | pxor W_minus_16, W_TMP1 | ||
386 | pxor W_TMP1, W | ||
387 | movdqa W, W_TMP2 | ||
388 | movdqa W, W_TMP1 | ||
389 | pslldq $12, W_TMP2 | ||
390 | .elseif ((i & 3) == 2) | ||
391 | psrld $31, W | ||
392 | pslld $1, W_TMP1 | ||
393 | por W, W_TMP1 | ||
394 | movdqa W_TMP2, W | ||
395 | psrld $30, W_TMP2 | ||
396 | pslld $2, W | ||
397 | .elseif ((i & 3) == 3) | ||
398 | pxor W, W_TMP1 | ||
399 | pxor W_TMP2, W_TMP1 | ||
400 | movdqa W_TMP1, W | ||
401 | paddd K_XMM(K_BASE), W_TMP1 | ||
402 | movdqa W_TMP1, WK(i&~3) | ||
403 | W_PRECALC_ROTATE | ||
404 | .endif | ||
405 | .endm | ||
406 | |||
407 | /* message scheduling pre-compute for rounds 32-79 | ||
408 | * | ||
409 | * in SHA-1 specification: w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 | ||
410 | * instead we do equal: w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 | ||
411 | * allows more efficient vectorization since w[i]=>w[i-3] dependency is broken | ||
412 | */ | ||
413 | .macro W_PRECALC_32_79_SSSE3 | ||
414 | .if ((i & 3) == 0) | ||
415 | movdqa W_minus_04, W_TMP1 | ||
416 | pxor W_minus_28, W # W is W_minus_32 before xor | ||
417 | palignr $8, W_minus_08, W_TMP1 | ||
418 | .elseif ((i & 3) == 1) | ||
419 | pxor W_minus_16, W | ||
420 | pxor W_TMP1, W | ||
421 | movdqa W, W_TMP1 | ||
422 | .elseif ((i & 3) == 2) | ||
423 | psrld $30, W | ||
424 | pslld $2, W_TMP1 | ||
425 | por W, W_TMP1 | ||
426 | .elseif ((i & 3) == 3) | ||
427 | movdqa W_TMP1, W | ||
428 | paddd K_XMM(K_BASE), W_TMP1 | ||
429 | movdqa W_TMP1, WK(i&~3) | ||
430 | W_PRECALC_ROTATE | ||
431 | .endif | ||
432 | .endm | ||
433 | |||
434 | .endm // W_PRECALC_SSSE3 | ||
435 | |||
436 | |||
437 | #define K1 0x5a827999 | ||
438 | #define K2 0x6ed9eba1 | ||
439 | #define K3 0x8f1bbcdc | ||
440 | #define K4 0xca62c1d6 | ||
441 | |||
442 | .section .rodata | ||
443 | .align 16 | ||
444 | |||
445 | K_XMM_AR: | ||
446 | .long K1, K1, K1, K1 | ||
447 | .long K2, K2, K2, K2 | ||
448 | .long K3, K3, K3, K3 | ||
449 | .long K4, K4, K4, K4 | ||
450 | |||
451 | BSWAP_SHUFB_CTL: | ||
452 | .long 0x00010203 | ||
453 | .long 0x04050607 | ||
454 | .long 0x08090a0b | ||
455 | .long 0x0c0d0e0f | ||
456 | |||
457 | |||
458 | .section .text | ||
459 | |||
460 | W_PRECALC_SSSE3 | ||
461 | .macro xmm_mov a, b | ||
462 | movdqu \a,\b | ||
463 | .endm | ||
464 | |||
465 | /* SSSE3 optimized implementation: | ||
466 | * extern "C" void sha1_transform_ssse3(u32 *digest, const char *data, u32 *ws, | ||
467 | * unsigned int rounds); | ||
468 | */ | ||
469 | SHA1_VECTOR_ASM sha1_transform_ssse3 | ||
470 | |||
471 | #ifdef CONFIG_AS_AVX | ||
472 | |||
473 | .macro W_PRECALC_AVX | ||
474 | |||
475 | .purgem W_PRECALC_00_15 | ||
476 | .macro W_PRECALC_00_15 | ||
477 | W_PRECALC_00_15_AVX | ||
478 | .endm | ||
479 | .purgem W_PRECALC_16_31 | ||
480 | .macro W_PRECALC_16_31 | ||
481 | W_PRECALC_16_31_AVX | ||
482 | .endm | ||
483 | .purgem W_PRECALC_32_79 | ||
484 | .macro W_PRECALC_32_79 | ||
485 | W_PRECALC_32_79_AVX | ||
486 | .endm | ||
487 | |||
488 | .macro W_PRECALC_00_15_AVX | ||
489 | .if ((i & 3) == 0) | ||
490 | vmovdqu (i*4)(BUFFER_PTR), W_TMP1 | ||
491 | .elseif ((i & 3) == 1) | ||
492 | vpshufb XMM_SHUFB_BSWAP, W_TMP1, W | ||
493 | .elseif ((i & 3) == 2) | ||
494 | vpaddd (K_BASE), W, W_TMP1 | ||
495 | .elseif ((i & 3) == 3) | ||
496 | vmovdqa W_TMP1, WK(i&~3) | ||
497 | W_PRECALC_ROTATE | ||
498 | .endif | ||
499 | .endm | ||
500 | |||
501 | .macro W_PRECALC_16_31_AVX | ||
502 | .if ((i & 3) == 0) | ||
503 | vpalignr $8, W_minus_16, W_minus_12, W # w[i-14] | ||
504 | vpsrldq $4, W_minus_04, W_TMP1 # w[i-3] | ||
505 | vpxor W_minus_08, W, W | ||
506 | vpxor W_minus_16, W_TMP1, W_TMP1 | ||
507 | .elseif ((i & 3) == 1) | ||
508 | vpxor W_TMP1, W, W | ||
509 | vpslldq $12, W, W_TMP2 | ||
510 | vpslld $1, W, W_TMP1 | ||
511 | .elseif ((i & 3) == 2) | ||
512 | vpsrld $31, W, W | ||
513 | vpor W, W_TMP1, W_TMP1 | ||
514 | vpslld $2, W_TMP2, W | ||
515 | vpsrld $30, W_TMP2, W_TMP2 | ||
516 | .elseif ((i & 3) == 3) | ||
517 | vpxor W, W_TMP1, W_TMP1 | ||
518 | vpxor W_TMP2, W_TMP1, W | ||
519 | vpaddd K_XMM(K_BASE), W, W_TMP1 | ||
520 | vmovdqu W_TMP1, WK(i&~3) | ||
521 | W_PRECALC_ROTATE | ||
522 | .endif | ||
523 | .endm | ||
524 | |||
525 | .macro W_PRECALC_32_79_AVX | ||
526 | .if ((i & 3) == 0) | ||
527 | vpalignr $8, W_minus_08, W_minus_04, W_TMP1 | ||
528 | vpxor W_minus_28, W, W # W is W_minus_32 before xor | ||
529 | .elseif ((i & 3) == 1) | ||
530 | vpxor W_minus_16, W_TMP1, W_TMP1 | ||
531 | vpxor W_TMP1, W, W | ||
532 | .elseif ((i & 3) == 2) | ||
533 | vpslld $2, W, W_TMP1 | ||
534 | vpsrld $30, W, W | ||
535 | vpor W, W_TMP1, W | ||
536 | .elseif ((i & 3) == 3) | ||
537 | vpaddd K_XMM(K_BASE), W, W_TMP1 | ||
538 | vmovdqu W_TMP1, WK(i&~3) | ||
539 | W_PRECALC_ROTATE | ||
540 | .endif | ||
541 | .endm | ||
542 | |||
543 | .endm // W_PRECALC_AVX | ||
544 | |||
545 | W_PRECALC_AVX | ||
546 | .purgem xmm_mov | ||
547 | .macro xmm_mov a, b | ||
548 | vmovdqu \a,\b | ||
549 | .endm | ||
550 | |||
551 | |||
552 | /* AVX optimized implementation: | ||
553 | * extern "C" void sha1_transform_avx(u32 *digest, const char *data, u32 *ws, | ||
554 | * unsigned int rounds); | ||
555 | */ | ||
556 | SHA1_VECTOR_ASM sha1_transform_avx | ||
557 | |||
558 | #endif | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c deleted file mode 100644 index 4a11a9d7245..00000000000 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ /dev/null | |||
@@ -1,240 +0,0 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using | ||
5 | * Supplemental SSE3 instructions. | ||
6 | * | ||
7 | * This file is based on sha1_generic.c | ||
8 | * | ||
9 | * Copyright (c) Alan Smithee. | ||
10 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | ||
11 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | ||
12 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify it | ||
15 | * under the terms of the GNU General Public License as published by the Free | ||
16 | * Software Foundation; either version 2 of the License, or (at your option) | ||
17 | * any later version. | ||
18 | * | ||
19 | */ | ||
20 | |||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
22 | |||
23 | #include <crypto/internal/hash.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/mm.h> | ||
27 | #include <linux/cryptohash.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <crypto/sha.h> | ||
30 | #include <asm/byteorder.h> | ||
31 | #include <asm/i387.h> | ||
32 | #include <asm/xcr.h> | ||
33 | #include <asm/xsave.h> | ||
34 | |||
35 | |||
36 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | ||
37 | unsigned int rounds); | ||
38 | #ifdef CONFIG_AS_AVX | ||
39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | ||
40 | unsigned int rounds); | ||
41 | #endif | ||
42 | |||
43 | static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); | ||
44 | |||
45 | |||
46 | static int sha1_ssse3_init(struct shash_desc *desc) | ||
47 | { | ||
48 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
49 | |||
50 | *sctx = (struct sha1_state){ | ||
51 | .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
52 | }; | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
58 | unsigned int len, unsigned int partial) | ||
59 | { | ||
60 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
61 | unsigned int done = 0; | ||
62 | |||
63 | sctx->count += len; | ||
64 | |||
65 | if (partial) { | ||
66 | done = SHA1_BLOCK_SIZE - partial; | ||
67 | memcpy(sctx->buffer + partial, data, done); | ||
68 | sha1_transform_asm(sctx->state, sctx->buffer, 1); | ||
69 | } | ||
70 | |||
71 | if (len - done >= SHA1_BLOCK_SIZE) { | ||
72 | const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; | ||
73 | |||
74 | sha1_transform_asm(sctx->state, data + done, rounds); | ||
75 | done += rounds * SHA1_BLOCK_SIZE; | ||
76 | } | ||
77 | |||
78 | memcpy(sctx->buffer, data + done, len - done); | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, | ||
84 | unsigned int len) | ||
85 | { | ||
86 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
87 | unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; | ||
88 | int res; | ||
89 | |||
90 | /* Handle the fast case right here */ | ||
91 | if (partial + len < SHA1_BLOCK_SIZE) { | ||
92 | sctx->count += len; | ||
93 | memcpy(sctx->buffer + partial, data, len); | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | if (!irq_fpu_usable()) { | ||
99 | res = crypto_sha1_update(desc, data, len); | ||
100 | } else { | ||
101 | kernel_fpu_begin(); | ||
102 | res = __sha1_ssse3_update(desc, data, len, partial); | ||
103 | kernel_fpu_end(); | ||
104 | } | ||
105 | |||
106 | return res; | ||
107 | } | ||
108 | |||
109 | |||
110 | /* Add padding and return the message digest. */ | ||
111 | static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) | ||
112 | { | ||
113 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
114 | unsigned int i, index, padlen; | ||
115 | __be32 *dst = (__be32 *)out; | ||
116 | __be64 bits; | ||
117 | static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; | ||
118 | |||
119 | bits = cpu_to_be64(sctx->count << 3); | ||
120 | |||
121 | /* Pad out to 56 mod 64 and append length */ | ||
122 | index = sctx->count % SHA1_BLOCK_SIZE; | ||
123 | padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); | ||
124 | if (!irq_fpu_usable()) { | ||
125 | crypto_sha1_update(desc, padding, padlen); | ||
126 | crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
127 | } else { | ||
128 | kernel_fpu_begin(); | ||
129 | /* We need to fill a whole block for __sha1_ssse3_update() */ | ||
130 | if (padlen <= 56) { | ||
131 | sctx->count += padlen; | ||
132 | memcpy(sctx->buffer + index, padding, padlen); | ||
133 | } else { | ||
134 | __sha1_ssse3_update(desc, padding, padlen, index); | ||
135 | } | ||
136 | __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56); | ||
137 | kernel_fpu_end(); | ||
138 | } | ||
139 | |||
140 | /* Store state in digest */ | ||
141 | for (i = 0; i < 5; i++) | ||
142 | dst[i] = cpu_to_be32(sctx->state[i]); | ||
143 | |||
144 | /* Wipe context */ | ||
145 | memset(sctx, 0, sizeof(*sctx)); | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | static int sha1_ssse3_export(struct shash_desc *desc, void *out) | ||
151 | { | ||
152 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
153 | |||
154 | memcpy(out, sctx, sizeof(*sctx)); | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static int sha1_ssse3_import(struct shash_desc *desc, const void *in) | ||
160 | { | ||
161 | struct sha1_state *sctx = shash_desc_ctx(desc); | ||
162 | |||
163 | memcpy(sctx, in, sizeof(*sctx)); | ||
164 | |||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static struct shash_alg alg = { | ||
169 | .digestsize = SHA1_DIGEST_SIZE, | ||
170 | .init = sha1_ssse3_init, | ||
171 | .update = sha1_ssse3_update, | ||
172 | .final = sha1_ssse3_final, | ||
173 | .export = sha1_ssse3_export, | ||
174 | .import = sha1_ssse3_import, | ||
175 | .descsize = sizeof(struct sha1_state), | ||
176 | .statesize = sizeof(struct sha1_state), | ||
177 | .base = { | ||
178 | .cra_name = "sha1", | ||
179 | .cra_driver_name= "sha1-ssse3", | ||
180 | .cra_priority = 150, | ||
181 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
182 | .cra_blocksize = SHA1_BLOCK_SIZE, | ||
183 | .cra_module = THIS_MODULE, | ||
184 | } | ||
185 | }; | ||
186 | |||
187 | #ifdef CONFIG_AS_AVX | ||
188 | static bool __init avx_usable(void) | ||
189 | { | ||
190 | u64 xcr0; | ||
191 | |||
192 | if (!cpu_has_avx || !cpu_has_osxsave) | ||
193 | return false; | ||
194 | |||
195 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
196 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
197 | pr_info("AVX detected but unusable.\n"); | ||
198 | |||
199 | return false; | ||
200 | } | ||
201 | |||
202 | return true; | ||
203 | } | ||
204 | #endif | ||
205 | |||
206 | static int __init sha1_ssse3_mod_init(void) | ||
207 | { | ||
208 | /* test for SSSE3 first */ | ||
209 | if (cpu_has_ssse3) | ||
210 | sha1_transform_asm = sha1_transform_ssse3; | ||
211 | |||
212 | #ifdef CONFIG_AS_AVX | ||
213 | /* allow AVX to override SSSE3, it's a little faster */ | ||
214 | if (avx_usable()) | ||
215 | sha1_transform_asm = sha1_transform_avx; | ||
216 | #endif | ||
217 | |||
218 | if (sha1_transform_asm) { | ||
219 | pr_info("Using %s optimized SHA-1 implementation\n", | ||
220 | sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3" | ||
221 | : "AVX"); | ||
222 | return crypto_register_shash(&alg); | ||
223 | } | ||
224 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | ||
225 | |||
226 | return -ENODEV; | ||
227 | } | ||
228 | |||
229 | static void __exit sha1_ssse3_mod_fini(void) | ||
230 | { | ||
231 | crypto_unregister_shash(&alg); | ||
232 | } | ||
233 | |||
234 | module_init(sha1_ssse3_mod_init); | ||
235 | module_exit(sha1_ssse3_mod_fini); | ||
236 | |||
237 | MODULE_LICENSE("GPL"); | ||
238 | MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); | ||
239 | |||
240 | MODULE_ALIAS("sha1"); | ||
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S deleted file mode 100644 index ebac16bfa83..00000000000 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ /dev/null | |||
@@ -1,423 +0,0 @@ | |||
1 | /* | ||
2 | * Twofish Cipher 8-way parallel algorithm (AVX/x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, | ||
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 | * GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
22 | * USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include "glue_helper-asm-avx.S" | ||
27 | |||
28 | .file "twofish-avx-x86_64-asm_64.S" | ||
29 | |||
30 | .data | ||
31 | .align 16 | ||
32 | |||
33 | .Lbswap128_mask: | ||
34 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | ||
35 | |||
36 | .text | ||
37 | |||
38 | /* structure of crypto context */ | ||
39 | #define s0 0 | ||
40 | #define s1 1024 | ||
41 | #define s2 2048 | ||
42 | #define s3 3072 | ||
43 | #define w 4096 | ||
44 | #define k 4128 | ||
45 | |||
46 | /********************************************************************** | ||
47 | 8-way AVX twofish | ||
48 | **********************************************************************/ | ||
49 | #define CTX %rdi | ||
50 | |||
51 | #define RA1 %xmm0 | ||
52 | #define RB1 %xmm1 | ||
53 | #define RC1 %xmm2 | ||
54 | #define RD1 %xmm3 | ||
55 | |||
56 | #define RA2 %xmm4 | ||
57 | #define RB2 %xmm5 | ||
58 | #define RC2 %xmm6 | ||
59 | #define RD2 %xmm7 | ||
60 | |||
61 | #define RX0 %xmm8 | ||
62 | #define RY0 %xmm9 | ||
63 | |||
64 | #define RX1 %xmm10 | ||
65 | #define RY1 %xmm11 | ||
66 | |||
67 | #define RK1 %xmm12 | ||
68 | #define RK2 %xmm13 | ||
69 | |||
70 | #define RT %xmm14 | ||
71 | #define RR %xmm15 | ||
72 | |||
73 | #define RID1 %rbp | ||
74 | #define RID1d %ebp | ||
75 | #define RID2 %rsi | ||
76 | #define RID2d %esi | ||
77 | |||
78 | #define RGI1 %rdx | ||
79 | #define RGI1bl %dl | ||
80 | #define RGI1bh %dh | ||
81 | #define RGI2 %rcx | ||
82 | #define RGI2bl %cl | ||
83 | #define RGI2bh %ch | ||
84 | |||
85 | #define RGI3 %rax | ||
86 | #define RGI3bl %al | ||
87 | #define RGI3bh %ah | ||
88 | #define RGI4 %rbx | ||
89 | #define RGI4bl %bl | ||
90 | #define RGI4bh %bh | ||
91 | |||
92 | #define RGS1 %r8 | ||
93 | #define RGS1d %r8d | ||
94 | #define RGS2 %r9 | ||
95 | #define RGS2d %r9d | ||
96 | #define RGS3 %r10 | ||
97 | #define RGS3d %r10d | ||
98 | |||
99 | |||
100 | #define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \ | ||
101 | movzbl src ## bl, RID1d; \ | ||
102 | movzbl src ## bh, RID2d; \ | ||
103 | shrq $16, src; \ | ||
104 | movl t0(CTX, RID1, 4), dst ## d; \ | ||
105 | movl t1(CTX, RID2, 4), RID2d; \ | ||
106 | movzbl src ## bl, RID1d; \ | ||
107 | xorl RID2d, dst ## d; \ | ||
108 | movzbl src ## bh, RID2d; \ | ||
109 | interleave_op(il_reg); \ | ||
110 | xorl t2(CTX, RID1, 4), dst ## d; \ | ||
111 | xorl t3(CTX, RID2, 4), dst ## d; | ||
112 | |||
113 | #define dummy(d) /* do nothing */ | ||
114 | |||
115 | #define shr_next(reg) \ | ||
116 | shrq $16, reg; | ||
117 | |||
118 | #define G(gi1, gi2, x, t0, t1, t2, t3) \ | ||
119 | lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1); \ | ||
120 | lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2); \ | ||
121 | \ | ||
122 | lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none); \ | ||
123 | shlq $32, RGS2; \ | ||
124 | orq RGS1, RGS2; \ | ||
125 | lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none); \ | ||
126 | shlq $32, RGS1; \ | ||
127 | orq RGS1, RGS3; | ||
128 | |||
129 | #define round_head_2(a, b, x1, y1, x2, y2) \ | ||
130 | vmovq b ## 1, RGI3; \ | ||
131 | vpextrq $1, b ## 1, RGI4; \ | ||
132 | \ | ||
133 | G(RGI1, RGI2, x1, s0, s1, s2, s3); \ | ||
134 | vmovq a ## 2, RGI1; \ | ||
135 | vpextrq $1, a ## 2, RGI2; \ | ||
136 | vmovq RGS2, x1; \ | ||
137 | vpinsrq $1, RGS3, x1, x1; \ | ||
138 | \ | ||
139 | G(RGI3, RGI4, y1, s1, s2, s3, s0); \ | ||
140 | vmovq b ## 2, RGI3; \ | ||
141 | vpextrq $1, b ## 2, RGI4; \ | ||
142 | vmovq RGS2, y1; \ | ||
143 | vpinsrq $1, RGS3, y1, y1; \ | ||
144 | \ | ||
145 | G(RGI1, RGI2, x2, s0, s1, s2, s3); \ | ||
146 | vmovq RGS2, x2; \ | ||
147 | vpinsrq $1, RGS3, x2, x2; \ | ||
148 | \ | ||
149 | G(RGI3, RGI4, y2, s1, s2, s3, s0); \ | ||
150 | vmovq RGS2, y2; \ | ||
151 | vpinsrq $1, RGS3, y2, y2; | ||
152 | |||
153 | #define encround_tail(a, b, c, d, x, y, prerotate) \ | ||
154 | vpaddd x, y, x; \ | ||
155 | vpaddd x, RK1, RT;\ | ||
156 | prerotate(b); \ | ||
157 | vpxor RT, c, c; \ | ||
158 | vpaddd y, x, y; \ | ||
159 | vpaddd y, RK2, y; \ | ||
160 | vpsrld $1, c, RT; \ | ||
161 | vpslld $(32 - 1), c, c; \ | ||
162 | vpor c, RT, c; \ | ||
163 | vpxor d, y, d; \ | ||
164 | |||
165 | #define decround_tail(a, b, c, d, x, y, prerotate) \ | ||
166 | vpaddd x, y, x; \ | ||
167 | vpaddd x, RK1, RT;\ | ||
168 | prerotate(a); \ | ||
169 | vpxor RT, c, c; \ | ||
170 | vpaddd y, x, y; \ | ||
171 | vpaddd y, RK2, y; \ | ||
172 | vpxor d, y, d; \ | ||
173 | vpsrld $1, d, y; \ | ||
174 | vpslld $(32 - 1), d, d; \ | ||
175 | vpor d, y, d; \ | ||
176 | |||
177 | #define rotate_1l(x) \ | ||
178 | vpslld $1, x, RR; \ | ||
179 | vpsrld $(32 - 1), x, x; \ | ||
180 | vpor x, RR, x; | ||
181 | |||
182 | #define preload_rgi(c) \ | ||
183 | vmovq c, RGI1; \ | ||
184 | vpextrq $1, c, RGI2; | ||
185 | |||
186 | #define encrypt_round(n, a, b, c, d, preload, prerotate) \ | ||
187 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
188 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
189 | round_head_2(a, b, RX0, RY0, RX1, RY1); \ | ||
190 | encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ | ||
191 | preload(c ## 1); \ | ||
192 | encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); | ||
193 | |||
194 | #define decrypt_round(n, a, b, c, d, preload, prerotate) \ | ||
195 | vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ | ||
196 | vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ | ||
197 | round_head_2(a, b, RX0, RY0, RX1, RY1); \ | ||
198 | decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ | ||
199 | preload(c ## 1); \ | ||
200 | decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); | ||
201 | |||
202 | #define encrypt_cycle(n) \ | ||
203 | encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ | ||
204 | encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); | ||
205 | |||
206 | #define encrypt_cycle_last(n) \ | ||
207 | encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ | ||
208 | encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy); | ||
209 | |||
210 | #define decrypt_cycle(n) \ | ||
211 | decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ | ||
212 | decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); | ||
213 | |||
214 | #define decrypt_cycle_last(n) \ | ||
215 | decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ | ||
216 | decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy); | ||
217 | |||
218 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
219 | vpunpckldq x1, x0, t0; \ | ||
220 | vpunpckhdq x1, x0, t2; \ | ||
221 | vpunpckldq x3, x2, t1; \ | ||
222 | vpunpckhdq x3, x2, x3; \ | ||
223 | \ | ||
224 | vpunpcklqdq t1, t0, x0; \ | ||
225 | vpunpckhqdq t1, t0, x1; \ | ||
226 | vpunpcklqdq x3, t2, x2; \ | ||
227 | vpunpckhqdq x3, t2, x3; | ||
228 | |||
229 | #define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
230 | vpxor x0, wkey, x0; \ | ||
231 | vpxor x1, wkey, x1; \ | ||
232 | vpxor x2, wkey, x2; \ | ||
233 | vpxor x3, wkey, x3; \ | ||
234 | \ | ||
235 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | ||
236 | |||
237 | #define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \ | ||
238 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | ||
239 | \ | ||
240 | vpxor x0, wkey, x0; \ | ||
241 | vpxor x1, wkey, x1; \ | ||
242 | vpxor x2, wkey, x2; \ | ||
243 | vpxor x3, wkey, x3; | ||
244 | |||
245 | .align 8 | ||
246 | .type __twofish_enc_blk8,@function; | ||
247 | |||
248 | __twofish_enc_blk8: | ||
249 | /* input: | ||
250 | * %rdi: ctx, CTX | ||
251 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks | ||
252 | * output: | ||
253 | * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks | ||
254 | */ | ||
255 | |||
256 | vmovdqu w(CTX), RK1; | ||
257 | |||
258 | pushq %rbp; | ||
259 | pushq %rbx; | ||
260 | pushq %rcx; | ||
261 | |||
262 | inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); | ||
263 | preload_rgi(RA1); | ||
264 | rotate_1l(RD1); | ||
265 | inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); | ||
266 | rotate_1l(RD2); | ||
267 | |||
268 | encrypt_cycle(0); | ||
269 | encrypt_cycle(1); | ||
270 | encrypt_cycle(2); | ||
271 | encrypt_cycle(3); | ||
272 | encrypt_cycle(4); | ||
273 | encrypt_cycle(5); | ||
274 | encrypt_cycle(6); | ||
275 | encrypt_cycle_last(7); | ||
276 | |||
277 | vmovdqu (w+4*4)(CTX), RK1; | ||
278 | |||
279 | popq %rcx; | ||
280 | popq %rbx; | ||
281 | popq %rbp; | ||
282 | |||
283 | outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); | ||
284 | outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); | ||
285 | |||
286 | ret; | ||
287 | |||
288 | .align 8 | ||
289 | .type __twofish_dec_blk8,@function; | ||
290 | |||
291 | __twofish_dec_blk8: | ||
292 | /* input: | ||
293 | * %rdi: ctx, CTX | ||
294 | * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks | ||
295 | * output: | ||
296 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks | ||
297 | */ | ||
298 | |||
299 | vmovdqu (w+4*4)(CTX), RK1; | ||
300 | |||
301 | pushq %rbp; | ||
302 | pushq %rbx; | ||
303 | |||
304 | inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); | ||
305 | preload_rgi(RC1); | ||
306 | rotate_1l(RA1); | ||
307 | inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); | ||
308 | rotate_1l(RA2); | ||
309 | |||
310 | decrypt_cycle(7); | ||
311 | decrypt_cycle(6); | ||
312 | decrypt_cycle(5); | ||
313 | decrypt_cycle(4); | ||
314 | decrypt_cycle(3); | ||
315 | decrypt_cycle(2); | ||
316 | decrypt_cycle(1); | ||
317 | decrypt_cycle_last(0); | ||
318 | |||
319 | vmovdqu (w)(CTX), RK1; | ||
320 | |||
321 | popq %rbx; | ||
322 | popq %rbp; | ||
323 | |||
324 | outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); | ||
325 | outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); | ||
326 | |||
327 | ret; | ||
328 | |||
329 | .align 8 | ||
330 | .global twofish_ecb_enc_8way | ||
331 | .type twofish_ecb_enc_8way,@function; | ||
332 | |||
333 | twofish_ecb_enc_8way: | ||
334 | /* input: | ||
335 | * %rdi: ctx, CTX | ||
336 | * %rsi: dst | ||
337 | * %rdx: src | ||
338 | */ | ||
339 | |||
340 | movq %rsi, %r11; | ||
341 | |||
342 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
343 | |||
344 | call __twofish_enc_blk8; | ||
345 | |||
346 | store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); | ||
347 | |||
348 | ret; | ||
349 | |||
350 | .align 8 | ||
351 | .global twofish_ecb_dec_8way | ||
352 | .type twofish_ecb_dec_8way,@function; | ||
353 | |||
354 | twofish_ecb_dec_8way: | ||
355 | /* input: | ||
356 | * %rdi: ctx, CTX | ||
357 | * %rsi: dst | ||
358 | * %rdx: src | ||
359 | */ | ||
360 | |||
361 | movq %rsi, %r11; | ||
362 | |||
363 | load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); | ||
364 | |||
365 | call __twofish_dec_blk8; | ||
366 | |||
367 | store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
368 | |||
369 | ret; | ||
370 | |||
371 | .align 8 | ||
372 | .global twofish_cbc_dec_8way | ||
373 | .type twofish_cbc_dec_8way,@function; | ||
374 | |||
375 | twofish_cbc_dec_8way: | ||
376 | /* input: | ||
377 | * %rdi: ctx, CTX | ||
378 | * %rsi: dst | ||
379 | * %rdx: src | ||
380 | */ | ||
381 | |||
382 | pushq %r12; | ||
383 | |||
384 | movq %rsi, %r11; | ||
385 | movq %rdx, %r12; | ||
386 | |||
387 | load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); | ||
388 | |||
389 | call __twofish_dec_blk8; | ||
390 | |||
391 | store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | ||
392 | |||
393 | popq %r12; | ||
394 | |||
395 | ret; | ||
396 | |||
397 | .align 8 | ||
398 | .global twofish_ctr_8way | ||
399 | .type twofish_ctr_8way,@function; | ||
400 | |||
401 | twofish_ctr_8way: | ||
402 | /* input: | ||
403 | * %rdi: ctx, CTX | ||
404 | * %rsi: dst | ||
405 | * %rdx: src | ||
406 | * %rcx: iv (little endian, 128bit) | ||
407 | */ | ||
408 | |||
409 | pushq %r12; | ||
410 | |||
411 | movq %rsi, %r11; | ||
412 | movq %rdx, %r12; | ||
413 | |||
414 | load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | ||
415 | RD2, RX0, RX1, RY0); | ||
416 | |||
417 | call __twofish_enc_blk8; | ||
418 | |||
419 | store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); | ||
420 | |||
421 | popq %r12; | ||
422 | |||
423 | ret; | ||
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 658af4bb35c..575331cb2a8 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S | |||
@@ -26,7 +26,7 @@ | |||
26 | 26 | ||
27 | #define in_blk 12 /* input byte array address parameter*/ | 27 | #define in_blk 12 /* input byte array address parameter*/ |
28 | #define out_blk 8 /* output byte array address parameter*/ | 28 | #define out_blk 8 /* output byte array address parameter*/ |
29 | #define ctx 4 /* Twofish context structure */ | 29 | #define tfm 4 /* Twofish context structure */ |
30 | 30 | ||
31 | #define a_offset 0 | 31 | #define a_offset 0 |
32 | #define b_offset 4 | 32 | #define b_offset 4 |
@@ -229,8 +229,8 @@ twofish_enc_blk: | |||
229 | push %esi | 229 | push %esi |
230 | push %edi | 230 | push %edi |
231 | 231 | ||
232 | mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base | 232 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ |
233 | * pointer to the ctx address */ | 233 | add $crypto_tfm_ctx_offset, %ebp /* ctx address */ |
234 | mov in_blk+16(%esp),%edi /* input address in edi */ | 234 | mov in_blk+16(%esp),%edi /* input address in edi */ |
235 | 235 | ||
236 | mov (%edi), %eax | 236 | mov (%edi), %eax |
@@ -285,8 +285,8 @@ twofish_dec_blk: | |||
285 | push %edi | 285 | push %edi |
286 | 286 | ||
287 | 287 | ||
288 | mov ctx + 16(%esp), %ebp /* abuse the base pointer: set new base | 288 | mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ |
289 | * pointer to the ctx address */ | 289 | add $crypto_tfm_ctx_offset, %ebp /* ctx address */ |
290 | mov in_blk+16(%esp),%edi /* input address in edi */ | 290 | mov in_blk+16(%esp),%edi /* input address in edi */ |
291 | 291 | ||
292 | mov (%edi), %eax | 292 | mov (%edi), %eax |
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S deleted file mode 100644 index 5b012a2c511..00000000000 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ /dev/null | |||
@@ -1,316 +0,0 @@ | |||
1 | /* | ||
2 | * Twofish Cipher 3-way parallel algorithm (x86_64) | ||
3 | * | ||
4 | * Copyright (C) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
19 | * USA | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | .file "twofish-x86_64-asm-3way.S" | ||
24 | .text | ||
25 | |||
26 | /* structure of crypto context */ | ||
27 | #define s0 0 | ||
28 | #define s1 1024 | ||
29 | #define s2 2048 | ||
30 | #define s3 3072 | ||
31 | #define w 4096 | ||
32 | #define k 4128 | ||
33 | |||
34 | /********************************************************************** | ||
35 | 3-way twofish | ||
36 | **********************************************************************/ | ||
37 | #define CTX %rdi | ||
38 | #define RIO %rdx | ||
39 | |||
40 | #define RAB0 %rax | ||
41 | #define RAB1 %rbx | ||
42 | #define RAB2 %rcx | ||
43 | |||
44 | #define RAB0d %eax | ||
45 | #define RAB1d %ebx | ||
46 | #define RAB2d %ecx | ||
47 | |||
48 | #define RAB0bh %ah | ||
49 | #define RAB1bh %bh | ||
50 | #define RAB2bh %ch | ||
51 | |||
52 | #define RAB0bl %al | ||
53 | #define RAB1bl %bl | ||
54 | #define RAB2bl %cl | ||
55 | |||
56 | #define RCD0 %r8 | ||
57 | #define RCD1 %r9 | ||
58 | #define RCD2 %r10 | ||
59 | |||
60 | #define RCD0d %r8d | ||
61 | #define RCD1d %r9d | ||
62 | #define RCD2d %r10d | ||
63 | |||
64 | #define RX0 %rbp | ||
65 | #define RX1 %r11 | ||
66 | #define RX2 %r12 | ||
67 | |||
68 | #define RX0d %ebp | ||
69 | #define RX1d %r11d | ||
70 | #define RX2d %r12d | ||
71 | |||
72 | #define RY0 %r13 | ||
73 | #define RY1 %r14 | ||
74 | #define RY2 %r15 | ||
75 | |||
76 | #define RY0d %r13d | ||
77 | #define RY1d %r14d | ||
78 | #define RY2d %r15d | ||
79 | |||
80 | #define RT0 %rdx | ||
81 | #define RT1 %rsi | ||
82 | |||
83 | #define RT0d %edx | ||
84 | #define RT1d %esi | ||
85 | |||
86 | #define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \ | ||
87 | movzbl ab ## bl, tmp2 ## d; \ | ||
88 | movzbl ab ## bh, tmp1 ## d; \ | ||
89 | rorq $(rot), ab; \ | ||
90 | op1##l T0(CTX, tmp2, 4), dst ## d; \ | ||
91 | op2##l T1(CTX, tmp1, 4), dst ## d; | ||
92 | |||
93 | /* | ||
94 | * Combined G1 & G2 function. Reordered with help of rotates to have moves | ||
95 | * at begining. | ||
96 | */ | ||
97 | #define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \ | ||
98 | /* G1,1 && G2,1 */ \ | ||
99 | do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \ | ||
100 | do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \ | ||
101 | \ | ||
102 | do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \ | ||
103 | do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \ | ||
104 | \ | ||
105 | do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \ | ||
106 | do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \ | ||
107 | \ | ||
108 | /* G1,2 && G2,2 */ \ | ||
109 | do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \ | ||
110 | do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \ | ||
111 | xchgq cd ## 0, ab ## 0; \ | ||
112 | \ | ||
113 | do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \ | ||
114 | do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \ | ||
115 | xchgq cd ## 1, ab ## 1; \ | ||
116 | \ | ||
117 | do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \ | ||
118 | do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \ | ||
119 | xchgq cd ## 2, ab ## 2; | ||
120 | |||
121 | #define enc_round_end(ab, x, y, n) \ | ||
122 | addl y ## d, x ## d; \ | ||
123 | addl x ## d, y ## d; \ | ||
124 | addl k+4*(2*(n))(CTX), x ## d; \ | ||
125 | xorl ab ## d, x ## d; \ | ||
126 | addl k+4*(2*(n)+1)(CTX), y ## d; \ | ||
127 | shrq $32, ab; \ | ||
128 | roll $1, ab ## d; \ | ||
129 | xorl y ## d, ab ## d; \ | ||
130 | shlq $32, ab; \ | ||
131 | rorl $1, x ## d; \ | ||
132 | orq x, ab; | ||
133 | |||
134 | #define dec_round_end(ba, x, y, n) \ | ||
135 | addl y ## d, x ## d; \ | ||
136 | addl x ## d, y ## d; \ | ||
137 | addl k+4*(2*(n))(CTX), x ## d; \ | ||
138 | addl k+4*(2*(n)+1)(CTX), y ## d; \ | ||
139 | xorl ba ## d, y ## d; \ | ||
140 | shrq $32, ba; \ | ||
141 | roll $1, ba ## d; \ | ||
142 | xorl x ## d, ba ## d; \ | ||
143 | shlq $32, ba; \ | ||
144 | rorl $1, y ## d; \ | ||
145 | orq y, ba; | ||
146 | |||
147 | #define encrypt_round3(ab, cd, n) \ | ||
148 | g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \ | ||
149 | \ | ||
150 | enc_round_end(ab ## 0, RX0, RY0, n); \ | ||
151 | enc_round_end(ab ## 1, RX1, RY1, n); \ | ||
152 | enc_round_end(ab ## 2, RX2, RY2, n); | ||
153 | |||
154 | #define decrypt_round3(ba, dc, n) \ | ||
155 | g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \ | ||
156 | \ | ||
157 | dec_round_end(ba ## 0, RX0, RY0, n); \ | ||
158 | dec_round_end(ba ## 1, RX1, RY1, n); \ | ||
159 | dec_round_end(ba ## 2, RX2, RY2, n); | ||
160 | |||
161 | #define encrypt_cycle3(ab, cd, n) \ | ||
162 | encrypt_round3(ab, cd, n*2); \ | ||
163 | encrypt_round3(ab, cd, (n*2)+1); | ||
164 | |||
165 | #define decrypt_cycle3(ba, dc, n) \ | ||
166 | decrypt_round3(ba, dc, (n*2)+1); \ | ||
167 | decrypt_round3(ba, dc, (n*2)); | ||
168 | |||
169 | #define inpack3(in, n, xy, m) \ | ||
170 | movq 4*(n)(in), xy ## 0; \ | ||
171 | xorq w+4*m(CTX), xy ## 0; \ | ||
172 | \ | ||
173 | movq 4*(4+(n))(in), xy ## 1; \ | ||
174 | xorq w+4*m(CTX), xy ## 1; \ | ||
175 | \ | ||
176 | movq 4*(8+(n))(in), xy ## 2; \ | ||
177 | xorq w+4*m(CTX), xy ## 2; | ||
178 | |||
179 | #define outunpack3(op, out, n, xy, m) \ | ||
180 | xorq w+4*m(CTX), xy ## 0; \ | ||
181 | op ## q xy ## 0, 4*(n)(out); \ | ||
182 | \ | ||
183 | xorq w+4*m(CTX), xy ## 1; \ | ||
184 | op ## q xy ## 1, 4*(4+(n))(out); \ | ||
185 | \ | ||
186 | xorq w+4*m(CTX), xy ## 2; \ | ||
187 | op ## q xy ## 2, 4*(8+(n))(out); | ||
188 | |||
189 | #define inpack_enc3() \ | ||
190 | inpack3(RIO, 0, RAB, 0); \ | ||
191 | inpack3(RIO, 2, RCD, 2); | ||
192 | |||
193 | #define outunpack_enc3(op) \ | ||
194 | outunpack3(op, RIO, 2, RAB, 6); \ | ||
195 | outunpack3(op, RIO, 0, RCD, 4); | ||
196 | |||
197 | #define inpack_dec3() \ | ||
198 | inpack3(RIO, 0, RAB, 4); \ | ||
199 | rorq $32, RAB0; \ | ||
200 | rorq $32, RAB1; \ | ||
201 | rorq $32, RAB2; \ | ||
202 | inpack3(RIO, 2, RCD, 6); \ | ||
203 | rorq $32, RCD0; \ | ||
204 | rorq $32, RCD1; \ | ||
205 | rorq $32, RCD2; | ||
206 | |||
207 | #define outunpack_dec3() \ | ||
208 | rorq $32, RCD0; \ | ||
209 | rorq $32, RCD1; \ | ||
210 | rorq $32, RCD2; \ | ||
211 | outunpack3(mov, RIO, 0, RCD, 0); \ | ||
212 | rorq $32, RAB0; \ | ||
213 | rorq $32, RAB1; \ | ||
214 | rorq $32, RAB2; \ | ||
215 | outunpack3(mov, RIO, 2, RAB, 2); | ||
216 | |||
217 | .align 8 | ||
218 | .global __twofish_enc_blk_3way | ||
219 | .type __twofish_enc_blk_3way,@function; | ||
220 | |||
221 | __twofish_enc_blk_3way: | ||
222 | /* input: | ||
223 | * %rdi: ctx, CTX | ||
224 | * %rsi: dst | ||
225 | * %rdx: src, RIO | ||
226 | * %rcx: bool, if true: xor output | ||
227 | */ | ||
228 | pushq %r15; | ||
229 | pushq %r14; | ||
230 | pushq %r13; | ||
231 | pushq %r12; | ||
232 | pushq %rbp; | ||
233 | pushq %rbx; | ||
234 | |||
235 | pushq %rcx; /* bool xor */ | ||
236 | pushq %rsi; /* dst */ | ||
237 | |||
238 | inpack_enc3(); | ||
239 | |||
240 | encrypt_cycle3(RAB, RCD, 0); | ||
241 | encrypt_cycle3(RAB, RCD, 1); | ||
242 | encrypt_cycle3(RAB, RCD, 2); | ||
243 | encrypt_cycle3(RAB, RCD, 3); | ||
244 | encrypt_cycle3(RAB, RCD, 4); | ||
245 | encrypt_cycle3(RAB, RCD, 5); | ||
246 | encrypt_cycle3(RAB, RCD, 6); | ||
247 | encrypt_cycle3(RAB, RCD, 7); | ||
248 | |||
249 | popq RIO; /* dst */ | ||
250 | popq %rbp; /* bool xor */ | ||
251 | |||
252 | testb %bpl, %bpl; | ||
253 | jnz __enc_xor3; | ||
254 | |||
255 | outunpack_enc3(mov); | ||
256 | |||
257 | popq %rbx; | ||
258 | popq %rbp; | ||
259 | popq %r12; | ||
260 | popq %r13; | ||
261 | popq %r14; | ||
262 | popq %r15; | ||
263 | ret; | ||
264 | |||
265 | __enc_xor3: | ||
266 | outunpack_enc3(xor); | ||
267 | |||
268 | popq %rbx; | ||
269 | popq %rbp; | ||
270 | popq %r12; | ||
271 | popq %r13; | ||
272 | popq %r14; | ||
273 | popq %r15; | ||
274 | ret; | ||
275 | |||
276 | .global twofish_dec_blk_3way | ||
277 | .type twofish_dec_blk_3way,@function; | ||
278 | |||
279 | twofish_dec_blk_3way: | ||
280 | /* input: | ||
281 | * %rdi: ctx, CTX | ||
282 | * %rsi: dst | ||
283 | * %rdx: src, RIO | ||
284 | */ | ||
285 | pushq %r15; | ||
286 | pushq %r14; | ||
287 | pushq %r13; | ||
288 | pushq %r12; | ||
289 | pushq %rbp; | ||
290 | pushq %rbx; | ||
291 | |||
292 | pushq %rsi; /* dst */ | ||
293 | |||
294 | inpack_dec3(); | ||
295 | |||
296 | decrypt_cycle3(RAB, RCD, 7); | ||
297 | decrypt_cycle3(RAB, RCD, 6); | ||
298 | decrypt_cycle3(RAB, RCD, 5); | ||
299 | decrypt_cycle3(RAB, RCD, 4); | ||
300 | decrypt_cycle3(RAB, RCD, 3); | ||
301 | decrypt_cycle3(RAB, RCD, 2); | ||
302 | decrypt_cycle3(RAB, RCD, 1); | ||
303 | decrypt_cycle3(RAB, RCD, 0); | ||
304 | |||
305 | popq RIO; /* dst */ | ||
306 | |||
307 | outunpack_dec3(); | ||
308 | |||
309 | popq %rbx; | ||
310 | popq %rbp; | ||
311 | popq %r12; | ||
312 | popq %r13; | ||
313 | popq %r14; | ||
314 | popq %r15; | ||
315 | ret; | ||
316 | |||
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 7bcf3fcc366..573aa102542 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S | |||
@@ -221,9 +221,10 @@ | |||
221 | twofish_enc_blk: | 221 | twofish_enc_blk: |
222 | pushq R1 | 222 | pushq R1 |
223 | 223 | ||
224 | /* %rdi contains the ctx address */ | 224 | /* %rdi contains the crypto tfm address */ |
225 | /* %rsi contains the output address */ | 225 | /* %rsi contains the output address */ |
226 | /* %rdx contains the input address */ | 226 | /* %rdx contains the input address */ |
227 | add $crypto_tfm_ctx_offset, %rdi /* set ctx address */ | ||
227 | /* ctx address is moved to free one non-rex register | 228 | /* ctx address is moved to free one non-rex register |
228 | as target for the 8bit high operations */ | 229 | as target for the 8bit high operations */ |
229 | mov %rdi, %r11 | 230 | mov %rdi, %r11 |
@@ -273,9 +274,10 @@ twofish_enc_blk: | |||
273 | twofish_dec_blk: | 274 | twofish_dec_blk: |
274 | pushq R1 | 275 | pushq R1 |
275 | 276 | ||
276 | /* %rdi contains the ctx address */ | 277 | /* %rdi contains the crypto tfm address */ |
277 | /* %rsi contains the output address */ | 278 | /* %rsi contains the output address */ |
278 | /* %rdx contains the input address */ | 279 | /* %rdx contains the input address */ |
280 | add $crypto_tfm_ctx_offset, %rdi /* set ctx address */ | ||
279 | /* ctx address is moved to free one non-rex register | 281 | /* ctx address is moved to free one non-rex register |
280 | as target for the 8bit high operations */ | 282 | as target for the 8bit high operations */ |
281 | mov %rdi, %r11 | 283 | mov %rdi, %r11 |
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c deleted file mode 100644 index 94ac91d26e4..00000000000 --- a/arch/x86/crypto/twofish_avx_glue.c +++ /dev/null | |||
@@ -1,571 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for AVX assembler version of Twofish Cipher | ||
3 | * | ||
4 | * Copyright (C) 2012 Johannes Goetzfried | ||
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
20 | * USA | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/types.h> | ||
27 | #include <linux/crypto.h> | ||
28 | #include <linux/err.h> | ||
29 | #include <crypto/algapi.h> | ||
30 | #include <crypto/twofish.h> | ||
31 | #include <crypto/cryptd.h> | ||
32 | #include <crypto/b128ops.h> | ||
33 | #include <crypto/ctr.h> | ||
34 | #include <crypto/lrw.h> | ||
35 | #include <crypto/xts.h> | ||
36 | #include <asm/i387.h> | ||
37 | #include <asm/xcr.h> | ||
38 | #include <asm/xsave.h> | ||
39 | #include <asm/crypto/twofish.h> | ||
40 | #include <asm/crypto/ablk_helper.h> | ||
41 | #include <asm/crypto/glue_helper.h> | ||
42 | #include <crypto/scatterwalk.h> | ||
43 | #include <linux/workqueue.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | |||
46 | #define TWOFISH_PARALLEL_BLOCKS 8 | ||
47 | |||
48 | /* 8-way parallel cipher functions */ | ||
49 | asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, | ||
50 | const u8 *src); | ||
51 | asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
52 | const u8 *src); | ||
53 | |||
54 | asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, | ||
55 | const u8 *src); | ||
56 | asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, | ||
57 | const u8 *src, le128 *iv); | ||
58 | |||
59 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
60 | const u8 *src) | ||
61 | { | ||
62 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
63 | } | ||
64 | |||
65 | |||
66 | static const struct common_glue_ctx twofish_enc = { | ||
67 | .num_funcs = 3, | ||
68 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
69 | |||
70 | .funcs = { { | ||
71 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
72 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } | ||
73 | }, { | ||
74 | .num_blocks = 3, | ||
75 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
76 | }, { | ||
77 | .num_blocks = 1, | ||
78 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
79 | } } | ||
80 | }; | ||
81 | |||
82 | static const struct common_glue_ctx twofish_ctr = { | ||
83 | .num_funcs = 3, | ||
84 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
85 | |||
86 | .funcs = { { | ||
87 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
88 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } | ||
89 | }, { | ||
90 | .num_blocks = 3, | ||
91 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
92 | }, { | ||
93 | .num_blocks = 1, | ||
94 | .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } | ||
95 | } } | ||
96 | }; | ||
97 | |||
98 | static const struct common_glue_ctx twofish_dec = { | ||
99 | .num_funcs = 3, | ||
100 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
101 | |||
102 | .funcs = { { | ||
103 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
104 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } | ||
105 | }, { | ||
106 | .num_blocks = 3, | ||
107 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
108 | }, { | ||
109 | .num_blocks = 1, | ||
110 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
111 | } } | ||
112 | }; | ||
113 | |||
114 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
115 | .num_funcs = 3, | ||
116 | .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, | ||
117 | |||
118 | .funcs = { { | ||
119 | .num_blocks = TWOFISH_PARALLEL_BLOCKS, | ||
120 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } | ||
121 | }, { | ||
122 | .num_blocks = 3, | ||
123 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
124 | }, { | ||
125 | .num_blocks = 1, | ||
126 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
127 | } } | ||
128 | }; | ||
129 | |||
130 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
131 | struct scatterlist *src, unsigned int nbytes) | ||
132 | { | ||
133 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
134 | } | ||
135 | |||
136 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
137 | struct scatterlist *src, unsigned int nbytes) | ||
138 | { | ||
139 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
140 | } | ||
141 | |||
142 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
143 | struct scatterlist *src, unsigned int nbytes) | ||
144 | { | ||
145 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
146 | dst, src, nbytes); | ||
147 | } | ||
148 | |||
149 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
150 | struct scatterlist *src, unsigned int nbytes) | ||
151 | { | ||
152 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
153 | nbytes); | ||
154 | } | ||
155 | |||
156 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
157 | struct scatterlist *src, unsigned int nbytes) | ||
158 | { | ||
159 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
160 | } | ||
161 | |||
162 | static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) | ||
163 | { | ||
164 | return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, | ||
165 | fpu_enabled, nbytes); | ||
166 | } | ||
167 | |||
168 | static inline void twofish_fpu_end(bool fpu_enabled) | ||
169 | { | ||
170 | glue_fpu_end(fpu_enabled); | ||
171 | } | ||
172 | |||
173 | struct crypt_priv { | ||
174 | struct twofish_ctx *ctx; | ||
175 | bool fpu_enabled; | ||
176 | }; | ||
177 | |||
178 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
179 | { | ||
180 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
181 | struct crypt_priv *ctx = priv; | ||
182 | int i; | ||
183 | |||
184 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
185 | |||
186 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
187 | twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | ||
188 | return; | ||
189 | } | ||
190 | |||
191 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
192 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | ||
193 | |||
194 | nbytes %= bsize * 3; | ||
195 | |||
196 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
197 | twofish_enc_blk(ctx->ctx, srcdst, srcdst); | ||
198 | } | ||
199 | |||
200 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
201 | { | ||
202 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
203 | struct crypt_priv *ctx = priv; | ||
204 | int i; | ||
205 | |||
206 | ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); | ||
207 | |||
208 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | ||
209 | twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | ||
214 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | ||
215 | |||
216 | nbytes %= bsize * 3; | ||
217 | |||
218 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
219 | twofish_dec_blk(ctx->ctx, srcdst, srcdst); | ||
220 | } | ||
221 | |||
222 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
223 | struct scatterlist *src, unsigned int nbytes) | ||
224 | { | ||
225 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
226 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
227 | struct crypt_priv crypt_ctx = { | ||
228 | .ctx = &ctx->twofish_ctx, | ||
229 | .fpu_enabled = false, | ||
230 | }; | ||
231 | struct lrw_crypt_req req = { | ||
232 | .tbuf = buf, | ||
233 | .tbuflen = sizeof(buf), | ||
234 | |||
235 | .table_ctx = &ctx->lrw_table, | ||
236 | .crypt_ctx = &crypt_ctx, | ||
237 | .crypt_fn = encrypt_callback, | ||
238 | }; | ||
239 | int ret; | ||
240 | |||
241 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
242 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
243 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
244 | |||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
249 | struct scatterlist *src, unsigned int nbytes) | ||
250 | { | ||
251 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
252 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
253 | struct crypt_priv crypt_ctx = { | ||
254 | .ctx = &ctx->twofish_ctx, | ||
255 | .fpu_enabled = false, | ||
256 | }; | ||
257 | struct lrw_crypt_req req = { | ||
258 | .tbuf = buf, | ||
259 | .tbuflen = sizeof(buf), | ||
260 | |||
261 | .table_ctx = &ctx->lrw_table, | ||
262 | .crypt_ctx = &crypt_ctx, | ||
263 | .crypt_fn = decrypt_callback, | ||
264 | }; | ||
265 | int ret; | ||
266 | |||
267 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
268 | ret = lrw_crypt(desc, dst, src, nbytes, &req); | ||
269 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
270 | |||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
275 | struct scatterlist *src, unsigned int nbytes) | ||
276 | { | ||
277 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
278 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
279 | struct crypt_priv crypt_ctx = { | ||
280 | .ctx = &ctx->crypt_ctx, | ||
281 | .fpu_enabled = false, | ||
282 | }; | ||
283 | struct xts_crypt_req req = { | ||
284 | .tbuf = buf, | ||
285 | .tbuflen = sizeof(buf), | ||
286 | |||
287 | .tweak_ctx = &ctx->tweak_ctx, | ||
288 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
289 | .crypt_ctx = &crypt_ctx, | ||
290 | .crypt_fn = encrypt_callback, | ||
291 | }; | ||
292 | int ret; | ||
293 | |||
294 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
295 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
296 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
297 | |||
298 | return ret; | ||
299 | } | ||
300 | |||
301 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
302 | struct scatterlist *src, unsigned int nbytes) | ||
303 | { | ||
304 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
305 | be128 buf[TWOFISH_PARALLEL_BLOCKS]; | ||
306 | struct crypt_priv crypt_ctx = { | ||
307 | .ctx = &ctx->crypt_ctx, | ||
308 | .fpu_enabled = false, | ||
309 | }; | ||
310 | struct xts_crypt_req req = { | ||
311 | .tbuf = buf, | ||
312 | .tbuflen = sizeof(buf), | ||
313 | |||
314 | .tweak_ctx = &ctx->tweak_ctx, | ||
315 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
316 | .crypt_ctx = &crypt_ctx, | ||
317 | .crypt_fn = decrypt_callback, | ||
318 | }; | ||
319 | int ret; | ||
320 | |||
321 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
322 | ret = xts_crypt(desc, dst, src, nbytes, &req); | ||
323 | twofish_fpu_end(crypt_ctx.fpu_enabled); | ||
324 | |||
325 | return ret; | ||
326 | } | ||
327 | |||
328 | static struct crypto_alg twofish_algs[10] = { { | ||
329 | .cra_name = "__ecb-twofish-avx", | ||
330 | .cra_driver_name = "__driver-ecb-twofish-avx", | ||
331 | .cra_priority = 0, | ||
332 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
333 | .cra_blocksize = TF_BLOCK_SIZE, | ||
334 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
335 | .cra_alignmask = 0, | ||
336 | .cra_type = &crypto_blkcipher_type, | ||
337 | .cra_module = THIS_MODULE, | ||
338 | .cra_u = { | ||
339 | .blkcipher = { | ||
340 | .min_keysize = TF_MIN_KEY_SIZE, | ||
341 | .max_keysize = TF_MAX_KEY_SIZE, | ||
342 | .setkey = twofish_setkey, | ||
343 | .encrypt = ecb_encrypt, | ||
344 | .decrypt = ecb_decrypt, | ||
345 | }, | ||
346 | }, | ||
347 | }, { | ||
348 | .cra_name = "__cbc-twofish-avx", | ||
349 | .cra_driver_name = "__driver-cbc-twofish-avx", | ||
350 | .cra_priority = 0, | ||
351 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
352 | .cra_blocksize = TF_BLOCK_SIZE, | ||
353 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
354 | .cra_alignmask = 0, | ||
355 | .cra_type = &crypto_blkcipher_type, | ||
356 | .cra_module = THIS_MODULE, | ||
357 | .cra_u = { | ||
358 | .blkcipher = { | ||
359 | .min_keysize = TF_MIN_KEY_SIZE, | ||
360 | .max_keysize = TF_MAX_KEY_SIZE, | ||
361 | .setkey = twofish_setkey, | ||
362 | .encrypt = cbc_encrypt, | ||
363 | .decrypt = cbc_decrypt, | ||
364 | }, | ||
365 | }, | ||
366 | }, { | ||
367 | .cra_name = "__ctr-twofish-avx", | ||
368 | .cra_driver_name = "__driver-ctr-twofish-avx", | ||
369 | .cra_priority = 0, | ||
370 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
371 | .cra_blocksize = 1, | ||
372 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
373 | .cra_alignmask = 0, | ||
374 | .cra_type = &crypto_blkcipher_type, | ||
375 | .cra_module = THIS_MODULE, | ||
376 | .cra_u = { | ||
377 | .blkcipher = { | ||
378 | .min_keysize = TF_MIN_KEY_SIZE, | ||
379 | .max_keysize = TF_MAX_KEY_SIZE, | ||
380 | .ivsize = TF_BLOCK_SIZE, | ||
381 | .setkey = twofish_setkey, | ||
382 | .encrypt = ctr_crypt, | ||
383 | .decrypt = ctr_crypt, | ||
384 | }, | ||
385 | }, | ||
386 | }, { | ||
387 | .cra_name = "__lrw-twofish-avx", | ||
388 | .cra_driver_name = "__driver-lrw-twofish-avx", | ||
389 | .cra_priority = 0, | ||
390 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
391 | .cra_blocksize = TF_BLOCK_SIZE, | ||
392 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
393 | .cra_alignmask = 0, | ||
394 | .cra_type = &crypto_blkcipher_type, | ||
395 | .cra_module = THIS_MODULE, | ||
396 | .cra_exit = lrw_twofish_exit_tfm, | ||
397 | .cra_u = { | ||
398 | .blkcipher = { | ||
399 | .min_keysize = TF_MIN_KEY_SIZE + | ||
400 | TF_BLOCK_SIZE, | ||
401 | .max_keysize = TF_MAX_KEY_SIZE + | ||
402 | TF_BLOCK_SIZE, | ||
403 | .ivsize = TF_BLOCK_SIZE, | ||
404 | .setkey = lrw_twofish_setkey, | ||
405 | .encrypt = lrw_encrypt, | ||
406 | .decrypt = lrw_decrypt, | ||
407 | }, | ||
408 | }, | ||
409 | }, { | ||
410 | .cra_name = "__xts-twofish-avx", | ||
411 | .cra_driver_name = "__driver-xts-twofish-avx", | ||
412 | .cra_priority = 0, | ||
413 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
414 | .cra_blocksize = TF_BLOCK_SIZE, | ||
415 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
416 | .cra_alignmask = 0, | ||
417 | .cra_type = &crypto_blkcipher_type, | ||
418 | .cra_module = THIS_MODULE, | ||
419 | .cra_u = { | ||
420 | .blkcipher = { | ||
421 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
422 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
423 | .ivsize = TF_BLOCK_SIZE, | ||
424 | .setkey = xts_twofish_setkey, | ||
425 | .encrypt = xts_encrypt, | ||
426 | .decrypt = xts_decrypt, | ||
427 | }, | ||
428 | }, | ||
429 | }, { | ||
430 | .cra_name = "ecb(twofish)", | ||
431 | .cra_driver_name = "ecb-twofish-avx", | ||
432 | .cra_priority = 400, | ||
433 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
434 | .cra_blocksize = TF_BLOCK_SIZE, | ||
435 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
436 | .cra_alignmask = 0, | ||
437 | .cra_type = &crypto_ablkcipher_type, | ||
438 | .cra_module = THIS_MODULE, | ||
439 | .cra_init = ablk_init, | ||
440 | .cra_exit = ablk_exit, | ||
441 | .cra_u = { | ||
442 | .ablkcipher = { | ||
443 | .min_keysize = TF_MIN_KEY_SIZE, | ||
444 | .max_keysize = TF_MAX_KEY_SIZE, | ||
445 | .setkey = ablk_set_key, | ||
446 | .encrypt = ablk_encrypt, | ||
447 | .decrypt = ablk_decrypt, | ||
448 | }, | ||
449 | }, | ||
450 | }, { | ||
451 | .cra_name = "cbc(twofish)", | ||
452 | .cra_driver_name = "cbc-twofish-avx", | ||
453 | .cra_priority = 400, | ||
454 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
455 | .cra_blocksize = TF_BLOCK_SIZE, | ||
456 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
457 | .cra_alignmask = 0, | ||
458 | .cra_type = &crypto_ablkcipher_type, | ||
459 | .cra_module = THIS_MODULE, | ||
460 | .cra_init = ablk_init, | ||
461 | .cra_exit = ablk_exit, | ||
462 | .cra_u = { | ||
463 | .ablkcipher = { | ||
464 | .min_keysize = TF_MIN_KEY_SIZE, | ||
465 | .max_keysize = TF_MAX_KEY_SIZE, | ||
466 | .ivsize = TF_BLOCK_SIZE, | ||
467 | .setkey = ablk_set_key, | ||
468 | .encrypt = __ablk_encrypt, | ||
469 | .decrypt = ablk_decrypt, | ||
470 | }, | ||
471 | }, | ||
472 | }, { | ||
473 | .cra_name = "ctr(twofish)", | ||
474 | .cra_driver_name = "ctr-twofish-avx", | ||
475 | .cra_priority = 400, | ||
476 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
477 | .cra_blocksize = 1, | ||
478 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
479 | .cra_alignmask = 0, | ||
480 | .cra_type = &crypto_ablkcipher_type, | ||
481 | .cra_module = THIS_MODULE, | ||
482 | .cra_init = ablk_init, | ||
483 | .cra_exit = ablk_exit, | ||
484 | .cra_u = { | ||
485 | .ablkcipher = { | ||
486 | .min_keysize = TF_MIN_KEY_SIZE, | ||
487 | .max_keysize = TF_MAX_KEY_SIZE, | ||
488 | .ivsize = TF_BLOCK_SIZE, | ||
489 | .setkey = ablk_set_key, | ||
490 | .encrypt = ablk_encrypt, | ||
491 | .decrypt = ablk_encrypt, | ||
492 | .geniv = "chainiv", | ||
493 | }, | ||
494 | }, | ||
495 | }, { | ||
496 | .cra_name = "lrw(twofish)", | ||
497 | .cra_driver_name = "lrw-twofish-avx", | ||
498 | .cra_priority = 400, | ||
499 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
500 | .cra_blocksize = TF_BLOCK_SIZE, | ||
501 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
502 | .cra_alignmask = 0, | ||
503 | .cra_type = &crypto_ablkcipher_type, | ||
504 | .cra_module = THIS_MODULE, | ||
505 | .cra_init = ablk_init, | ||
506 | .cra_exit = ablk_exit, | ||
507 | .cra_u = { | ||
508 | .ablkcipher = { | ||
509 | .min_keysize = TF_MIN_KEY_SIZE + | ||
510 | TF_BLOCK_SIZE, | ||
511 | .max_keysize = TF_MAX_KEY_SIZE + | ||
512 | TF_BLOCK_SIZE, | ||
513 | .ivsize = TF_BLOCK_SIZE, | ||
514 | .setkey = ablk_set_key, | ||
515 | .encrypt = ablk_encrypt, | ||
516 | .decrypt = ablk_decrypt, | ||
517 | }, | ||
518 | }, | ||
519 | }, { | ||
520 | .cra_name = "xts(twofish)", | ||
521 | .cra_driver_name = "xts-twofish-avx", | ||
522 | .cra_priority = 400, | ||
523 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | ||
524 | .cra_blocksize = TF_BLOCK_SIZE, | ||
525 | .cra_ctxsize = sizeof(struct async_helper_ctx), | ||
526 | .cra_alignmask = 0, | ||
527 | .cra_type = &crypto_ablkcipher_type, | ||
528 | .cra_module = THIS_MODULE, | ||
529 | .cra_init = ablk_init, | ||
530 | .cra_exit = ablk_exit, | ||
531 | .cra_u = { | ||
532 | .ablkcipher = { | ||
533 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
534 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
535 | .ivsize = TF_BLOCK_SIZE, | ||
536 | .setkey = ablk_set_key, | ||
537 | .encrypt = ablk_encrypt, | ||
538 | .decrypt = ablk_decrypt, | ||
539 | }, | ||
540 | }, | ||
541 | } }; | ||
542 | |||
543 | static int __init twofish_init(void) | ||
544 | { | ||
545 | u64 xcr0; | ||
546 | |||
547 | if (!cpu_has_avx || !cpu_has_osxsave) { | ||
548 | printk(KERN_INFO "AVX instructions are not detected.\n"); | ||
549 | return -ENODEV; | ||
550 | } | ||
551 | |||
552 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
553 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
554 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
555 | return -ENODEV; | ||
556 | } | ||
557 | |||
558 | return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
559 | } | ||
560 | |||
561 | static void __exit twofish_exit(void) | ||
562 | { | ||
563 | crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); | ||
564 | } | ||
565 | |||
566 | module_init(twofish_init); | ||
567 | module_exit(twofish_exit); | ||
568 | |||
569 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); | ||
570 | MODULE_LICENSE("GPL"); | ||
571 | MODULE_ALIAS("twofish"); | ||
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 0a520230350..cefaf8b9aa1 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c | |||
@@ -44,21 +44,17 @@ | |||
44 | #include <linux/module.h> | 44 | #include <linux/module.h> |
45 | #include <linux/types.h> | 45 | #include <linux/types.h> |
46 | 46 | ||
47 | asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, | 47 | asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); |
48 | const u8 *src); | 48 | asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); |
49 | EXPORT_SYMBOL_GPL(twofish_enc_blk); | ||
50 | asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, | ||
51 | const u8 *src); | ||
52 | EXPORT_SYMBOL_GPL(twofish_dec_blk); | ||
53 | 49 | ||
54 | static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 50 | static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
55 | { | 51 | { |
56 | twofish_enc_blk(crypto_tfm_ctx(tfm), dst, src); | 52 | twofish_enc_blk(tfm, dst, src); |
57 | } | 53 | } |
58 | 54 | ||
59 | static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | 55 | static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) |
60 | { | 56 | { |
61 | twofish_dec_blk(crypto_tfm_ctx(tfm), dst, src); | 57 | twofish_dec_blk(tfm, dst, src); |
62 | } | 58 | } |
63 | 59 | ||
64 | static struct crypto_alg alg = { | 60 | static struct crypto_alg alg = { |
@@ -68,8 +64,9 @@ static struct crypto_alg alg = { | |||
68 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | 64 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, |
69 | .cra_blocksize = TF_BLOCK_SIZE, | 65 | .cra_blocksize = TF_BLOCK_SIZE, |
70 | .cra_ctxsize = sizeof(struct twofish_ctx), | 66 | .cra_ctxsize = sizeof(struct twofish_ctx), |
71 | .cra_alignmask = 0, | 67 | .cra_alignmask = 3, |
72 | .cra_module = THIS_MODULE, | 68 | .cra_module = THIS_MODULE, |
69 | .cra_list = LIST_HEAD_INIT(alg.cra_list), | ||
73 | .cra_u = { | 70 | .cra_u = { |
74 | .cipher = { | 71 | .cipher = { |
75 | .cia_min_keysize = TF_MIN_KEY_SIZE, | 72 | .cia_min_keysize = TF_MIN_KEY_SIZE, |
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c deleted file mode 100644 index 13e63b3e1df..00000000000 --- a/arch/x86/crypto/twofish_glue_3way.c +++ /dev/null | |||
@@ -1,499 +0,0 @@ | |||
1 | /* | ||
2 | * Glue Code for 3-way parallel assembler optimized version of Twofish | ||
3 | * | ||
4 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | ||
19 | * USA | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <asm/processor.h> | ||
24 | #include <linux/crypto.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <crypto/algapi.h> | ||
29 | #include <crypto/twofish.h> | ||
30 | #include <crypto/b128ops.h> | ||
31 | #include <asm/crypto/twofish.h> | ||
32 | #include <asm/crypto/glue_helper.h> | ||
33 | #include <crypto/lrw.h> | ||
34 | #include <crypto/xts.h> | ||
35 | |||
36 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); | ||
37 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); | ||
38 | |||
39 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, | ||
40 | const u8 *src) | ||
41 | { | ||
42 | __twofish_enc_blk_3way(ctx, dst, src, false); | ||
43 | } | ||
44 | |||
45 | static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, | ||
46 | const u8 *src) | ||
47 | { | ||
48 | __twofish_enc_blk_3way(ctx, dst, src, true); | ||
49 | } | ||
50 | |||
51 | void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) | ||
52 | { | ||
53 | u128 ivs[2]; | ||
54 | |||
55 | ivs[0] = src[0]; | ||
56 | ivs[1] = src[1]; | ||
57 | |||
58 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); | ||
59 | |||
60 | u128_xor(&dst[1], &dst[1], &ivs[0]); | ||
61 | u128_xor(&dst[2], &dst[2], &ivs[1]); | ||
62 | } | ||
63 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); | ||
64 | |||
65 | void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) | ||
66 | { | ||
67 | be128 ctrblk; | ||
68 | |||
69 | if (dst != src) | ||
70 | *dst = *src; | ||
71 | |||
72 | le128_to_be128(&ctrblk, iv); | ||
73 | le128_inc(iv); | ||
74 | |||
75 | twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); | ||
76 | u128_xor(dst, dst, (u128 *)&ctrblk); | ||
77 | } | ||
78 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); | ||
79 | |||
80 | void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, | ||
81 | le128 *iv) | ||
82 | { | ||
83 | be128 ctrblks[3]; | ||
84 | |||
85 | if (dst != src) { | ||
86 | dst[0] = src[0]; | ||
87 | dst[1] = src[1]; | ||
88 | dst[2] = src[2]; | ||
89 | } | ||
90 | |||
91 | le128_to_be128(&ctrblks[0], iv); | ||
92 | le128_inc(iv); | ||
93 | le128_to_be128(&ctrblks[1], iv); | ||
94 | le128_inc(iv); | ||
95 | le128_to_be128(&ctrblks[2], iv); | ||
96 | le128_inc(iv); | ||
97 | |||
98 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); | ||
99 | } | ||
100 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); | ||
101 | |||
102 | static const struct common_glue_ctx twofish_enc = { | ||
103 | .num_funcs = 2, | ||
104 | .fpu_blocks_limit = -1, | ||
105 | |||
106 | .funcs = { { | ||
107 | .num_blocks = 3, | ||
108 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | ||
109 | }, { | ||
110 | .num_blocks = 1, | ||
111 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | ||
112 | } } | ||
113 | }; | ||
114 | |||
115 | static const struct common_glue_ctx twofish_ctr = { | ||
116 | .num_funcs = 2, | ||
117 | .fpu_blocks_limit = -1, | ||
118 | |||
119 | .funcs = { { | ||
120 | .num_blocks = 3, | ||
121 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } | ||
122 | }, { | ||
123 | .num_blocks = 1, | ||
124 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } | ||
125 | } } | ||
126 | }; | ||
127 | |||
128 | static const struct common_glue_ctx twofish_dec = { | ||
129 | .num_funcs = 2, | ||
130 | .fpu_blocks_limit = -1, | ||
131 | |||
132 | .funcs = { { | ||
133 | .num_blocks = 3, | ||
134 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | ||
135 | }, { | ||
136 | .num_blocks = 1, | ||
137 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | ||
138 | } } | ||
139 | }; | ||
140 | |||
141 | static const struct common_glue_ctx twofish_dec_cbc = { | ||
142 | .num_funcs = 2, | ||
143 | .fpu_blocks_limit = -1, | ||
144 | |||
145 | .funcs = { { | ||
146 | .num_blocks = 3, | ||
147 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | ||
148 | }, { | ||
149 | .num_blocks = 1, | ||
150 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | ||
151 | } } | ||
152 | }; | ||
153 | |||
154 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
155 | struct scatterlist *src, unsigned int nbytes) | ||
156 | { | ||
157 | return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); | ||
158 | } | ||
159 | |||
160 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
161 | struct scatterlist *src, unsigned int nbytes) | ||
162 | { | ||
163 | return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); | ||
164 | } | ||
165 | |||
166 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
167 | struct scatterlist *src, unsigned int nbytes) | ||
168 | { | ||
169 | return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, | ||
170 | dst, src, nbytes); | ||
171 | } | ||
172 | |||
173 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
174 | struct scatterlist *src, unsigned int nbytes) | ||
175 | { | ||
176 | return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, | ||
177 | nbytes); | ||
178 | } | ||
179 | |||
180 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
181 | struct scatterlist *src, unsigned int nbytes) | ||
182 | { | ||
183 | return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); | ||
184 | } | ||
185 | |||
186 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
187 | { | ||
188 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
189 | struct twofish_ctx *ctx = priv; | ||
190 | int i; | ||
191 | |||
192 | if (nbytes == 3 * bsize) { | ||
193 | twofish_enc_blk_3way(ctx, srcdst, srcdst); | ||
194 | return; | ||
195 | } | ||
196 | |||
197 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
198 | twofish_enc_blk(ctx, srcdst, srcdst); | ||
199 | } | ||
200 | |||
201 | static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | ||
202 | { | ||
203 | const unsigned int bsize = TF_BLOCK_SIZE; | ||
204 | struct twofish_ctx *ctx = priv; | ||
205 | int i; | ||
206 | |||
207 | if (nbytes == 3 * bsize) { | ||
208 | twofish_dec_blk_3way(ctx, srcdst, srcdst); | ||
209 | return; | ||
210 | } | ||
211 | |||
212 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | ||
213 | twofish_dec_blk(ctx, srcdst, srcdst); | ||
214 | } | ||
215 | |||
216 | int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
217 | unsigned int keylen) | ||
218 | { | ||
219 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
220 | int err; | ||
221 | |||
222 | err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, | ||
223 | &tfm->crt_flags); | ||
224 | if (err) | ||
225 | return err; | ||
226 | |||
227 | return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); | ||
228 | } | ||
229 | EXPORT_SYMBOL_GPL(lrw_twofish_setkey); | ||
230 | |||
231 | static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
232 | struct scatterlist *src, unsigned int nbytes) | ||
233 | { | ||
234 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
235 | be128 buf[3]; | ||
236 | struct lrw_crypt_req req = { | ||
237 | .tbuf = buf, | ||
238 | .tbuflen = sizeof(buf), | ||
239 | |||
240 | .table_ctx = &ctx->lrw_table, | ||
241 | .crypt_ctx = &ctx->twofish_ctx, | ||
242 | .crypt_fn = encrypt_callback, | ||
243 | }; | ||
244 | |||
245 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
246 | } | ||
247 | |||
248 | static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
249 | struct scatterlist *src, unsigned int nbytes) | ||
250 | { | ||
251 | struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
252 | be128 buf[3]; | ||
253 | struct lrw_crypt_req req = { | ||
254 | .tbuf = buf, | ||
255 | .tbuflen = sizeof(buf), | ||
256 | |||
257 | .table_ctx = &ctx->lrw_table, | ||
258 | .crypt_ctx = &ctx->twofish_ctx, | ||
259 | .crypt_fn = decrypt_callback, | ||
260 | }; | ||
261 | |||
262 | return lrw_crypt(desc, dst, src, nbytes, &req); | ||
263 | } | ||
264 | |||
265 | void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) | ||
266 | { | ||
267 | struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); | ||
268 | |||
269 | lrw_free_table(&ctx->lrw_table); | ||
270 | } | ||
271 | EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); | ||
272 | |||
273 | int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, | ||
274 | unsigned int keylen) | ||
275 | { | ||
276 | struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); | ||
277 | u32 *flags = &tfm->crt_flags; | ||
278 | int err; | ||
279 | |||
280 | /* key consists of keys of equal size concatenated, therefore | ||
281 | * the length must be even | ||
282 | */ | ||
283 | if (keylen % 2) { | ||
284 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | ||
285 | return -EINVAL; | ||
286 | } | ||
287 | |||
288 | /* first half of xts-key is for crypt */ | ||
289 | err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); | ||
290 | if (err) | ||
291 | return err; | ||
292 | |||
293 | /* second half of xts-key is for tweak */ | ||
294 | return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, | ||
295 | flags); | ||
296 | } | ||
297 | EXPORT_SYMBOL_GPL(xts_twofish_setkey); | ||
298 | |||
299 | static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
300 | struct scatterlist *src, unsigned int nbytes) | ||
301 | { | ||
302 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
303 | be128 buf[3]; | ||
304 | struct xts_crypt_req req = { | ||
305 | .tbuf = buf, | ||
306 | .tbuflen = sizeof(buf), | ||
307 | |||
308 | .tweak_ctx = &ctx->tweak_ctx, | ||
309 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
310 | .crypt_ctx = &ctx->crypt_ctx, | ||
311 | .crypt_fn = encrypt_callback, | ||
312 | }; | ||
313 | |||
314 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
315 | } | ||
316 | |||
317 | static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | ||
318 | struct scatterlist *src, unsigned int nbytes) | ||
319 | { | ||
320 | struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | ||
321 | be128 buf[3]; | ||
322 | struct xts_crypt_req req = { | ||
323 | .tbuf = buf, | ||
324 | .tbuflen = sizeof(buf), | ||
325 | |||
326 | .tweak_ctx = &ctx->tweak_ctx, | ||
327 | .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), | ||
328 | .crypt_ctx = &ctx->crypt_ctx, | ||
329 | .crypt_fn = decrypt_callback, | ||
330 | }; | ||
331 | |||
332 | return xts_crypt(desc, dst, src, nbytes, &req); | ||
333 | } | ||
334 | |||
335 | static struct crypto_alg tf_algs[5] = { { | ||
336 | .cra_name = "ecb(twofish)", | ||
337 | .cra_driver_name = "ecb-twofish-3way", | ||
338 | .cra_priority = 300, | ||
339 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
340 | .cra_blocksize = TF_BLOCK_SIZE, | ||
341 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
342 | .cra_alignmask = 0, | ||
343 | .cra_type = &crypto_blkcipher_type, | ||
344 | .cra_module = THIS_MODULE, | ||
345 | .cra_u = { | ||
346 | .blkcipher = { | ||
347 | .min_keysize = TF_MIN_KEY_SIZE, | ||
348 | .max_keysize = TF_MAX_KEY_SIZE, | ||
349 | .setkey = twofish_setkey, | ||
350 | .encrypt = ecb_encrypt, | ||
351 | .decrypt = ecb_decrypt, | ||
352 | }, | ||
353 | }, | ||
354 | }, { | ||
355 | .cra_name = "cbc(twofish)", | ||
356 | .cra_driver_name = "cbc-twofish-3way", | ||
357 | .cra_priority = 300, | ||
358 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
359 | .cra_blocksize = TF_BLOCK_SIZE, | ||
360 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
361 | .cra_alignmask = 0, | ||
362 | .cra_type = &crypto_blkcipher_type, | ||
363 | .cra_module = THIS_MODULE, | ||
364 | .cra_u = { | ||
365 | .blkcipher = { | ||
366 | .min_keysize = TF_MIN_KEY_SIZE, | ||
367 | .max_keysize = TF_MAX_KEY_SIZE, | ||
368 | .ivsize = TF_BLOCK_SIZE, | ||
369 | .setkey = twofish_setkey, | ||
370 | .encrypt = cbc_encrypt, | ||
371 | .decrypt = cbc_decrypt, | ||
372 | }, | ||
373 | }, | ||
374 | }, { | ||
375 | .cra_name = "ctr(twofish)", | ||
376 | .cra_driver_name = "ctr-twofish-3way", | ||
377 | .cra_priority = 300, | ||
378 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
379 | .cra_blocksize = 1, | ||
380 | .cra_ctxsize = sizeof(struct twofish_ctx), | ||
381 | .cra_alignmask = 0, | ||
382 | .cra_type = &crypto_blkcipher_type, | ||
383 | .cra_module = THIS_MODULE, | ||
384 | .cra_u = { | ||
385 | .blkcipher = { | ||
386 | .min_keysize = TF_MIN_KEY_SIZE, | ||
387 | .max_keysize = TF_MAX_KEY_SIZE, | ||
388 | .ivsize = TF_BLOCK_SIZE, | ||
389 | .setkey = twofish_setkey, | ||
390 | .encrypt = ctr_crypt, | ||
391 | .decrypt = ctr_crypt, | ||
392 | }, | ||
393 | }, | ||
394 | }, { | ||
395 | .cra_name = "lrw(twofish)", | ||
396 | .cra_driver_name = "lrw-twofish-3way", | ||
397 | .cra_priority = 300, | ||
398 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
399 | .cra_blocksize = TF_BLOCK_SIZE, | ||
400 | .cra_ctxsize = sizeof(struct twofish_lrw_ctx), | ||
401 | .cra_alignmask = 0, | ||
402 | .cra_type = &crypto_blkcipher_type, | ||
403 | .cra_module = THIS_MODULE, | ||
404 | .cra_exit = lrw_twofish_exit_tfm, | ||
405 | .cra_u = { | ||
406 | .blkcipher = { | ||
407 | .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, | ||
408 | .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, | ||
409 | .ivsize = TF_BLOCK_SIZE, | ||
410 | .setkey = lrw_twofish_setkey, | ||
411 | .encrypt = lrw_encrypt, | ||
412 | .decrypt = lrw_decrypt, | ||
413 | }, | ||
414 | }, | ||
415 | }, { | ||
416 | .cra_name = "xts(twofish)", | ||
417 | .cra_driver_name = "xts-twofish-3way", | ||
418 | .cra_priority = 300, | ||
419 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, | ||
420 | .cra_blocksize = TF_BLOCK_SIZE, | ||
421 | .cra_ctxsize = sizeof(struct twofish_xts_ctx), | ||
422 | .cra_alignmask = 0, | ||
423 | .cra_type = &crypto_blkcipher_type, | ||
424 | .cra_module = THIS_MODULE, | ||
425 | .cra_u = { | ||
426 | .blkcipher = { | ||
427 | .min_keysize = TF_MIN_KEY_SIZE * 2, | ||
428 | .max_keysize = TF_MAX_KEY_SIZE * 2, | ||
429 | .ivsize = TF_BLOCK_SIZE, | ||
430 | .setkey = xts_twofish_setkey, | ||
431 | .encrypt = xts_encrypt, | ||
432 | .decrypt = xts_decrypt, | ||
433 | }, | ||
434 | }, | ||
435 | } }; | ||
436 | |||
437 | static bool is_blacklisted_cpu(void) | ||
438 | { | ||
439 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
440 | return false; | ||
441 | |||
442 | if (boot_cpu_data.x86 == 0x06 && | ||
443 | (boot_cpu_data.x86_model == 0x1c || | ||
444 | boot_cpu_data.x86_model == 0x26 || | ||
445 | boot_cpu_data.x86_model == 0x36)) { | ||
446 | /* | ||
447 | * On Atom, twofish-3way is slower than original assembler | ||
448 | * implementation. Twofish-3way trades off some performance in | ||
449 | * storing blocks in 64bit registers to allow three blocks to | ||
450 | * be processed parallel. Parallel operation then allows gaining | ||
451 | * more performance than was trade off, on out-of-order CPUs. | ||
452 | * However Atom does not benefit from this parallellism and | ||
453 | * should be blacklisted. | ||
454 | */ | ||
455 | return true; | ||
456 | } | ||
457 | |||
458 | if (boot_cpu_data.x86 == 0x0f) { | ||
459 | /* | ||
460 | * On Pentium 4, twofish-3way is slower than original assembler | ||
461 | * implementation because excessive uses of 64bit rotate and | ||
462 | * left-shifts (which are really slow on P4) needed to store and | ||
463 | * handle 128bit block in two 64bit registers. | ||
464 | */ | ||
465 | return true; | ||
466 | } | ||
467 | |||
468 | return false; | ||
469 | } | ||
470 | |||
471 | static int force; | ||
472 | module_param(force, int, 0); | ||
473 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); | ||
474 | |||
475 | static int __init init(void) | ||
476 | { | ||
477 | if (!force && is_blacklisted_cpu()) { | ||
478 | printk(KERN_INFO | ||
479 | "twofish-x86_64-3way: performance on this CPU " | ||
480 | "would be suboptimal: disabling " | ||
481 | "twofish-x86_64-3way.\n"); | ||
482 | return -ENODEV; | ||
483 | } | ||
484 | |||
485 | return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
486 | } | ||
487 | |||
488 | static void __exit fini(void) | ||
489 | { | ||
490 | crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); | ||
491 | } | ||
492 | |||
493 | module_init(init); | ||
494 | module_exit(fini); | ||
495 | |||
496 | MODULE_LICENSE("GPL"); | ||
497 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); | ||
498 | MODULE_ALIAS("twofish"); | ||
499 | MODULE_ALIAS("twofish-asm"); | ||