diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-14 12:47:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-14 12:47:01 -0500 |
commit | fee5429e028c414d80d036198db30454cfd91b7a (patch) | |
tree | 485f37a974e4ab85339021c794d1782e2d761c5b /arch | |
parent | 83e047c104aa95a8a683d6bd421df1551c17dbd2 (diff) | |
parent | 96692a7305c49845e3cbf5a60cfcb207c5dc4030 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 3.20:
- Added 192/256-bit key support to aesni GCM.
- Added MIPS OCTEON MD5 support.
- Fixed hwrng starvation and race conditions.
- Added note that memzero_explicit is not a subsitute for memset.
- Added user-space interface for crypto_rng.
- Misc fixes"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (71 commits)
crypto: tcrypt - do not allocate iv on stack for aead speed tests
crypto: testmgr - limit IV copy length in aead tests
crypto: tcrypt - fix buflen reminder calculation
crypto: testmgr - mark rfc4106(gcm(aes)) as fips_allowed
crypto: caam - fix resource clean-up on error path for caam_jr_init
crypto: caam - pair irq map and dispose in the same function
crypto: ccp - terminate ccp_support array with empty element
crypto: caam - remove unused local variable
crypto: caam - remove dead code
crypto: caam - don't emit ICV check failures to dmesg
hwrng: virtio - drop extra empty line
crypto: replace scatterwalk_sg_next with sg_next
crypto: atmel - Free memory in error path
crypto: doc - remove colons in comments
crypto: seqiv - Ensure that IV size is at least 8 bytes
crypto: cts - Weed out non-CBC algorithms
MAINTAINERS: add linux-crypto to hw random
crypto: cts - Remove bogus use of seqiv
crypto: qat - don't need qat_auth_state struct
crypto: algif_rng - fix sparse non static symbol warning
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/mips/cavium-octeon/Makefile | 1 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/crypto/Makefile | 7 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/crypto/octeon-crypto.c | 66 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/crypto/octeon-crypto.h | 75 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/crypto/octeon-md5.c | 216 | ||||
-rw-r--r-- | arch/mips/cavium-octeon/executive/octeon-model.c | 6 | ||||
-rw-r--r-- | arch/mips/include/asm/octeon/octeon-feature.h | 17 | ||||
-rw-r--r-- | arch/mips/include/asm/octeon/octeon.h | 5 | ||||
-rw-r--r-- | arch/sparc/crypto/aes_glue.c | 2 | ||||
-rw-r--r-- | arch/sparc/crypto/camellia_glue.c | 2 | ||||
-rw-r--r-- | arch/sparc/crypto/des_glue.c | 1 | ||||
-rw-r--r-- | arch/sparc/crypto/md5_glue.c | 2 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 343 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 34 | ||||
-rw-r--r-- | arch/x86/crypto/des3_ede_glue.c | 2 |
15 files changed, 595 insertions, 184 deletions
diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile index 42f5f1a4b40a..69a8a8dabc2b 100644 --- a/arch/mips/cavium-octeon/Makefile +++ b/arch/mips/cavium-octeon/Makefile | |||
@@ -16,6 +16,7 @@ obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o | |||
16 | obj-y += dma-octeon.o | 16 | obj-y += dma-octeon.o |
17 | obj-y += octeon-memcpy.o | 17 | obj-y += octeon-memcpy.o |
18 | obj-y += executive/ | 18 | obj-y += executive/ |
19 | obj-y += crypto/ | ||
19 | 20 | ||
20 | obj-$(CONFIG_MTD) += flash_setup.o | 21 | obj-$(CONFIG_MTD) += flash_setup.o |
21 | obj-$(CONFIG_SMP) += smp.o | 22 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile new file mode 100644 index 000000000000..a74f76d85a2f --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # OCTEON-specific crypto modules. | ||
3 | # | ||
4 | |||
5 | obj-y += octeon-crypto.o | ||
6 | |||
7 | obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c new file mode 100644 index 000000000000..7c82ff463b65 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (C) 2004-2012 Cavium Networks | ||
7 | */ | ||
8 | |||
9 | #include <asm/cop2.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/interrupt.h> | ||
12 | |||
13 | #include "octeon-crypto.h" | ||
14 | |||
15 | /** | ||
16 | * Enable access to Octeon's COP2 crypto hardware for kernel use. Wrap any | ||
17 | * crypto operations in calls to octeon_crypto_enable/disable in order to make | ||
18 | * sure the state of COP2 isn't corrupted if userspace is also performing | ||
19 | * hardware crypto operations. Allocate the state parameter on the stack. | ||
20 | * Preemption must be disabled to prevent context switches. | ||
21 | * | ||
22 | * @state: Pointer to state structure to store current COP2 state in. | ||
23 | * | ||
24 | * Returns: Flags to be passed to octeon_crypto_disable() | ||
25 | */ | ||
26 | unsigned long octeon_crypto_enable(struct octeon_cop2_state *state) | ||
27 | { | ||
28 | int status; | ||
29 | unsigned long flags; | ||
30 | |||
31 | local_irq_save(flags); | ||
32 | status = read_c0_status(); | ||
33 | write_c0_status(status | ST0_CU2); | ||
34 | if (KSTK_STATUS(current) & ST0_CU2) { | ||
35 | octeon_cop2_save(&(current->thread.cp2)); | ||
36 | KSTK_STATUS(current) &= ~ST0_CU2; | ||
37 | status &= ~ST0_CU2; | ||
38 | } else if (status & ST0_CU2) { | ||
39 | octeon_cop2_save(state); | ||
40 | } | ||
41 | local_irq_restore(flags); | ||
42 | return status & ST0_CU2; | ||
43 | } | ||
44 | EXPORT_SYMBOL_GPL(octeon_crypto_enable); | ||
45 | |||
46 | /** | ||
47 | * Disable access to Octeon's COP2 crypto hardware in the kernel. This must be | ||
48 | * called after an octeon_crypto_enable() before any context switch or return to | ||
49 | * userspace. | ||
50 | * | ||
51 | * @state: Pointer to COP2 state to restore | ||
52 | * @flags: Return value from octeon_crypto_enable() | ||
53 | */ | ||
54 | void octeon_crypto_disable(struct octeon_cop2_state *state, | ||
55 | unsigned long crypto_flags) | ||
56 | { | ||
57 | unsigned long flags; | ||
58 | |||
59 | local_irq_save(flags); | ||
60 | if (crypto_flags & ST0_CU2) | ||
61 | octeon_cop2_restore(state); | ||
62 | else | ||
63 | write_c0_status(read_c0_status() & ~ST0_CU2); | ||
64 | local_irq_restore(flags); | ||
65 | } | ||
66 | EXPORT_SYMBOL_GPL(octeon_crypto_disable); | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h new file mode 100644 index 000000000000..e2a4aece9c24 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved. | ||
7 | * | ||
8 | * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
9 | * | ||
10 | */ | ||
11 | #ifndef __LINUX_OCTEON_CRYPTO_H | ||
12 | #define __LINUX_OCTEON_CRYPTO_H | ||
13 | |||
14 | #include <linux/sched.h> | ||
15 | #include <asm/mipsregs.h> | ||
16 | |||
17 | #define OCTEON_CR_OPCODE_PRIORITY 300 | ||
18 | |||
19 | extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state); | ||
20 | extern void octeon_crypto_disable(struct octeon_cop2_state *state, | ||
21 | unsigned long flags); | ||
22 | |||
23 | /* | ||
24 | * Macros needed to implement MD5: | ||
25 | */ | ||
26 | |||
27 | /* | ||
28 | * The index can be 0-1. | ||
29 | */ | ||
30 | #define write_octeon_64bit_hash_dword(value, index) \ | ||
31 | do { \ | ||
32 | __asm__ __volatile__ ( \ | ||
33 | "dmtc2 %[rt],0x0048+" STR(index) \ | ||
34 | : \ | ||
35 | : [rt] "d" (value)); \ | ||
36 | } while (0) | ||
37 | |||
38 | /* | ||
39 | * The index can be 0-1. | ||
40 | */ | ||
41 | #define read_octeon_64bit_hash_dword(index) \ | ||
42 | ({ \ | ||
43 | u64 __value; \ | ||
44 | \ | ||
45 | __asm__ __volatile__ ( \ | ||
46 | "dmfc2 %[rt],0x0048+" STR(index) \ | ||
47 | : [rt] "=d" (__value) \ | ||
48 | : ); \ | ||
49 | \ | ||
50 | __value; \ | ||
51 | }) | ||
52 | |||
53 | /* | ||
54 | * The index can be 0-6. | ||
55 | */ | ||
56 | #define write_octeon_64bit_block_dword(value, index) \ | ||
57 | do { \ | ||
58 | __asm__ __volatile__ ( \ | ||
59 | "dmtc2 %[rt],0x0040+" STR(index) \ | ||
60 | : \ | ||
61 | : [rt] "d" (value)); \ | ||
62 | } while (0) | ||
63 | |||
64 | /* | ||
65 | * The value is the final block dword (64-bit). | ||
66 | */ | ||
67 | #define octeon_md5_start(value) \ | ||
68 | do { \ | ||
69 | __asm__ __volatile__ ( \ | ||
70 | "dmtc2 %[rt],0x4047" \ | ||
71 | : \ | ||
72 | : [rt] "d" (value)); \ | ||
73 | } while (0) | ||
74 | |||
75 | #endif /* __LINUX_OCTEON_CRYPTO_H */ | ||
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c new file mode 100644 index 000000000000..b909881ba6c1 --- /dev/null +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * Cryptographic API. | ||
3 | * | ||
4 | * MD5 Message Digest Algorithm (RFC1321). | ||
5 | * | ||
6 | * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>. | ||
7 | * | ||
8 | * Based on crypto/md5.c, which is: | ||
9 | * | ||
10 | * Derived from cryptoapi implementation, originally based on the | ||
11 | * public domain implementation written by Colin Plumb in 1993. | ||
12 | * | ||
13 | * Copyright (c) Cryptoapi developers. | ||
14 | * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> | ||
15 | * | ||
16 | * This program is free software; you can redistribute it and/or modify it | ||
17 | * under the terms of the GNU General Public License as published by the Free | ||
18 | * Software Foundation; either version 2 of the License, or (at your option) | ||
19 | * any later version. | ||
20 | */ | ||
21 | |||
22 | #include <crypto/md5.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/string.h> | ||
27 | #include <asm/byteorder.h> | ||
28 | #include <linux/cryptohash.h> | ||
29 | #include <asm/octeon/octeon.h> | ||
30 | #include <crypto/internal/hash.h> | ||
31 | |||
32 | #include "octeon-crypto.h" | ||
33 | |||
34 | /* | ||
35 | * We pass everything as 64-bit. OCTEON can handle misaligned data. | ||
36 | */ | ||
37 | |||
38 | static void octeon_md5_store_hash(struct md5_state *ctx) | ||
39 | { | ||
40 | u64 *hash = (u64 *)ctx->hash; | ||
41 | |||
42 | write_octeon_64bit_hash_dword(hash[0], 0); | ||
43 | write_octeon_64bit_hash_dword(hash[1], 1); | ||
44 | } | ||
45 | |||
46 | static void octeon_md5_read_hash(struct md5_state *ctx) | ||
47 | { | ||
48 | u64 *hash = (u64 *)ctx->hash; | ||
49 | |||
50 | hash[0] = read_octeon_64bit_hash_dword(0); | ||
51 | hash[1] = read_octeon_64bit_hash_dword(1); | ||
52 | } | ||
53 | |||
54 | static void octeon_md5_transform(const void *_block) | ||
55 | { | ||
56 | const u64 *block = _block; | ||
57 | |||
58 | write_octeon_64bit_block_dword(block[0], 0); | ||
59 | write_octeon_64bit_block_dword(block[1], 1); | ||
60 | write_octeon_64bit_block_dword(block[2], 2); | ||
61 | write_octeon_64bit_block_dword(block[3], 3); | ||
62 | write_octeon_64bit_block_dword(block[4], 4); | ||
63 | write_octeon_64bit_block_dword(block[5], 5); | ||
64 | write_octeon_64bit_block_dword(block[6], 6); | ||
65 | octeon_md5_start(block[7]); | ||
66 | } | ||
67 | |||
68 | static int octeon_md5_init(struct shash_desc *desc) | ||
69 | { | ||
70 | struct md5_state *mctx = shash_desc_ctx(desc); | ||
71 | |||
72 | mctx->hash[0] = cpu_to_le32(0x67452301); | ||
73 | mctx->hash[1] = cpu_to_le32(0xefcdab89); | ||
74 | mctx->hash[2] = cpu_to_le32(0x98badcfe); | ||
75 | mctx->hash[3] = cpu_to_le32(0x10325476); | ||
76 | mctx->byte_count = 0; | ||
77 | |||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int octeon_md5_update(struct shash_desc *desc, const u8 *data, | ||
82 | unsigned int len) | ||
83 | { | ||
84 | struct md5_state *mctx = shash_desc_ctx(desc); | ||
85 | const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f); | ||
86 | struct octeon_cop2_state state; | ||
87 | unsigned long flags; | ||
88 | |||
89 | mctx->byte_count += len; | ||
90 | |||
91 | if (avail > len) { | ||
92 | memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), | ||
93 | data, len); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data, | ||
98 | avail); | ||
99 | |||
100 | local_bh_disable(); | ||
101 | preempt_disable(); | ||
102 | flags = octeon_crypto_enable(&state); | ||
103 | octeon_md5_store_hash(mctx); | ||
104 | |||
105 | octeon_md5_transform(mctx->block); | ||
106 | data += avail; | ||
107 | len -= avail; | ||
108 | |||
109 | while (len >= sizeof(mctx->block)) { | ||
110 | octeon_md5_transform(data); | ||
111 | data += sizeof(mctx->block); | ||
112 | len -= sizeof(mctx->block); | ||
113 | } | ||
114 | |||
115 | octeon_md5_read_hash(mctx); | ||
116 | octeon_crypto_disable(&state, flags); | ||
117 | preempt_enable(); | ||
118 | local_bh_enable(); | ||
119 | |||
120 | memcpy(mctx->block, data, len); | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | static int octeon_md5_final(struct shash_desc *desc, u8 *out) | ||
126 | { | ||
127 | struct md5_state *mctx = shash_desc_ctx(desc); | ||
128 | const unsigned int offset = mctx->byte_count & 0x3f; | ||
129 | char *p = (char *)mctx->block + offset; | ||
130 | int padding = 56 - (offset + 1); | ||
131 | struct octeon_cop2_state state; | ||
132 | unsigned long flags; | ||
133 | |||
134 | *p++ = 0x80; | ||
135 | |||
136 | local_bh_disable(); | ||
137 | preempt_disable(); | ||
138 | flags = octeon_crypto_enable(&state); | ||
139 | octeon_md5_store_hash(mctx); | ||
140 | |||
141 | if (padding < 0) { | ||
142 | memset(p, 0x00, padding + sizeof(u64)); | ||
143 | octeon_md5_transform(mctx->block); | ||
144 | p = (char *)mctx->block; | ||
145 | padding = 56; | ||
146 | } | ||
147 | |||
148 | memset(p, 0, padding); | ||
149 | mctx->block[14] = cpu_to_le32(mctx->byte_count << 3); | ||
150 | mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29); | ||
151 | octeon_md5_transform(mctx->block); | ||
152 | |||
153 | octeon_md5_read_hash(mctx); | ||
154 | octeon_crypto_disable(&state, flags); | ||
155 | preempt_enable(); | ||
156 | local_bh_enable(); | ||
157 | |||
158 | memcpy(out, mctx->hash, sizeof(mctx->hash)); | ||
159 | memset(mctx, 0, sizeof(*mctx)); | ||
160 | |||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | static int octeon_md5_export(struct shash_desc *desc, void *out) | ||
165 | { | ||
166 | struct md5_state *ctx = shash_desc_ctx(desc); | ||
167 | |||
168 | memcpy(out, ctx, sizeof(*ctx)); | ||
169 | return 0; | ||
170 | } | ||
171 | |||
172 | static int octeon_md5_import(struct shash_desc *desc, const void *in) | ||
173 | { | ||
174 | struct md5_state *ctx = shash_desc_ctx(desc); | ||
175 | |||
176 | memcpy(ctx, in, sizeof(*ctx)); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static struct shash_alg alg = { | ||
181 | .digestsize = MD5_DIGEST_SIZE, | ||
182 | .init = octeon_md5_init, | ||
183 | .update = octeon_md5_update, | ||
184 | .final = octeon_md5_final, | ||
185 | .export = octeon_md5_export, | ||
186 | .import = octeon_md5_import, | ||
187 | .descsize = sizeof(struct md5_state), | ||
188 | .statesize = sizeof(struct md5_state), | ||
189 | .base = { | ||
190 | .cra_name = "md5", | ||
191 | .cra_driver_name= "octeon-md5", | ||
192 | .cra_priority = OCTEON_CR_OPCODE_PRIORITY, | ||
193 | .cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
194 | .cra_blocksize = MD5_HMAC_BLOCK_SIZE, | ||
195 | .cra_module = THIS_MODULE, | ||
196 | } | ||
197 | }; | ||
198 | |||
199 | static int __init md5_mod_init(void) | ||
200 | { | ||
201 | if (!octeon_has_crypto()) | ||
202 | return -ENOTSUPP; | ||
203 | return crypto_register_shash(&alg); | ||
204 | } | ||
205 | |||
206 | static void __exit md5_mod_fini(void) | ||
207 | { | ||
208 | crypto_unregister_shash(&alg); | ||
209 | } | ||
210 | |||
211 | module_init(md5_mod_init); | ||
212 | module_exit(md5_mod_fini); | ||
213 | |||
214 | MODULE_LICENSE("GPL"); | ||
215 | MODULE_DESCRIPTION("MD5 Message Digest Algorithm (OCTEON)"); | ||
216 | MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); | ||
diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c index e15b049b3bd7..b2104bd9ab3b 100644 --- a/arch/mips/cavium-octeon/executive/octeon-model.c +++ b/arch/mips/cavium-octeon/executive/octeon-model.c | |||
@@ -27,6 +27,9 @@ | |||
27 | 27 | ||
28 | #include <asm/octeon/octeon.h> | 28 | #include <asm/octeon/octeon.h> |
29 | 29 | ||
30 | enum octeon_feature_bits __octeon_feature_bits __read_mostly; | ||
31 | EXPORT_SYMBOL_GPL(__octeon_feature_bits); | ||
32 | |||
30 | /** | 33 | /** |
31 | * Read a byte of fuse data | 34 | * Read a byte of fuse data |
32 | * @byte_addr: address to read | 35 | * @byte_addr: address to read |
@@ -103,6 +106,9 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id, | |||
103 | else | 106 | else |
104 | suffix = "NSP"; | 107 | suffix = "NSP"; |
105 | 108 | ||
109 | if (!fus_dat2.s.nocrypto) | ||
110 | __octeon_feature_bits |= OCTEON_HAS_CRYPTO; | ||
111 | |||
106 | /* | 112 | /* |
107 | * Assume pass number is encoded using <5:3><2:0>. Exceptions | 113 | * Assume pass number is encoded using <5:3><2:0>. Exceptions |
108 | * will be fixed later. | 114 | * will be fixed later. |
diff --git a/arch/mips/include/asm/octeon/octeon-feature.h b/arch/mips/include/asm/octeon/octeon-feature.h index c4fe81f47f53..8ebd3f579b84 100644 --- a/arch/mips/include/asm/octeon/octeon-feature.h +++ b/arch/mips/include/asm/octeon/octeon-feature.h | |||
@@ -46,8 +46,6 @@ enum octeon_feature { | |||
46 | OCTEON_FEATURE_SAAD, | 46 | OCTEON_FEATURE_SAAD, |
47 | /* Does this Octeon support the ZIP offload engine? */ | 47 | /* Does this Octeon support the ZIP offload engine? */ |
48 | OCTEON_FEATURE_ZIP, | 48 | OCTEON_FEATURE_ZIP, |
49 | /* Does this Octeon support crypto acceleration using COP2? */ | ||
50 | OCTEON_FEATURE_CRYPTO, | ||
51 | OCTEON_FEATURE_DORM_CRYPTO, | 49 | OCTEON_FEATURE_DORM_CRYPTO, |
52 | /* Does this Octeon support PCI express? */ | 50 | /* Does this Octeon support PCI express? */ |
53 | OCTEON_FEATURE_PCIE, | 51 | OCTEON_FEATURE_PCIE, |
@@ -86,6 +84,21 @@ enum octeon_feature { | |||
86 | OCTEON_MAX_FEATURE | 84 | OCTEON_MAX_FEATURE |
87 | }; | 85 | }; |
88 | 86 | ||
87 | enum octeon_feature_bits { | ||
88 | OCTEON_HAS_CRYPTO = 0x0001, /* Crypto acceleration using COP2 */ | ||
89 | }; | ||
90 | extern enum octeon_feature_bits __octeon_feature_bits; | ||
91 | |||
92 | /** | ||
93 | * octeon_has_crypto() - Check if this OCTEON has crypto acceleration support. | ||
94 | * | ||
95 | * Returns: Non-zero if the feature exists. Zero if the feature does not exist. | ||
96 | */ | ||
97 | static inline int octeon_has_crypto(void) | ||
98 | { | ||
99 | return __octeon_feature_bits & OCTEON_HAS_CRYPTO; | ||
100 | } | ||
101 | |||
89 | /** | 102 | /** |
90 | * Determine if the current Octeon supports a specific feature. These | 103 | * Determine if the current Octeon supports a specific feature. These |
91 | * checks have been optimized to be fairly quick, but they should still | 104 | * checks have been optimized to be fairly quick, but they should still |
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h index d781f9e66884..6dfefd2d5cdf 100644 --- a/arch/mips/include/asm/octeon/octeon.h +++ b/arch/mips/include/asm/octeon/octeon.h | |||
@@ -44,11 +44,6 @@ extern int octeon_get_boot_num_arguments(void); | |||
44 | extern const char *octeon_get_boot_argument(int arg); | 44 | extern const char *octeon_get_boot_argument(int arg); |
45 | extern void octeon_hal_setup_reserved32(void); | 45 | extern void octeon_hal_setup_reserved32(void); |
46 | extern void octeon_user_io_init(void); | 46 | extern void octeon_user_io_init(void); |
47 | struct octeon_cop2_state; | ||
48 | extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state); | ||
49 | extern void octeon_crypto_disable(struct octeon_cop2_state *state, | ||
50 | unsigned long flags); | ||
51 | extern asmlinkage void octeon_cop2_restore(struct octeon_cop2_state *task); | ||
52 | 47 | ||
53 | extern void octeon_init_cvmcount(void); | 48 | extern void octeon_init_cvmcount(void); |
54 | extern void octeon_setup_delays(void); | 49 | extern void octeon_setup_delays(void); |
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 705408766ab0..2e48eb8813ff 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c | |||
@@ -497,7 +497,7 @@ module_init(aes_sparc64_mod_init); | |||
497 | module_exit(aes_sparc64_mod_fini); | 497 | module_exit(aes_sparc64_mod_fini); |
498 | 498 | ||
499 | MODULE_LICENSE("GPL"); | 499 | MODULE_LICENSE("GPL"); |
500 | MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); | 500 | MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, sparc64 aes opcode accelerated"); |
501 | 501 | ||
502 | MODULE_ALIAS_CRYPTO("aes"); | 502 | MODULE_ALIAS_CRYPTO("aes"); |
503 | 503 | ||
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 641f55cb61c3..6bf2479a12fb 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c | |||
@@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini); | |||
322 | MODULE_LICENSE("GPL"); | 322 | MODULE_LICENSE("GPL"); |
323 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); | 323 | MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); |
324 | 324 | ||
325 | MODULE_ALIAS_CRYPTO("aes"); | 325 | MODULE_ALIAS_CRYPTO("camellia"); |
326 | 326 | ||
327 | #include "crop_devid.c" | 327 | #include "crop_devid.c" |
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index d11500972994..dd6a34fa6e19 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c | |||
@@ -533,5 +533,6 @@ MODULE_LICENSE("GPL"); | |||
533 | MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); | 533 | MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); |
534 | 534 | ||
535 | MODULE_ALIAS_CRYPTO("des"); | 535 | MODULE_ALIAS_CRYPTO("des"); |
536 | MODULE_ALIAS_CRYPTO("des3_ede"); | ||
536 | 537 | ||
537 | #include "crop_devid.c" | 538 | #include "crop_devid.c" |
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 64c7ff5f72a9..b688731d7ede 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c | |||
@@ -183,7 +183,7 @@ module_init(md5_sparc64_mod_init); | |||
183 | module_exit(md5_sparc64_mod_fini); | 183 | module_exit(md5_sparc64_mod_fini); |
184 | 184 | ||
185 | MODULE_LICENSE("GPL"); | 185 | MODULE_LICENSE("GPL"); |
186 | MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); | 186 | MODULE_DESCRIPTION("MD5 Message Digest Algorithm, sparc64 md5 opcode accelerated"); |
187 | 187 | ||
188 | MODULE_ALIAS_CRYPTO("md5"); | 188 | MODULE_ALIAS_CRYPTO("md5"); |
189 | 189 | ||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 477e9d75149b..6bd2c6c95373 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -32,12 +32,23 @@ | |||
32 | #include <linux/linkage.h> | 32 | #include <linux/linkage.h> |
33 | #include <asm/inst.h> | 33 | #include <asm/inst.h> |
34 | 34 | ||
35 | /* | ||
36 | * The following macros are used to move an (un)aligned 16 byte value to/from | ||
37 | * an XMM register. This can done for either FP or integer values, for FP use | ||
38 | * movaps (move aligned packed single) or integer use movdqa (move double quad | ||
39 | * aligned). It doesn't make a performance difference which instruction is used | ||
40 | * since Nehalem (original Core i7) was released. However, the movaps is a byte | ||
41 | * shorter, so that is the one we'll use for now. (same for unaligned). | ||
42 | */ | ||
43 | #define MOVADQ movaps | ||
44 | #define MOVUDQ movups | ||
45 | |||
35 | #ifdef __x86_64__ | 46 | #ifdef __x86_64__ |
47 | |||
36 | .data | 48 | .data |
37 | .align 16 | 49 | .align 16 |
38 | .Lgf128mul_x_ble_mask: | 50 | .Lgf128mul_x_ble_mask: |
39 | .octa 0x00000000000000010000000000000087 | 51 | .octa 0x00000000000000010000000000000087 |
40 | |||
41 | POLY: .octa 0xC2000000000000000000000000000001 | 52 | POLY: .octa 0xC2000000000000000000000000000001 |
42 | TWOONE: .octa 0x00000001000000000000000000000001 | 53 | TWOONE: .octa 0x00000001000000000000000000000001 |
43 | 54 | ||
@@ -89,6 +100,7 @@ enc: .octa 0x2 | |||
89 | #define arg8 STACK_OFFSET+16(%r14) | 100 | #define arg8 STACK_OFFSET+16(%r14) |
90 | #define arg9 STACK_OFFSET+24(%r14) | 101 | #define arg9 STACK_OFFSET+24(%r14) |
91 | #define arg10 STACK_OFFSET+32(%r14) | 102 | #define arg10 STACK_OFFSET+32(%r14) |
103 | #define keysize 2*15*16(%arg1) | ||
92 | #endif | 104 | #endif |
93 | 105 | ||
94 | 106 | ||
@@ -213,10 +225,12 @@ enc: .octa 0x2 | |||
213 | 225 | ||
214 | .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | 226 | .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ |
215 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | 227 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation |
228 | MOVADQ SHUF_MASK(%rip), %xmm14 | ||
216 | mov arg7, %r10 # %r10 = AAD | 229 | mov arg7, %r10 # %r10 = AAD |
217 | mov arg8, %r12 # %r12 = aadLen | 230 | mov arg8, %r12 # %r12 = aadLen |
218 | mov %r12, %r11 | 231 | mov %r12, %r11 |
219 | pxor %xmm\i, %xmm\i | 232 | pxor %xmm\i, %xmm\i |
233 | |||
220 | _get_AAD_loop\num_initial_blocks\operation: | 234 | _get_AAD_loop\num_initial_blocks\operation: |
221 | movd (%r10), \TMP1 | 235 | movd (%r10), \TMP1 |
222 | pslldq $12, \TMP1 | 236 | pslldq $12, \TMP1 |
@@ -225,16 +239,18 @@ _get_AAD_loop\num_initial_blocks\operation: | |||
225 | add $4, %r10 | 239 | add $4, %r10 |
226 | sub $4, %r12 | 240 | sub $4, %r12 |
227 | jne _get_AAD_loop\num_initial_blocks\operation | 241 | jne _get_AAD_loop\num_initial_blocks\operation |
242 | |||
228 | cmp $16, %r11 | 243 | cmp $16, %r11 |
229 | je _get_AAD_loop2_done\num_initial_blocks\operation | 244 | je _get_AAD_loop2_done\num_initial_blocks\operation |
245 | |||
230 | mov $16, %r12 | 246 | mov $16, %r12 |
231 | _get_AAD_loop2\num_initial_blocks\operation: | 247 | _get_AAD_loop2\num_initial_blocks\operation: |
232 | psrldq $4, %xmm\i | 248 | psrldq $4, %xmm\i |
233 | sub $4, %r12 | 249 | sub $4, %r12 |
234 | cmp %r11, %r12 | 250 | cmp %r11, %r12 |
235 | jne _get_AAD_loop2\num_initial_blocks\operation | 251 | jne _get_AAD_loop2\num_initial_blocks\operation |
252 | |||
236 | _get_AAD_loop2_done\num_initial_blocks\operation: | 253 | _get_AAD_loop2_done\num_initial_blocks\operation: |
237 | movdqa SHUF_MASK(%rip), %xmm14 | ||
238 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | 254 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data |
239 | 255 | ||
240 | xor %r11, %r11 # initialise the data pointer offset as zero | 256 | xor %r11, %r11 # initialise the data pointer offset as zero |
@@ -243,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
243 | 259 | ||
244 | mov %arg5, %rax # %rax = *Y0 | 260 | mov %arg5, %rax # %rax = *Y0 |
245 | movdqu (%rax), \XMM0 # XMM0 = Y0 | 261 | movdqu (%rax), \XMM0 # XMM0 = Y0 |
246 | movdqa SHUF_MASK(%rip), %xmm14 | ||
247 | PSHUFB_XMM %xmm14, \XMM0 | 262 | PSHUFB_XMM %xmm14, \XMM0 |
248 | 263 | ||
249 | .if (\i == 5) || (\i == 6) || (\i == 7) | 264 | .if (\i == 5) || (\i == 6) || (\i == 7) |
265 | MOVADQ ONE(%RIP),\TMP1 | ||
266 | MOVADQ (%arg1),\TMP2 | ||
250 | .irpc index, \i_seq | 267 | .irpc index, \i_seq |
251 | paddd ONE(%rip), \XMM0 # INCR Y0 | 268 | paddd \TMP1, \XMM0 # INCR Y0 |
252 | movdqa \XMM0, %xmm\index | 269 | movdqa \XMM0, %xmm\index |
253 | movdqa SHUF_MASK(%rip), %xmm14 | ||
254 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | 270 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap |
255 | 271 | pxor \TMP2, %xmm\index | |
256 | .endr | ||
257 | .irpc index, \i_seq | ||
258 | pxor 16*0(%arg1), %xmm\index | ||
259 | .endr | ||
260 | .irpc index, \i_seq | ||
261 | movaps 0x10(%rdi), \TMP1 | ||
262 | AESENC \TMP1, %xmm\index # Round 1 | ||
263 | .endr | ||
264 | .irpc index, \i_seq | ||
265 | movaps 0x20(%arg1), \TMP1 | ||
266 | AESENC \TMP1, %xmm\index # Round 2 | ||
267 | .endr | ||
268 | .irpc index, \i_seq | ||
269 | movaps 0x30(%arg1), \TMP1 | ||
270 | AESENC \TMP1, %xmm\index # Round 2 | ||
271 | .endr | ||
272 | .irpc index, \i_seq | ||
273 | movaps 0x40(%arg1), \TMP1 | ||
274 | AESENC \TMP1, %xmm\index # Round 2 | ||
275 | .endr | ||
276 | .irpc index, \i_seq | ||
277 | movaps 0x50(%arg1), \TMP1 | ||
278 | AESENC \TMP1, %xmm\index # Round 2 | ||
279 | .endr | ||
280 | .irpc index, \i_seq | ||
281 | movaps 0x60(%arg1), \TMP1 | ||
282 | AESENC \TMP1, %xmm\index # Round 2 | ||
283 | .endr | 272 | .endr |
284 | .irpc index, \i_seq | 273 | lea 0x10(%arg1),%r10 |
285 | movaps 0x70(%arg1), \TMP1 | 274 | mov keysize,%eax |
286 | AESENC \TMP1, %xmm\index # Round 2 | 275 | shr $2,%eax # 128->4, 192->6, 256->8 |
287 | .endr | 276 | add $5,%eax # 128->9, 192->11, 256->13 |
288 | .irpc index, \i_seq | 277 | |
289 | movaps 0x80(%arg1), \TMP1 | 278 | aes_loop_initial_dec\num_initial_blocks: |
290 | AESENC \TMP1, %xmm\index # Round 2 | 279 | MOVADQ (%r10),\TMP1 |
291 | .endr | 280 | .irpc index, \i_seq |
292 | .irpc index, \i_seq | 281 | AESENC \TMP1, %xmm\index |
293 | movaps 0x90(%arg1), \TMP1 | ||
294 | AESENC \TMP1, %xmm\index # Round 2 | ||
295 | .endr | 282 | .endr |
283 | add $16,%r10 | ||
284 | sub $1,%eax | ||
285 | jnz aes_loop_initial_dec\num_initial_blocks | ||
286 | |||
287 | MOVADQ (%r10), \TMP1 | ||
296 | .irpc index, \i_seq | 288 | .irpc index, \i_seq |
297 | movaps 0xa0(%arg1), \TMP1 | 289 | AESENCLAST \TMP1, %xmm\index # Last Round |
298 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
299 | .endr | 290 | .endr |
300 | .irpc index, \i_seq | 291 | .irpc index, \i_seq |
301 | movdqu (%arg3 , %r11, 1), \TMP1 | 292 | movdqu (%arg3 , %r11, 1), \TMP1 |
@@ -305,10 +296,8 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
305 | add $16, %r11 | 296 | add $16, %r11 |
306 | 297 | ||
307 | movdqa \TMP1, %xmm\index | 298 | movdqa \TMP1, %xmm\index |
308 | movdqa SHUF_MASK(%rip), %xmm14 | ||
309 | PSHUFB_XMM %xmm14, %xmm\index | 299 | PSHUFB_XMM %xmm14, %xmm\index |
310 | 300 | # prepare plaintext/ciphertext for GHASH computation | |
311 | # prepare plaintext/ciphertext for GHASH computation | ||
312 | .endr | 301 | .endr |
313 | .endif | 302 | .endif |
314 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 | 303 | GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 |
@@ -338,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
338 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | 327 | * Precomputations for HashKey parallel with encryption of first 4 blocks. |
339 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | 328 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i |
340 | */ | 329 | */ |
341 | paddd ONE(%rip), \XMM0 # INCR Y0 | 330 | MOVADQ ONE(%rip), \TMP1 |
342 | movdqa \XMM0, \XMM1 | 331 | paddd \TMP1, \XMM0 # INCR Y0 |
343 | movdqa SHUF_MASK(%rip), %xmm14 | 332 | MOVADQ \XMM0, \XMM1 |
344 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | 333 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap |
345 | 334 | ||
346 | paddd ONE(%rip), \XMM0 # INCR Y0 | 335 | paddd \TMP1, \XMM0 # INCR Y0 |
347 | movdqa \XMM0, \XMM2 | 336 | MOVADQ \XMM0, \XMM2 |
348 | movdqa SHUF_MASK(%rip), %xmm14 | ||
349 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | 337 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
350 | 338 | ||
351 | paddd ONE(%rip), \XMM0 # INCR Y0 | 339 | paddd \TMP1, \XMM0 # INCR Y0 |
352 | movdqa \XMM0, \XMM3 | 340 | MOVADQ \XMM0, \XMM3 |
353 | movdqa SHUF_MASK(%rip), %xmm14 | ||
354 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | 341 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap |
355 | 342 | ||
356 | paddd ONE(%rip), \XMM0 # INCR Y0 | 343 | paddd \TMP1, \XMM0 # INCR Y0 |
357 | movdqa \XMM0, \XMM4 | 344 | MOVADQ \XMM0, \XMM4 |
358 | movdqa SHUF_MASK(%rip), %xmm14 | ||
359 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | 345 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap |
360 | 346 | ||
361 | pxor 16*0(%arg1), \XMM1 | 347 | MOVADQ 0(%arg1),\TMP1 |
362 | pxor 16*0(%arg1), \XMM2 | 348 | pxor \TMP1, \XMM1 |
363 | pxor 16*0(%arg1), \XMM3 | 349 | pxor \TMP1, \XMM2 |
364 | pxor 16*0(%arg1), \XMM4 | 350 | pxor \TMP1, \XMM3 |
351 | pxor \TMP1, \XMM4 | ||
365 | movdqa \TMP3, \TMP5 | 352 | movdqa \TMP3, \TMP5 |
366 | pshufd $78, \TMP3, \TMP1 | 353 | pshufd $78, \TMP3, \TMP1 |
367 | pxor \TMP3, \TMP1 | 354 | pxor \TMP3, \TMP1 |
@@ -399,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
399 | pshufd $78, \TMP5, \TMP1 | 386 | pshufd $78, \TMP5, \TMP1 |
400 | pxor \TMP5, \TMP1 | 387 | pxor \TMP5, \TMP1 |
401 | movdqa \TMP1, HashKey_4_k(%rsp) | 388 | movdqa \TMP1, HashKey_4_k(%rsp) |
402 | movaps 0xa0(%arg1), \TMP2 | 389 | lea 0xa0(%arg1),%r10 |
390 | mov keysize,%eax | ||
391 | shr $2,%eax # 128->4, 192->6, 256->8 | ||
392 | sub $4,%eax # 128->0, 192->2, 256->4 | ||
393 | jz aes_loop_pre_dec_done\num_initial_blocks | ||
394 | |||
395 | aes_loop_pre_dec\num_initial_blocks: | ||
396 | MOVADQ (%r10),\TMP2 | ||
397 | .irpc index, 1234 | ||
398 | AESENC \TMP2, %xmm\index | ||
399 | .endr | ||
400 | add $16,%r10 | ||
401 | sub $1,%eax | ||
402 | jnz aes_loop_pre_dec\num_initial_blocks | ||
403 | |||
404 | aes_loop_pre_dec_done\num_initial_blocks: | ||
405 | MOVADQ (%r10), \TMP2 | ||
403 | AESENCLAST \TMP2, \XMM1 | 406 | AESENCLAST \TMP2, \XMM1 |
404 | AESENCLAST \TMP2, \XMM2 | 407 | AESENCLAST \TMP2, \XMM2 |
405 | AESENCLAST \TMP2, \XMM3 | 408 | AESENCLAST \TMP2, \XMM3 |
@@ -421,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
421 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | 424 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) |
422 | movdqa \TMP1, \XMM4 | 425 | movdqa \TMP1, \XMM4 |
423 | add $64, %r11 | 426 | add $64, %r11 |
424 | movdqa SHUF_MASK(%rip), %xmm14 | ||
425 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | 427 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap |
426 | pxor \XMMDst, \XMM1 | 428 | pxor \XMMDst, \XMM1 |
427 | # combine GHASHed value with the corresponding ciphertext | 429 | # combine GHASHed value with the corresponding ciphertext |
428 | movdqa SHUF_MASK(%rip), %xmm14 | ||
429 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | 430 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
430 | movdqa SHUF_MASK(%rip), %xmm14 | ||
431 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | 431 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap |
432 | movdqa SHUF_MASK(%rip), %xmm14 | ||
433 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | 432 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap |
434 | 433 | ||
435 | _initial_blocks_done\num_initial_blocks\operation: | 434 | _initial_blocks_done\num_initial_blocks\operation: |
@@ -451,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation: | |||
451 | 450 | ||
452 | .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ | 451 | .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ |
453 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation | 452 | XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation |
453 | MOVADQ SHUF_MASK(%rip), %xmm14 | ||
454 | mov arg7, %r10 # %r10 = AAD | 454 | mov arg7, %r10 # %r10 = AAD |
455 | mov arg8, %r12 # %r12 = aadLen | 455 | mov arg8, %r12 # %r12 = aadLen |
456 | mov %r12, %r11 | 456 | mov %r12, %r11 |
@@ -472,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation: | |||
472 | cmp %r11, %r12 | 472 | cmp %r11, %r12 |
473 | jne _get_AAD_loop2\num_initial_blocks\operation | 473 | jne _get_AAD_loop2\num_initial_blocks\operation |
474 | _get_AAD_loop2_done\num_initial_blocks\operation: | 474 | _get_AAD_loop2_done\num_initial_blocks\operation: |
475 | movdqa SHUF_MASK(%rip), %xmm14 | ||
476 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data | 475 | PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data |
477 | 476 | ||
478 | xor %r11, %r11 # initialise the data pointer offset as zero | 477 | xor %r11, %r11 # initialise the data pointer offset as zero |
@@ -481,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
481 | 480 | ||
482 | mov %arg5, %rax # %rax = *Y0 | 481 | mov %arg5, %rax # %rax = *Y0 |
483 | movdqu (%rax), \XMM0 # XMM0 = Y0 | 482 | movdqu (%rax), \XMM0 # XMM0 = Y0 |
484 | movdqa SHUF_MASK(%rip), %xmm14 | ||
485 | PSHUFB_XMM %xmm14, \XMM0 | 483 | PSHUFB_XMM %xmm14, \XMM0 |
486 | 484 | ||
487 | .if (\i == 5) || (\i == 6) || (\i == 7) | 485 | .if (\i == 5) || (\i == 6) || (\i == 7) |
488 | .irpc index, \i_seq | ||
489 | paddd ONE(%rip), \XMM0 # INCR Y0 | ||
490 | movdqa \XMM0, %xmm\index | ||
491 | movdqa SHUF_MASK(%rip), %xmm14 | ||
492 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
493 | 486 | ||
494 | .endr | 487 | MOVADQ ONE(%RIP),\TMP1 |
495 | .irpc index, \i_seq | 488 | MOVADQ 0(%arg1),\TMP2 |
496 | pxor 16*0(%arg1), %xmm\index | ||
497 | .endr | ||
498 | .irpc index, \i_seq | ||
499 | movaps 0x10(%rdi), \TMP1 | ||
500 | AESENC \TMP1, %xmm\index # Round 1 | ||
501 | .endr | ||
502 | .irpc index, \i_seq | ||
503 | movaps 0x20(%arg1), \TMP1 | ||
504 | AESENC \TMP1, %xmm\index # Round 2 | ||
505 | .endr | ||
506 | .irpc index, \i_seq | 489 | .irpc index, \i_seq |
507 | movaps 0x30(%arg1), \TMP1 | 490 | paddd \TMP1, \XMM0 # INCR Y0 |
508 | AESENC \TMP1, %xmm\index # Round 2 | 491 | MOVADQ \XMM0, %xmm\index |
492 | PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap | ||
493 | pxor \TMP2, %xmm\index | ||
509 | .endr | 494 | .endr |
510 | .irpc index, \i_seq | 495 | lea 0x10(%arg1),%r10 |
511 | movaps 0x40(%arg1), \TMP1 | 496 | mov keysize,%eax |
512 | AESENC \TMP1, %xmm\index # Round 2 | 497 | shr $2,%eax # 128->4, 192->6, 256->8 |
513 | .endr | 498 | add $5,%eax # 128->9, 192->11, 256->13 |
514 | .irpc index, \i_seq | 499 | |
515 | movaps 0x50(%arg1), \TMP1 | 500 | aes_loop_initial_enc\num_initial_blocks: |
516 | AESENC \TMP1, %xmm\index # Round 2 | 501 | MOVADQ (%r10),\TMP1 |
517 | .endr | 502 | .irpc index, \i_seq |
518 | .irpc index, \i_seq | 503 | AESENC \TMP1, %xmm\index |
519 | movaps 0x60(%arg1), \TMP1 | ||
520 | AESENC \TMP1, %xmm\index # Round 2 | ||
521 | .endr | ||
522 | .irpc index, \i_seq | ||
523 | movaps 0x70(%arg1), \TMP1 | ||
524 | AESENC \TMP1, %xmm\index # Round 2 | ||
525 | .endr | ||
526 | .irpc index, \i_seq | ||
527 | movaps 0x80(%arg1), \TMP1 | ||
528 | AESENC \TMP1, %xmm\index # Round 2 | ||
529 | .endr | ||
530 | .irpc index, \i_seq | ||
531 | movaps 0x90(%arg1), \TMP1 | ||
532 | AESENC \TMP1, %xmm\index # Round 2 | ||
533 | .endr | 504 | .endr |
505 | add $16,%r10 | ||
506 | sub $1,%eax | ||
507 | jnz aes_loop_initial_enc\num_initial_blocks | ||
508 | |||
509 | MOVADQ (%r10), \TMP1 | ||
534 | .irpc index, \i_seq | 510 | .irpc index, \i_seq |
535 | movaps 0xa0(%arg1), \TMP1 | 511 | AESENCLAST \TMP1, %xmm\index # Last Round |
536 | AESENCLAST \TMP1, %xmm\index # Round 10 | ||
537 | .endr | 512 | .endr |
538 | .irpc index, \i_seq | 513 | .irpc index, \i_seq |
539 | movdqu (%arg3 , %r11, 1), \TMP1 | 514 | movdqu (%arg3 , %r11, 1), \TMP1 |
@@ -541,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
541 | movdqu %xmm\index, (%arg2 , %r11, 1) | 516 | movdqu %xmm\index, (%arg2 , %r11, 1) |
542 | # write back plaintext/ciphertext for num_initial_blocks | 517 | # write back plaintext/ciphertext for num_initial_blocks |
543 | add $16, %r11 | 518 | add $16, %r11 |
544 | |||
545 | movdqa SHUF_MASK(%rip), %xmm14 | ||
546 | PSHUFB_XMM %xmm14, %xmm\index | 519 | PSHUFB_XMM %xmm14, %xmm\index |
547 | 520 | ||
548 | # prepare plaintext/ciphertext for GHASH computation | 521 | # prepare plaintext/ciphertext for GHASH computation |
@@ -575,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
575 | * Precomputations for HashKey parallel with encryption of first 4 blocks. | 548 | * Precomputations for HashKey parallel with encryption of first 4 blocks. |
576 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | 549 | * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i |
577 | */ | 550 | */ |
578 | paddd ONE(%rip), \XMM0 # INCR Y0 | 551 | MOVADQ ONE(%RIP),\TMP1 |
579 | movdqa \XMM0, \XMM1 | 552 | paddd \TMP1, \XMM0 # INCR Y0 |
580 | movdqa SHUF_MASK(%rip), %xmm14 | 553 | MOVADQ \XMM0, \XMM1 |
581 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | 554 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap |
582 | 555 | ||
583 | paddd ONE(%rip), \XMM0 # INCR Y0 | 556 | paddd \TMP1, \XMM0 # INCR Y0 |
584 | movdqa \XMM0, \XMM2 | 557 | MOVADQ \XMM0, \XMM2 |
585 | movdqa SHUF_MASK(%rip), %xmm14 | ||
586 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | 558 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
587 | 559 | ||
588 | paddd ONE(%rip), \XMM0 # INCR Y0 | 560 | paddd \TMP1, \XMM0 # INCR Y0 |
589 | movdqa \XMM0, \XMM3 | 561 | MOVADQ \XMM0, \XMM3 |
590 | movdqa SHUF_MASK(%rip), %xmm14 | ||
591 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | 562 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap |
592 | 563 | ||
593 | paddd ONE(%rip), \XMM0 # INCR Y0 | 564 | paddd \TMP1, \XMM0 # INCR Y0 |
594 | movdqa \XMM0, \XMM4 | 565 | MOVADQ \XMM0, \XMM4 |
595 | movdqa SHUF_MASK(%rip), %xmm14 | ||
596 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | 566 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap |
597 | 567 | ||
598 | pxor 16*0(%arg1), \XMM1 | 568 | MOVADQ 0(%arg1),\TMP1 |
599 | pxor 16*0(%arg1), \XMM2 | 569 | pxor \TMP1, \XMM1 |
600 | pxor 16*0(%arg1), \XMM3 | 570 | pxor \TMP1, \XMM2 |
601 | pxor 16*0(%arg1), \XMM4 | 571 | pxor \TMP1, \XMM3 |
572 | pxor \TMP1, \XMM4 | ||
602 | movdqa \TMP3, \TMP5 | 573 | movdqa \TMP3, \TMP5 |
603 | pshufd $78, \TMP3, \TMP1 | 574 | pshufd $78, \TMP3, \TMP1 |
604 | pxor \TMP3, \TMP1 | 575 | pxor \TMP3, \TMP1 |
@@ -636,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
636 | pshufd $78, \TMP5, \TMP1 | 607 | pshufd $78, \TMP5, \TMP1 |
637 | pxor \TMP5, \TMP1 | 608 | pxor \TMP5, \TMP1 |
638 | movdqa \TMP1, HashKey_4_k(%rsp) | 609 | movdqa \TMP1, HashKey_4_k(%rsp) |
639 | movaps 0xa0(%arg1), \TMP2 | 610 | lea 0xa0(%arg1),%r10 |
611 | mov keysize,%eax | ||
612 | shr $2,%eax # 128->4, 192->6, 256->8 | ||
613 | sub $4,%eax # 128->0, 192->2, 256->4 | ||
614 | jz aes_loop_pre_enc_done\num_initial_blocks | ||
615 | |||
616 | aes_loop_pre_enc\num_initial_blocks: | ||
617 | MOVADQ (%r10),\TMP2 | ||
618 | .irpc index, 1234 | ||
619 | AESENC \TMP2, %xmm\index | ||
620 | .endr | ||
621 | add $16,%r10 | ||
622 | sub $1,%eax | ||
623 | jnz aes_loop_pre_enc\num_initial_blocks | ||
624 | |||
625 | aes_loop_pre_enc_done\num_initial_blocks: | ||
626 | MOVADQ (%r10), \TMP2 | ||
640 | AESENCLAST \TMP2, \XMM1 | 627 | AESENCLAST \TMP2, \XMM1 |
641 | AESENCLAST \TMP2, \XMM2 | 628 | AESENCLAST \TMP2, \XMM2 |
642 | AESENCLAST \TMP2, \XMM3 | 629 | AESENCLAST \TMP2, \XMM3 |
@@ -655,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: | |||
655 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) | 642 | movdqu \XMM4, 16*3(%arg2 , %r11 , 1) |
656 | 643 | ||
657 | add $64, %r11 | 644 | add $64, %r11 |
658 | movdqa SHUF_MASK(%rip), %xmm14 | ||
659 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap | 645 | PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap |
660 | pxor \XMMDst, \XMM1 | 646 | pxor \XMMDst, \XMM1 |
661 | # combine GHASHed value with the corresponding ciphertext | 647 | # combine GHASHed value with the corresponding ciphertext |
662 | movdqa SHUF_MASK(%rip), %xmm14 | ||
663 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap | 648 | PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
664 | movdqa SHUF_MASK(%rip), %xmm14 | ||
665 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap | 649 | PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap |
666 | movdqa SHUF_MASK(%rip), %xmm14 | ||
667 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap | 650 | PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap |
668 | 651 | ||
669 | _initial_blocks_done\num_initial_blocks\operation: | 652 | _initial_blocks_done\num_initial_blocks\operation: |
@@ -794,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | |||
794 | AESENC \TMP3, \XMM3 | 777 | AESENC \TMP3, \XMM3 |
795 | AESENC \TMP3, \XMM4 | 778 | AESENC \TMP3, \XMM4 |
796 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | 779 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 |
797 | movaps 0xa0(%arg1), \TMP3 | 780 | lea 0xa0(%arg1),%r10 |
781 | mov keysize,%eax | ||
782 | shr $2,%eax # 128->4, 192->6, 256->8 | ||
783 | sub $4,%eax # 128->0, 192->2, 256->4 | ||
784 | jz aes_loop_par_enc_done | ||
785 | |||
786 | aes_loop_par_enc: | ||
787 | MOVADQ (%r10),\TMP3 | ||
788 | .irpc index, 1234 | ||
789 | AESENC \TMP3, %xmm\index | ||
790 | .endr | ||
791 | add $16,%r10 | ||
792 | sub $1,%eax | ||
793 | jnz aes_loop_par_enc | ||
794 | |||
795 | aes_loop_par_enc_done: | ||
796 | MOVADQ (%r10), \TMP3 | ||
798 | AESENCLAST \TMP3, \XMM1 # Round 10 | 797 | AESENCLAST \TMP3, \XMM1 # Round 10 |
799 | AESENCLAST \TMP3, \XMM2 | 798 | AESENCLAST \TMP3, \XMM2 |
800 | AESENCLAST \TMP3, \XMM3 | 799 | AESENCLAST \TMP3, \XMM3 |
@@ -986,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation | |||
986 | AESENC \TMP3, \XMM3 | 985 | AESENC \TMP3, \XMM3 |
987 | AESENC \TMP3, \XMM4 | 986 | AESENC \TMP3, \XMM4 |
988 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 | 987 | PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 |
989 | movaps 0xa0(%arg1), \TMP3 | 988 | lea 0xa0(%arg1),%r10 |
990 | AESENCLAST \TMP3, \XMM1 # Round 10 | 989 | mov keysize,%eax |
990 | shr $2,%eax # 128->4, 192->6, 256->8 | ||
991 | sub $4,%eax # 128->0, 192->2, 256->4 | ||
992 | jz aes_loop_par_dec_done | ||
993 | |||
994 | aes_loop_par_dec: | ||
995 | MOVADQ (%r10),\TMP3 | ||
996 | .irpc index, 1234 | ||
997 | AESENC \TMP3, %xmm\index | ||
998 | .endr | ||
999 | add $16,%r10 | ||
1000 | sub $1,%eax | ||
1001 | jnz aes_loop_par_dec | ||
1002 | |||
1003 | aes_loop_par_dec_done: | ||
1004 | MOVADQ (%r10), \TMP3 | ||
1005 | AESENCLAST \TMP3, \XMM1 # last round | ||
991 | AESENCLAST \TMP3, \XMM2 | 1006 | AESENCLAST \TMP3, \XMM2 |
992 | AESENCLAST \TMP3, \XMM3 | 1007 | AESENCLAST \TMP3, \XMM3 |
993 | AESENCLAST \TMP3, \XMM4 | 1008 | AESENCLAST \TMP3, \XMM4 |
@@ -1155,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst | |||
1155 | pxor \TMP6, \XMMDst # reduced result is in XMMDst | 1170 | pxor \TMP6, \XMMDst # reduced result is in XMMDst |
1156 | .endm | 1171 | .endm |
1157 | 1172 | ||
1158 | /* Encryption of a single block done*/ | ||
1159 | .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 | ||
1160 | 1173 | ||
1161 | pxor (%arg1), \XMM0 | 1174 | /* Encryption of a single block |
1162 | movaps 16(%arg1), \TMP1 | 1175 | * uses eax & r10 |
1163 | AESENC \TMP1, \XMM0 | 1176 | */ |
1164 | movaps 32(%arg1), \TMP1 | ||
1165 | AESENC \TMP1, \XMM0 | ||
1166 | movaps 48(%arg1), \TMP1 | ||
1167 | AESENC \TMP1, \XMM0 | ||
1168 | movaps 64(%arg1), \TMP1 | ||
1169 | AESENC \TMP1, \XMM0 | ||
1170 | movaps 80(%arg1), \TMP1 | ||
1171 | AESENC \TMP1, \XMM0 | ||
1172 | movaps 96(%arg1), \TMP1 | ||
1173 | AESENC \TMP1, \XMM0 | ||
1174 | movaps 112(%arg1), \TMP1 | ||
1175 | AESENC \TMP1, \XMM0 | ||
1176 | movaps 128(%arg1), \TMP1 | ||
1177 | AESENC \TMP1, \XMM0 | ||
1178 | movaps 144(%arg1), \TMP1 | ||
1179 | AESENC \TMP1, \XMM0 | ||
1180 | movaps 160(%arg1), \TMP1 | ||
1181 | AESENCLAST \TMP1, \XMM0 | ||
1182 | .endm | ||
1183 | 1177 | ||
1178 | .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 | ||
1184 | 1179 | ||
1180 | pxor (%arg1), \XMM0 | ||
1181 | mov keysize,%eax | ||
1182 | shr $2,%eax # 128->4, 192->6, 256->8 | ||
1183 | add $5,%eax # 128->9, 192->11, 256->13 | ||
1184 | lea 16(%arg1), %r10 # get first expanded key address | ||
1185 | |||
1186 | _esb_loop_\@: | ||
1187 | MOVADQ (%r10),\TMP1 | ||
1188 | AESENC \TMP1,\XMM0 | ||
1189 | add $16,%r10 | ||
1190 | sub $1,%eax | ||
1191 | jnz _esb_loop_\@ | ||
1192 | |||
1193 | MOVADQ (%r10),\TMP1 | ||
1194 | AESENCLAST \TMP1,\XMM0 | ||
1195 | .endm | ||
1185 | /***************************************************************************** | 1196 | /***************************************************************************** |
1186 | * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. | 1197 | * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. |
1187 | * u8 *out, // Plaintext output. Encrypt in-place is allowed. | 1198 | * u8 *out, // Plaintext output. Encrypt in-place is allowed. |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ae855f4f64b7..947c6bf52c33 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <asm/crypto/glue_helper.h> | 43 | #include <asm/crypto/glue_helper.h> |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | |||
46 | /* This data is stored at the end of the crypto_tfm struct. | 47 | /* This data is stored at the end of the crypto_tfm struct. |
47 | * It's a type of per "session" data storage location. | 48 | * It's a type of per "session" data storage location. |
48 | * This needs to be 16 byte aligned. | 49 | * This needs to be 16 byte aligned. |
@@ -182,7 +183,8 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out, | |||
182 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 183 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
183 | u8 *auth_tag, unsigned long auth_tag_len) | 184 | u8 *auth_tag, unsigned long auth_tag_len) |
184 | { | 185 | { |
185 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | 186 | struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; |
187 | if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ | ||
186 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | 188 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, |
187 | aad_len, auth_tag, auth_tag_len); | 189 | aad_len, auth_tag, auth_tag_len); |
188 | } else { | 190 | } else { |
@@ -197,7 +199,8 @@ static void aesni_gcm_dec_avx(void *ctx, u8 *out, | |||
197 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 199 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
198 | u8 *auth_tag, unsigned long auth_tag_len) | 200 | u8 *auth_tag, unsigned long auth_tag_len) |
199 | { | 201 | { |
200 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | 202 | struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; |
203 | if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { | ||
201 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, | 204 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, |
202 | aad_len, auth_tag, auth_tag_len); | 205 | aad_len, auth_tag, auth_tag_len); |
203 | } else { | 206 | } else { |
@@ -231,7 +234,8 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out, | |||
231 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 234 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
232 | u8 *auth_tag, unsigned long auth_tag_len) | 235 | u8 *auth_tag, unsigned long auth_tag_len) |
233 | { | 236 | { |
234 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | 237 | struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; |
238 | if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { | ||
235 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | 239 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, |
236 | aad_len, auth_tag, auth_tag_len); | 240 | aad_len, auth_tag, auth_tag_len); |
237 | } else if (plaintext_len < AVX_GEN4_OPTSIZE) { | 241 | } else if (plaintext_len < AVX_GEN4_OPTSIZE) { |
@@ -250,7 +254,8 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out, | |||
250 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 254 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
251 | u8 *auth_tag, unsigned long auth_tag_len) | 255 | u8 *auth_tag, unsigned long auth_tag_len) |
252 | { | 256 | { |
253 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | 257 | struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; |
258 | if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { | ||
254 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, | 259 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, |
255 | aad, aad_len, auth_tag, auth_tag_len); | 260 | aad, aad_len, auth_tag, auth_tag_len); |
256 | } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { | 261 | } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { |
@@ -511,7 +516,7 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |||
511 | kernel_fpu_begin(); | 516 | kernel_fpu_begin(); |
512 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | 517 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { |
513 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | 518 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, |
514 | nbytes & AES_BLOCK_MASK, walk.iv); | 519 | nbytes & AES_BLOCK_MASK, walk.iv); |
515 | nbytes &= AES_BLOCK_SIZE - 1; | 520 | nbytes &= AES_BLOCK_SIZE - 1; |
516 | err = blkcipher_walk_done(desc, &walk, nbytes); | 521 | err = blkcipher_walk_done(desc, &walk, nbytes); |
517 | } | 522 | } |
@@ -902,7 +907,8 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, | |||
902 | } | 907 | } |
903 | /*Account for 4 byte nonce at the end.*/ | 908 | /*Account for 4 byte nonce at the end.*/ |
904 | key_len -= 4; | 909 | key_len -= 4; |
905 | if (key_len != AES_KEYSIZE_128) { | 910 | if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && |
911 | key_len != AES_KEYSIZE_256) { | ||
906 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 912 | crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
907 | return -EINVAL; | 913 | return -EINVAL; |
908 | } | 914 | } |
@@ -1013,6 +1019,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
1013 | __be32 counter = cpu_to_be32(1); | 1019 | __be32 counter = cpu_to_be32(1); |
1014 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 1020 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
1015 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | 1021 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); |
1022 | u32 key_len = ctx->aes_key_expanded.key_length; | ||
1016 | void *aes_ctx = &(ctx->aes_key_expanded); | 1023 | void *aes_ctx = &(ctx->aes_key_expanded); |
1017 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | 1024 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); |
1018 | u8 iv_tab[16+AESNI_ALIGN]; | 1025 | u8 iv_tab[16+AESNI_ALIGN]; |
@@ -1027,6 +1034,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
1027 | /* to 8 or 12 bytes */ | 1034 | /* to 8 or 12 bytes */ |
1028 | if (unlikely(req->assoclen != 8 && req->assoclen != 12)) | 1035 | if (unlikely(req->assoclen != 8 && req->assoclen != 12)) |
1029 | return -EINVAL; | 1036 | return -EINVAL; |
1037 | if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16)) | ||
1038 | return -EINVAL; | ||
1039 | if (unlikely(key_len != AES_KEYSIZE_128 && | ||
1040 | key_len != AES_KEYSIZE_192 && | ||
1041 | key_len != AES_KEYSIZE_256)) | ||
1042 | return -EINVAL; | ||
1043 | |||
1030 | /* IV below built */ | 1044 | /* IV below built */ |
1031 | for (i = 0; i < 4; i++) | 1045 | for (i = 0; i < 4; i++) |
1032 | *(iv+i) = ctx->nonce[i]; | 1046 | *(iv+i) = ctx->nonce[i]; |
@@ -1091,6 +1105,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
1091 | int retval = 0; | 1105 | int retval = 0; |
1092 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); | 1106 | struct crypto_aead *tfm = crypto_aead_reqtfm(req); |
1093 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); | 1107 | struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); |
1108 | u32 key_len = ctx->aes_key_expanded.key_length; | ||
1094 | void *aes_ctx = &(ctx->aes_key_expanded); | 1109 | void *aes_ctx = &(ctx->aes_key_expanded); |
1095 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); | 1110 | unsigned long auth_tag_len = crypto_aead_authsize(tfm); |
1096 | u8 iv_and_authTag[32+AESNI_ALIGN]; | 1111 | u8 iv_and_authTag[32+AESNI_ALIGN]; |
@@ -1104,6 +1119,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
1104 | if (unlikely((req->cryptlen < auth_tag_len) || | 1119 | if (unlikely((req->cryptlen < auth_tag_len) || |
1105 | (req->assoclen != 8 && req->assoclen != 12))) | 1120 | (req->assoclen != 8 && req->assoclen != 12))) |
1106 | return -EINVAL; | 1121 | return -EINVAL; |
1122 | if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16)) | ||
1123 | return -EINVAL; | ||
1124 | if (unlikely(key_len != AES_KEYSIZE_128 && | ||
1125 | key_len != AES_KEYSIZE_192 && | ||
1126 | key_len != AES_KEYSIZE_256)) | ||
1127 | return -EINVAL; | ||
1128 | |||
1107 | /* Assuming we are supporting rfc4106 64-bit extended */ | 1129 | /* Assuming we are supporting rfc4106 64-bit extended */ |
1108 | /* sequence numbers We need to have the AAD length */ | 1130 | /* sequence numbers We need to have the AAD length */ |
1109 | /* equal to 8 or 12 bytes */ | 1131 | /* equal to 8 or 12 bytes */ |
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 38a14f818ef1..d6fc59aaaadf 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c | |||
@@ -504,6 +504,4 @@ MODULE_LICENSE("GPL"); | |||
504 | MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); | 504 | MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized"); |
505 | MODULE_ALIAS_CRYPTO("des3_ede"); | 505 | MODULE_ALIAS_CRYPTO("des3_ede"); |
506 | MODULE_ALIAS_CRYPTO("des3_ede-asm"); | 506 | MODULE_ALIAS_CRYPTO("des3_ede-asm"); |
507 | MODULE_ALIAS_CRYPTO("des"); | ||
508 | MODULE_ALIAS_CRYPTO("des-asm"); | ||
509 | MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>"); | 507 | MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>"); |