summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2013-03-26 16:59:17 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2013-04-25 09:00:57 -0400
commit8275d1aa642295edd34a11a117080384bb9d65c2 (patch)
tree60200502662d5fa75b3d4c120cf740a35ea0def2
parentd34a460092d857f1616e39eed7eac6f40cea2225 (diff)
crypto: sha256 - Create module providing optimized SHA256 routines using SSSE3, AVX or AVX2 instructions.
We added glue code and config options to create crypto module that uses SSE/AVX/AVX2 optimized SHA256 x86_64 assembly routines. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c275
-rw-r--r--crypto/Kconfig11
3 files changed, 288 insertions, 0 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 83681a317422..9414b91b8f49 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
25obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o 25obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
26obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o 26obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
27obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o 27obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
28obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
28 29
29# These modules require assembler to support AVX. 30# These modules require assembler to support AVX.
30ifeq ($(avx_supported),yes) 31ifeq ($(avx_supported),yes)
@@ -66,3 +67,4 @@ sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
66crc32c-intel-y := crc32c-intel_glue.o 67crc32c-intel-y := crc32c-intel_glue.o
67crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o 68crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
68crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o 69crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
70sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
new file mode 100644
index 000000000000..597d4da69656
--- /dev/null
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -0,0 +1,275 @@
1/*
2 * Cryptographic API.
3 *
4 * Glue code for the SHA256 Secure Hash Algorithm assembler
5 * implementation using supplemental SSE3 / AVX / AVX2 instructions.
6 *
7 * This file is based on sha256_generic.c
8 *
9 * Copyright (C) 2013 Intel Corporation.
10 *
11 * Author:
12 * Tim Chen <tim.c.chen@linux.intel.com>
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 2 of the License, or (at your option)
17 * any later version.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
23 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
24 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * SOFTWARE.
27 */
28
29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
32#include <crypto/internal/hash.h>
33#include <linux/init.h>
34#include <linux/module.h>
35#include <linux/mm.h>
36#include <linux/cryptohash.h>
37#include <linux/types.h>
38#include <crypto/sha.h>
39#include <asm/byteorder.h>
40#include <asm/i387.h>
41#include <asm/xcr.h>
42#include <asm/xsave.h>
43#include <linux/string.h>
44
45asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
46 u64 rounds);
47#ifdef CONFIG_AS_AVX
48asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
49 u64 rounds);
50#endif
51#ifdef CONFIG_AS_AVX2
52asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
53 u64 rounds);
54#endif
55
56static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
57
58
59static int sha256_ssse3_init(struct shash_desc *desc)
60{
61 struct sha256_state *sctx = shash_desc_ctx(desc);
62
63 sctx->state[0] = SHA256_H0;
64 sctx->state[1] = SHA256_H1;
65 sctx->state[2] = SHA256_H2;
66 sctx->state[3] = SHA256_H3;
67 sctx->state[4] = SHA256_H4;
68 sctx->state[5] = SHA256_H5;
69 sctx->state[6] = SHA256_H6;
70 sctx->state[7] = SHA256_H7;
71 sctx->count = 0;
72
73 return 0;
74}
75
76static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
77 unsigned int len, unsigned int partial)
78{
79 struct sha256_state *sctx = shash_desc_ctx(desc);
80 unsigned int done = 0;
81
82 sctx->count += len;
83
84 if (partial) {
85 done = SHA256_BLOCK_SIZE - partial;
86 memcpy(sctx->buf + partial, data, done);
87 sha256_transform_asm(sctx->buf, sctx->state, 1);
88 }
89
90 if (len - done >= SHA256_BLOCK_SIZE) {
91 const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
92
93 sha256_transform_asm(data + done, sctx->state, (u64) rounds);
94
95 done += rounds * SHA256_BLOCK_SIZE;
96 }
97
98 memcpy(sctx->buf, data + done, len - done);
99
100 return 0;
101}
102
103static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
104 unsigned int len)
105{
106 struct sha256_state *sctx = shash_desc_ctx(desc);
107 unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
108 int res;
109
110 /* Handle the fast case right here */
111 if (partial + len < SHA256_BLOCK_SIZE) {
112 sctx->count += len;
113 memcpy(sctx->buf + partial, data, len);
114
115 return 0;
116 }
117
118 if (!irq_fpu_usable()) {
119 res = crypto_sha256_update(desc, data, len);
120 } else {
121 kernel_fpu_begin();
122 res = __sha256_ssse3_update(desc, data, len, partial);
123 kernel_fpu_end();
124 }
125
126 return res;
127}
128
129
130/* Add padding and return the message digest. */
131static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
132{
133 struct sha256_state *sctx = shash_desc_ctx(desc);
134 unsigned int i, index, padlen;
135 __be32 *dst = (__be32 *)out;
136 __be64 bits;
137 static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
138
139 bits = cpu_to_be64(sctx->count << 3);
140
141 /* Pad out to 56 mod 64 and append length */
142 index = sctx->count % SHA256_BLOCK_SIZE;
143 padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
144
145 if (!irq_fpu_usable()) {
146 crypto_sha256_update(desc, padding, padlen);
147 crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
148 } else {
149 kernel_fpu_begin();
150 /* We need to fill a whole block for __sha256_ssse3_update() */
151 if (padlen <= 56) {
152 sctx->count += padlen;
153 memcpy(sctx->buf + index, padding, padlen);
154 } else {
155 __sha256_ssse3_update(desc, padding, padlen, index);
156 }
157 __sha256_ssse3_update(desc, (const u8 *)&bits,
158 sizeof(bits), 56);
159 kernel_fpu_end();
160 }
161
162 /* Store state in digest */
163 for (i = 0; i < 8; i++)
164 dst[i] = cpu_to_be32(sctx->state[i]);
165
166 /* Wipe context */
167 memset(sctx, 0, sizeof(*sctx));
168
169 return 0;
170}
171
172static int sha256_ssse3_export(struct shash_desc *desc, void *out)
173{
174 struct sha256_state *sctx = shash_desc_ctx(desc);
175
176 memcpy(out, sctx, sizeof(*sctx));
177
178 return 0;
179}
180
181static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
182{
183 struct sha256_state *sctx = shash_desc_ctx(desc);
184
185 memcpy(sctx, in, sizeof(*sctx));
186
187 return 0;
188}
189
190static struct shash_alg alg = {
191 .digestsize = SHA256_DIGEST_SIZE,
192 .init = sha256_ssse3_init,
193 .update = sha256_ssse3_update,
194 .final = sha256_ssse3_final,
195 .export = sha256_ssse3_export,
196 .import = sha256_ssse3_import,
197 .descsize = sizeof(struct sha256_state),
198 .statesize = sizeof(struct sha256_state),
199 .base = {
200 .cra_name = "sha256",
201 .cra_driver_name = "sha256-ssse3",
202 .cra_priority = 150,
203 .cra_flags = CRYPTO_ALG_TYPE_SHASH,
204 .cra_blocksize = SHA256_BLOCK_SIZE,
205 .cra_module = THIS_MODULE,
206 }
207};
208
209#ifdef CONFIG_AS_AVX
210static bool __init avx_usable(void)
211{
212 u64 xcr0;
213
214 if (!cpu_has_avx || !cpu_has_osxsave)
215 return false;
216
217 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
218 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
219 pr_info("AVX detected but unusable.\n");
220
221 return false;
222 }
223
224 return true;
225}
226#endif
227
228static int __init sha256_ssse3_mod_init(void)
229{
230 /* test for SSE3 first */
231 if (cpu_has_ssse3)
232 sha256_transform_asm = sha256_transform_ssse3;
233
234#ifdef CONFIG_AS_AVX
235 /* allow AVX to override SSSE3, it's a little faster */
236 if (avx_usable()) {
237#ifdef CONFIG_AS_AVX2
238 if (boot_cpu_has(X86_FEATURE_AVX2))
239 sha256_transform_asm = sha256_transform_rorx;
240 else
241#endif
242 sha256_transform_asm = sha256_transform_avx;
243 }
244#endif
245
246 if (sha256_transform_asm) {
247#ifdef CONFIG_AS_AVX
248 if (sha256_transform_asm == sha256_transform_avx)
249 pr_info("Using AVX optimized SHA-256 implementation\n");
250#ifdef CONFIG_AS_AVX2
251 else if (sha256_transform_asm == sha256_transform_rorx)
252 pr_info("Using AVX2 optimized SHA-256 implementation\n");
253#endif
254 else
255#endif
256 pr_info("Using SSSE3 optimized SHA-256 implementation\n");
257 return crypto_register_shash(&alg);
258 }
259 pr_info("Neither AVX nor SSSE3 is available/usable.\n");
260
261 return -ENODEV;
262}
263
264static void __exit sha256_ssse3_mod_fini(void)
265{
266 crypto_unregister_shash(&alg);
267}
268
269module_init(sha256_ssse3_mod_init);
270module_exit(sha256_ssse3_mod_fini);
271
272MODULE_LICENSE("GPL");
273MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
274
275MODULE_ALIAS("sha256");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index aed52b2e4a55..8064ef1fedc4 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -470,6 +470,17 @@ config CRYPTO_SHA1_SSSE3
470 using Supplemental SSE3 (SSSE3) instructions or Advanced Vector 470 using Supplemental SSE3 (SSSE3) instructions or Advanced Vector
471 Extensions (AVX), when available. 471 Extensions (AVX), when available.
472 472
473config CRYPTO_SHA256_SSSE3
474 tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)"
475 depends on X86 && 64BIT
476 select CRYPTO_SHA256
477 select CRYPTO_HASH
478 help
479 SHA-256 secure hash standard (DFIPS 180-2) implemented
480 using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector
481 Extensions version 1 (AVX1), or Advanced Vector Extensions
482 version 2 (AVX2) instructions, when available.
483
473config CRYPTO_SHA1_SPARC64 484config CRYPTO_SHA1_SPARC64
474 tristate "SHA1 digest algorithm (SPARC64)" 485 tristate "SHA1 digest algorithm (SPARC64)"
475 depends on SPARC64 486 depends on SPARC64