aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/sha1_ssse3_glue.c
diff options
context:
space:
mode:
authorchandramouli narayanan <mouli@linux.intel.com>2014-03-20 18:14:00 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2014-03-21 09:54:30 -0400
commit7c1da8d0d046174a4188b5729d7579abf3d29427 (patch)
treef527edeaff6a2b0404ce1e3ecf079faeff34fc16 /arch/x86/crypto/sha1_ssse3_glue.c
parent130fa5bc81b44b6cc1fbdea3abf6db0da22964e0 (diff)
crypto: sha - SHA1 transform x86_64 AVX2
This git patch adds x86_64 AVX2 optimization of SHA1 transform to crypto support. The patch has been tested with 3.14.0-rc1 kernel. On a Haswell desktop, with turbo disabled and all cpus running at maximum frequency, tcrypt shows AVX2 performance improvement from 3% for 256 bytes update to 16% for 1024 bytes update over AVX implementation. This patch adds sha1_avx2_transform(), the glue, build and configuration changes needed for AVX2 optimization of SHA1 transform to crypto support. sha1-ssse3 is one module which adds the necessary optimization support (SSSE3/AVX/AVX2) for the low-level SHA1 transform function. With better optimization support, transform function is overridden as the case may be. In the case of AVX2, due to performance reasons across datablock sizes, the AVX or AVX2 transform function is used at run-time as it suits best. The Makefile change therefore appends the necessary objects to the linkage. Due to this, the patch merely appends AVX2 transform to the existing build mix and Kconfig support and leaves the configuration build support as is. Signed-off-by: Chandramouli Narayanan <mouli@linux.intel.com> Reviewed-by: Marek Vasut <marex@denx.de> Acked-by: H. Peter Anvin <hpa@linux.intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/sha1_ssse3_glue.c')
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c49
1 files changed, 42 insertions, 7 deletions
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 4a11a9d72451..139a55c04d82 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -10,6 +10,7 @@
10 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> 10 * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
11 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> 11 * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
12 * Copyright (c) Mathias Krause <minipli@googlemail.com> 12 * Copyright (c) Mathias Krause <minipli@googlemail.com>
13 * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
13 * 14 *
14 * This program is free software; you can redistribute it and/or modify it 15 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the Free 16 * under the terms of the GNU General Public License as published by the Free
@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
39asmlinkage void sha1_transform_avx(u32 *digest, const char *data, 40asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
40 unsigned int rounds); 41 unsigned int rounds);
41#endif 42#endif
43#ifdef CONFIG_AS_AVX2
44#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
45
46asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
47 unsigned int rounds);
48#endif
42 49
43static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); 50static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
44 51
@@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
165 return 0; 172 return 0;
166} 173}
167 174
175#ifdef CONFIG_AS_AVX2
176static void sha1_apply_transform_avx2(u32 *digest, const char *data,
177 unsigned int rounds)
178{
179 /* Select the optimal transform based on data block size */
180 if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE)
181 sha1_transform_avx2(digest, data, rounds);
182 else
183 sha1_transform_avx(digest, data, rounds);
184}
185#endif
186
168static struct shash_alg alg = { 187static struct shash_alg alg = {
169 .digestsize = SHA1_DIGEST_SIZE, 188 .digestsize = SHA1_DIGEST_SIZE,
170 .init = sha1_ssse3_init, 189 .init = sha1_ssse3_init,
@@ -189,7 +208,11 @@ static bool __init avx_usable(void)
189{ 208{
190 u64 xcr0; 209 u64 xcr0;
191 210
211#if defined(CONFIG_AS_AVX2)
212 if (!cpu_has_avx || !cpu_has_avx2 || !cpu_has_osxsave)
213#else
192 if (!cpu_has_avx || !cpu_has_osxsave) 214 if (!cpu_has_avx || !cpu_has_osxsave)
215#endif
193 return false; 216 return false;
194 217
195 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 218 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
@@ -205,23 +228,35 @@ static bool __init avx_usable(void)
205 228
206static int __init sha1_ssse3_mod_init(void) 229static int __init sha1_ssse3_mod_init(void)
207{ 230{
231 char *algo_name;
208 /* test for SSSE3 first */ 232 /* test for SSSE3 first */
209 if (cpu_has_ssse3) 233 if (cpu_has_ssse3) {
210 sha1_transform_asm = sha1_transform_ssse3; 234 sha1_transform_asm = sha1_transform_ssse3;
235 algo_name = "SSSE3";
236 }
211 237
212#ifdef CONFIG_AS_AVX 238#ifdef CONFIG_AS_AVX
213 /* allow AVX to override SSSE3, it's a little faster */ 239 /* allow AVX to override SSSE3, it's a little faster */
214 if (avx_usable()) 240 if (avx_usable()) {
215 sha1_transform_asm = sha1_transform_avx; 241 if (cpu_has_avx) {
242 sha1_transform_asm = sha1_transform_avx;
243 algo_name = "AVX";
244 }
245#ifdef CONFIG_AS_AVX2
246 if (cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI2)) {
247 /* allow AVX2 to override AVX, it's a little faster */
248 sha1_transform_asm = sha1_apply_transform_avx2;
249 algo_name = "AVX2";
250 }
251#endif
252 }
216#endif 253#endif
217 254
218 if (sha1_transform_asm) { 255 if (sha1_transform_asm) {
219 pr_info("Using %s optimized SHA-1 implementation\n", 256 pr_info("Using %s optimized SHA-1 implementation\n", algo_name);
220 sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3"
221 : "AVX");
222 return crypto_register_shash(&alg); 257 return crypto_register_shash(&alg);
223 } 258 }
224 pr_info("Neither AVX nor SSSE3 is available/usable.\n"); 259 pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n");
225 260
226 return -ENODEV; 261 return -ENODEV;
227} 262}