diff options
author | Jim Kukunas <james.t.kukunas@linux.intel.com> | 2012-11-08 16:47:44 -0500 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-12-13 00:42:01 -0500 |
commit | 7056741fd9fc14a65608549a4657cf5178f05f63 (patch) | |
tree | b30504208f8261c4a0a2625169eaff9aa9de544e /lib/raid6/x86.h | |
parent | 54f89341e8b8da0cdac8a7b873491739de19f098 (diff) |
lib/raid6: Add AVX2 optimized recovery functions
Optimize RAID6 recovery functions to take advantage of
the 256-bit YMM integer instructions introduced in AVX2.
The patch was tested and benchmarked before submission.
However hardware is not yet released so benchmark numbers
cannot be reported.
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'lib/raid6/x86.h')
-rw-r--r-- | lib/raid6/x86.h | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index d55d63232c55..b7595484a815 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h | |||
@@ -45,19 +45,23 @@ static inline void kernel_fpu_end(void) | |||
45 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 45 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ | 46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ |
47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | ||
48 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | 49 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ |
49 | 50 | ||
50 | /* Should work well enough on modern CPUs for testing */ | 51 | /* Should work well enough on modern CPUs for testing */ |
51 | static inline int boot_cpu_has(int flag) | 52 | static inline int boot_cpu_has(int flag) |
52 | { | 53 | { |
53 | u32 eax = (flag & 0x20) ? 0x80000001 : 1; | 54 | u32 eax, ebx, ecx, edx; |
54 | u32 ecx, edx; | 55 | |
56 | eax = (flag & 0x100) ? 7 : | ||
57 | (flag & 0x20) ? 0x80000001 : 1; | ||
58 | ecx = 0; | ||
55 | 59 | ||
56 | asm volatile("cpuid" | 60 | asm volatile("cpuid" |
57 | : "+a" (eax), "=d" (edx), "=c" (ecx) | 61 | : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); |
58 | : : "ebx"); | ||
59 | 62 | ||
60 | return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1; | 63 | return ((flag & 0x100 ? ebx : |
64 | (flag & 0x80) ? ecx : edx) >> (flag & 31)) & 1; | ||
61 | } | 65 | } |
62 | 66 | ||
63 | #endif /* ndef __KERNEL__ */ | 67 | #endif /* ndef __KERNEL__ */ |