diff options
author | Jim Kukunas <james.t.kukunas@linux.intel.com> | 2012-05-21 23:54:04 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-05-21 23:54:04 -0400 |
commit | ea4d26ae24e58fbd2c61de9242adab053cb982d8 (patch) | |
tree | 3115dd168f0cf1eb1eb5dd6aecc385cfa0e8bc05 /arch/x86/include/asm/xor_32.h | |
parent | 56a519913eeba2bdae4d7ee39e80fab442c3836c (diff) |
raid5: add AVX optimized RAID5 checksumming
Optimize RAID5 xor checksumming by taking advantage of
256-bit YMM registers introduced in AVX.
Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'arch/x86/include/asm/xor_32.h')
-rw-r--r-- | arch/x86/include/asm/xor_32.h | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 133b40a0f495..454570891bdc 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h | |||
@@ -861,6 +861,9 @@ static struct xor_block_template xor_block_pIII_sse = { | |||
861 | .do_5 = xor_sse_5, | 861 | .do_5 = xor_sse_5, |
862 | }; | 862 | }; |
863 | 863 | ||
864 | /* Also try the AVX routines */ | ||
865 | #include "xor_avx.h" | ||
866 | |||
864 | /* Also try the generic routines. */ | 867 | /* Also try the generic routines. */ |
865 | #include <asm-generic/xor.h> | 868 | #include <asm-generic/xor.h> |
866 | 869 | ||
@@ -871,6 +874,7 @@ do { \ | |||
871 | xor_speed(&xor_block_8regs_p); \ | 874 | xor_speed(&xor_block_8regs_p); \ |
872 | xor_speed(&xor_block_32regs); \ | 875 | xor_speed(&xor_block_32regs); \ |
873 | xor_speed(&xor_block_32regs_p); \ | 876 | xor_speed(&xor_block_32regs_p); \ |
877 | AVX_XOR_SPEED; \ | ||
874 | if (cpu_has_xmm) \ | 878 | if (cpu_has_xmm) \ |
875 | xor_speed(&xor_block_pIII_sse); \ | 879 | xor_speed(&xor_block_pIII_sse); \ |
876 | if (cpu_has_mmx) { \ | 880 | if (cpu_has_mmx) { \ |
@@ -883,6 +887,6 @@ do { \ | |||
883 | We may also be able to load into the L1 only depending on how the cpu | 887 | We may also be able to load into the L1 only depending on how the cpu |
884 | deals with a load to a line that is being prefetched. */ | 888 | deals with a load to a line that is being prefetched. */ |
885 | #define XOR_SELECT_TEMPLATE(FASTEST) \ | 889 | #define XOR_SELECT_TEMPLATE(FASTEST) \ |
886 | (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) | 890 | AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) |
887 | 891 | ||
888 | #endif /* _ASM_X86_XOR_32_H */ | 892 | #endif /* _ASM_X86_XOR_32_H */ |