aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/Kconfig7
-rw-r--r--arch/arm/include/asm/neon.h36
-rw-r--r--arch/arm/include/asm/xor.h73
-rw-r--r--arch/arm/lib/Makefile6
-rw-r--r--arch/arm/lib/xor-neon.c42
-rw-r--r--arch/arm/vfp/vfphw.S5
-rw-r--r--arch/arm/vfp/vfpmodule.c69
-rw-r--r--include/linux/raid/pq.h5
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile40
-rw-r--r--lib/raid6/algos.c6
-rw-r--r--lib/raid6/neon.c58
-rw-r--r--lib/raid6/neon.uc80
-rw-r--r--lib/raid6/test/Makefile26
14 files changed, 452 insertions, 2 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ba412e02ec0c..ccc388d388be 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2176,6 +2176,13 @@ config NEON
2176 Say Y to include support code for NEON, the ARMv7 Advanced SIMD 2176 Say Y to include support code for NEON, the ARMv7 Advanced SIMD
2177 Extension. 2177 Extension.
2178 2178
2179config KERNEL_MODE_NEON
2180 bool "Support for NEON in kernel mode"
2181 default n
2182 depends on NEON
2183 help
2184 Say Y to include support for NEON in kernel mode.
2185
2179endmenu 2186endmenu
2180 2187
2181menu "Userspace binary formats" 2188menu "Userspace binary formats"
diff --git a/arch/arm/include/asm/neon.h b/arch/arm/include/asm/neon.h
new file mode 100644
index 000000000000..8f730fe70093
--- /dev/null
+++ b/arch/arm/include/asm/neon.h
@@ -0,0 +1,36 @@
1/*
2 * linux/arch/arm/include/asm/neon.h
3 *
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <asm/hwcap.h>
12
13#define cpu_has_neon() (!!(elf_hwcap & HWCAP_NEON))
14
15#ifdef __ARM_NEON__
16
17/*
18 * If you are affected by the BUILD_BUG below, it probably means that you are
19 * using NEON code /and/ calling the kernel_neon_begin() function from the same
20 * compilation unit. To prevent issues that may arise from GCC reordering or
21 * generating(1) NEON instructions outside of these begin/end functions, the
22 * only supported way of using NEON code in the kernel is by isolating it in a
23 * separate compilation unit, and calling it from another unit from inside a
24 * kernel_neon_begin/kernel_neon_end pair.
25 *
26 * (1) Current GCC (4.7) might generate NEON instructions at O3 level if
27 * -mpfu=neon is set.
28 */
29
30#define kernel_neon_begin() \
31 BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")
32
33#else
34void kernel_neon_begin(void);
35#endif
36void kernel_neon_end(void);
diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h
index 7604673dc427..4ffb26d4cad8 100644
--- a/arch/arm/include/asm/xor.h
+++ b/arch/arm/include/asm/xor.h
@@ -7,7 +7,10 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/hardirq.h>
10#include <asm-generic/xor.h> 11#include <asm-generic/xor.h>
12#include <asm/hwcap.h>
13#include <asm/neon.h>
11 14
12#define __XOR(a1, a2) a1 ^= a2 15#define __XOR(a1, a2) a1 ^= a2
13 16
@@ -138,4 +141,74 @@ static struct xor_block_template xor_block_arm4regs = {
138 xor_speed(&xor_block_arm4regs); \ 141 xor_speed(&xor_block_arm4regs); \
139 xor_speed(&xor_block_8regs); \ 142 xor_speed(&xor_block_8regs); \
140 xor_speed(&xor_block_32regs); \ 143 xor_speed(&xor_block_32regs); \
144 NEON_TEMPLATES; \
141 } while (0) 145 } while (0)
146
147#ifdef CONFIG_KERNEL_MODE_NEON
148
149extern struct xor_block_template const xor_block_neon_inner;
150
151static void
152xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
153{
154 if (in_interrupt()) {
155 xor_arm4regs_2(bytes, p1, p2);
156 } else {
157 kernel_neon_begin();
158 xor_block_neon_inner.do_2(bytes, p1, p2);
159 kernel_neon_end();
160 }
161}
162
163static void
164xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
165 unsigned long *p3)
166{
167 if (in_interrupt()) {
168 xor_arm4regs_3(bytes, p1, p2, p3);
169 } else {
170 kernel_neon_begin();
171 xor_block_neon_inner.do_3(bytes, p1, p2, p3);
172 kernel_neon_end();
173 }
174}
175
176static void
177xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
178 unsigned long *p3, unsigned long *p4)
179{
180 if (in_interrupt()) {
181 xor_arm4regs_4(bytes, p1, p2, p3, p4);
182 } else {
183 kernel_neon_begin();
184 xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
185 kernel_neon_end();
186 }
187}
188
189static void
190xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
191 unsigned long *p3, unsigned long *p4, unsigned long *p5)
192{
193 if (in_interrupt()) {
194 xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
195 } else {
196 kernel_neon_begin();
197 xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
198 kernel_neon_end();
199 }
200}
201
202static struct xor_block_template xor_block_neon = {
203 .name = "neon",
204 .do_2 = xor_neon_2,
205 .do_3 = xor_neon_3,
206 .do_4 = xor_neon_4,
207 .do_5 = xor_neon_5
208};
209
210#define NEON_TEMPLATES \
211 do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
212#else
213#define NEON_TEMPLATES
214#endif
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index af72969820b4..aaf3a8731136 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -45,3 +45,9 @@ lib-$(CONFIG_ARCH_SHARK) += io-shark.o
45 45
46$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S 46$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
47$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S 47$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
48
49ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
50 NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon
51 CFLAGS_xor-neon.o += $(NEON_FLAGS)
52 lib-$(CONFIG_XOR_BLOCKS) += xor-neon.o
53endif
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
new file mode 100644
index 000000000000..f485e5a2af4b
--- /dev/null
+++ b/arch/arm/lib/xor-neon.c
@@ -0,0 +1,42 @@
1/*
2 * linux/arch/arm/lib/xor-neon.c
3 *
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/raid/xor.h>
12
13#ifndef __ARM_NEON__
14#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
15#endif
16
17/*
18 * Pull in the reference implementations while instructing GCC (through
19 * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
20 * NEON instructions.
21 */
22#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
23#pragma GCC optimize "tree-vectorize"
24#else
25/*
26 * While older versions of GCC do not generate incorrect code, they fail to
27 * recognize the parallel nature of these functions, and emit plain ARM code,
28 * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
29 */
30#warning This code requires at least version 4.6 of GCC
31#endif
32
33#pragma GCC diagnostic ignored "-Wunused-variable"
34#include <asm-generic/xor.h>
35
36struct xor_block_template const xor_block_neon_inner = {
37 .name = "__inner_neon__",
38 .do_2 = xor_8regs_2,
39 .do_3 = xor_8regs_3,
40 .do_4 = xor_8regs_4,
41 .do_5 = xor_8regs_5,
42};
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 8d10dc8a1e17..3e5d3115a2a6 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -78,6 +78,11 @@
78ENTRY(vfp_support_entry) 78ENTRY(vfp_support_entry)
79 DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 79 DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10
80 80
81 ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions
82 and r3, r3, #MODE_MASK @ are supported in kernel mode
83 teq r3, #USR_MODE
84 bne vfp_kmode_exception @ Returns through lr
85
81 VFPFMRX r1, FPEXC @ Is the VFP enabled? 86 VFPFMRX r1, FPEXC @ Is the VFP enabled?
82 DBGSTR1 "fpexc %08x", r1 87 DBGSTR1 "fpexc %08x", r1
83 tst r1, #FPEXC_EN 88 tst r1, #FPEXC_EN
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 5dfbb0b8e7f4..52b8f40b1c73 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -20,6 +20,7 @@
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/user.h> 22#include <linux/user.h>
23#include <linux/export.h>
23 24
24#include <asm/cp15.h> 25#include <asm/cp15.h>
25#include <asm/cputype.h> 26#include <asm/cputype.h>
@@ -648,6 +649,72 @@ static int vfp_hotplug(struct notifier_block *b, unsigned long action,
648 return NOTIFY_OK; 649 return NOTIFY_OK;
649} 650}
650 651
652void vfp_kmode_exception(void)
653{
654 /*
655 * If we reach this point, a floating point exception has been raised
656 * while running in kernel mode. If the NEON/VFP unit was enabled at the
657 * time, it means a VFP instruction has been issued that requires
658 * software assistance to complete, something which is not currently
659 * supported in kernel mode.
660 * If the NEON/VFP unit was disabled, and the location pointed to below
661 * is properly preceded by a call to kernel_neon_begin(), something has
662 * caused the task to be scheduled out and back in again. In this case,
663 * rebuilding and running with CONFIG_DEBUG_ATOMIC_SLEEP enabled should
664 * be helpful in localizing the problem.
665 */
666 if (fmrx(FPEXC) & FPEXC_EN)
667 pr_crit("BUG: unsupported FP instruction in kernel mode\n");
668 else
669 pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n");
670}
671
672#ifdef CONFIG_KERNEL_MODE_NEON
673
674/*
675 * Kernel-side NEON support functions
676 */
677void kernel_neon_begin(void)
678{
679 struct thread_info *thread = current_thread_info();
680 unsigned int cpu;
681 u32 fpexc;
682
683 /*
684 * Kernel mode NEON is only allowed outside of interrupt context
685 * with preemption disabled. This will make sure that the kernel
686 * mode NEON register contents never need to be preserved.
687 */
688 BUG_ON(in_interrupt());
689 cpu = get_cpu();
690
691 fpexc = fmrx(FPEXC) | FPEXC_EN;
692 fmxr(FPEXC, fpexc);
693
694 /*
695 * Save the userland NEON/VFP state. Under UP,
696 * the owner could be a task other than 'current'
697 */
698 if (vfp_state_in_hw(cpu, thread))
699 vfp_save_state(&thread->vfpstate, fpexc);
700#ifndef CONFIG_SMP
701 else if (vfp_current_hw_state[cpu] != NULL)
702 vfp_save_state(vfp_current_hw_state[cpu], fpexc);
703#endif
704 vfp_current_hw_state[cpu] = NULL;
705}
706EXPORT_SYMBOL(kernel_neon_begin);
707
708void kernel_neon_end(void)
709{
710 /* Disable the NEON/VFP unit. */
711 fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
712 put_cpu();
713}
714EXPORT_SYMBOL(kernel_neon_end);
715
716#endif /* CONFIG_KERNEL_MODE_NEON */
717
651/* 718/*
652 * VFP support code initialisation. 719 * VFP support code initialisation.
653 */ 720 */
@@ -731,4 +798,4 @@ static int __init vfp_init(void)
731 return 0; 798 return 0;
732} 799}
733 800
734late_initcall(vfp_init); 801core_initcall(vfp_init);
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 8dfaa2ce2e95..0f424698064f 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -114,6 +114,11 @@ extern const struct raid6_recov_calls raid6_recov_intx1;
114extern const struct raid6_recov_calls raid6_recov_ssse3; 114extern const struct raid6_recov_calls raid6_recov_ssse3;
115extern const struct raid6_recov_calls raid6_recov_avx2; 115extern const struct raid6_recov_calls raid6_recov_avx2;
116 116
117extern const struct raid6_calls raid6_neonx1;
118extern const struct raid6_calls raid6_neonx2;
119extern const struct raid6_calls raid6_neonx4;
120extern const struct raid6_calls raid6_neonx8;
121
117/* Algorithm list */ 122/* Algorithm list */
118extern const struct raid6_calls * const raid6_algos[]; 123extern const struct raid6_calls * const raid6_algos[];
119extern const struct raid6_recov_calls *const raid6_recov_algos[]; 124extern const struct raid6_recov_calls *const raid6_recov_algos[];
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 162becacf97c..0a7e494b2bcd 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -2,3 +2,4 @@ mktables
2altivec*.c 2altivec*.c
3int*.c 3int*.c
4tables.c 4tables.c
5neon?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 9f7c184725d7..b4625787c7ee 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
5 5
6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o 6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
8 9
9hostprogs-y += mktables 10hostprogs-y += mktables
10 11
@@ -16,6 +17,21 @@ ifeq ($(CONFIG_ALTIVEC),y)
16altivec_flags := -maltivec -mabi=altivec 17altivec_flags := -maltivec -mabi=altivec
17endif 18endif
18 19
20# The GCC option -ffreestanding is required in order to compile code containing
21# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
22ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
23NEON_FLAGS := -ffreestanding
24ifeq ($(ARCH),arm)
25NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
26endif
27ifeq ($(ARCH),arm64)
28CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
29CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
30CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
31CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only
32endif
33endif
34
19targets += int1.c 35targets += int1.c
20$(obj)/int1.c: UNROLL := 1 36$(obj)/int1.c: UNROLL := 1
21$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE 37$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE
@@ -70,6 +86,30 @@ $(obj)/altivec8.c: UNROLL := 8
70$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE 86$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
71 $(call if_changed,unroll) 87 $(call if_changed,unroll)
72 88
89CFLAGS_neon1.o += $(NEON_FLAGS)
90targets += neon1.c
91$(obj)/neon1.c: UNROLL := 1
92$(obj)/neon1.c: $(src)/neon.uc $(src)/unroll.awk FORCE
93 $(call if_changed,unroll)
94
95CFLAGS_neon2.o += $(NEON_FLAGS)
96targets += neon2.c
97$(obj)/neon2.c: UNROLL := 2
98$(obj)/neon2.c: $(src)/neon.uc $(src)/unroll.awk FORCE
99 $(call if_changed,unroll)
100
101CFLAGS_neon4.o += $(NEON_FLAGS)
102targets += neon4.c
103$(obj)/neon4.c: UNROLL := 4
104$(obj)/neon4.c: $(src)/neon.uc $(src)/unroll.awk FORCE
105 $(call if_changed,unroll)
106
107CFLAGS_neon8.o += $(NEON_FLAGS)
108targets += neon8.c
109$(obj)/neon8.c: UNROLL := 8
110$(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE
111 $(call if_changed,unroll)
112
73quiet_cmd_mktable = TABLE $@ 113quiet_cmd_mktable = TABLE $@
74 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) 114 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
75 115
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 6d7316fe9f30..74e6f5629dbc 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -70,6 +70,12 @@ const struct raid6_calls * const raid6_algos[] = {
70 &raid6_intx2, 70 &raid6_intx2,
71 &raid6_intx4, 71 &raid6_intx4,
72 &raid6_intx8, 72 &raid6_intx8,
73#ifdef CONFIG_KERNEL_MODE_NEON
74 &raid6_neonx1,
75 &raid6_neonx2,
76 &raid6_neonx4,
77 &raid6_neonx8,
78#endif
73 NULL 79 NULL
74}; 80};
75 81
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
new file mode 100644
index 000000000000..36ad4705df1a
--- /dev/null
+++ b/lib/raid6/neon.c
@@ -0,0 +1,58 @@
1/*
2 * linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics
3 *
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/raid/pq.h>
12
13#ifdef __KERNEL__
14#include <asm/neon.h>
15#else
16#define kernel_neon_begin()
17#define kernel_neon_end()
18#define cpu_has_neon() (1)
19#endif
20
21/*
22 * There are 2 reasons these wrappers are kept in a separate compilation unit
23 * from the actual implementations in neonN.c (generated from neon.uc by
24 * unroll.awk):
25 * - the actual implementations use NEON intrinsics, and the GCC support header
26 * (arm_neon.h) is not fully compatible (type wise) with the kernel;
27 * - the neonN.c files are compiled with -mfpu=neon and optimization enabled,
28 * and we have to make sure that we never use *any* NEON/VFP instructions
29 * outside a kernel_neon_begin()/kernel_neon_end() pair.
30 */
31
32#define RAID6_NEON_WRAPPER(_n) \
33 static void raid6_neon ## _n ## _gen_syndrome(int disks, \
34 size_t bytes, void **ptrs) \
35 { \
36 void raid6_neon ## _n ## _gen_syndrome_real(int, \
37 unsigned long, void**); \
38 kernel_neon_begin(); \
39 raid6_neon ## _n ## _gen_syndrome_real(disks, \
40 (unsigned long)bytes, ptrs); \
41 kernel_neon_end(); \
42 } \
43 struct raid6_calls const raid6_neonx ## _n = { \
44 raid6_neon ## _n ## _gen_syndrome, \
45 raid6_have_neon, \
46 "neonx" #_n, \
47 0 \
48 }
49
50static int raid6_have_neon(void)
51{
52 return cpu_has_neon();
53}
54
55RAID6_NEON_WRAPPER(1);
56RAID6_NEON_WRAPPER(2);
57RAID6_NEON_WRAPPER(4);
58RAID6_NEON_WRAPPER(8);
diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
new file mode 100644
index 000000000000..1b9ed793342d
--- /dev/null
+++ b/lib/raid6/neon.uc
@@ -0,0 +1,80 @@
1/* -----------------------------------------------------------------------
2 *
3 * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
4 *
5 * Copyright (C) 2012 Rob Herring
6 *
7 * Based on altivec.uc:
8 * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
13 * Boston MA 02111-1307, USA; either version 2 of the License, or
14 * (at your option) any later version; incorporated herein by reference.
15 *
16 * ----------------------------------------------------------------------- */
17
18/*
19 * neon$#.c
20 *
21 * $#-way unrolled NEON intrinsics math RAID-6 instruction set
22 *
23 * This file is postprocessed using unroll.awk
24 */
25
26#include <arm_neon.h>
27
28typedef uint8x16_t unative_t;
29
30#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
31#define NSIZE sizeof(unative_t)
32
33/*
34 * The SHLBYTE() operation shifts each byte left by 1, *not*
35 * rolling over into the next byte
36 */
37static inline unative_t SHLBYTE(unative_t v)
38{
39 return vshlq_n_u8(v, 1);
40}
41
42/*
43 * The MASK() operation returns 0xFF in any byte for which the high
44 * bit is 1, 0x00 for any byte for which the high bit is 0.
45 */
46static inline unative_t MASK(unative_t v)
47{
48 const uint8x16_t temp = NBYTES(0);
49 return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp);
50}
51
52void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
53{
54 uint8_t **dptr = (uint8_t **)ptrs;
55 uint8_t *p, *q;
56 int d, z, z0;
57
58 register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
59 const unative_t x1d = NBYTES(0x1d);
60
61 z0 = disks - 3; /* Highest data disk */
62 p = dptr[z0+1]; /* XOR parity */
63 q = dptr[z0+2]; /* RS syndrome */
64
65 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
66 wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
67 for ( z = z0-1 ; z >= 0 ; z-- ) {
68 wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
69 wp$$ = veorq_u8(wp$$, wd$$);
70 w2$$ = MASK(wq$$);
71 w1$$ = SHLBYTE(wq$$);
72
73 w2$$ = vandq_u8(w2$$, x1d);
74 w1$$ = veorq_u8(w1$$, w2$$);
75 wq$$ = veorq_u8(w1$$, wd$$);
76 }
77 vst1q_u8(&p[d+NSIZE*$$], wp$$);
78 vst1q_u8(&q[d+NSIZE*$$], wq$$);
79 }
80}
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 087332dbf8aa..28afa1a06e03 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -22,11 +22,23 @@ ifeq ($(ARCH),x86_64)
22 IS_X86 = yes 22 IS_X86 = yes
23endif 23endif
24 24
25ifeq ($(ARCH),arm)
26 CFLAGS += -I../../../arch/arm/include -mfpu=neon
27 HAS_NEON = yes
28endif
29ifeq ($(ARCH),arm64)
30 CFLAGS += -I../../../arch/arm64/include
31 HAS_NEON = yes
32endif
33
25ifeq ($(IS_X86),yes) 34ifeq ($(IS_X86),yes)
26 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o 35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
27 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ 36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
28 gcc -c -x assembler - >&/dev/null && \ 37 gcc -c -x assembler - >&/dev/null && \
29 rm ./-.o && echo -DCONFIG_AS_AVX2=1) 38 rm ./-.o && echo -DCONFIG_AS_AVX2=1)
39else ifeq ($(HAS_NEON),yes)
40 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
41 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
30else 42else
31 HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ 43 HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
32 gcc -c -x c - >&/dev/null && \ 44 gcc -c -x c - >&/dev/null && \
@@ -55,6 +67,18 @@ raid6.a: $(OBJS)
55raid6test: test.c raid6.a 67raid6test: test.c raid6.a
56 $(CC) $(CFLAGS) -o raid6test $^ 68 $(CC) $(CFLAGS) -o raid6test $^
57 69
70neon1.c: neon.uc ../unroll.awk
71 $(AWK) ../unroll.awk -vN=1 < neon.uc > $@
72
73neon2.c: neon.uc ../unroll.awk
74 $(AWK) ../unroll.awk -vN=2 < neon.uc > $@
75
76neon4.c: neon.uc ../unroll.awk
77 $(AWK) ../unroll.awk -vN=4 < neon.uc > $@
78
79neon8.c: neon.uc ../unroll.awk
80 $(AWK) ../unroll.awk -vN=8 < neon.uc > $@
81
58altivec1.c: altivec.uc ../unroll.awk 82altivec1.c: altivec.uc ../unroll.awk
59 $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ 83 $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
60 84
@@ -89,7 +113,7 @@ tables.c: mktables
89 ./mktables > tables.c 113 ./mktables > tables.c
90 114
91clean: 115clean:
92 rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test 116 rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
93 117
94spotless: clean 118spotless: clean
95 rm -f *~ 119 rm -f *~