diff options
Diffstat (limited to 'lib/raid6')
-rw-r--r-- | lib/raid6/.gitignore | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 8 | ||||
-rw-r--r-- | lib/raid6/algos.c | 18 | ||||
-rw-r--r-- | lib/raid6/avx512.c | 569 | ||||
-rw-r--r-- | lib/raid6/recov_avx512.c | 388 | ||||
-rw-r--r-- | lib/raid6/recov_s390xc.c | 116 | ||||
-rw-r--r-- | lib/raid6/s390vx.uc | 168 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 5 | ||||
-rw-r--r-- | lib/raid6/test/test.c | 7 | ||||
-rw-r--r-- | lib/raid6/x86.h | 10 |
10 files changed, 1285 insertions, 5 deletions
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index 0a7e494b2bcd..f01b1cb04f91 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore | |||
@@ -3,3 +3,4 @@ altivec*.c | |||
3 | int*.c | 3 | int*.c |
4 | tables.c | 4 | tables.c |
5 | neon?.c | 5 | neon?.c |
6 | s390vx?.c | ||
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 3b10a48fa040..3057011f5599 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile | |||
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | |||
3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ | 3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ |
4 | int8.o int16.o int32.o | 4 | int8.o int16.o int32.o |
5 | 5 | ||
6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o | 6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o |
7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o | 7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o |
8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o | 8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o |
9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o | 9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o |
10 | raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o | ||
10 | 11 | ||
11 | hostprogs-y += mktables | 12 | hostprogs-y += mktables |
12 | 13 | ||
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8 | |||
116 | $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE | 117 | $(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE |
117 | $(call if_changed,unroll) | 118 | $(call if_changed,unroll) |
118 | 119 | ||
120 | targets += s390vx8.c | ||
121 | $(obj)/s390vx8.c: UNROLL := 8 | ||
122 | $(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE | ||
123 | $(call if_changed,unroll) | ||
124 | |||
119 | quiet_cmd_mktable = TABLE $@ | 125 | quiet_cmd_mktable = TABLE $@ |
120 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) | 126 | cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) |
121 | 127 | ||
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 975c6e0434bd..7857049fd7d3 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = { | |||
49 | &raid6_avx2x1, | 49 | &raid6_avx2x1, |
50 | &raid6_avx2x2, | 50 | &raid6_avx2x2, |
51 | #endif | 51 | #endif |
52 | #ifdef CONFIG_AS_AVX512 | ||
53 | &raid6_avx512x1, | ||
54 | &raid6_avx512x2, | ||
55 | #endif | ||
52 | #endif | 56 | #endif |
53 | #if defined(__x86_64__) && !defined(__arch_um__) | 57 | #if defined(__x86_64__) && !defined(__arch_um__) |
54 | &raid6_sse2x1, | 58 | &raid6_sse2x1, |
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = { | |||
59 | &raid6_avx2x2, | 63 | &raid6_avx2x2, |
60 | &raid6_avx2x4, | 64 | &raid6_avx2x4, |
61 | #endif | 65 | #endif |
66 | #ifdef CONFIG_AS_AVX512 | ||
67 | &raid6_avx512x1, | ||
68 | &raid6_avx512x2, | ||
69 | &raid6_avx512x4, | ||
70 | #endif | ||
62 | #endif | 71 | #endif |
63 | #ifdef CONFIG_ALTIVEC | 72 | #ifdef CONFIG_ALTIVEC |
64 | &raid6_altivec1, | 73 | &raid6_altivec1, |
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = { | |||
69 | #if defined(CONFIG_TILEGX) | 78 | #if defined(CONFIG_TILEGX) |
70 | &raid6_tilegx8, | 79 | &raid6_tilegx8, |
71 | #endif | 80 | #endif |
81 | #if defined(CONFIG_S390) | ||
82 | &raid6_s390vx8, | ||
83 | #endif | ||
72 | &raid6_intx1, | 84 | &raid6_intx1, |
73 | &raid6_intx2, | 85 | &raid6_intx2, |
74 | &raid6_intx4, | 86 | &raid6_intx4, |
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **); | |||
89 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | 101 | EXPORT_SYMBOL_GPL(raid6_datap_recov); |
90 | 102 | ||
91 | const struct raid6_recov_calls *const raid6_recov_algos[] = { | 103 | const struct raid6_recov_calls *const raid6_recov_algos[] = { |
104 | #ifdef CONFIG_AS_AVX512 | ||
105 | &raid6_recov_avx512, | ||
106 | #endif | ||
92 | #ifdef CONFIG_AS_AVX2 | 107 | #ifdef CONFIG_AS_AVX2 |
93 | &raid6_recov_avx2, | 108 | &raid6_recov_avx2, |
94 | #endif | 109 | #endif |
95 | #ifdef CONFIG_AS_SSSE3 | 110 | #ifdef CONFIG_AS_SSSE3 |
96 | &raid6_recov_ssse3, | 111 | &raid6_recov_ssse3, |
97 | #endif | 112 | #endif |
113 | #ifdef CONFIG_S390 | ||
114 | &raid6_recov_s390xc, | ||
115 | #endif | ||
98 | &raid6_recov_intx1, | 116 | &raid6_recov_intx1, |
99 | NULL | 117 | NULL |
100 | }; | 118 | }; |
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c new file mode 100644 index 000000000000..f524a7972006 --- /dev/null +++ b/lib/raid6/avx512.c | |||
@@ -0,0 +1,569 @@ | |||
1 | /* -*- linux-c -*- -------------------------------------------------------- | ||
2 | * | ||
3 | * Copyright (C) 2016 Intel Corporation | ||
4 | * | ||
5 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
6 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
7 | * | ||
8 | * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved | ||
9 | * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | ||
14 | * Boston MA 02111-1307, USA; either version 2 of the License, or | ||
15 | * (at your option) any later version; incorporated herein by reference. | ||
16 | * | ||
17 | * ----------------------------------------------------------------------- | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * AVX512 implementation of RAID-6 syndrome functions | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifdef CONFIG_AS_AVX512 | ||
26 | |||
27 | #include <linux/raid/pq.h> | ||
28 | #include "x86.h" | ||
29 | |||
30 | static const struct raid6_avx512_constants { | ||
31 | u64 x1d[8]; | ||
32 | } raid6_avx512_constants __aligned(512) = { | ||
33 | { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
34 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
35 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, | ||
36 | 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, | ||
37 | }; | ||
38 | |||
39 | static int raid6_have_avx512(void) | ||
40 | { | ||
41 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
42 | boot_cpu_has(X86_FEATURE_AVX) && | ||
43 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
44 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
45 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
46 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
47 | } | ||
48 | |||
49 | static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
50 | { | ||
51 | u8 **dptr = (u8 **)ptrs; | ||
52 | u8 *p, *q; | ||
53 | int d, z, z0; | ||
54 | |||
55 | z0 = disks - 3; /* Highest data disk */ | ||
56 | p = dptr[z0+1]; /* XOR parity */ | ||
57 | q = dptr[z0+2]; /* RS syndrome */ | ||
58 | |||
59 | kernel_fpu_begin(); | ||
60 | |||
61 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
62 | "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ | ||
63 | : | ||
64 | : "m" (raid6_avx512_constants.x1d[0])); | ||
65 | |||
66 | for (d = 0; d < bytes; d += 64) { | ||
67 | asm volatile("prefetchnta %0\n\t" | ||
68 | "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ | ||
69 | "prefetchnta %1\n\t" | ||
70 | "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ | ||
71 | "vmovdqa64 %1,%%zmm6" | ||
72 | : | ||
73 | : "m" (dptr[z0][d]), "m" (dptr[z0-1][d])); | ||
74 | for (z = z0-2; z >= 0; z--) { | ||
75 | asm volatile("prefetchnta %0\n\t" | ||
76 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
77 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
78 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
79 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
80 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
81 | "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" | ||
82 | "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" | ||
83 | "vmovdqa64 %0,%%zmm6" | ||
84 | : | ||
85 | : "m" (dptr[z][d])); | ||
86 | } | ||
87 | asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
88 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
89 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
90 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
91 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
92 | "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" | ||
93 | "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" | ||
94 | "vmovntdq %%zmm2,%0\n\t" | ||
95 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" | ||
96 | "vmovntdq %%zmm4,%1\n\t" | ||
97 | "vpxorq %%zmm4,%%zmm4,%%zmm4" | ||
98 | : | ||
99 | : "m" (p[d]), "m" (q[d])); | ||
100 | } | ||
101 | |||
102 | asm volatile("sfence" : : : "memory"); | ||
103 | kernel_fpu_end(); | ||
104 | } | ||
105 | |||
106 | static void raid6_avx5121_xor_syndrome(int disks, int start, int stop, | ||
107 | size_t bytes, void **ptrs) | ||
108 | { | ||
109 | u8 **dptr = (u8 **)ptrs; | ||
110 | u8 *p, *q; | ||
111 | int d, z, z0; | ||
112 | |||
113 | z0 = stop; /* P/Q right side optimization */ | ||
114 | p = dptr[disks-2]; /* XOR parity */ | ||
115 | q = dptr[disks-1]; /* RS syndrome */ | ||
116 | |||
117 | kernel_fpu_begin(); | ||
118 | |||
119 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
120 | : : "m" (raid6_avx512_constants.x1d[0])); | ||
121 | |||
122 | for (d = 0 ; d < bytes ; d += 64) { | ||
123 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
124 | "vmovdqa64 %1,%%zmm2\n\t" | ||
125 | "vpxorq %%zmm4,%%zmm2,%%zmm2" | ||
126 | : | ||
127 | : "m" (dptr[z0][d]), "m" (p[d])); | ||
128 | /* P/Q data pages */ | ||
129 | for (z = z0-1 ; z >= start ; z--) { | ||
130 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
131 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
132 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
133 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
134 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
135 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
136 | "vmovdqa64 %0,%%zmm5\n\t" | ||
137 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
138 | "vpxorq %%zmm5,%%zmm4,%%zmm4" | ||
139 | : | ||
140 | : "m" (dptr[z][d])); | ||
141 | } | ||
142 | /* P/Q left side optimization */ | ||
143 | for (z = start-1 ; z >= 0 ; z--) { | ||
144 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
145 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
146 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
147 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
148 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
149 | "vpxorq %%zmm5,%%zmm4,%%zmm4" | ||
150 | : | ||
151 | : ); | ||
152 | } | ||
153 | asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" | ||
154 | /* Don't use movntdq for r/w memory area < cache line */ | ||
155 | "vmovdqa64 %%zmm4,%0\n\t" | ||
156 | "vmovdqa64 %%zmm2,%1" | ||
157 | : | ||
158 | : "m" (q[d]), "m" (p[d])); | ||
159 | } | ||
160 | |||
161 | asm volatile("sfence" : : : "memory"); | ||
162 | kernel_fpu_end(); | ||
163 | } | ||
164 | |||
165 | const struct raid6_calls raid6_avx512x1 = { | ||
166 | raid6_avx5121_gen_syndrome, | ||
167 | raid6_avx5121_xor_syndrome, | ||
168 | raid6_have_avx512, | ||
169 | "avx512x1", | ||
170 | 1 /* Has cache hints */ | ||
171 | }; | ||
172 | |||
173 | /* | ||
174 | * Unrolled-by-2 AVX512 implementation | ||
175 | */ | ||
176 | static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
177 | { | ||
178 | u8 **dptr = (u8 **)ptrs; | ||
179 | u8 *p, *q; | ||
180 | int d, z, z0; | ||
181 | |||
182 | z0 = disks - 3; /* Highest data disk */ | ||
183 | p = dptr[z0+1]; /* XOR parity */ | ||
184 | q = dptr[z0+2]; /* RS syndrome */ | ||
185 | |||
186 | kernel_fpu_begin(); | ||
187 | |||
188 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
189 | "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ | ||
190 | : | ||
191 | : "m" (raid6_avx512_constants.x1d[0])); | ||
192 | |||
193 | /* We uniformly assume a single prefetch covers at least 64 bytes */ | ||
194 | for (d = 0; d < bytes; d += 128) { | ||
195 | asm volatile("prefetchnta %0\n\t" | ||
196 | "prefetchnta %1\n\t" | ||
197 | "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ | ||
198 | "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */ | ||
199 | "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ | ||
200 | "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */ | ||
201 | : | ||
202 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64])); | ||
203 | for (z = z0-1; z >= 0; z--) { | ||
204 | asm volatile("prefetchnta %0\n\t" | ||
205 | "prefetchnta %1\n\t" | ||
206 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
207 | "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" | ||
208 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
209 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
210 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
211 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
212 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
213 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
214 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
215 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
216 | "vmovdqa64 %0,%%zmm5\n\t" | ||
217 | "vmovdqa64 %1,%%zmm7\n\t" | ||
218 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
219 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
220 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
221 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
222 | : | ||
223 | : "m" (dptr[z][d]), "m" (dptr[z][d+64])); | ||
224 | } | ||
225 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
226 | "vmovntdq %%zmm3,%1\n\t" | ||
227 | "vmovntdq %%zmm4,%2\n\t" | ||
228 | "vmovntdq %%zmm6,%3" | ||
229 | : | ||
230 | : "m" (p[d]), "m" (p[d+64]), "m" (q[d]), | ||
231 | "m" (q[d+64])); | ||
232 | } | ||
233 | |||
234 | asm volatile("sfence" : : : "memory"); | ||
235 | kernel_fpu_end(); | ||
236 | } | ||
237 | |||
238 | static void raid6_avx5122_xor_syndrome(int disks, int start, int stop, | ||
239 | size_t bytes, void **ptrs) | ||
240 | { | ||
241 | u8 **dptr = (u8 **)ptrs; | ||
242 | u8 *p, *q; | ||
243 | int d, z, z0; | ||
244 | |||
245 | z0 = stop; /* P/Q right side optimization */ | ||
246 | p = dptr[disks-2]; /* XOR parity */ | ||
247 | q = dptr[disks-1]; /* RS syndrome */ | ||
248 | |||
249 | kernel_fpu_begin(); | ||
250 | |||
251 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
252 | : : "m" (raid6_avx512_constants.x1d[0])); | ||
253 | |||
254 | for (d = 0 ; d < bytes ; d += 128) { | ||
255 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
256 | "vmovdqa64 %1,%%zmm6\n\t" | ||
257 | "vmovdqa64 %2,%%zmm2\n\t" | ||
258 | "vmovdqa64 %3,%%zmm3\n\t" | ||
259 | "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" | ||
260 | "vpxorq %%zmm6,%%zmm3,%%zmm3" | ||
261 | : | ||
262 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), | ||
263 | "m" (p[d]), "m" (p[d+64])); | ||
264 | /* P/Q data pages */ | ||
265 | for (z = z0-1 ; z >= start ; z--) { | ||
266 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
267 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
268 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
269 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
270 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
271 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
272 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
273 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
274 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
275 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
276 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
277 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
278 | "vmovdqa64 %0,%%zmm5\n\t" | ||
279 | "vmovdqa64 %1,%%zmm7\n\t" | ||
280 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
281 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
282 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
283 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
284 | : | ||
285 | : "m" (dptr[z][d]), "m" (dptr[z][d+64])); | ||
286 | } | ||
287 | /* P/Q left side optimization */ | ||
288 | for (z = start-1 ; z >= 0 ; z--) { | ||
289 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
290 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
291 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
292 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
293 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
294 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
295 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
296 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
297 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
298 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
299 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
300 | "vpxorq %%zmm7,%%zmm6,%%zmm6" | ||
301 | : | ||
302 | : ); | ||
303 | } | ||
304 | asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" | ||
305 | "vpxorq %1,%%zmm6,%%zmm6\n\t" | ||
306 | /* Don't use movntdq for r/w | ||
307 | * memory area < cache line | ||
308 | */ | ||
309 | "vmovdqa64 %%zmm4,%0\n\t" | ||
310 | "vmovdqa64 %%zmm6,%1\n\t" | ||
311 | "vmovdqa64 %%zmm2,%2\n\t" | ||
312 | "vmovdqa64 %%zmm3,%3" | ||
313 | : | ||
314 | : "m" (q[d]), "m" (q[d+64]), "m" (p[d]), | ||
315 | "m" (p[d+64])); | ||
316 | } | ||
317 | |||
318 | asm volatile("sfence" : : : "memory"); | ||
319 | kernel_fpu_end(); | ||
320 | } | ||
321 | |||
322 | const struct raid6_calls raid6_avx512x2 = { | ||
323 | raid6_avx5122_gen_syndrome, | ||
324 | raid6_avx5122_xor_syndrome, | ||
325 | raid6_have_avx512, | ||
326 | "avx512x2", | ||
327 | 1 /* Has cache hints */ | ||
328 | }; | ||
329 | |||
330 | #ifdef CONFIG_X86_64 | ||
331 | |||
332 | /* | ||
333 | * Unrolled-by-4 AVX2 implementation | ||
334 | */ | ||
335 | static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
336 | { | ||
337 | u8 **dptr = (u8 **)ptrs; | ||
338 | u8 *p, *q; | ||
339 | int d, z, z0; | ||
340 | |||
341 | z0 = disks - 3; /* Highest data disk */ | ||
342 | p = dptr[z0+1]; /* XOR parity */ | ||
343 | q = dptr[z0+2]; /* RS syndrome */ | ||
344 | |||
345 | kernel_fpu_begin(); | ||
346 | |||
347 | asm volatile("vmovdqa64 %0,%%zmm0\n\t" | ||
348 | "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */ | ||
349 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */ | ||
350 | "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */ | ||
351 | "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */ | ||
352 | "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */ | ||
353 | "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */ | ||
354 | "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */ | ||
355 | "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */ | ||
356 | "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */ | ||
357 | : | ||
358 | : "m" (raid6_avx512_constants.x1d[0])); | ||
359 | |||
360 | for (d = 0; d < bytes; d += 256) { | ||
361 | for (z = z0; z >= 0; z--) { | ||
362 | asm volatile("prefetchnta %0\n\t" | ||
363 | "prefetchnta %1\n\t" | ||
364 | "prefetchnta %2\n\t" | ||
365 | "prefetchnta %3\n\t" | ||
366 | "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" | ||
367 | "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" | ||
368 | "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t" | ||
369 | "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t" | ||
370 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
371 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
372 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
373 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
374 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
375 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
376 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
377 | "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" | ||
378 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
379 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
380 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
381 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
382 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
383 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
384 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
385 | "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" | ||
386 | "vmovdqa64 %0,%%zmm5\n\t" | ||
387 | "vmovdqa64 %1,%%zmm7\n\t" | ||
388 | "vmovdqa64 %2,%%zmm13\n\t" | ||
389 | "vmovdqa64 %3,%%zmm15\n\t" | ||
390 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
391 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
392 | "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" | ||
393 | "vpxorq %%zmm15,%%zmm11,%%zmm11\n" | ||
394 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
395 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
396 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
397 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
398 | : | ||
399 | : "m" (dptr[z][d]), "m" (dptr[z][d+64]), | ||
400 | "m" (dptr[z][d+128]), "m" (dptr[z][d+192])); | ||
401 | } | ||
402 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
403 | "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" | ||
404 | "vmovntdq %%zmm3,%1\n\t" | ||
405 | "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" | ||
406 | "vmovntdq %%zmm10,%2\n\t" | ||
407 | "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" | ||
408 | "vmovntdq %%zmm11,%3\n\t" | ||
409 | "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" | ||
410 | "vmovntdq %%zmm4,%4\n\t" | ||
411 | "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" | ||
412 | "vmovntdq %%zmm6,%5\n\t" | ||
413 | "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" | ||
414 | "vmovntdq %%zmm12,%6\n\t" | ||
415 | "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" | ||
416 | "vmovntdq %%zmm14,%7\n\t" | ||
417 | "vpxorq %%zmm14,%%zmm14,%%zmm14" | ||
418 | : | ||
419 | : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
420 | "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), | ||
421 | "m" (q[d+128]), "m" (q[d+192])); | ||
422 | } | ||
423 | |||
424 | asm volatile("sfence" : : : "memory"); | ||
425 | kernel_fpu_end(); | ||
426 | } | ||
427 | |||
428 | static void raid6_avx5124_xor_syndrome(int disks, int start, int stop, | ||
429 | size_t bytes, void **ptrs) | ||
430 | { | ||
431 | u8 **dptr = (u8 **)ptrs; | ||
432 | u8 *p, *q; | ||
433 | int d, z, z0; | ||
434 | |||
435 | z0 = stop; /* P/Q right side optimization */ | ||
436 | p = dptr[disks-2]; /* XOR parity */ | ||
437 | q = dptr[disks-1]; /* RS syndrome */ | ||
438 | |||
439 | kernel_fpu_begin(); | ||
440 | |||
441 | asm volatile("vmovdqa64 %0,%%zmm0" | ||
442 | :: "m" (raid6_avx512_constants.x1d[0])); | ||
443 | |||
444 | for (d = 0 ; d < bytes ; d += 256) { | ||
445 | asm volatile("vmovdqa64 %0,%%zmm4\n\t" | ||
446 | "vmovdqa64 %1,%%zmm6\n\t" | ||
447 | "vmovdqa64 %2,%%zmm12\n\t" | ||
448 | "vmovdqa64 %3,%%zmm14\n\t" | ||
449 | "vmovdqa64 %4,%%zmm2\n\t" | ||
450 | "vmovdqa64 %5,%%zmm3\n\t" | ||
451 | "vmovdqa64 %6,%%zmm10\n\t" | ||
452 | "vmovdqa64 %7,%%zmm11\n\t" | ||
453 | "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" | ||
454 | "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t" | ||
455 | "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t" | ||
456 | "vpxorq %%zmm14,%%zmm11,%%zmm11" | ||
457 | : | ||
458 | : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), | ||
459 | "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]), | ||
460 | "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
461 | "m" (p[d+192])); | ||
462 | /* P/Q data pages */ | ||
463 | for (z = z0-1 ; z >= start ; z--) { | ||
464 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
465 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
466 | "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" | ||
467 | "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" | ||
468 | "prefetchnta %0\n\t" | ||
469 | "prefetchnta %2\n\t" | ||
470 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
471 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
472 | "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" | ||
473 | "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" | ||
474 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
475 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
476 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
477 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
478 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
479 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
480 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
481 | "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t" | ||
482 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
483 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
484 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
485 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
486 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
487 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
488 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
489 | "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" | ||
490 | "vmovdqa64 %0,%%zmm5\n\t" | ||
491 | "vmovdqa64 %1,%%zmm7\n\t" | ||
492 | "vmovdqa64 %2,%%zmm13\n\t" | ||
493 | "vmovdqa64 %3,%%zmm15\n\t" | ||
494 | "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" | ||
495 | "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" | ||
496 | "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" | ||
497 | "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t" | ||
498 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
499 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
500 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
501 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
502 | : | ||
503 | : "m" (dptr[z][d]), "m" (dptr[z][d+64]), | ||
504 | "m" (dptr[z][d+128]), | ||
505 | "m" (dptr[z][d+192])); | ||
506 | } | ||
507 | asm volatile("prefetchnta %0\n\t" | ||
508 | "prefetchnta %1\n\t" | ||
509 | : | ||
510 | : "m" (q[d]), "m" (q[d+128])); | ||
511 | /* P/Q left side optimization */ | ||
512 | for (z = start-1 ; z >= 0 ; z--) { | ||
513 | asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" | ||
514 | "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" | ||
515 | "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" | ||
516 | "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" | ||
517 | "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" | ||
518 | "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" | ||
519 | "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" | ||
520 | "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" | ||
521 | "vpmovm2b %%k1,%%zmm5\n\t" | ||
522 | "vpmovm2b %%k2,%%zmm7\n\t" | ||
523 | "vpmovm2b %%k3,%%zmm13\n\t" | ||
524 | "vpmovm2b %%k4,%%zmm15\n\t" | ||
525 | "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" | ||
526 | "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" | ||
527 | "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" | ||
528 | "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" | ||
529 | "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" | ||
530 | "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" | ||
531 | "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" | ||
532 | "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" | ||
533 | "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" | ||
534 | "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" | ||
535 | "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" | ||
536 | "vpxorq %%zmm15,%%zmm14,%%zmm14" | ||
537 | : | ||
538 | : ); | ||
539 | } | ||
540 | asm volatile("vmovntdq %%zmm2,%0\n\t" | ||
541 | "vmovntdq %%zmm3,%1\n\t" | ||
542 | "vmovntdq %%zmm10,%2\n\t" | ||
543 | "vmovntdq %%zmm11,%3\n\t" | ||
544 | "vpxorq %4,%%zmm4,%%zmm4\n\t" | ||
545 | "vpxorq %5,%%zmm6,%%zmm6\n\t" | ||
546 | "vpxorq %6,%%zmm12,%%zmm12\n\t" | ||
547 | "vpxorq %7,%%zmm14,%%zmm14\n\t" | ||
548 | "vmovntdq %%zmm4,%4\n\t" | ||
549 | "vmovntdq %%zmm6,%5\n\t" | ||
550 | "vmovntdq %%zmm12,%6\n\t" | ||
551 | "vmovntdq %%zmm14,%7" | ||
552 | : | ||
553 | : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), | ||
554 | "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), | ||
555 | "m" (q[d+128]), "m" (q[d+192])); | ||
556 | } | ||
557 | asm volatile("sfence" : : : "memory"); | ||
558 | kernel_fpu_end(); | ||
559 | } | ||
560 | const struct raid6_calls raid6_avx512x4 = { | ||
561 | raid6_avx5124_gen_syndrome, | ||
562 | raid6_avx5124_xor_syndrome, | ||
563 | raid6_have_avx512, | ||
564 | "avx512x4", | ||
565 | 1 /* Has cache hints */ | ||
566 | }; | ||
567 | #endif | ||
568 | |||
569 | #endif /* CONFIG_AS_AVX512 */ | ||
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c new file mode 100644 index 000000000000..625aafa33b61 --- /dev/null +++ b/lib/raid6/recov_avx512.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Intel Corporation | ||
3 | * | ||
4 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
5 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #ifdef CONFIG_AS_AVX512 | ||
15 | |||
16 | #include <linux/raid/pq.h> | ||
17 | #include "x86.h" | ||
18 | |||
19 | static int raid6_has_avx512(void) | ||
20 | { | ||
21 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
22 | boot_cpu_has(X86_FEATURE_AVX) && | ||
23 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
24 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
25 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
26 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
27 | } | ||
28 | |||
29 | static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, | ||
30 | int failb, void **ptrs) | ||
31 | { | ||
32 | u8 *p, *q, *dp, *dq; | ||
33 | const u8 *pbmul; /* P multiplier table for B data */ | ||
34 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
35 | const u8 x0f = 0x0f; | ||
36 | |||
37 | p = (u8 *)ptrs[disks-2]; | ||
38 | q = (u8 *)ptrs[disks-1]; | ||
39 | |||
40 | /* | ||
41 | * Compute syndrome with zero for the missing data pages | ||
42 | * Use the dead data pages as temporary storage for | ||
43 | * delta p and delta q | ||
44 | */ | ||
45 | |||
46 | dp = (u8 *)ptrs[faila]; | ||
47 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
48 | ptrs[disks-2] = dp; | ||
49 | dq = (u8 *)ptrs[failb]; | ||
50 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
51 | ptrs[disks-1] = dq; | ||
52 | |||
53 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
54 | |||
55 | /* Restore pointer table */ | ||
56 | ptrs[faila] = dp; | ||
57 | ptrs[failb] = dq; | ||
58 | ptrs[disks-2] = p; | ||
59 | ptrs[disks-1] = q; | ||
60 | |||
61 | /* Now, pick the proper data tables */ | ||
62 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | ||
63 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | ||
64 | raid6_gfexp[failb]]]; | ||
65 | |||
66 | kernel_fpu_begin(); | ||
67 | |||
68 | /* zmm0 = x0f[16] */ | ||
69 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
70 | |||
71 | while (bytes) { | ||
72 | #ifdef CONFIG_X86_64 | ||
73 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
74 | "vmovdqa64 %1, %%zmm9\n\t" | ||
75 | "vmovdqa64 %2, %%zmm0\n\t" | ||
76 | "vmovdqa64 %3, %%zmm8\n\t" | ||
77 | "vpxorq %4, %%zmm1, %%zmm1\n\t" | ||
78 | "vpxorq %5, %%zmm9, %%zmm9\n\t" | ||
79 | "vpxorq %6, %%zmm0, %%zmm0\n\t" | ||
80 | "vpxorq %7, %%zmm8, %%zmm8" | ||
81 | : | ||
82 | : "m" (q[0]), "m" (q[64]), "m" (p[0]), | ||
83 | "m" (p[64]), "m" (dq[0]), "m" (dq[64]), | ||
84 | "m" (dp[0]), "m" (dp[64])); | ||
85 | |||
86 | /* | ||
87 | * 1 = dq[0] ^ q[0] | ||
88 | * 9 = dq[64] ^ q[64] | ||
89 | * 0 = dp[0] ^ p[0] | ||
90 | * 8 = dp[64] ^ p[64] | ||
91 | */ | ||
92 | |||
93 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
94 | "vbroadcasti64x2 %1, %%zmm5" | ||
95 | : | ||
96 | : "m" (qmul[0]), "m" (qmul[16])); | ||
97 | |||
98 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
99 | "vpsraw $4, %%zmm9, %%zmm12\n\t" | ||
100 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
101 | "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" | ||
102 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
103 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
104 | "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" | ||
105 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
106 | "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" | ||
107 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
108 | "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" | ||
109 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
110 | : | ||
111 | : ); | ||
112 | |||
113 | /* | ||
114 | * 5 = qx[0] | ||
115 | * 15 = qx[64] | ||
116 | */ | ||
117 | |||
118 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
119 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
120 | "vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
121 | "vpsraw $4, %%zmm8, %%zmm6\n\t" | ||
122 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
123 | "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" | ||
124 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
125 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
126 | "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" | ||
127 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
128 | "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" | ||
129 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
130 | "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" | ||
131 | "vpxorq %%zmm12, %%zmm13, %%zmm13" | ||
132 | : | ||
133 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
134 | |||
135 | /* | ||
136 | * 1 = pbmul[px[0]] | ||
137 | * 13 = pbmul[px[64]] | ||
138 | */ | ||
139 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
140 | "vpxorq %%zmm15, %%zmm13, %%zmm13" | ||
141 | : | ||
142 | : ); | ||
143 | |||
144 | /* | ||
145 | * 1 = db = DQ | ||
146 | * 13 = db[64] = DQ[64] | ||
147 | */ | ||
148 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
149 | "vmovdqa64 %%zmm13,%1\n\t" | ||
150 | "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
151 | "vpxorq %%zmm13, %%zmm8, %%zmm8" | ||
152 | : | ||
153 | : "m" (dq[0]), "m" (dq[64])); | ||
154 | |||
155 | asm volatile("vmovdqa64 %%zmm0, %0\n\t" | ||
156 | "vmovdqa64 %%zmm8, %1" | ||
157 | : | ||
158 | : "m" (dp[0]), "m" (dp[64])); | ||
159 | |||
160 | bytes -= 128; | ||
161 | p += 128; | ||
162 | q += 128; | ||
163 | dp += 128; | ||
164 | dq += 128; | ||
165 | #else | ||
166 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
167 | "vmovdqa64 %1, %%zmm0\n\t" | ||
168 | "vpxorq %2, %%zmm1, %%zmm1\n\t" | ||
169 | "vpxorq %3, %%zmm0, %%zmm0" | ||
170 | : | ||
171 | : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); | ||
172 | |||
173 | /* 1 = dq ^ q; 0 = dp ^ p */ | ||
174 | |||
175 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
176 | "vbroadcasti64x2 %1, %%zmm5" | ||
177 | : | ||
178 | : "m" (qmul[0]), "m" (qmul[16])); | ||
179 | |||
180 | /* | ||
181 | * 1 = dq ^ q | ||
182 | * 3 = dq ^ p >> 4 | ||
183 | */ | ||
184 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
185 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
186 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
187 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
188 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
189 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
190 | : | ||
191 | : ); | ||
192 | |||
193 | /* 5 = qx */ | ||
194 | |||
195 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
196 | "vbroadcasti64x2 %1, %%zmm1" | ||
197 | : | ||
198 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
199 | |||
200 | asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
201 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
202 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
203 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
204 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
205 | "vpxorq %%zmm4, %%zmm1, %%zmm1" | ||
206 | : | ||
207 | : ); | ||
208 | |||
209 | /* 1 = pbmul[px] */ | ||
210 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
211 | /* 1 = db = DQ */ | ||
212 | "vmovdqa64 %%zmm1, %0\n\t" | ||
213 | : | ||
214 | : "m" (dq[0])); | ||
215 | |||
216 | asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
217 | "vmovdqa64 %%zmm0, %0" | ||
218 | : | ||
219 | : "m" (dp[0])); | ||
220 | |||
221 | bytes -= 64; | ||
222 | p += 64; | ||
223 | q += 64; | ||
224 | dp += 64; | ||
225 | dq += 64; | ||
226 | #endif | ||
227 | } | ||
228 | |||
229 | kernel_fpu_end(); | ||
230 | } | ||
231 | |||
232 | static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, | ||
233 | void **ptrs) | ||
234 | { | ||
235 | u8 *p, *q, *dq; | ||
236 | const u8 *qmul; /* Q multiplier table */ | ||
237 | const u8 x0f = 0x0f; | ||
238 | |||
239 | p = (u8 *)ptrs[disks-2]; | ||
240 | q = (u8 *)ptrs[disks-1]; | ||
241 | |||
242 | /* | ||
243 | * Compute syndrome with zero for the missing data page | ||
244 | * Use the dead data page as temporary storage for delta q | ||
245 | */ | ||
246 | |||
247 | dq = (u8 *)ptrs[faila]; | ||
248 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
249 | ptrs[disks-1] = dq; | ||
250 | |||
251 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
252 | |||
253 | /* Restore pointer table */ | ||
254 | ptrs[faila] = dq; | ||
255 | ptrs[disks-1] = q; | ||
256 | |||
257 | /* Now, pick the proper data tables */ | ||
258 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
259 | |||
260 | kernel_fpu_begin(); | ||
261 | |||
262 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
263 | |||
264 | while (bytes) { | ||
265 | #ifdef CONFIG_X86_64 | ||
266 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
267 | "vmovdqa64 %1, %%zmm8\n\t" | ||
268 | "vpxorq %2, %%zmm3, %%zmm3\n\t" | ||
269 | "vpxorq %3, %%zmm8, %%zmm8" | ||
270 | : | ||
271 | : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), | ||
272 | "m" (q[64])); | ||
273 | |||
274 | /* | ||
275 | * 3 = q[0] ^ dq[0] | ||
276 | * 8 = q[64] ^ dq[64] | ||
277 | */ | ||
278 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
279 | "vmovapd %%zmm0, %%zmm13\n\t" | ||
280 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
281 | "vmovapd %%zmm1, %%zmm14" | ||
282 | : | ||
283 | : "m" (qmul[0]), "m" (qmul[16])); | ||
284 | |||
285 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
286 | "vpsraw $4, %%zmm8, %%zmm12\n\t" | ||
287 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
288 | "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" | ||
289 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
290 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
291 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
292 | "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" | ||
293 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
294 | "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" | ||
295 | "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" | ||
296 | "vpxorq %%zmm13, %%zmm14, %%zmm14" | ||
297 | : | ||
298 | : ); | ||
299 | |||
300 | /* | ||
301 | * 1 = qmul[q[0] ^ dq[0]] | ||
302 | * 14 = qmul[q[64] ^ dq[64]] | ||
303 | */ | ||
304 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
305 | "vmovdqa64 %1, %%zmm12\n\t" | ||
306 | "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" | ||
307 | "vpxorq %%zmm14, %%zmm12, %%zmm12" | ||
308 | : | ||
309 | : "m" (p[0]), "m" (p[64])); | ||
310 | |||
311 | /* | ||
312 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | ||
313 | * 12 = p[64] ^ qmul[q[64] ^ dq[64]] | ||
314 | */ | ||
315 | |||
316 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
317 | "vmovdqa64 %%zmm14, %1\n\t" | ||
318 | "vmovdqa64 %%zmm2, %2\n\t" | ||
319 | "vmovdqa64 %%zmm12,%3" | ||
320 | : | ||
321 | : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), | ||
322 | "m" (p[64])); | ||
323 | |||
324 | bytes -= 128; | ||
325 | p += 128; | ||
326 | q += 128; | ||
327 | dq += 128; | ||
328 | #else | ||
329 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
330 | "vpxorq %1, %%zmm3, %%zmm3" | ||
331 | : | ||
332 | : "m" (dq[0]), "m" (q[0])); | ||
333 | |||
334 | /* 3 = q ^ dq */ | ||
335 | |||
336 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
337 | "vbroadcasti64x2 %1, %%zmm1" | ||
338 | : | ||
339 | : "m" (qmul[0]), "m" (qmul[16])); | ||
340 | |||
341 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
342 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
343 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
344 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
345 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
346 | "vpxorq %%zmm0, %%zmm1, %%zmm1" | ||
347 | : | ||
348 | : ); | ||
349 | |||
350 | /* 1 = qmul[q ^ dq] */ | ||
351 | |||
352 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
353 | "vpxorq %%zmm1, %%zmm2, %%zmm2" | ||
354 | : | ||
355 | : "m" (p[0])); | ||
356 | |||
357 | /* 2 = p ^ qmul[q ^ dq] */ | ||
358 | |||
359 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
360 | "vmovdqa64 %%zmm2, %1" | ||
361 | : | ||
362 | : "m" (dq[0]), "m" (p[0])); | ||
363 | |||
364 | bytes -= 64; | ||
365 | p += 64; | ||
366 | q += 64; | ||
367 | dq += 64; | ||
368 | #endif | ||
369 | } | ||
370 | |||
371 | kernel_fpu_end(); | ||
372 | } | ||
373 | |||
374 | const struct raid6_recov_calls raid6_recov_avx512 = { | ||
375 | .data2 = raid6_2data_recov_avx512, | ||
376 | .datap = raid6_datap_recov_avx512, | ||
377 | .valid = raid6_has_avx512, | ||
378 | #ifdef CONFIG_X86_64 | ||
379 | .name = "avx512x2", | ||
380 | #else | ||
381 | .name = "avx512x1", | ||
382 | #endif | ||
383 | .priority = 3, | ||
384 | }; | ||
385 | |||
386 | #else | ||
387 | #warning "your version of binutils lacks AVX512 support" | ||
388 | #endif | ||
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c new file mode 100644 index 000000000000..b042dac826cc --- /dev/null +++ b/lib/raid6/recov_s390xc.c | |||
@@ -0,0 +1,116 @@ | |||
1 | /* | ||
2 | * RAID-6 data recovery in dual failure mode based on the XC instruction. | ||
3 | * | ||
4 | * Copyright IBM Corp. 2016 | ||
5 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
6 | */ | ||
7 | |||
8 | #include <linux/export.h> | ||
9 | #include <linux/raid/pq.h> | ||
10 | |||
11 | static inline void xor_block(u8 *p1, u8 *p2) | ||
12 | { | ||
13 | typedef struct { u8 _[256]; } addrtype; | ||
14 | |||
15 | asm volatile( | ||
16 | " xc 0(256,%[p1]),0(%[p2])\n" | ||
17 | : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2), | ||
18 | [p1] "a" (p1), [p2] "a" (p2) : "cc"); | ||
19 | } | ||
20 | |||
21 | /* Recover two failed data blocks. */ | ||
22 | static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, | ||
23 | int failb, void **ptrs) | ||
24 | { | ||
25 | u8 *p, *q, *dp, *dq; | ||
26 | const u8 *pbmul; /* P multiplier table for B data */ | ||
27 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
28 | int i; | ||
29 | |||
30 | p = (u8 *)ptrs[disks-2]; | ||
31 | q = (u8 *)ptrs[disks-1]; | ||
32 | |||
33 | /* Compute syndrome with zero for the missing data pages | ||
34 | Use the dead data pages as temporary storage for | ||
35 | delta p and delta q */ | ||
36 | dp = (u8 *)ptrs[faila]; | ||
37 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
38 | ptrs[disks-2] = dp; | ||
39 | dq = (u8 *)ptrs[failb]; | ||
40 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
41 | ptrs[disks-1] = dq; | ||
42 | |||
43 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
44 | |||
45 | /* Restore pointer table */ | ||
46 | ptrs[faila] = dp; | ||
47 | ptrs[failb] = dq; | ||
48 | ptrs[disks-2] = p; | ||
49 | ptrs[disks-1] = q; | ||
50 | |||
51 | /* Now, pick the proper data tables */ | ||
52 | pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; | ||
53 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; | ||
54 | |||
55 | /* Now do it... */ | ||
56 | while (bytes) { | ||
57 | xor_block(dp, p); | ||
58 | xor_block(dq, q); | ||
59 | for (i = 0; i < 256; i++) | ||
60 | dq[i] = pbmul[dp[i]] ^ qmul[dq[i]]; | ||
61 | xor_block(dp, dq); | ||
62 | p += 256; | ||
63 | q += 256; | ||
64 | dp += 256; | ||
65 | dq += 256; | ||
66 | bytes -= 256; | ||
67 | } | ||
68 | } | ||
69 | |||
70 | /* Recover failure of one data block plus the P block */ | ||
71 | static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, | ||
72 | void **ptrs) | ||
73 | { | ||
74 | u8 *p, *q, *dq; | ||
75 | const u8 *qmul; /* Q multiplier table */ | ||
76 | int i; | ||
77 | |||
78 | p = (u8 *)ptrs[disks-2]; | ||
79 | q = (u8 *)ptrs[disks-1]; | ||
80 | |||
81 | /* Compute syndrome with zero for the missing data page | ||
82 | Use the dead data page as temporary storage for delta q */ | ||
83 | dq = (u8 *)ptrs[faila]; | ||
84 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
85 | ptrs[disks-1] = dq; | ||
86 | |||
87 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
88 | |||
89 | /* Restore pointer table */ | ||
90 | ptrs[faila] = dq; | ||
91 | ptrs[disks-1] = q; | ||
92 | |||
93 | /* Now, pick the proper data tables */ | ||
94 | qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
95 | |||
96 | /* Now do it... */ | ||
97 | while (bytes) { | ||
98 | xor_block(dq, q); | ||
99 | for (i = 0; i < 256; i++) | ||
100 | dq[i] = qmul[dq[i]]; | ||
101 | xor_block(p, dq); | ||
102 | p += 256; | ||
103 | q += 256; | ||
104 | dq += 256; | ||
105 | bytes -= 256; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | |||
110 | const struct raid6_recov_calls raid6_recov_s390xc = { | ||
111 | .data2 = raid6_2data_recov_s390xc, | ||
112 | .datap = raid6_datap_recov_s390xc, | ||
113 | .valid = NULL, | ||
114 | .name = "s390xc", | ||
115 | .priority = 1, | ||
116 | }; | ||
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc new file mode 100644 index 000000000000..7b45191a655f --- /dev/null +++ b/lib/raid6/s390vx.uc | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * raid6_vx$#.c | ||
3 | * | ||
4 | * $#-way unrolled RAID6 gen/xor functions for s390 | ||
5 | * based on the vector facility | ||
6 | * | ||
7 | * Copyright IBM Corp. 2016 | ||
8 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> | ||
9 | * | ||
10 | * This file is postprocessed using unroll.awk. | ||
11 | */ | ||
12 | |||
13 | #include <linux/raid/pq.h> | ||
14 | #include <asm/fpu/api.h> | ||
15 | |||
16 | asm(".include \"asm/vx-insn.h\"\n"); | ||
17 | |||
18 | #define NSIZE 16 | ||
19 | |||
20 | static inline void LOAD_CONST(void) | ||
21 | { | ||
22 | asm volatile("VREPIB %v24,7"); | ||
23 | asm volatile("VREPIB %v25,0x1d"); | ||
24 | } | ||
25 | |||
26 | /* | ||
27 | * The SHLBYTE() operation shifts each of the 16 bytes in | ||
28 | * vector register y left by 1 bit and stores the result in | ||
29 | * vector register x. | ||
30 | */ | ||
31 | static inline void SHLBYTE(int x, int y) | ||
32 | { | ||
33 | asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y)); | ||
34 | } | ||
35 | |||
36 | /* | ||
37 | * For each of the 16 bytes in the vector register y the MASK() | ||
38 | * operation returns 0xFF if the high bit of the byte is 1, | ||
39 | * or 0x00 if the high bit is 0. The result is stored in vector | ||
40 | * register x. | ||
41 | */ | ||
42 | static inline void MASK(int x, int y) | ||
43 | { | ||
44 | asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y)); | ||
45 | } | ||
46 | |||
47 | static inline void AND(int x, int y, int z) | ||
48 | { | ||
49 | asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); | ||
50 | } | ||
51 | |||
52 | static inline void XOR(int x, int y, int z) | ||
53 | { | ||
54 | asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); | ||
55 | } | ||
56 | |||
57 | static inline void LOAD_DATA(int x, int n, u8 *ptr) | ||
58 | { | ||
59 | typedef struct { u8 _[16*n]; } addrtype; | ||
60 | register addrtype *__ptr asm("1") = (addrtype *) ptr; | ||
61 | |||
62 | asm volatile ("VLM %2,%3,0,%r1" | ||
63 | : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); | ||
64 | } | ||
65 | |||
66 | static inline void STORE_DATA(int x, int n, u8 *ptr) | ||
67 | { | ||
68 | typedef struct { u8 _[16*n]; } addrtype; | ||
69 | register addrtype *__ptr asm("1") = (addrtype *) ptr; | ||
70 | |||
71 | asm volatile ("VSTM %2,%3,0,1" | ||
72 | : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); | ||
73 | } | ||
74 | |||
75 | static inline void COPY_VEC(int x, int y) | ||
76 | { | ||
77 | asm volatile ("VLR %0,%1" : : "i" (x), "i" (y)); | ||
78 | } | ||
79 | |||
80 | static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) | ||
81 | { | ||
82 | struct kernel_fpu vxstate; | ||
83 | u8 **dptr, *p, *q; | ||
84 | int d, z, z0; | ||
85 | |||
86 | kernel_fpu_begin(&vxstate, KERNEL_VXR); | ||
87 | LOAD_CONST(); | ||
88 | |||
89 | dptr = (u8 **) ptrs; | ||
90 | z0 = disks - 3; /* Highest data disk */ | ||
91 | p = dptr[z0 + 1]; /* XOR parity */ | ||
92 | q = dptr[z0 + 2]; /* RS syndrome */ | ||
93 | |||
94 | for (d = 0; d < bytes; d += $#*NSIZE) { | ||
95 | LOAD_DATA(0,$#,&dptr[z0][d]); | ||
96 | COPY_VEC(8+$$,0+$$); | ||
97 | for (z = z0 - 1; z >= 0; z--) { | ||
98 | MASK(16+$$,8+$$); | ||
99 | AND(16+$$,16+$$,25); | ||
100 | SHLBYTE(8+$$,8+$$); | ||
101 | XOR(8+$$,8+$$,16+$$); | ||
102 | LOAD_DATA(16,$#,&dptr[z][d]); | ||
103 | XOR(0+$$,0+$$,16+$$); | ||
104 | XOR(8+$$,8+$$,16+$$); | ||
105 | } | ||
106 | STORE_DATA(0,$#,&p[d]); | ||
107 | STORE_DATA(8,$#,&q[d]); | ||
108 | } | ||
109 | kernel_fpu_end(&vxstate, KERNEL_VXR); | ||
110 | } | ||
111 | |||
112 | static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, | ||
113 | size_t bytes, void **ptrs) | ||
114 | { | ||
115 | struct kernel_fpu vxstate; | ||
116 | u8 **dptr, *p, *q; | ||
117 | int d, z, z0; | ||
118 | |||
119 | dptr = (u8 **) ptrs; | ||
120 | z0 = stop; /* P/Q right side optimization */ | ||
121 | p = dptr[disks - 2]; /* XOR parity */ | ||
122 | q = dptr[disks - 1]; /* RS syndrome */ | ||
123 | |||
124 | kernel_fpu_begin(&vxstate, KERNEL_VXR); | ||
125 | LOAD_CONST(); | ||
126 | |||
127 | for (d = 0; d < bytes; d += $#*NSIZE) { | ||
128 | /* P/Q data pages */ | ||
129 | LOAD_DATA(0,$#,&dptr[z0][d]); | ||
130 | COPY_VEC(8+$$,0+$$); | ||
131 | for (z = z0 - 1; z >= start; z--) { | ||
132 | MASK(16+$$,8+$$); | ||
133 | AND(16+$$,16+$$,25); | ||
134 | SHLBYTE(8+$$,8+$$); | ||
135 | XOR(8+$$,8+$$,16+$$); | ||
136 | LOAD_DATA(16,$#,&dptr[z][d]); | ||
137 | XOR(0+$$,0+$$,16+$$); | ||
138 | XOR(8+$$,8+$$,16+$$); | ||
139 | } | ||
140 | /* P/Q left side optimization */ | ||
141 | for (z = start - 1; z >= 0; z--) { | ||
142 | MASK(16+$$,8+$$); | ||
143 | AND(16+$$,16+$$,25); | ||
144 | SHLBYTE(8+$$,8+$$); | ||
145 | XOR(8+$$,8+$$,16+$$); | ||
146 | } | ||
147 | LOAD_DATA(16,$#,&p[d]); | ||
148 | XOR(16+$$,16+$$,0+$$); | ||
149 | STORE_DATA(16,$#,&p[d]); | ||
150 | LOAD_DATA(16,$#,&q[d]); | ||
151 | XOR(16+$$,16+$$,8+$$); | ||
152 | STORE_DATA(16,$#,&q[d]); | ||
153 | } | ||
154 | kernel_fpu_end(&vxstate, KERNEL_VXR); | ||
155 | } | ||
156 | |||
157 | static int raid6_s390vx$#_valid(void) | ||
158 | { | ||
159 | return MACHINE_HAS_VX; | ||
160 | } | ||
161 | |||
162 | const struct raid6_calls raid6_s390vx$# = { | ||
163 | raid6_s390vx$#_gen_syndrome, | ||
164 | raid6_s390vx$#_xor_syndrome, | ||
165 | raid6_s390vx$#_valid, | ||
166 | "vx128x$#", | ||
167 | 1 | ||
168 | }; | ||
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 29090f3db677..2c7b60edea04 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile | |||
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64) | |||
32 | endif | 32 | endif |
33 | 33 | ||
34 | ifeq ($(IS_X86),yes) | 34 | ifeq ($(IS_X86),yes) |
35 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o | 35 | OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o |
36 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ | 36 | CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ |
37 | gcc -c -x assembler - >&/dev/null && \ | 37 | gcc -c -x assembler - >&/dev/null && \ |
38 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) | 38 | rm ./-.o && echo -DCONFIG_AS_AVX2=1) |
39 | CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \ | ||
40 | gcc -c -x assembler - >&/dev/null && \ | ||
41 | rm ./-.o && echo -DCONFIG_AS_AVX512=1) | ||
39 | else ifeq ($(HAS_NEON),yes) | 42 | else ifeq ($(HAS_NEON),yes) |
40 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o | 43 | OBJS += neon.o neon1.o neon2.o neon4.o neon8.o |
41 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 | 44 | CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 |
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 3bebbabdb510..b07f4d8e6b03 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c | |||
@@ -21,12 +21,13 @@ | |||
21 | 21 | ||
22 | #define NDISKS 16 /* Including P and Q */ | 22 | #define NDISKS 16 /* Including P and Q */ |
23 | 23 | ||
24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); | 24 | const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
25 | struct raid6_calls raid6_call; | 25 | struct raid6_calls raid6_call; |
26 | 26 | ||
27 | char *dataptrs[NDISKS]; | 27 | char *dataptrs[NDISKS]; |
28 | char data[NDISKS][PAGE_SIZE]; | 28 | char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
29 | char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; | 29 | char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); |
30 | char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); | ||
30 | 31 | ||
31 | static void makedata(int start, int stop) | 32 | static void makedata(int start, int stop) |
32 | { | 33 | { |
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index 8fe9d9662abb..834d268a4b05 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h | |||
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void) | |||
46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ | 46 | #define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ |
47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 47 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ | 48 | #define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ |
49 | #define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ | ||
50 | #define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular) | ||
51 | * Instructions | ||
52 | */ | ||
53 | #define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular) | ||
54 | * Instructions | ||
55 | */ | ||
56 | #define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length) | ||
57 | * Extensions | ||
58 | */ | ||
49 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ | 59 | #define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ |
50 | 60 | ||
51 | /* Should work well enough on modern CPUs for testing */ | 61 | /* Should work well enough on modern CPUs for testing */ |