aboutsummaryrefslogtreecommitdiffstats
path: root/lib/raid6
diff options
context:
space:
mode:
Diffstat (limited to 'lib/raid6')
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile8
-rw-r--r--lib/raid6/algos.c18
-rw-r--r--lib/raid6/avx512.c569
-rw-r--r--lib/raid6/recov_avx512.c388
-rw-r--r--lib/raid6/recov_s390xc.c116
-rw-r--r--lib/raid6/s390vx.uc168
-rw-r--r--lib/raid6/test/Makefile5
-rw-r--r--lib/raid6/test/test.c7
-rw-r--r--lib/raid6/x86.h10
10 files changed, 1285 insertions, 5 deletions
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494b2bcd..f01b1cb04f91 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@ altivec*.c
3int*.c 3int*.c
4tables.c 4tables.c
5neon?.c 5neon?.c
6s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48fa040..3057011f5599 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -3,10 +3,11 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ 3raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
4 int8.o int16.o int32.o 4 int8.o int16.o int32.o
5 5
6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o 6raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o 7raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o 8raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
9raid6_pq-$(CONFIG_TILEGX) += tilegx8.o 9raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
10raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
10 11
11hostprogs-y += mktables 12hostprogs-y += mktables
12 13
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c: UNROLL := 8
116$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE 117$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE
117 $(call if_changed,unroll) 118 $(call if_changed,unroll)
118 119
120targets += s390vx8.c
121$(obj)/s390vx8.c: UNROLL := 8
122$(obj)/s390vx8.c: $(src)/s390vx.uc $(src)/unroll.awk FORCE
123 $(call if_changed,unroll)
124
119quiet_cmd_mktable = TABLE $@ 125quiet_cmd_mktable = TABLE $@
120 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) 126 cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
121 127
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0434bd..7857049fd7d3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -49,6 +49,10 @@ const struct raid6_calls * const raid6_algos[] = {
49 &raid6_avx2x1, 49 &raid6_avx2x1,
50 &raid6_avx2x2, 50 &raid6_avx2x2,
51#endif 51#endif
52#ifdef CONFIG_AS_AVX512
53 &raid6_avx512x1,
54 &raid6_avx512x2,
55#endif
52#endif 56#endif
53#if defined(__x86_64__) && !defined(__arch_um__) 57#if defined(__x86_64__) && !defined(__arch_um__)
54 &raid6_sse2x1, 58 &raid6_sse2x1,
@@ -59,6 +63,11 @@ const struct raid6_calls * const raid6_algos[] = {
59 &raid6_avx2x2, 63 &raid6_avx2x2,
60 &raid6_avx2x4, 64 &raid6_avx2x4,
61#endif 65#endif
66#ifdef CONFIG_AS_AVX512
67 &raid6_avx512x1,
68 &raid6_avx512x2,
69 &raid6_avx512x4,
70#endif
62#endif 71#endif
63#ifdef CONFIG_ALTIVEC 72#ifdef CONFIG_ALTIVEC
64 &raid6_altivec1, 73 &raid6_altivec1,
@@ -69,6 +78,9 @@ const struct raid6_calls * const raid6_algos[] = {
69#if defined(CONFIG_TILEGX) 78#if defined(CONFIG_TILEGX)
70 &raid6_tilegx8, 79 &raid6_tilegx8,
71#endif 80#endif
81#if defined(CONFIG_S390)
82 &raid6_s390vx8,
83#endif
72 &raid6_intx1, 84 &raid6_intx1,
73 &raid6_intx2, 85 &raid6_intx2,
74 &raid6_intx4, 86 &raid6_intx4,
@@ -89,12 +101,18 @@ void (*raid6_datap_recov)(int, size_t, int, void **);
89EXPORT_SYMBOL_GPL(raid6_datap_recov); 101EXPORT_SYMBOL_GPL(raid6_datap_recov);
90 102
91const struct raid6_recov_calls *const raid6_recov_algos[] = { 103const struct raid6_recov_calls *const raid6_recov_algos[] = {
104#ifdef CONFIG_AS_AVX512
105 &raid6_recov_avx512,
106#endif
92#ifdef CONFIG_AS_AVX2 107#ifdef CONFIG_AS_AVX2
93 &raid6_recov_avx2, 108 &raid6_recov_avx2,
94#endif 109#endif
95#ifdef CONFIG_AS_SSSE3 110#ifdef CONFIG_AS_SSSE3
96 &raid6_recov_ssse3, 111 &raid6_recov_ssse3,
97#endif 112#endif
113#ifdef CONFIG_S390
114 &raid6_recov_s390xc,
115#endif
98 &raid6_recov_intx1, 116 &raid6_recov_intx1,
99 NULL 117 NULL
100}; 118};
diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c
new file mode 100644
index 000000000000..f524a7972006
--- /dev/null
+++ b/lib/raid6/avx512.c
@@ -0,0 +1,569 @@
1/* -*- linux-c -*- --------------------------------------------------------
2 *
3 * Copyright (C) 2016 Intel Corporation
4 *
5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6 * Author: Megha Dey <megha.dey@linux.intel.com>
7 *
8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
14 * Boston MA 02111-1307, USA; either version 2 of the License, or
15 * (at your option) any later version; incorporated herein by reference.
16 *
17 * -----------------------------------------------------------------------
18 */
19
20/*
21 * AVX512 implementation of RAID-6 syndrome functions
22 *
23 */
24
25#ifdef CONFIG_AS_AVX512
26
27#include <linux/raid/pq.h>
28#include "x86.h"
29
30static const struct raid6_avx512_constants {
31 u64 x1d[8];
32} raid6_avx512_constants __aligned(512) = {
33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
36 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
37};
38
39static int raid6_have_avx512(void)
40{
41 return boot_cpu_has(X86_FEATURE_AVX2) &&
42 boot_cpu_has(X86_FEATURE_AVX) &&
43 boot_cpu_has(X86_FEATURE_AVX512F) &&
44 boot_cpu_has(X86_FEATURE_AVX512BW) &&
45 boot_cpu_has(X86_FEATURE_AVX512VL) &&
46 boot_cpu_has(X86_FEATURE_AVX512DQ);
47}
48
49static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
50{
51 u8 **dptr = (u8 **)ptrs;
52 u8 *p, *q;
53 int d, z, z0;
54
55 z0 = disks - 3; /* Highest data disk */
56 p = dptr[z0+1]; /* XOR parity */
57 q = dptr[z0+2]; /* RS syndrome */
58
59 kernel_fpu_begin();
60
61 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
62 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
63 :
64 : "m" (raid6_avx512_constants.x1d[0]));
65
66 for (d = 0; d < bytes; d += 64) {
67 asm volatile("prefetchnta %0\n\t"
68 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
69 "prefetchnta %1\n\t"
70 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
71 "vmovdqa64 %1,%%zmm6"
72 :
73 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
74 for (z = z0-2; z >= 0; z--) {
75 asm volatile("prefetchnta %0\n\t"
76 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
77 "vpmovm2b %%k1,%%zmm5\n\t"
78 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
79 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
80 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
81 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
82 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
83 "vmovdqa64 %0,%%zmm6"
84 :
85 : "m" (dptr[z][d]));
86 }
87 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
88 "vpmovm2b %%k1,%%zmm5\n\t"
89 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
90 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
91 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
92 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
93 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
94 "vmovntdq %%zmm2,%0\n\t"
95 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
96 "vmovntdq %%zmm4,%1\n\t"
97 "vpxorq %%zmm4,%%zmm4,%%zmm4"
98 :
99 : "m" (p[d]), "m" (q[d]));
100 }
101
102 asm volatile("sfence" : : : "memory");
103 kernel_fpu_end();
104}
105
106static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
107 size_t bytes, void **ptrs)
108{
109 u8 **dptr = (u8 **)ptrs;
110 u8 *p, *q;
111 int d, z, z0;
112
113 z0 = stop; /* P/Q right side optimization */
114 p = dptr[disks-2]; /* XOR parity */
115 q = dptr[disks-1]; /* RS syndrome */
116
117 kernel_fpu_begin();
118
119 asm volatile("vmovdqa64 %0,%%zmm0"
120 : : "m" (raid6_avx512_constants.x1d[0]));
121
122 for (d = 0 ; d < bytes ; d += 64) {
123 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
124 "vmovdqa64 %1,%%zmm2\n\t"
125 "vpxorq %%zmm4,%%zmm2,%%zmm2"
126 :
127 : "m" (dptr[z0][d]), "m" (p[d]));
128 /* P/Q data pages */
129 for (z = z0-1 ; z >= start ; z--) {
130 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
131 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
132 "vpmovm2b %%k1,%%zmm5\n\t"
133 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
134 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
135 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
136 "vmovdqa64 %0,%%zmm5\n\t"
137 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
138 "vpxorq %%zmm5,%%zmm4,%%zmm4"
139 :
140 : "m" (dptr[z][d]));
141 }
142 /* P/Q left side optimization */
143 for (z = start-1 ; z >= 0 ; z--) {
144 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
145 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
146 "vpmovm2b %%k1,%%zmm5\n\t"
147 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
148 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
149 "vpxorq %%zmm5,%%zmm4,%%zmm4"
150 :
151 : );
152 }
153 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
154 /* Don't use movntdq for r/w memory area < cache line */
155 "vmovdqa64 %%zmm4,%0\n\t"
156 "vmovdqa64 %%zmm2,%1"
157 :
158 : "m" (q[d]), "m" (p[d]));
159 }
160
161 asm volatile("sfence" : : : "memory");
162 kernel_fpu_end();
163}
164
165const struct raid6_calls raid6_avx512x1 = {
166 raid6_avx5121_gen_syndrome,
167 raid6_avx5121_xor_syndrome,
168 raid6_have_avx512,
169 "avx512x1",
170 1 /* Has cache hints */
171};
172
173/*
174 * Unrolled-by-2 AVX512 implementation
175 */
176static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
177{
178 u8 **dptr = (u8 **)ptrs;
179 u8 *p, *q;
180 int d, z, z0;
181
182 z0 = disks - 3; /* Highest data disk */
183 p = dptr[z0+1]; /* XOR parity */
184 q = dptr[z0+2]; /* RS syndrome */
185
186 kernel_fpu_begin();
187
188 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
189 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
190 :
191 : "m" (raid6_avx512_constants.x1d[0]));
192
193 /* We uniformly assume a single prefetch covers at least 64 bytes */
194 for (d = 0; d < bytes; d += 128) {
195 asm volatile("prefetchnta %0\n\t"
196 "prefetchnta %1\n\t"
197 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
198 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
199 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
200 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
201 :
202 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
203 for (z = z0-1; z >= 0; z--) {
204 asm volatile("prefetchnta %0\n\t"
205 "prefetchnta %1\n\t"
206 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
207 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
208 "vpmovm2b %%k1,%%zmm5\n\t"
209 "vpmovm2b %%k2,%%zmm7\n\t"
210 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
211 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
212 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
213 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
214 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
215 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
216 "vmovdqa64 %0,%%zmm5\n\t"
217 "vmovdqa64 %1,%%zmm7\n\t"
218 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
219 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
220 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
221 "vpxorq %%zmm7,%%zmm6,%%zmm6"
222 :
223 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
224 }
225 asm volatile("vmovntdq %%zmm2,%0\n\t"
226 "vmovntdq %%zmm3,%1\n\t"
227 "vmovntdq %%zmm4,%2\n\t"
228 "vmovntdq %%zmm6,%3"
229 :
230 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
231 "m" (q[d+64]));
232 }
233
234 asm volatile("sfence" : : : "memory");
235 kernel_fpu_end();
236}
237
238static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
239 size_t bytes, void **ptrs)
240{
241 u8 **dptr = (u8 **)ptrs;
242 u8 *p, *q;
243 int d, z, z0;
244
245 z0 = stop; /* P/Q right side optimization */
246 p = dptr[disks-2]; /* XOR parity */
247 q = dptr[disks-1]; /* RS syndrome */
248
249 kernel_fpu_begin();
250
251 asm volatile("vmovdqa64 %0,%%zmm0"
252 : : "m" (raid6_avx512_constants.x1d[0]));
253
254 for (d = 0 ; d < bytes ; d += 128) {
255 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
256 "vmovdqa64 %1,%%zmm6\n\t"
257 "vmovdqa64 %2,%%zmm2\n\t"
258 "vmovdqa64 %3,%%zmm3\n\t"
259 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
260 "vpxorq %%zmm6,%%zmm3,%%zmm3"
261 :
262 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
263 "m" (p[d]), "m" (p[d+64]));
264 /* P/Q data pages */
265 for (z = z0-1 ; z >= start ; z--) {
266 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
267 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
268 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
269 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
270 "vpmovm2b %%k1,%%zmm5\n\t"
271 "vpmovm2b %%k2,%%zmm7\n\t"
272 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
273 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
274 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
275 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
276 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
277 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
278 "vmovdqa64 %0,%%zmm5\n\t"
279 "vmovdqa64 %1,%%zmm7\n\t"
280 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
281 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
282 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
283 "vpxorq %%zmm7,%%zmm6,%%zmm6"
284 :
285 : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
286 }
287 /* P/Q left side optimization */
288 for (z = start-1 ; z >= 0 ; z--) {
289 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
290 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
291 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
292 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
293 "vpmovm2b %%k1,%%zmm5\n\t"
294 "vpmovm2b %%k2,%%zmm7\n\t"
295 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
296 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
297 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
298 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
299 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
300 "vpxorq %%zmm7,%%zmm6,%%zmm6"
301 :
302 : );
303 }
304 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
305 "vpxorq %1,%%zmm6,%%zmm6\n\t"
306 /* Don't use movntdq for r/w
307 * memory area < cache line
308 */
309 "vmovdqa64 %%zmm4,%0\n\t"
310 "vmovdqa64 %%zmm6,%1\n\t"
311 "vmovdqa64 %%zmm2,%2\n\t"
312 "vmovdqa64 %%zmm3,%3"
313 :
314 : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
315 "m" (p[d+64]));
316 }
317
318 asm volatile("sfence" : : : "memory");
319 kernel_fpu_end();
320}
321
322const struct raid6_calls raid6_avx512x2 = {
323 raid6_avx5122_gen_syndrome,
324 raid6_avx5122_xor_syndrome,
325 raid6_have_avx512,
326 "avx512x2",
327 1 /* Has cache hints */
328};
329
330#ifdef CONFIG_X86_64
331
332/*
333 * Unrolled-by-4 AVX2 implementation
334 */
335static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
336{
337 u8 **dptr = (u8 **)ptrs;
338 u8 *p, *q;
339 int d, z, z0;
340
341 z0 = disks - 3; /* Highest data disk */
342 p = dptr[z0+1]; /* XOR parity */
343 q = dptr[z0+2]; /* RS syndrome */
344
345 kernel_fpu_begin();
346
347 asm volatile("vmovdqa64 %0,%%zmm0\n\t"
348 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
349 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
350 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
351 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
352 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
353 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
354 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
355 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
356 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
357 :
358 : "m" (raid6_avx512_constants.x1d[0]));
359
360 for (d = 0; d < bytes; d += 256) {
361 for (z = z0; z >= 0; z--) {
362 asm volatile("prefetchnta %0\n\t"
363 "prefetchnta %1\n\t"
364 "prefetchnta %2\n\t"
365 "prefetchnta %3\n\t"
366 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
367 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
368 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
369 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
370 "vpmovm2b %%k1,%%zmm5\n\t"
371 "vpmovm2b %%k2,%%zmm7\n\t"
372 "vpmovm2b %%k3,%%zmm13\n\t"
373 "vpmovm2b %%k4,%%zmm15\n\t"
374 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
375 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
376 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
377 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
378 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
379 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
380 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
381 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
382 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
383 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
384 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
385 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
386 "vmovdqa64 %0,%%zmm5\n\t"
387 "vmovdqa64 %1,%%zmm7\n\t"
388 "vmovdqa64 %2,%%zmm13\n\t"
389 "vmovdqa64 %3,%%zmm15\n\t"
390 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
391 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
392 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
393 "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
394 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
395 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
396 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
397 "vpxorq %%zmm15,%%zmm14,%%zmm14"
398 :
399 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
400 "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
401 }
402 asm volatile("vmovntdq %%zmm2,%0\n\t"
403 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
404 "vmovntdq %%zmm3,%1\n\t"
405 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
406 "vmovntdq %%zmm10,%2\n\t"
407 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
408 "vmovntdq %%zmm11,%3\n\t"
409 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
410 "vmovntdq %%zmm4,%4\n\t"
411 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
412 "vmovntdq %%zmm6,%5\n\t"
413 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
414 "vmovntdq %%zmm12,%6\n\t"
415 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
416 "vmovntdq %%zmm14,%7\n\t"
417 "vpxorq %%zmm14,%%zmm14,%%zmm14"
418 :
419 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
420 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
421 "m" (q[d+128]), "m" (q[d+192]));
422 }
423
424 asm volatile("sfence" : : : "memory");
425 kernel_fpu_end();
426}
427
428static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
429 size_t bytes, void **ptrs)
430{
431 u8 **dptr = (u8 **)ptrs;
432 u8 *p, *q;
433 int d, z, z0;
434
435 z0 = stop; /* P/Q right side optimization */
436 p = dptr[disks-2]; /* XOR parity */
437 q = dptr[disks-1]; /* RS syndrome */
438
439 kernel_fpu_begin();
440
441 asm volatile("vmovdqa64 %0,%%zmm0"
442 :: "m" (raid6_avx512_constants.x1d[0]));
443
444 for (d = 0 ; d < bytes ; d += 256) {
445 asm volatile("vmovdqa64 %0,%%zmm4\n\t"
446 "vmovdqa64 %1,%%zmm6\n\t"
447 "vmovdqa64 %2,%%zmm12\n\t"
448 "vmovdqa64 %3,%%zmm14\n\t"
449 "vmovdqa64 %4,%%zmm2\n\t"
450 "vmovdqa64 %5,%%zmm3\n\t"
451 "vmovdqa64 %6,%%zmm10\n\t"
452 "vmovdqa64 %7,%%zmm11\n\t"
453 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
454 "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
455 "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
456 "vpxorq %%zmm14,%%zmm11,%%zmm11"
457 :
458 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
459 "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
460 "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
461 "m" (p[d+192]));
462 /* P/Q data pages */
463 for (z = z0-1 ; z >= start ; z--) {
464 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
465 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
466 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
467 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
468 "prefetchnta %0\n\t"
469 "prefetchnta %2\n\t"
470 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
471 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
472 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
473 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
474 "vpmovm2b %%k1,%%zmm5\n\t"
475 "vpmovm2b %%k2,%%zmm7\n\t"
476 "vpmovm2b %%k3,%%zmm13\n\t"
477 "vpmovm2b %%k4,%%zmm15\n\t"
478 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
479 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
480 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
481 "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
482 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
483 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
484 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
485 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
486 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
487 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
488 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
489 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
490 "vmovdqa64 %0,%%zmm5\n\t"
491 "vmovdqa64 %1,%%zmm7\n\t"
492 "vmovdqa64 %2,%%zmm13\n\t"
493 "vmovdqa64 %3,%%zmm15\n\t"
494 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
495 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
496 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
497 "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
498 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
499 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
500 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
501 "vpxorq %%zmm15,%%zmm14,%%zmm14"
502 :
503 : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
504 "m" (dptr[z][d+128]),
505 "m" (dptr[z][d+192]));
506 }
507 asm volatile("prefetchnta %0\n\t"
508 "prefetchnta %1\n\t"
509 :
510 : "m" (q[d]), "m" (q[d+128]));
511 /* P/Q left side optimization */
512 for (z = start-1 ; z >= 0 ; z--) {
513 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
514 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
515 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
516 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
517 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
518 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
519 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
520 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
521 "vpmovm2b %%k1,%%zmm5\n\t"
522 "vpmovm2b %%k2,%%zmm7\n\t"
523 "vpmovm2b %%k3,%%zmm13\n\t"
524 "vpmovm2b %%k4,%%zmm15\n\t"
525 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
526 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
527 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
528 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
529 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
530 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
531 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
532 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
533 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
534 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
535 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
536 "vpxorq %%zmm15,%%zmm14,%%zmm14"
537 :
538 : );
539 }
540 asm volatile("vmovntdq %%zmm2,%0\n\t"
541 "vmovntdq %%zmm3,%1\n\t"
542 "vmovntdq %%zmm10,%2\n\t"
543 "vmovntdq %%zmm11,%3\n\t"
544 "vpxorq %4,%%zmm4,%%zmm4\n\t"
545 "vpxorq %5,%%zmm6,%%zmm6\n\t"
546 "vpxorq %6,%%zmm12,%%zmm12\n\t"
547 "vpxorq %7,%%zmm14,%%zmm14\n\t"
548 "vmovntdq %%zmm4,%4\n\t"
549 "vmovntdq %%zmm6,%5\n\t"
550 "vmovntdq %%zmm12,%6\n\t"
551 "vmovntdq %%zmm14,%7"
552 :
553 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
554 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
555 "m" (q[d+128]), "m" (q[d+192]));
556 }
557 asm volatile("sfence" : : : "memory");
558 kernel_fpu_end();
559}
560const struct raid6_calls raid6_avx512x4 = {
561 raid6_avx5124_gen_syndrome,
562 raid6_avx5124_xor_syndrome,
563 raid6_have_avx512,
564 "avx512x4",
565 1 /* Has cache hints */
566};
567#endif
568
569#endif /* CONFIG_AS_AVX512 */
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c
new file mode 100644
index 000000000000..625aafa33b61
--- /dev/null
+++ b/lib/raid6/recov_avx512.c
@@ -0,0 +1,388 @@
1/*
2 * Copyright (C) 2016 Intel Corporation
3 *
4 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
5 * Author: Megha Dey <megha.dey@linux.intel.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; version 2
10 * of the License.
11 *
12 */
13
14#ifdef CONFIG_AS_AVX512
15
16#include <linux/raid/pq.h>
17#include "x86.h"
18
19static int raid6_has_avx512(void)
20{
21 return boot_cpu_has(X86_FEATURE_AVX2) &&
22 boot_cpu_has(X86_FEATURE_AVX) &&
23 boot_cpu_has(X86_FEATURE_AVX512F) &&
24 boot_cpu_has(X86_FEATURE_AVX512BW) &&
25 boot_cpu_has(X86_FEATURE_AVX512VL) &&
26 boot_cpu_has(X86_FEATURE_AVX512DQ);
27}
28
29static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
30 int failb, void **ptrs)
31{
32 u8 *p, *q, *dp, *dq;
33 const u8 *pbmul; /* P multiplier table for B data */
34 const u8 *qmul; /* Q multiplier table (for both) */
35 const u8 x0f = 0x0f;
36
37 p = (u8 *)ptrs[disks-2];
38 q = (u8 *)ptrs[disks-1];
39
40 /*
41 * Compute syndrome with zero for the missing data pages
42 * Use the dead data pages as temporary storage for
43 * delta p and delta q
44 */
45
46 dp = (u8 *)ptrs[faila];
47 ptrs[faila] = (void *)raid6_empty_zero_page;
48 ptrs[disks-2] = dp;
49 dq = (u8 *)ptrs[failb];
50 ptrs[failb] = (void *)raid6_empty_zero_page;
51 ptrs[disks-1] = dq;
52
53 raid6_call.gen_syndrome(disks, bytes, ptrs);
54
55 /* Restore pointer table */
56 ptrs[faila] = dp;
57 ptrs[failb] = dq;
58 ptrs[disks-2] = p;
59 ptrs[disks-1] = q;
60
61 /* Now, pick the proper data tables */
62 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
63 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
64 raid6_gfexp[failb]]];
65
66 kernel_fpu_begin();
67
68 /* zmm0 = x0f[16] */
69 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
70
71 while (bytes) {
72#ifdef CONFIG_X86_64
73 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
74 "vmovdqa64 %1, %%zmm9\n\t"
75 "vmovdqa64 %2, %%zmm0\n\t"
76 "vmovdqa64 %3, %%zmm8\n\t"
77 "vpxorq %4, %%zmm1, %%zmm1\n\t"
78 "vpxorq %5, %%zmm9, %%zmm9\n\t"
79 "vpxorq %6, %%zmm0, %%zmm0\n\t"
80 "vpxorq %7, %%zmm8, %%zmm8"
81 :
82 : "m" (q[0]), "m" (q[64]), "m" (p[0]),
83 "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
84 "m" (dp[0]), "m" (dp[64]));
85
86 /*
87 * 1 = dq[0] ^ q[0]
88 * 9 = dq[64] ^ q[64]
89 * 0 = dp[0] ^ p[0]
90 * 8 = dp[64] ^ p[64]
91 */
92
93 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
94 "vbroadcasti64x2 %1, %%zmm5"
95 :
96 : "m" (qmul[0]), "m" (qmul[16]));
97
98 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
99 "vpsraw $4, %%zmm9, %%zmm12\n\t"
100 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
101 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
102 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
103 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
104 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
105 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
106 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
107 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
108 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
109 "vpxorq %%zmm4, %%zmm5, %%zmm5"
110 :
111 : );
112
113 /*
114 * 5 = qx[0]
115 * 15 = qx[64]
116 */
117
118 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
119 "vbroadcasti64x2 %1, %%zmm1\n\t"
120 "vpsraw $4, %%zmm0, %%zmm2\n\t"
121 "vpsraw $4, %%zmm8, %%zmm6\n\t"
122 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
123 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
124 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
125 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
126 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
127 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
128 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
129 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
130 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
131 "vpxorq %%zmm12, %%zmm13, %%zmm13"
132 :
133 : "m" (pbmul[0]), "m" (pbmul[16]));
134
135 /*
136 * 1 = pbmul[px[0]]
137 * 13 = pbmul[px[64]]
138 */
139 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
140 "vpxorq %%zmm15, %%zmm13, %%zmm13"
141 :
142 : );
143
144 /*
145 * 1 = db = DQ
146 * 13 = db[64] = DQ[64]
147 */
148 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
149 "vmovdqa64 %%zmm13,%1\n\t"
150 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
151 "vpxorq %%zmm13, %%zmm8, %%zmm8"
152 :
153 : "m" (dq[0]), "m" (dq[64]));
154
155 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
156 "vmovdqa64 %%zmm8, %1"
157 :
158 : "m" (dp[0]), "m" (dp[64]));
159
160 bytes -= 128;
161 p += 128;
162 q += 128;
163 dp += 128;
164 dq += 128;
165#else
166 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
167 "vmovdqa64 %1, %%zmm0\n\t"
168 "vpxorq %2, %%zmm1, %%zmm1\n\t"
169 "vpxorq %3, %%zmm0, %%zmm0"
170 :
171 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
172
173 /* 1 = dq ^ q; 0 = dp ^ p */
174
175 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
176 "vbroadcasti64x2 %1, %%zmm5"
177 :
178 : "m" (qmul[0]), "m" (qmul[16]));
179
180 /*
181 * 1 = dq ^ q
182 * 3 = dq ^ p >> 4
183 */
184 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
185 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
186 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
187 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
188 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
189 "vpxorq %%zmm4, %%zmm5, %%zmm5"
190 :
191 : );
192
193 /* 5 = qx */
194
195 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
196 "vbroadcasti64x2 %1, %%zmm1"
197 :
198 : "m" (pbmul[0]), "m" (pbmul[16]));
199
200 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
201 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
202 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
203 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
204 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
205 "vpxorq %%zmm4, %%zmm1, %%zmm1"
206 :
207 : );
208
209 /* 1 = pbmul[px] */
210 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
211 /* 1 = db = DQ */
212 "vmovdqa64 %%zmm1, %0\n\t"
213 :
214 : "m" (dq[0]));
215
216 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
217 "vmovdqa64 %%zmm0, %0"
218 :
219 : "m" (dp[0]));
220
221 bytes -= 64;
222 p += 64;
223 q += 64;
224 dp += 64;
225 dq += 64;
226#endif
227 }
228
229 kernel_fpu_end();
230}
231
232static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
233 void **ptrs)
234{
235 u8 *p, *q, *dq;
236 const u8 *qmul; /* Q multiplier table */
237 const u8 x0f = 0x0f;
238
239 p = (u8 *)ptrs[disks-2];
240 q = (u8 *)ptrs[disks-1];
241
242 /*
243 * Compute syndrome with zero for the missing data page
244 * Use the dead data page as temporary storage for delta q
245 */
246
247 dq = (u8 *)ptrs[faila];
248 ptrs[faila] = (void *)raid6_empty_zero_page;
249 ptrs[disks-1] = dq;
250
251 raid6_call.gen_syndrome(disks, bytes, ptrs);
252
253 /* Restore pointer table */
254 ptrs[faila] = dq;
255 ptrs[disks-1] = q;
256
257 /* Now, pick the proper data tables */
258 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
259
260 kernel_fpu_begin();
261
262 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
263
264 while (bytes) {
265#ifdef CONFIG_X86_64
266 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
267 "vmovdqa64 %1, %%zmm8\n\t"
268 "vpxorq %2, %%zmm3, %%zmm3\n\t"
269 "vpxorq %3, %%zmm8, %%zmm8"
270 :
271 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
272 "m" (q[64]));
273
274 /*
275 * 3 = q[0] ^ dq[0]
276 * 8 = q[64] ^ dq[64]
277 */
278 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
279 "vmovapd %%zmm0, %%zmm13\n\t"
280 "vbroadcasti64x2 %1, %%zmm1\n\t"
281 "vmovapd %%zmm1, %%zmm14"
282 :
283 : "m" (qmul[0]), "m" (qmul[16]));
284
285 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
286 "vpsraw $4, %%zmm8, %%zmm12\n\t"
287 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
288 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
289 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
290 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
291 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
292 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
293 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
294 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
295 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
296 "vpxorq %%zmm13, %%zmm14, %%zmm14"
297 :
298 : );
299
300 /*
301 * 1 = qmul[q[0] ^ dq[0]]
302 * 14 = qmul[q[64] ^ dq[64]]
303 */
304 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
305 "vmovdqa64 %1, %%zmm12\n\t"
306 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
307 "vpxorq %%zmm14, %%zmm12, %%zmm12"
308 :
309 : "m" (p[0]), "m" (p[64]));
310
311 /*
312 * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
313 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
314 */
315
316 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
317 "vmovdqa64 %%zmm14, %1\n\t"
318 "vmovdqa64 %%zmm2, %2\n\t"
319 "vmovdqa64 %%zmm12,%3"
320 :
321 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
322 "m" (p[64]));
323
324 bytes -= 128;
325 p += 128;
326 q += 128;
327 dq += 128;
328#else
329 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
330 "vpxorq %1, %%zmm3, %%zmm3"
331 :
332 : "m" (dq[0]), "m" (q[0]));
333
334 /* 3 = q ^ dq */
335
336 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
337 "vbroadcasti64x2 %1, %%zmm1"
338 :
339 : "m" (qmul[0]), "m" (qmul[16]));
340
341 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
342 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
343 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
344 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
345 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
346 "vpxorq %%zmm0, %%zmm1, %%zmm1"
347 :
348 : );
349
350 /* 1 = qmul[q ^ dq] */
351
352 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
353 "vpxorq %%zmm1, %%zmm2, %%zmm2"
354 :
355 : "m" (p[0]));
356
357 /* 2 = p ^ qmul[q ^ dq] */
358
359 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
360 "vmovdqa64 %%zmm2, %1"
361 :
362 : "m" (dq[0]), "m" (p[0]));
363
364 bytes -= 64;
365 p += 64;
366 q += 64;
367 dq += 64;
368#endif
369 }
370
371 kernel_fpu_end();
372}
373
374const struct raid6_recov_calls raid6_recov_avx512 = {
375 .data2 = raid6_2data_recov_avx512,
376 .datap = raid6_datap_recov_avx512,
377 .valid = raid6_has_avx512,
378#ifdef CONFIG_X86_64
379 .name = "avx512x2",
380#else
381 .name = "avx512x1",
382#endif
383 .priority = 3,
384};
385
386#else
387#warning "your version of binutils lacks AVX512 support"
388#endif
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 000000000000..b042dac826cc
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
1/*
2 * RAID-6 data recovery in dual failure mode based on the XC instruction.
3 *
4 * Copyright IBM Corp. 2016
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/export.h>
9#include <linux/raid/pq.h>
10
11static inline void xor_block(u8 *p1, u8 *p2)
12{
13 typedef struct { u8 _[256]; } addrtype;
14
15 asm volatile(
16 " xc 0(256,%[p1]),0(%[p2])\n"
17 : "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
18 [p1] "a" (p1), [p2] "a" (p2) : "cc");
19}
20
21/* Recover two failed data blocks. */
22static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
23 int failb, void **ptrs)
24{
25 u8 *p, *q, *dp, *dq;
26 const u8 *pbmul; /* P multiplier table for B data */
27 const u8 *qmul; /* Q multiplier table (for both) */
28 int i;
29
30 p = (u8 *)ptrs[disks-2];
31 q = (u8 *)ptrs[disks-1];
32
33 /* Compute syndrome with zero for the missing data pages
34 Use the dead data pages as temporary storage for
35 delta p and delta q */
36 dp = (u8 *)ptrs[faila];
37 ptrs[faila] = (void *)raid6_empty_zero_page;
38 ptrs[disks-2] = dp;
39 dq = (u8 *)ptrs[failb];
40 ptrs[failb] = (void *)raid6_empty_zero_page;
41 ptrs[disks-1] = dq;
42
43 raid6_call.gen_syndrome(disks, bytes, ptrs);
44
45 /* Restore pointer table */
46 ptrs[faila] = dp;
47 ptrs[failb] = dq;
48 ptrs[disks-2] = p;
49 ptrs[disks-1] = q;
50
51 /* Now, pick the proper data tables */
52 pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
53 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
54
55 /* Now do it... */
56 while (bytes) {
57 xor_block(dp, p);
58 xor_block(dq, q);
59 for (i = 0; i < 256; i++)
60 dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
61 xor_block(dp, dq);
62 p += 256;
63 q += 256;
64 dp += 256;
65 dq += 256;
66 bytes -= 256;
67 }
68}
69
70/* Recover failure of one data block plus the P block */
71static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
72 void **ptrs)
73{
74 u8 *p, *q, *dq;
75 const u8 *qmul; /* Q multiplier table */
76 int i;
77
78 p = (u8 *)ptrs[disks-2];
79 q = (u8 *)ptrs[disks-1];
80
81 /* Compute syndrome with zero for the missing data page
82 Use the dead data page as temporary storage for delta q */
83 dq = (u8 *)ptrs[faila];
84 ptrs[faila] = (void *)raid6_empty_zero_page;
85 ptrs[disks-1] = dq;
86
87 raid6_call.gen_syndrome(disks, bytes, ptrs);
88
89 /* Restore pointer table */
90 ptrs[faila] = dq;
91 ptrs[disks-1] = q;
92
93 /* Now, pick the proper data tables */
94 qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
95
96 /* Now do it... */
97 while (bytes) {
98 xor_block(dq, q);
99 for (i = 0; i < 256; i++)
100 dq[i] = qmul[dq[i]];
101 xor_block(p, dq);
102 p += 256;
103 q += 256;
104 dq += 256;
105 bytes -= 256;
106 }
107}
108
109
110const struct raid6_recov_calls raid6_recov_s390xc = {
111 .data2 = raid6_2data_recov_s390xc,
112 .datap = raid6_datap_recov_s390xc,
113 .valid = NULL,
114 .name = "s390xc",
115 .priority = 1,
116};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 000000000000..7b45191a655f
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
1/*
2 * raid6_vx$#.c
3 *
4 * $#-way unrolled RAID6 gen/xor functions for s390
5 * based on the vector facility
6 *
7 * Copyright IBM Corp. 2016
8 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
9 *
10 * This file is postprocessed using unroll.awk.
11 */
12
13#include <linux/raid/pq.h>
14#include <asm/fpu/api.h>
15
16asm(".include \"asm/vx-insn.h\"\n");
17
18#define NSIZE 16
19
20static inline void LOAD_CONST(void)
21{
22 asm volatile("VREPIB %v24,7");
23 asm volatile("VREPIB %v25,0x1d");
24}
25
26/*
27 * The SHLBYTE() operation shifts each of the 16 bytes in
28 * vector register y left by 1 bit and stores the result in
29 * vector register x.
30 */
31static inline void SHLBYTE(int x, int y)
32{
33 asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
34}
35
36/*
37 * For each of the 16 bytes in the vector register y the MASK()
38 * operation returns 0xFF if the high bit of the byte is 1,
39 * or 0x00 if the high bit is 0. The result is stored in vector
40 * register x.
41 */
42static inline void MASK(int x, int y)
43{
44 asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y));
45}
46
47static inline void AND(int x, int y, int z)
48{
49 asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
50}
51
52static inline void XOR(int x, int y, int z)
53{
54 asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
55}
56
57static inline void LOAD_DATA(int x, int n, u8 *ptr)
58{
59 typedef struct { u8 _[16*n]; } addrtype;
60 register addrtype *__ptr asm("1") = (addrtype *) ptr;
61
62 asm volatile ("VLM %2,%3,0,%r1"
63 : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
64}
65
66static inline void STORE_DATA(int x, int n, u8 *ptr)
67{
68 typedef struct { u8 _[16*n]; } addrtype;
69 register addrtype *__ptr asm("1") = (addrtype *) ptr;
70
71 asm volatile ("VSTM %2,%3,0,1"
72 : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
73}
74
75static inline void COPY_VEC(int x, int y)
76{
77 asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
78}
79
80static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
81{
82 struct kernel_fpu vxstate;
83 u8 **dptr, *p, *q;
84 int d, z, z0;
85
86 kernel_fpu_begin(&vxstate, KERNEL_VXR);
87 LOAD_CONST();
88
89 dptr = (u8 **) ptrs;
90 z0 = disks - 3; /* Highest data disk */
91 p = dptr[z0 + 1]; /* XOR parity */
92 q = dptr[z0 + 2]; /* RS syndrome */
93
94 for (d = 0; d < bytes; d += $#*NSIZE) {
95 LOAD_DATA(0,$#,&dptr[z0][d]);
96 COPY_VEC(8+$$,0+$$);
97 for (z = z0 - 1; z >= 0; z--) {
98 MASK(16+$$,8+$$);
99 AND(16+$$,16+$$,25);
100 SHLBYTE(8+$$,8+$$);
101 XOR(8+$$,8+$$,16+$$);
102 LOAD_DATA(16,$#,&dptr[z][d]);
103 XOR(0+$$,0+$$,16+$$);
104 XOR(8+$$,8+$$,16+$$);
105 }
106 STORE_DATA(0,$#,&p[d]);
107 STORE_DATA(8,$#,&q[d]);
108 }
109 kernel_fpu_end(&vxstate, KERNEL_VXR);
110}
111
112static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
113 size_t bytes, void **ptrs)
114{
115 struct kernel_fpu vxstate;
116 u8 **dptr, *p, *q;
117 int d, z, z0;
118
119 dptr = (u8 **) ptrs;
120 z0 = stop; /* P/Q right side optimization */
121 p = dptr[disks - 2]; /* XOR parity */
122 q = dptr[disks - 1]; /* RS syndrome */
123
124 kernel_fpu_begin(&vxstate, KERNEL_VXR);
125 LOAD_CONST();
126
127 for (d = 0; d < bytes; d += $#*NSIZE) {
128 /* P/Q data pages */
129 LOAD_DATA(0,$#,&dptr[z0][d]);
130 COPY_VEC(8+$$,0+$$);
131 for (z = z0 - 1; z >= start; z--) {
132 MASK(16+$$,8+$$);
133 AND(16+$$,16+$$,25);
134 SHLBYTE(8+$$,8+$$);
135 XOR(8+$$,8+$$,16+$$);
136 LOAD_DATA(16,$#,&dptr[z][d]);
137 XOR(0+$$,0+$$,16+$$);
138 XOR(8+$$,8+$$,16+$$);
139 }
140 /* P/Q left side optimization */
141 for (z = start - 1; z >= 0; z--) {
142 MASK(16+$$,8+$$);
143 AND(16+$$,16+$$,25);
144 SHLBYTE(8+$$,8+$$);
145 XOR(8+$$,8+$$,16+$$);
146 }
147 LOAD_DATA(16,$#,&p[d]);
148 XOR(16+$$,16+$$,0+$$);
149 STORE_DATA(16,$#,&p[d]);
150 LOAD_DATA(16,$#,&q[d]);
151 XOR(16+$$,16+$$,8+$$);
152 STORE_DATA(16,$#,&q[d]);
153 }
154 kernel_fpu_end(&vxstate, KERNEL_VXR);
155}
156
157static int raid6_s390vx$#_valid(void)
158{
159 return MACHINE_HAS_VX;
160}
161
162const struct raid6_calls raid6_s390vx$# = {
163 raid6_s390vx$#_gen_syndrome,
164 raid6_s390vx$#_xor_syndrome,
165 raid6_s390vx$#_valid,
166 "vx128x$#",
167 1
168};
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 29090f3db677..2c7b60edea04 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -32,10 +32,13 @@ ifeq ($(ARCH),arm64)
32endif 32endif
33 33
34ifeq ($(IS_X86),yes) 34ifeq ($(IS_X86),yes)
35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o 35 OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ 36 CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
37 gcc -c -x assembler - >&/dev/null && \ 37 gcc -c -x assembler - >&/dev/null && \
38 rm ./-.o && echo -DCONFIG_AS_AVX2=1) 38 rm ./-.o && echo -DCONFIG_AS_AVX2=1)
39 CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
40 gcc -c -x assembler - >&/dev/null && \
41 rm ./-.o && echo -DCONFIG_AS_AVX512=1)
39else ifeq ($(HAS_NEON),yes) 42else ifeq ($(HAS_NEON),yes)
40 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o 43 OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
41 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 44 CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 3bebbabdb510..b07f4d8e6b03 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -21,12 +21,13 @@
21 21
22#define NDISKS 16 /* Including P and Q */ 22#define NDISKS 16 /* Including P and Q */
23 23
24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 24const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
25struct raid6_calls raid6_call; 25struct raid6_calls raid6_call;
26 26
27char *dataptrs[NDISKS]; 27char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
30char recovj[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
30 31
31static void makedata(int start, int stop) 32static void makedata(int start, int stop)
32{ 33{
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index 8fe9d9662abb..834d268a4b05 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -46,6 +46,16 @@ static inline void kernel_fpu_end(void)
46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ 46#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ 47#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */ 48#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
49#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */
50#define X86_FEATURE_AVX512DQ (9*32+17) /* AVX-512 DQ (Double/Quad granular)
51 * Instructions
52 */
53#define X86_FEATURE_AVX512BW (9*32+30) /* AVX-512 BW (Byte/Word granular)
54 * Instructions
55 */
56#define X86_FEATURE_AVX512VL (9*32+31) /* AVX-512 VL (128/256 Vector Length)
57 * Extensions
58 */
49#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 59#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
50 60
51/* Should work well enough on modern CPUs for testing */ 61/* Should work well enough on modern CPUs for testing */