diff options
-rw-r--r-- | include/linux/raid/pq.h | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 2 | ||||
-rw-r--r-- | lib/raid6/algos.c | 3 | ||||
-rw-r--r-- | lib/raid6/recov_avx512.c | 388 |
4 files changed, 393 insertions, 1 deletions
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 0c529a55b52e..1abd89584568 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h | |||
@@ -118,6 +118,7 @@ struct raid6_recov_calls { | |||
118 | extern const struct raid6_recov_calls raid6_recov_intx1; | 118 | extern const struct raid6_recov_calls raid6_recov_intx1; |
119 | extern const struct raid6_recov_calls raid6_recov_ssse3; | 119 | extern const struct raid6_recov_calls raid6_recov_ssse3; |
120 | extern const struct raid6_recov_calls raid6_recov_avx2; | 120 | extern const struct raid6_recov_calls raid6_recov_avx2; |
121 | extern const struct raid6_recov_calls raid6_recov_avx512; | ||
121 | 122 | ||
122 | extern const struct raid6_calls raid6_neonx1; | 123 | extern const struct raid6_calls raid6_neonx1; |
123 | extern const struct raid6_calls raid6_neonx2; | 124 | extern const struct raid6_calls raid6_neonx2; |
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 8948268d47b4..cd05ee1fb809 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile | |||
@@ -3,7 +3,7 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o | |||
3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ | 3 | raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ |
4 | int8.o int16.o int32.o | 4 | int8.o int16.o int32.o |
5 | 5 | ||
6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o | 6 | raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o |
7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o | 7 | raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o |
8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o | 8 | raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o |
9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o | 9 | raid6_pq-$(CONFIG_TILEGX) += tilegx8.o |
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index f5f090c52dd9..149d947a4fec 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c | |||
@@ -98,6 +98,9 @@ void (*raid6_datap_recov)(int, size_t, int, void **); | |||
98 | EXPORT_SYMBOL_GPL(raid6_datap_recov); | 98 | EXPORT_SYMBOL_GPL(raid6_datap_recov); |
99 | 99 | ||
100 | const struct raid6_recov_calls *const raid6_recov_algos[] = { | 100 | const struct raid6_recov_calls *const raid6_recov_algos[] = { |
101 | #ifdef CONFIG_AS_AVX512 | ||
102 | &raid6_recov_avx512, | ||
103 | #endif | ||
101 | #ifdef CONFIG_AS_AVX2 | 104 | #ifdef CONFIG_AS_AVX2 |
102 | &raid6_recov_avx2, | 105 | &raid6_recov_avx2, |
103 | #endif | 106 | #endif |
diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c new file mode 100644 index 000000000000..625aafa33b61 --- /dev/null +++ b/lib/raid6/recov_avx512.c | |||
@@ -0,0 +1,388 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Intel Corporation | ||
3 | * | ||
4 | * Author: Gayatri Kammela <gayatri.kammela@intel.com> | ||
5 | * Author: Megha Dey <megha.dey@linux.intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #ifdef CONFIG_AS_AVX512 | ||
15 | |||
16 | #include <linux/raid/pq.h> | ||
17 | #include "x86.h" | ||
18 | |||
19 | static int raid6_has_avx512(void) | ||
20 | { | ||
21 | return boot_cpu_has(X86_FEATURE_AVX2) && | ||
22 | boot_cpu_has(X86_FEATURE_AVX) && | ||
23 | boot_cpu_has(X86_FEATURE_AVX512F) && | ||
24 | boot_cpu_has(X86_FEATURE_AVX512BW) && | ||
25 | boot_cpu_has(X86_FEATURE_AVX512VL) && | ||
26 | boot_cpu_has(X86_FEATURE_AVX512DQ); | ||
27 | } | ||
28 | |||
29 | static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, | ||
30 | int failb, void **ptrs) | ||
31 | { | ||
32 | u8 *p, *q, *dp, *dq; | ||
33 | const u8 *pbmul; /* P multiplier table for B data */ | ||
34 | const u8 *qmul; /* Q multiplier table (for both) */ | ||
35 | const u8 x0f = 0x0f; | ||
36 | |||
37 | p = (u8 *)ptrs[disks-2]; | ||
38 | q = (u8 *)ptrs[disks-1]; | ||
39 | |||
40 | /* | ||
41 | * Compute syndrome with zero for the missing data pages | ||
42 | * Use the dead data pages as temporary storage for | ||
43 | * delta p and delta q | ||
44 | */ | ||
45 | |||
46 | dp = (u8 *)ptrs[faila]; | ||
47 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
48 | ptrs[disks-2] = dp; | ||
49 | dq = (u8 *)ptrs[failb]; | ||
50 | ptrs[failb] = (void *)raid6_empty_zero_page; | ||
51 | ptrs[disks-1] = dq; | ||
52 | |||
53 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
54 | |||
55 | /* Restore pointer table */ | ||
56 | ptrs[faila] = dp; | ||
57 | ptrs[failb] = dq; | ||
58 | ptrs[disks-2] = p; | ||
59 | ptrs[disks-1] = q; | ||
60 | |||
61 | /* Now, pick the proper data tables */ | ||
62 | pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; | ||
63 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ | ||
64 | raid6_gfexp[failb]]]; | ||
65 | |||
66 | kernel_fpu_begin(); | ||
67 | |||
68 | /* zmm0 = x0f[16] */ | ||
69 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
70 | |||
71 | while (bytes) { | ||
72 | #ifdef CONFIG_X86_64 | ||
73 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
74 | "vmovdqa64 %1, %%zmm9\n\t" | ||
75 | "vmovdqa64 %2, %%zmm0\n\t" | ||
76 | "vmovdqa64 %3, %%zmm8\n\t" | ||
77 | "vpxorq %4, %%zmm1, %%zmm1\n\t" | ||
78 | "vpxorq %5, %%zmm9, %%zmm9\n\t" | ||
79 | "vpxorq %6, %%zmm0, %%zmm0\n\t" | ||
80 | "vpxorq %7, %%zmm8, %%zmm8" | ||
81 | : | ||
82 | : "m" (q[0]), "m" (q[64]), "m" (p[0]), | ||
83 | "m" (p[64]), "m" (dq[0]), "m" (dq[64]), | ||
84 | "m" (dp[0]), "m" (dp[64])); | ||
85 | |||
86 | /* | ||
87 | * 1 = dq[0] ^ q[0] | ||
88 | * 9 = dq[64] ^ q[64] | ||
89 | * 0 = dp[0] ^ p[0] | ||
90 | * 8 = dp[64] ^ p[64] | ||
91 | */ | ||
92 | |||
93 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
94 | "vbroadcasti64x2 %1, %%zmm5" | ||
95 | : | ||
96 | : "m" (qmul[0]), "m" (qmul[16])); | ||
97 | |||
98 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
99 | "vpsraw $4, %%zmm9, %%zmm12\n\t" | ||
100 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
101 | "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" | ||
102 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
103 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
104 | "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" | ||
105 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
106 | "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" | ||
107 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
108 | "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" | ||
109 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
110 | : | ||
111 | : ); | ||
112 | |||
113 | /* | ||
114 | * 5 = qx[0] | ||
115 | * 15 = qx[64] | ||
116 | */ | ||
117 | |||
118 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
119 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
120 | "vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
121 | "vpsraw $4, %%zmm8, %%zmm6\n\t" | ||
122 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
123 | "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" | ||
124 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
125 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
126 | "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" | ||
127 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
128 | "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" | ||
129 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
130 | "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" | ||
131 | "vpxorq %%zmm12, %%zmm13, %%zmm13" | ||
132 | : | ||
133 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
134 | |||
135 | /* | ||
136 | * 1 = pbmul[px[0]] | ||
137 | * 13 = pbmul[px[64]] | ||
138 | */ | ||
139 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
140 | "vpxorq %%zmm15, %%zmm13, %%zmm13" | ||
141 | : | ||
142 | : ); | ||
143 | |||
144 | /* | ||
145 | * 1 = db = DQ | ||
146 | * 13 = db[64] = DQ[64] | ||
147 | */ | ||
148 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
149 | "vmovdqa64 %%zmm13,%1\n\t" | ||
150 | "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
151 | "vpxorq %%zmm13, %%zmm8, %%zmm8" | ||
152 | : | ||
153 | : "m" (dq[0]), "m" (dq[64])); | ||
154 | |||
155 | asm volatile("vmovdqa64 %%zmm0, %0\n\t" | ||
156 | "vmovdqa64 %%zmm8, %1" | ||
157 | : | ||
158 | : "m" (dp[0]), "m" (dp[64])); | ||
159 | |||
160 | bytes -= 128; | ||
161 | p += 128; | ||
162 | q += 128; | ||
163 | dp += 128; | ||
164 | dq += 128; | ||
165 | #else | ||
166 | asm volatile("vmovdqa64 %0, %%zmm1\n\t" | ||
167 | "vmovdqa64 %1, %%zmm0\n\t" | ||
168 | "vpxorq %2, %%zmm1, %%zmm1\n\t" | ||
169 | "vpxorq %3, %%zmm0, %%zmm0" | ||
170 | : | ||
171 | : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); | ||
172 | |||
173 | /* 1 = dq ^ q; 0 = dp ^ p */ | ||
174 | |||
175 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
176 | "vbroadcasti64x2 %1, %%zmm5" | ||
177 | : | ||
178 | : "m" (qmul[0]), "m" (qmul[16])); | ||
179 | |||
180 | /* | ||
181 | * 1 = dq ^ q | ||
182 | * 3 = dq ^ p >> 4 | ||
183 | */ | ||
184 | asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" | ||
185 | "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" | ||
186 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
187 | "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" | ||
188 | "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" | ||
189 | "vpxorq %%zmm4, %%zmm5, %%zmm5" | ||
190 | : | ||
191 | : ); | ||
192 | |||
193 | /* 5 = qx */ | ||
194 | |||
195 | asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" | ||
196 | "vbroadcasti64x2 %1, %%zmm1" | ||
197 | : | ||
198 | : "m" (pbmul[0]), "m" (pbmul[16])); | ||
199 | |||
200 | asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" | ||
201 | "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" | ||
202 | "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" | ||
203 | "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" | ||
204 | "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" | ||
205 | "vpxorq %%zmm4, %%zmm1, %%zmm1" | ||
206 | : | ||
207 | : ); | ||
208 | |||
209 | /* 1 = pbmul[px] */ | ||
210 | asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" | ||
211 | /* 1 = db = DQ */ | ||
212 | "vmovdqa64 %%zmm1, %0\n\t" | ||
213 | : | ||
214 | : "m" (dq[0])); | ||
215 | |||
216 | asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" | ||
217 | "vmovdqa64 %%zmm0, %0" | ||
218 | : | ||
219 | : "m" (dp[0])); | ||
220 | |||
221 | bytes -= 64; | ||
222 | p += 64; | ||
223 | q += 64; | ||
224 | dp += 64; | ||
225 | dq += 64; | ||
226 | #endif | ||
227 | } | ||
228 | |||
229 | kernel_fpu_end(); | ||
230 | } | ||
231 | |||
232 | static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, | ||
233 | void **ptrs) | ||
234 | { | ||
235 | u8 *p, *q, *dq; | ||
236 | const u8 *qmul; /* Q multiplier table */ | ||
237 | const u8 x0f = 0x0f; | ||
238 | |||
239 | p = (u8 *)ptrs[disks-2]; | ||
240 | q = (u8 *)ptrs[disks-1]; | ||
241 | |||
242 | /* | ||
243 | * Compute syndrome with zero for the missing data page | ||
244 | * Use the dead data page as temporary storage for delta q | ||
245 | */ | ||
246 | |||
247 | dq = (u8 *)ptrs[faila]; | ||
248 | ptrs[faila] = (void *)raid6_empty_zero_page; | ||
249 | ptrs[disks-1] = dq; | ||
250 | |||
251 | raid6_call.gen_syndrome(disks, bytes, ptrs); | ||
252 | |||
253 | /* Restore pointer table */ | ||
254 | ptrs[faila] = dq; | ||
255 | ptrs[disks-1] = q; | ||
256 | |||
257 | /* Now, pick the proper data tables */ | ||
258 | qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; | ||
259 | |||
260 | kernel_fpu_begin(); | ||
261 | |||
262 | asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); | ||
263 | |||
264 | while (bytes) { | ||
265 | #ifdef CONFIG_X86_64 | ||
266 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
267 | "vmovdqa64 %1, %%zmm8\n\t" | ||
268 | "vpxorq %2, %%zmm3, %%zmm3\n\t" | ||
269 | "vpxorq %3, %%zmm8, %%zmm8" | ||
270 | : | ||
271 | : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), | ||
272 | "m" (q[64])); | ||
273 | |||
274 | /* | ||
275 | * 3 = q[0] ^ dq[0] | ||
276 | * 8 = q[64] ^ dq[64] | ||
277 | */ | ||
278 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
279 | "vmovapd %%zmm0, %%zmm13\n\t" | ||
280 | "vbroadcasti64x2 %1, %%zmm1\n\t" | ||
281 | "vmovapd %%zmm1, %%zmm14" | ||
282 | : | ||
283 | : "m" (qmul[0]), "m" (qmul[16])); | ||
284 | |||
285 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
286 | "vpsraw $4, %%zmm8, %%zmm12\n\t" | ||
287 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
288 | "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" | ||
289 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
290 | "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" | ||
291 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
292 | "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" | ||
293 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
294 | "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" | ||
295 | "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" | ||
296 | "vpxorq %%zmm13, %%zmm14, %%zmm14" | ||
297 | : | ||
298 | : ); | ||
299 | |||
300 | /* | ||
301 | * 1 = qmul[q[0] ^ dq[0]] | ||
302 | * 14 = qmul[q[64] ^ dq[64]] | ||
303 | */ | ||
304 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
305 | "vmovdqa64 %1, %%zmm12\n\t" | ||
306 | "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" | ||
307 | "vpxorq %%zmm14, %%zmm12, %%zmm12" | ||
308 | : | ||
309 | : "m" (p[0]), "m" (p[64])); | ||
310 | |||
311 | /* | ||
312 | * 2 = p[0] ^ qmul[q[0] ^ dq[0]] | ||
313 | * 12 = p[64] ^ qmul[q[64] ^ dq[64]] | ||
314 | */ | ||
315 | |||
316 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
317 | "vmovdqa64 %%zmm14, %1\n\t" | ||
318 | "vmovdqa64 %%zmm2, %2\n\t" | ||
319 | "vmovdqa64 %%zmm12,%3" | ||
320 | : | ||
321 | : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), | ||
322 | "m" (p[64])); | ||
323 | |||
324 | bytes -= 128; | ||
325 | p += 128; | ||
326 | q += 128; | ||
327 | dq += 128; | ||
328 | #else | ||
329 | asm volatile("vmovdqa64 %0, %%zmm3\n\t" | ||
330 | "vpxorq %1, %%zmm3, %%zmm3" | ||
331 | : | ||
332 | : "m" (dq[0]), "m" (q[0])); | ||
333 | |||
334 | /* 3 = q ^ dq */ | ||
335 | |||
336 | asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" | ||
337 | "vbroadcasti64x2 %1, %%zmm1" | ||
338 | : | ||
339 | : "m" (qmul[0]), "m" (qmul[16])); | ||
340 | |||
341 | asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" | ||
342 | "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" | ||
343 | "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" | ||
344 | "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" | ||
345 | "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" | ||
346 | "vpxorq %%zmm0, %%zmm1, %%zmm1" | ||
347 | : | ||
348 | : ); | ||
349 | |||
350 | /* 1 = qmul[q ^ dq] */ | ||
351 | |||
352 | asm volatile("vmovdqa64 %0, %%zmm2\n\t" | ||
353 | "vpxorq %%zmm1, %%zmm2, %%zmm2" | ||
354 | : | ||
355 | : "m" (p[0])); | ||
356 | |||
357 | /* 2 = p ^ qmul[q ^ dq] */ | ||
358 | |||
359 | asm volatile("vmovdqa64 %%zmm1, %0\n\t" | ||
360 | "vmovdqa64 %%zmm2, %1" | ||
361 | : | ||
362 | : "m" (dq[0]), "m" (p[0])); | ||
363 | |||
364 | bytes -= 64; | ||
365 | p += 64; | ||
366 | q += 64; | ||
367 | dq += 64; | ||
368 | #endif | ||
369 | } | ||
370 | |||
371 | kernel_fpu_end(); | ||
372 | } | ||
373 | |||
374 | const struct raid6_recov_calls raid6_recov_avx512 = { | ||
375 | .data2 = raid6_2data_recov_avx512, | ||
376 | .datap = raid6_datap_recov_avx512, | ||
377 | .valid = raid6_has_avx512, | ||
378 | #ifdef CONFIG_X86_64 | ||
379 | .name = "avx512x2", | ||
380 | #else | ||
381 | .name = "avx512x1", | ||
382 | #endif | ||
383 | .priority = 3, | ||
384 | }; | ||
385 | |||
386 | #else | ||
387 | #warning "your version of binutils lacks AVX512 support" | ||
388 | #endif | ||