aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/raid6/algos.c41
-rw-r--r--lib/raid6/altivec.uc1
-rw-r--r--lib/raid6/avx2.c3
-rw-r--r--lib/raid6/int.uc41
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/neon.c1
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c227
-rw-r--r--lib/raid6/test/test.c51
-rw-r--r--lib/raid6/tilegx.uc1
10 files changed, 347 insertions, 23 deletions
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dbef2314901e..975c6e0434bd 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
131static inline const struct raid6_calls *raid6_choose_gen( 131static inline const struct raid6_calls *raid6_choose_gen(
132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) 132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
133{ 133{
134 unsigned long perf, bestperf, j0, j1; 134 unsigned long perf, bestgenperf, bestxorperf, j0, j1;
135 int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
135 const struct raid6_calls *const *algo; 136 const struct raid6_calls *const *algo;
136 const struct raid6_calls *best; 137 const struct raid6_calls *best;
137 138
138 for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { 139 for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
139 if (!best || (*algo)->prefer >= best->prefer) { 140 if (!best || (*algo)->prefer >= best->prefer) {
140 if ((*algo)->valid && !(*algo)->valid()) 141 if ((*algo)->valid && !(*algo)->valid())
141 continue; 142 continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
153 } 154 }
154 preempt_enable(); 155 preempt_enable();
155 156
156 if (perf > bestperf) { 157 if (perf > bestgenperf) {
157 bestperf = perf; 158 bestgenperf = perf;
158 best = *algo; 159 best = *algo;
159 } 160 }
160 pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, 161 pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
161 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 162 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
163
164 if (!(*algo)->xor_syndrome)
165 continue;
166
167 perf = 0;
168
169 preempt_disable();
170 j0 = jiffies;
171 while ((j1 = jiffies) == j0)
172 cpu_relax();
173 while (time_before(jiffies,
174 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
175 (*algo)->xor_syndrome(disks, start, stop,
176 PAGE_SIZE, *dptrs);
177 perf++;
178 }
179 preempt_enable();
180
181 if (best == *algo)
182 bestxorperf = perf;
183
184 pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
185 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
162 } 186 }
163 } 187 }
164 188
165 if (best) { 189 if (best) {
166 pr_info("raid6: using algorithm %s (%ld MB/s)\n", 190 pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
167 best->name, 191 best->name,
168 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 192 (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
193 if (best->xor_syndrome)
194 pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
195 (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
169 raid6_call = *best; 196 raid6_call = *best;
170 } else 197 } else
171 pr_err("raid6: Yikes! No algorithm found!\n"); 198 pr_err("raid6: Yikes! No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 7cc12b532e95..bec27fce7501 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
119 119
120const struct raid6_calls raid6_altivec$# = { 120const struct raid6_calls raid6_altivec$# = {
121 raid6_altivec$#_gen_syndrome, 121 raid6_altivec$#_gen_syndrome,
122 NULL, /* XOR not yet implemented */
122 raid6_have_altivec, 123 raid6_have_altivec,
123 "altivecx$#", 124 "altivecx$#",
124 0 125 0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index bc3b1dd436eb..76734004358d 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
89 89
90const struct raid6_calls raid6_avx2x1 = { 90const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome, 91 raid6_avx21_gen_syndrome,
92 NULL, /* XOR not yet implemented */
92 raid6_have_avx2, 93 raid6_have_avx2,
93 "avx2x1", 94 "avx2x1",
94 1 /* Has cache hints */ 95 1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 151
151const struct raid6_calls raid6_avx2x2 = { 152const struct raid6_calls raid6_avx2x2 = {
152 raid6_avx22_gen_syndrome, 153 raid6_avx22_gen_syndrome,
154 NULL, /* XOR not yet implemented */
153 raid6_have_avx2, 155 raid6_have_avx2,
154 "avx2x2", 156 "avx2x2",
155 1 /* Has cache hints */ 157 1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
242 244
243const struct raid6_calls raid6_avx2x4 = { 245const struct raid6_calls raid6_avx2x4 = {
244 raid6_avx24_gen_syndrome, 246 raid6_avx24_gen_syndrome,
247 NULL, /* XOR not yet implemented */
245 raid6_have_avx2, 248 raid6_have_avx2,
246 "avx2x4", 249 "avx2x4",
247 1 /* Has cache hints */ 250 1 /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index 5b50f8dfc5d2..558aeac9342a 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
107 } 107 }
108} 108}
109 109
110static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
111 size_t bytes, void **ptrs)
112{
113 u8 **dptr = (u8 **)ptrs;
114 u8 *p, *q;
115 int d, z, z0;
116
117 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
118
119 z0 = stop; /* P/Q right side optimization */
120 p = dptr[disks-2]; /* XOR parity */
121 q = dptr[disks-1]; /* RS syndrome */
122
123 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
124 /* P/Q data pages */
125 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
126 for ( z = z0-1 ; z >= start ; z-- ) {
127 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
128 wp$$ ^= wd$$;
129 w2$$ = MASK(wq$$);
130 w1$$ = SHLBYTE(wq$$);
131 w2$$ &= NBYTES(0x1d);
132 w1$$ ^= w2$$;
133 wq$$ = w1$$ ^ wd$$;
134 }
135 /* P/Q left side optimization */
136 for ( z = start-1 ; z >= 0 ; z-- ) {
137 w2$$ = MASK(wq$$);
138 w1$$ = SHLBYTE(wq$$);
139 w2$$ &= NBYTES(0x1d);
140 wq$$ = w1$$ ^ w2$$;
141 }
142 *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
143 *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
144 }
145
146}
147
110const struct raid6_calls raid6_intx$# = { 148const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome, 149 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */ 150 raid6_int$#_xor_syndrome,
151 NULL, /* always valid */
113 "int" NSTRING "x$#", 152 "int" NSTRING "x$#",
114 0 153 0
115}; 154};
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 590c71c9e200..b3b0e1fcd3af 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
76 76
77const struct raid6_calls raid6_mmxx1 = { 77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome, 78 raid6_mmx1_gen_syndrome,
79 NULL, /* XOR not yet implemented */
79 raid6_have_mmx, 80 raid6_have_mmx,
80 "mmxx1", 81 "mmxx1",
81 0 82 0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
134 135
135const struct raid6_calls raid6_mmxx2 = { 136const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome, 137 raid6_mmx2_gen_syndrome,
138 NULL, /* XOR not yet implemented */
137 raid6_have_mmx, 139 raid6_have_mmx,
138 "mmxx2", 140 "mmxx2",
139 0 141 0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 36ad4705df1a..d9ad6ee284f4 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
42 } \ 42 } \
43 struct raid6_calls const raid6_neonx ## _n = { \ 43 struct raid6_calls const raid6_neonx ## _n = { \
44 raid6_neon ## _n ## _gen_syndrome, \ 44 raid6_neon ## _n ## _gen_syndrome, \
45 NULL, /* XOR not yet implemented */ \
45 raid6_have_neon, \ 46 raid6_have_neon, \
46 "neonx" #_n, \ 47 "neonx" #_n, \
47 0 \ 48 0 \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index f76297139445..9025b8ca9aa3 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
92 92
93const struct raid6_calls raid6_sse1x1 = { 93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome, 94 raid6_sse11_gen_syndrome,
95 NULL, /* XOR not yet implemented */
95 raid6_have_sse1_or_mmxext, 96 raid6_have_sse1_or_mmxext,
96 "sse1x1", 97 "sse1x1",
97 1 /* Has cache hints */ 98 1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
154 155
155const struct raid6_calls raid6_sse1x2 = { 156const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome, 157 raid6_sse12_gen_syndrome,
158 NULL, /* XOR not yet implemented */
157 raid6_have_sse1_or_mmxext, 159 raid6_have_sse1_or_mmxext,
158 "sse1x2", 160 "sse1x2",
159 1 /* Has cache hints */ 161 1 /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 85b82c85f28e..1d2276b007ee 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
88 kernel_fpu_end(); 88 kernel_fpu_end();
89} 89}
90 90
91
92static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
93 size_t bytes, void **ptrs)
94 {
95 u8 **dptr = (u8 **)ptrs;
96 u8 *p, *q;
97 int d, z, z0;
98
99 z0 = stop; /* P/Q right side optimization */
100 p = dptr[disks-2]; /* XOR parity */
101 q = dptr[disks-1]; /* RS syndrome */
102
103 kernel_fpu_begin();
104
105 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
106
107 for ( d = 0 ; d < bytes ; d += 16 ) {
108 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
109 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
110 asm volatile("pxor %xmm4,%xmm2");
111 /* P/Q data pages */
112 for ( z = z0-1 ; z >= start ; z-- ) {
113 asm volatile("pxor %xmm5,%xmm5");
114 asm volatile("pcmpgtb %xmm4,%xmm5");
115 asm volatile("paddb %xmm4,%xmm4");
116 asm volatile("pand %xmm0,%xmm5");
117 asm volatile("pxor %xmm5,%xmm4");
118 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
119 asm volatile("pxor %xmm5,%xmm2");
120 asm volatile("pxor %xmm5,%xmm4");
121 }
122 /* P/Q left side optimization */
123 for ( z = start-1 ; z >= 0 ; z-- ) {
124 asm volatile("pxor %xmm5,%xmm5");
125 asm volatile("pcmpgtb %xmm4,%xmm5");
126 asm volatile("paddb %xmm4,%xmm4");
127 asm volatile("pand %xmm0,%xmm5");
128 asm volatile("pxor %xmm5,%xmm4");
129 }
130 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
131 /* Don't use movntdq for r/w memory area < cache line */
132 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
133 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
134 }
135
136 asm volatile("sfence" : : : "memory");
137 kernel_fpu_end();
138}
139
91const struct raid6_calls raid6_sse2x1 = { 140const struct raid6_calls raid6_sse2x1 = {
92 raid6_sse21_gen_syndrome, 141 raid6_sse21_gen_syndrome,
142 raid6_sse21_xor_syndrome,
93 raid6_have_sse2, 143 raid6_have_sse2,
94 "sse2x1", 144 "sse2x1",
95 1 /* Has cache hints */ 145 1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 kernel_fpu_end(); 200 kernel_fpu_end();
151} 201}
152 202
203 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
204 size_t bytes, void **ptrs)
205 {
206 u8 **dptr = (u8 **)ptrs;
207 u8 *p, *q;
208 int d, z, z0;
209
210 z0 = stop; /* P/Q right side optimization */
211 p = dptr[disks-2]; /* XOR parity */
212 q = dptr[disks-1]; /* RS syndrome */
213
214 kernel_fpu_begin();
215
216 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
217
218 for ( d = 0 ; d < bytes ; d += 32 ) {
219 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
220 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
221 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
222 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
223 asm volatile("pxor %xmm4,%xmm2");
224 asm volatile("pxor %xmm6,%xmm3");
225 /* P/Q data pages */
226 for ( z = z0-1 ; z >= start ; z-- ) {
227 asm volatile("pxor %xmm5,%xmm5");
228 asm volatile("pxor %xmm7,%xmm7");
229 asm volatile("pcmpgtb %xmm4,%xmm5");
230 asm volatile("pcmpgtb %xmm6,%xmm7");
231 asm volatile("paddb %xmm4,%xmm4");
232 asm volatile("paddb %xmm6,%xmm6");
233 asm volatile("pand %xmm0,%xmm5");
234 asm volatile("pand %xmm0,%xmm7");
235 asm volatile("pxor %xmm5,%xmm4");
236 asm volatile("pxor %xmm7,%xmm6");
237 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
238 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
239 asm volatile("pxor %xmm5,%xmm2");
240 asm volatile("pxor %xmm7,%xmm3");
241 asm volatile("pxor %xmm5,%xmm4");
242 asm volatile("pxor %xmm7,%xmm6");
243 }
244 /* P/Q left side optimization */
245 for ( z = start-1 ; z >= 0 ; z-- ) {
246 asm volatile("pxor %xmm5,%xmm5");
247 asm volatile("pxor %xmm7,%xmm7");
248 asm volatile("pcmpgtb %xmm4,%xmm5");
249 asm volatile("pcmpgtb %xmm6,%xmm7");
250 asm volatile("paddb %xmm4,%xmm4");
251 asm volatile("paddb %xmm6,%xmm6");
252 asm volatile("pand %xmm0,%xmm5");
253 asm volatile("pand %xmm0,%xmm7");
254 asm volatile("pxor %xmm5,%xmm4");
255 asm volatile("pxor %xmm7,%xmm6");
256 }
257 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
258 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
259 /* Don't use movntdq for r/w memory area < cache line */
260 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
261 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
262 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
263 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
264 }
265
266 asm volatile("sfence" : : : "memory");
267 kernel_fpu_end();
268 }
269
153const struct raid6_calls raid6_sse2x2 = { 270const struct raid6_calls raid6_sse2x2 = {
154 raid6_sse22_gen_syndrome, 271 raid6_sse22_gen_syndrome,
272 raid6_sse22_xor_syndrome,
155 raid6_have_sse2, 273 raid6_have_sse2,
156 "sse2x2", 274 "sse2x2",
157 1 /* Has cache hints */ 275 1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
248 kernel_fpu_end(); 366 kernel_fpu_end();
249} 367}
250 368
369 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
370 size_t bytes, void **ptrs)
371 {
372 u8 **dptr = (u8 **)ptrs;
373 u8 *p, *q;
374 int d, z, z0;
375
376 z0 = stop; /* P/Q right side optimization */
377 p = dptr[disks-2]; /* XOR parity */
378 q = dptr[disks-1]; /* RS syndrome */
379
380 kernel_fpu_begin();
381
382 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
383
384 for ( d = 0 ; d < bytes ; d += 64 ) {
385 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
386 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
387 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
388 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
389 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
390 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
391 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
392 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
393 asm volatile("pxor %xmm4,%xmm2");
394 asm volatile("pxor %xmm6,%xmm3");
395 asm volatile("pxor %xmm12,%xmm10");
396 asm volatile("pxor %xmm14,%xmm11");
397 /* P/Q data pages */
398 for ( z = z0-1 ; z >= start ; z-- ) {
399 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
400 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
401 asm volatile("pxor %xmm5,%xmm5");
402 asm volatile("pxor %xmm7,%xmm7");
403 asm volatile("pxor %xmm13,%xmm13");
404 asm volatile("pxor %xmm15,%xmm15");
405 asm volatile("pcmpgtb %xmm4,%xmm5");
406 asm volatile("pcmpgtb %xmm6,%xmm7");
407 asm volatile("pcmpgtb %xmm12,%xmm13");
408 asm volatile("pcmpgtb %xmm14,%xmm15");
409 asm volatile("paddb %xmm4,%xmm4");
410 asm volatile("paddb %xmm6,%xmm6");
411 asm volatile("paddb %xmm12,%xmm12");
412 asm volatile("paddb %xmm14,%xmm14");
413 asm volatile("pand %xmm0,%xmm5");
414 asm volatile("pand %xmm0,%xmm7");
415 asm volatile("pand %xmm0,%xmm13");
416 asm volatile("pand %xmm0,%xmm15");
417 asm volatile("pxor %xmm5,%xmm4");
418 asm volatile("pxor %xmm7,%xmm6");
419 asm volatile("pxor %xmm13,%xmm12");
420 asm volatile("pxor %xmm15,%xmm14");
421 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
422 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
423 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
424 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
425 asm volatile("pxor %xmm5,%xmm2");
426 asm volatile("pxor %xmm7,%xmm3");
427 asm volatile("pxor %xmm13,%xmm10");
428 asm volatile("pxor %xmm15,%xmm11");
429 asm volatile("pxor %xmm5,%xmm4");
430 asm volatile("pxor %xmm7,%xmm6");
431 asm volatile("pxor %xmm13,%xmm12");
432 asm volatile("pxor %xmm15,%xmm14");
433 }
434 asm volatile("prefetchnta %0" :: "m" (q[d]));
435 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
436 /* P/Q left side optimization */
437 for ( z = start-1 ; z >= 0 ; z-- ) {
438 asm volatile("pxor %xmm5,%xmm5");
439 asm volatile("pxor %xmm7,%xmm7");
440 asm volatile("pxor %xmm13,%xmm13");
441 asm volatile("pxor %xmm15,%xmm15");
442 asm volatile("pcmpgtb %xmm4,%xmm5");
443 asm volatile("pcmpgtb %xmm6,%xmm7");
444 asm volatile("pcmpgtb %xmm12,%xmm13");
445 asm volatile("pcmpgtb %xmm14,%xmm15");
446 asm volatile("paddb %xmm4,%xmm4");
447 asm volatile("paddb %xmm6,%xmm6");
448 asm volatile("paddb %xmm12,%xmm12");
449 asm volatile("paddb %xmm14,%xmm14");
450 asm volatile("pand %xmm0,%xmm5");
451 asm volatile("pand %xmm0,%xmm7");
452 asm volatile("pand %xmm0,%xmm13");
453 asm volatile("pand %xmm0,%xmm15");
454 asm volatile("pxor %xmm5,%xmm4");
455 asm volatile("pxor %xmm7,%xmm6");
456 asm volatile("pxor %xmm13,%xmm12");
457 asm volatile("pxor %xmm15,%xmm14");
458 }
459 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
460 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
461 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
462 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
463 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
464 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
465 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
466 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
467 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
468 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
469 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
470 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
471 }
472 asm volatile("sfence" : : : "memory");
473 kernel_fpu_end();
474 }
475
476
251const struct raid6_calls raid6_sse2x4 = { 477const struct raid6_calls raid6_sse2x4 = {
252 raid6_sse24_gen_syndrome, 478 raid6_sse24_gen_syndrome,
479 raid6_sse24_xor_syndrome,
253 raid6_have_sse2, 480 raid6_have_sse2,
254 "sse2x4", 481 "sse2x4",
255 1 /* Has cache hints */ 482 1 /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 5a485b7a7d3c..3bebbabdb510 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30 30
31static void makedata(void) 31static void makedata(int start, int stop)
32{ 32{
33 int i, j; 33 int i, j;
34 34
35 for (i = 0; i < NDISKS; i++) { 35 for (i = start; i <= stop; i++) {
36 for (j = 0; j < PAGE_SIZE; j++) 36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand(); 37 data[i][j] = rand();
38 38
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
91{ 91{
92 const struct raid6_calls *const *algo; 92 const struct raid6_calls *const *algo;
93 const struct raid6_recov_calls *const *ra; 93 const struct raid6_recov_calls *const *ra;
94 int i, j; 94 int i, j, p1, p2;
95 int err = 0; 95 int err = 0;
96 96
97 makedata(); 97 makedata(0, NDISKS-1);
98 98
99 for (ra = raid6_recov_algos; *ra; ra++) { 99 for (ra = raid6_recov_algos; *ra; ra++) {
100 if ((*ra)->valid && !(*ra)->valid()) 100 if ((*ra)->valid && !(*ra)->valid())
101 continue; 101 continue;
102
102 raid6_2data_recov = (*ra)->data2; 103 raid6_2data_recov = (*ra)->data2;
103 raid6_datap_recov = (*ra)->datap; 104 raid6_datap_recov = (*ra)->datap;
104 105
105 printf("using recovery %s\n", (*ra)->name); 106 printf("using recovery %s\n", (*ra)->name);
106 107
107 for (algo = raid6_algos; *algo; algo++) { 108 for (algo = raid6_algos; *algo; algo++) {
108 if (!(*algo)->valid || (*algo)->valid()) { 109 if ((*algo)->valid && !(*algo)->valid())
109 raid6_call = **algo; 110 continue;
111
112 raid6_call = **algo;
113
114 /* Nuke syndromes */
115 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
116
117 /* Generate assumed good syndrome */
118 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
119 (void **)&dataptrs);
120
121 for (i = 0; i < NDISKS-1; i++)
122 for (j = i+1; j < NDISKS; j++)
123 err += test_disks(i, j);
124
125 if (!raid6_call.xor_syndrome)
126 continue;
127
128 for (p1 = 0; p1 < NDISKS-2; p1++)
129 for (p2 = p1; p2 < NDISKS-2; p2++) {
110 130
111 /* Nuke syndromes */ 131 /* Simulate rmw run */
112 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); 132 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
133 (void **)&dataptrs);
134 makedata(p1, p2);
135 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
136 (void **)&dataptrs);
113 137
114 /* Generate assumed good syndrome */ 138 for (i = 0; i < NDISKS-1; i++)
115 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, 139 for (j = i+1; j < NDISKS; j++)
116 (void **)&dataptrs); 140 err += test_disks(i, j);
141 }
117 142
118 for (i = 0; i < NDISKS-1; i++)
119 for (j = i+1; j < NDISKS; j++)
120 err += test_disks(i, j);
121 }
122 } 143 }
123 printf("\n"); 144 printf("\n");
124 } 145 }
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
index e7c29459cbcd..2dd291a11264 100644
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
80 80
81const struct raid6_calls raid6_tilegx$# = { 81const struct raid6_calls raid6_tilegx$# = {
82 raid6_tilegx$#_gen_syndrome, 82 raid6_tilegx$#_gen_syndrome,
83 NULL, /* XOR not yet implemented */
83 NULL, 84 NULL,
84 "tilegx$#", 85 "tilegx$#",
85 0 86 0