aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 12:28:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-24 12:28:01 -0400
commit474095e46cd14421821da3201a9fd6a4c070996b (patch)
tree7203d36f53c376a96099ed0310787b1fb0c4f7a5 /lib
parentd56a669ca59c37ed0a7282a251b2f2f22533343a (diff)
parent9ffc8f7cb9647b13dfe4d1ad0d5e1427bb8b46d6 (diff)
Merge tag 'md/4.1' of git://neil.brown.name/md
Pull md updates from Neil Brown: "More updates that usual this time. A few have performance impacts which hould mostly be positive, but RAID5 (in particular) can be very work-load ensitive... We'll have to wait and see. Highlights: - "experimental" code for managing md/raid1 across a cluster using DLM. Code is not ready for general use and triggers a WARNING if used. However it is looking good and mostly done and having in mainline will help co-ordinate development. - RAID5/6 can now batch multiple (4K wide) stripe_heads so as to handle a full (chunk wide) stripe as a single unit. - RAID6 can now perform read-modify-write cycles which should help performance on larger arrays: 6 or more devices. - RAID5/6 stripe cache now grows and shrinks dynamically. The value set is used as a minimum. - Resync is now allowed to go a little faster than the 'mininum' when there is competing IO. How much faster depends on the speed of the devices, so the effective minimum should scale with device speed to some extent" * tag 'md/4.1' of git://neil.brown.name/md: (58 commits) md/raid5: don't do chunk aligned read on degraded array. md/raid5: allow the stripe_cache to grow and shrink. md/raid5: change ->inactive_blocked to a bit-flag. md/raid5: move max_nr_stripes management into grow_one_stripe and drop_one_stripe md/raid5: pass gfp_t arg to grow_one_stripe() md/raid5: introduce configuration option rmw_level md/raid5: activate raid6 rmw feature md/raid6 algorithms: xor_syndrome() for SSE2 md/raid6 algorithms: xor_syndrome() for generic int md/raid6 algorithms: improve test program md/raid6 algorithms: delta syndrome functions raid5: handle expansion/resync case with stripe batching raid5: handle io error of batch list RAID5: batch adjacent full stripe write raid5: track overwrite disk count raid5: add a new flag to track if a stripe can be batched raid5: use flex_array for scribble data md raid0: access mddev->queue (request queue member) conditionally because it is not set when accessed from dm-raid md: allow resync to go faster when there is competing IO. md: remove 'go_faster' option from ->sync_request() ...
Diffstat (limited to 'lib')
-rw-r--r--lib/raid6/algos.c41
-rw-r--r--lib/raid6/altivec.uc1
-rw-r--r--lib/raid6/avx2.c3
-rw-r--r--lib/raid6/int.uc41
-rw-r--r--lib/raid6/mmx.c2
-rw-r--r--lib/raid6/neon.c1
-rw-r--r--lib/raid6/sse1.c2
-rw-r--r--lib/raid6/sse2.c227
-rw-r--r--lib/raid6/test/test.c51
-rw-r--r--lib/raid6/tilegx.uc1
10 files changed, 347 insertions, 23 deletions
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index dbef2314901e..975c6e0434bd 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
131static inline const struct raid6_calls *raid6_choose_gen( 131static inline const struct raid6_calls *raid6_choose_gen(
132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) 132 void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
133{ 133{
134 unsigned long perf, bestperf, j0, j1; 134 unsigned long perf, bestgenperf, bestxorperf, j0, j1;
135 int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
135 const struct raid6_calls *const *algo; 136 const struct raid6_calls *const *algo;
136 const struct raid6_calls *best; 137 const struct raid6_calls *best;
137 138
138 for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { 139 for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
139 if (!best || (*algo)->prefer >= best->prefer) { 140 if (!best || (*algo)->prefer >= best->prefer) {
140 if ((*algo)->valid && !(*algo)->valid()) 141 if ((*algo)->valid && !(*algo)->valid())
141 continue; 142 continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
153 } 154 }
154 preempt_enable(); 155 preempt_enable();
155 156
156 if (perf > bestperf) { 157 if (perf > bestgenperf) {
157 bestperf = perf; 158 bestgenperf = perf;
158 best = *algo; 159 best = *algo;
159 } 160 }
160 pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, 161 pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
161 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 162 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
163
164 if (!(*algo)->xor_syndrome)
165 continue;
166
167 perf = 0;
168
169 preempt_disable();
170 j0 = jiffies;
171 while ((j1 = jiffies) == j0)
172 cpu_relax();
173 while (time_before(jiffies,
174 j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
175 (*algo)->xor_syndrome(disks, start, stop,
176 PAGE_SIZE, *dptrs);
177 perf++;
178 }
179 preempt_enable();
180
181 if (best == *algo)
182 bestxorperf = perf;
183
184 pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
185 (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
162 } 186 }
163 } 187 }
164 188
165 if (best) { 189 if (best) {
166 pr_info("raid6: using algorithm %s (%ld MB/s)\n", 190 pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
167 best->name, 191 best->name,
168 (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); 192 (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
193 if (best->xor_syndrome)
194 pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
195 (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
169 raid6_call = *best; 196 raid6_call = *best;
170 } else 197 } else
171 pr_err("raid6: Yikes! No algorithm found!\n"); 198 pr_err("raid6: Yikes! No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 7cc12b532e95..bec27fce7501 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
119 119
120const struct raid6_calls raid6_altivec$# = { 120const struct raid6_calls raid6_altivec$# = {
121 raid6_altivec$#_gen_syndrome, 121 raid6_altivec$#_gen_syndrome,
122 NULL, /* XOR not yet implemented */
122 raid6_have_altivec, 123 raid6_have_altivec,
123 "altivecx$#", 124 "altivecx$#",
124 0 125 0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c
index bc3b1dd436eb..76734004358d 100644
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
89 89
90const struct raid6_calls raid6_avx2x1 = { 90const struct raid6_calls raid6_avx2x1 = {
91 raid6_avx21_gen_syndrome, 91 raid6_avx21_gen_syndrome,
92 NULL, /* XOR not yet implemented */
92 raid6_have_avx2, 93 raid6_have_avx2,
93 "avx2x1", 94 "avx2x1",
94 1 /* Has cache hints */ 95 1 /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 151
151const struct raid6_calls raid6_avx2x2 = { 152const struct raid6_calls raid6_avx2x2 = {
152 raid6_avx22_gen_syndrome, 153 raid6_avx22_gen_syndrome,
154 NULL, /* XOR not yet implemented */
153 raid6_have_avx2, 155 raid6_have_avx2,
154 "avx2x2", 156 "avx2x2",
155 1 /* Has cache hints */ 157 1 /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
242 244
243const struct raid6_calls raid6_avx2x4 = { 245const struct raid6_calls raid6_avx2x4 = {
244 raid6_avx24_gen_syndrome, 246 raid6_avx24_gen_syndrome,
247 NULL, /* XOR not yet implemented */
245 raid6_have_avx2, 248 raid6_have_avx2,
246 "avx2x4", 249 "avx2x4",
247 1 /* Has cache hints */ 250 1 /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
index 5b50f8dfc5d2..558aeac9342a 100644
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
107 } 107 }
108} 108}
109 109
110static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
111 size_t bytes, void **ptrs)
112{
113 u8 **dptr = (u8 **)ptrs;
114 u8 *p, *q;
115 int d, z, z0;
116
117 unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
118
119 z0 = stop; /* P/Q right side optimization */
120 p = dptr[disks-2]; /* XOR parity */
121 q = dptr[disks-1]; /* RS syndrome */
122
123 for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
124 /* P/Q data pages */
125 wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
126 for ( z = z0-1 ; z >= start ; z-- ) {
127 wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
128 wp$$ ^= wd$$;
129 w2$$ = MASK(wq$$);
130 w1$$ = SHLBYTE(wq$$);
131 w2$$ &= NBYTES(0x1d);
132 w1$$ ^= w2$$;
133 wq$$ = w1$$ ^ wd$$;
134 }
135 /* P/Q left side optimization */
136 for ( z = start-1 ; z >= 0 ; z-- ) {
137 w2$$ = MASK(wq$$);
138 w1$$ = SHLBYTE(wq$$);
139 w2$$ &= NBYTES(0x1d);
140 wq$$ = w1$$ ^ w2$$;
141 }
142 *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
143 *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
144 }
145
146}
147
110const struct raid6_calls raid6_intx$# = { 148const struct raid6_calls raid6_intx$# = {
111 raid6_int$#_gen_syndrome, 149 raid6_int$#_gen_syndrome,
112 NULL, /* always valid */ 150 raid6_int$#_xor_syndrome,
151 NULL, /* always valid */
113 "int" NSTRING "x$#", 152 "int" NSTRING "x$#",
114 0 153 0
115}; 154};
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
index 590c71c9e200..b3b0e1fcd3af 100644
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
76 76
77const struct raid6_calls raid6_mmxx1 = { 77const struct raid6_calls raid6_mmxx1 = {
78 raid6_mmx1_gen_syndrome, 78 raid6_mmx1_gen_syndrome,
79 NULL, /* XOR not yet implemented */
79 raid6_have_mmx, 80 raid6_have_mmx,
80 "mmxx1", 81 "mmxx1",
81 0 82 0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
134 135
135const struct raid6_calls raid6_mmxx2 = { 136const struct raid6_calls raid6_mmxx2 = {
136 raid6_mmx2_gen_syndrome, 137 raid6_mmx2_gen_syndrome,
138 NULL, /* XOR not yet implemented */
137 raid6_have_mmx, 139 raid6_have_mmx,
138 "mmxx2", 140 "mmxx2",
139 0 141 0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
index 36ad4705df1a..d9ad6ee284f4 100644
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
42 } \ 42 } \
43 struct raid6_calls const raid6_neonx ## _n = { \ 43 struct raid6_calls const raid6_neonx ## _n = { \
44 raid6_neon ## _n ## _gen_syndrome, \ 44 raid6_neon ## _n ## _gen_syndrome, \
45 NULL, /* XOR not yet implemented */ \
45 raid6_have_neon, \ 46 raid6_have_neon, \
46 "neonx" #_n, \ 47 "neonx" #_n, \
47 0 \ 48 0 \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
index f76297139445..9025b8ca9aa3 100644
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
92 92
93const struct raid6_calls raid6_sse1x1 = { 93const struct raid6_calls raid6_sse1x1 = {
94 raid6_sse11_gen_syndrome, 94 raid6_sse11_gen_syndrome,
95 NULL, /* XOR not yet implemented */
95 raid6_have_sse1_or_mmxext, 96 raid6_have_sse1_or_mmxext,
96 "sse1x1", 97 "sse1x1",
97 1 /* Has cache hints */ 98 1 /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
154 155
155const struct raid6_calls raid6_sse1x2 = { 156const struct raid6_calls raid6_sse1x2 = {
156 raid6_sse12_gen_syndrome, 157 raid6_sse12_gen_syndrome,
158 NULL, /* XOR not yet implemented */
157 raid6_have_sse1_or_mmxext, 159 raid6_have_sse1_or_mmxext,
158 "sse1x2", 160 "sse1x2",
159 1 /* Has cache hints */ 161 1 /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 85b82c85f28e..1d2276b007ee 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
88 kernel_fpu_end(); 88 kernel_fpu_end();
89} 89}
90 90
91
92static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
93 size_t bytes, void **ptrs)
94 {
95 u8 **dptr = (u8 **)ptrs;
96 u8 *p, *q;
97 int d, z, z0;
98
99 z0 = stop; /* P/Q right side optimization */
100 p = dptr[disks-2]; /* XOR parity */
101 q = dptr[disks-1]; /* RS syndrome */
102
103 kernel_fpu_begin();
104
105 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
106
107 for ( d = 0 ; d < bytes ; d += 16 ) {
108 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
109 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
110 asm volatile("pxor %xmm4,%xmm2");
111 /* P/Q data pages */
112 for ( z = z0-1 ; z >= start ; z-- ) {
113 asm volatile("pxor %xmm5,%xmm5");
114 asm volatile("pcmpgtb %xmm4,%xmm5");
115 asm volatile("paddb %xmm4,%xmm4");
116 asm volatile("pand %xmm0,%xmm5");
117 asm volatile("pxor %xmm5,%xmm4");
118 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
119 asm volatile("pxor %xmm5,%xmm2");
120 asm volatile("pxor %xmm5,%xmm4");
121 }
122 /* P/Q left side optimization */
123 for ( z = start-1 ; z >= 0 ; z-- ) {
124 asm volatile("pxor %xmm5,%xmm5");
125 asm volatile("pcmpgtb %xmm4,%xmm5");
126 asm volatile("paddb %xmm4,%xmm4");
127 asm volatile("pand %xmm0,%xmm5");
128 asm volatile("pxor %xmm5,%xmm4");
129 }
130 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
131 /* Don't use movntdq for r/w memory area < cache line */
132 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
133 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
134 }
135
136 asm volatile("sfence" : : : "memory");
137 kernel_fpu_end();
138}
139
91const struct raid6_calls raid6_sse2x1 = { 140const struct raid6_calls raid6_sse2x1 = {
92 raid6_sse21_gen_syndrome, 141 raid6_sse21_gen_syndrome,
142 raid6_sse21_xor_syndrome,
93 raid6_have_sse2, 143 raid6_have_sse2,
94 "sse2x1", 144 "sse2x1",
95 1 /* Has cache hints */ 145 1 /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
150 kernel_fpu_end(); 200 kernel_fpu_end();
151} 201}
152 202
203 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
204 size_t bytes, void **ptrs)
205 {
206 u8 **dptr = (u8 **)ptrs;
207 u8 *p, *q;
208 int d, z, z0;
209
210 z0 = stop; /* P/Q right side optimization */
211 p = dptr[disks-2]; /* XOR parity */
212 q = dptr[disks-1]; /* RS syndrome */
213
214 kernel_fpu_begin();
215
216 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
217
218 for ( d = 0 ; d < bytes ; d += 32 ) {
219 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
220 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
221 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
222 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
223 asm volatile("pxor %xmm4,%xmm2");
224 asm volatile("pxor %xmm6,%xmm3");
225 /* P/Q data pages */
226 for ( z = z0-1 ; z >= start ; z-- ) {
227 asm volatile("pxor %xmm5,%xmm5");
228 asm volatile("pxor %xmm7,%xmm7");
229 asm volatile("pcmpgtb %xmm4,%xmm5");
230 asm volatile("pcmpgtb %xmm6,%xmm7");
231 asm volatile("paddb %xmm4,%xmm4");
232 asm volatile("paddb %xmm6,%xmm6");
233 asm volatile("pand %xmm0,%xmm5");
234 asm volatile("pand %xmm0,%xmm7");
235 asm volatile("pxor %xmm5,%xmm4");
236 asm volatile("pxor %xmm7,%xmm6");
237 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
238 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
239 asm volatile("pxor %xmm5,%xmm2");
240 asm volatile("pxor %xmm7,%xmm3");
241 asm volatile("pxor %xmm5,%xmm4");
242 asm volatile("pxor %xmm7,%xmm6");
243 }
244 /* P/Q left side optimization */
245 for ( z = start-1 ; z >= 0 ; z-- ) {
246 asm volatile("pxor %xmm5,%xmm5");
247 asm volatile("pxor %xmm7,%xmm7");
248 asm volatile("pcmpgtb %xmm4,%xmm5");
249 asm volatile("pcmpgtb %xmm6,%xmm7");
250 asm volatile("paddb %xmm4,%xmm4");
251 asm volatile("paddb %xmm6,%xmm6");
252 asm volatile("pand %xmm0,%xmm5");
253 asm volatile("pand %xmm0,%xmm7");
254 asm volatile("pxor %xmm5,%xmm4");
255 asm volatile("pxor %xmm7,%xmm6");
256 }
257 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
258 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
259 /* Don't use movntdq for r/w memory area < cache line */
260 asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
261 asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
262 asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
263 asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
264 }
265
266 asm volatile("sfence" : : : "memory");
267 kernel_fpu_end();
268 }
269
153const struct raid6_calls raid6_sse2x2 = { 270const struct raid6_calls raid6_sse2x2 = {
154 raid6_sse22_gen_syndrome, 271 raid6_sse22_gen_syndrome,
272 raid6_sse22_xor_syndrome,
155 raid6_have_sse2, 273 raid6_have_sse2,
156 "sse2x2", 274 "sse2x2",
157 1 /* Has cache hints */ 275 1 /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
248 kernel_fpu_end(); 366 kernel_fpu_end();
249} 367}
250 368
369 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
370 size_t bytes, void **ptrs)
371 {
372 u8 **dptr = (u8 **)ptrs;
373 u8 *p, *q;
374 int d, z, z0;
375
376 z0 = stop; /* P/Q right side optimization */
377 p = dptr[disks-2]; /* XOR parity */
378 q = dptr[disks-1]; /* RS syndrome */
379
380 kernel_fpu_begin();
381
382 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
383
384 for ( d = 0 ; d < bytes ; d += 64 ) {
385 asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
386 asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
387 asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
388 asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
389 asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
390 asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
391 asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
392 asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
393 asm volatile("pxor %xmm4,%xmm2");
394 asm volatile("pxor %xmm6,%xmm3");
395 asm volatile("pxor %xmm12,%xmm10");
396 asm volatile("pxor %xmm14,%xmm11");
397 /* P/Q data pages */
398 for ( z = z0-1 ; z >= start ; z-- ) {
399 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
400 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
401 asm volatile("pxor %xmm5,%xmm5");
402 asm volatile("pxor %xmm7,%xmm7");
403 asm volatile("pxor %xmm13,%xmm13");
404 asm volatile("pxor %xmm15,%xmm15");
405 asm volatile("pcmpgtb %xmm4,%xmm5");
406 asm volatile("pcmpgtb %xmm6,%xmm7");
407 asm volatile("pcmpgtb %xmm12,%xmm13");
408 asm volatile("pcmpgtb %xmm14,%xmm15");
409 asm volatile("paddb %xmm4,%xmm4");
410 asm volatile("paddb %xmm6,%xmm6");
411 asm volatile("paddb %xmm12,%xmm12");
412 asm volatile("paddb %xmm14,%xmm14");
413 asm volatile("pand %xmm0,%xmm5");
414 asm volatile("pand %xmm0,%xmm7");
415 asm volatile("pand %xmm0,%xmm13");
416 asm volatile("pand %xmm0,%xmm15");
417 asm volatile("pxor %xmm5,%xmm4");
418 asm volatile("pxor %xmm7,%xmm6");
419 asm volatile("pxor %xmm13,%xmm12");
420 asm volatile("pxor %xmm15,%xmm14");
421 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
422 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
423 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
424 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
425 asm volatile("pxor %xmm5,%xmm2");
426 asm volatile("pxor %xmm7,%xmm3");
427 asm volatile("pxor %xmm13,%xmm10");
428 asm volatile("pxor %xmm15,%xmm11");
429 asm volatile("pxor %xmm5,%xmm4");
430 asm volatile("pxor %xmm7,%xmm6");
431 asm volatile("pxor %xmm13,%xmm12");
432 asm volatile("pxor %xmm15,%xmm14");
433 }
434 asm volatile("prefetchnta %0" :: "m" (q[d]));
435 asm volatile("prefetchnta %0" :: "m" (q[d+32]));
436 /* P/Q left side optimization */
437 for ( z = start-1 ; z >= 0 ; z-- ) {
438 asm volatile("pxor %xmm5,%xmm5");
439 asm volatile("pxor %xmm7,%xmm7");
440 asm volatile("pxor %xmm13,%xmm13");
441 asm volatile("pxor %xmm15,%xmm15");
442 asm volatile("pcmpgtb %xmm4,%xmm5");
443 asm volatile("pcmpgtb %xmm6,%xmm7");
444 asm volatile("pcmpgtb %xmm12,%xmm13");
445 asm volatile("pcmpgtb %xmm14,%xmm15");
446 asm volatile("paddb %xmm4,%xmm4");
447 asm volatile("paddb %xmm6,%xmm6");
448 asm volatile("paddb %xmm12,%xmm12");
449 asm volatile("paddb %xmm14,%xmm14");
450 asm volatile("pand %xmm0,%xmm5");
451 asm volatile("pand %xmm0,%xmm7");
452 asm volatile("pand %xmm0,%xmm13");
453 asm volatile("pand %xmm0,%xmm15");
454 asm volatile("pxor %xmm5,%xmm4");
455 asm volatile("pxor %xmm7,%xmm6");
456 asm volatile("pxor %xmm13,%xmm12");
457 asm volatile("pxor %xmm15,%xmm14");
458 }
459 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
460 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
461 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
462 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
463 asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
464 asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
465 asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
466 asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
467 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
468 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
469 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
470 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
471 }
472 asm volatile("sfence" : : : "memory");
473 kernel_fpu_end();
474 }
475
476
251const struct raid6_calls raid6_sse2x4 = { 477const struct raid6_calls raid6_sse2x4 = {
252 raid6_sse24_gen_syndrome, 478 raid6_sse24_gen_syndrome,
479 raid6_sse24_xor_syndrome,
253 raid6_have_sse2, 480 raid6_have_sse2,
254 "sse2x4", 481 "sse2x4",
255 1 /* Has cache hints */ 482 1 /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 5a485b7a7d3c..3bebbabdb510 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
28char data[NDISKS][PAGE_SIZE]; 28char data[NDISKS][PAGE_SIZE];
29char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; 29char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
30 30
31static void makedata(void) 31static void makedata(int start, int stop)
32{ 32{
33 int i, j; 33 int i, j;
34 34
35 for (i = 0; i < NDISKS; i++) { 35 for (i = start; i <= stop; i++) {
36 for (j = 0; j < PAGE_SIZE; j++) 36 for (j = 0; j < PAGE_SIZE; j++)
37 data[i][j] = rand(); 37 data[i][j] = rand();
38 38
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
91{ 91{
92 const struct raid6_calls *const *algo; 92 const struct raid6_calls *const *algo;
93 const struct raid6_recov_calls *const *ra; 93 const struct raid6_recov_calls *const *ra;
94 int i, j; 94 int i, j, p1, p2;
95 int err = 0; 95 int err = 0;
96 96
97 makedata(); 97 makedata(0, NDISKS-1);
98 98
99 for (ra = raid6_recov_algos; *ra; ra++) { 99 for (ra = raid6_recov_algos; *ra; ra++) {
100 if ((*ra)->valid && !(*ra)->valid()) 100 if ((*ra)->valid && !(*ra)->valid())
101 continue; 101 continue;
102
102 raid6_2data_recov = (*ra)->data2; 103 raid6_2data_recov = (*ra)->data2;
103 raid6_datap_recov = (*ra)->datap; 104 raid6_datap_recov = (*ra)->datap;
104 105
105 printf("using recovery %s\n", (*ra)->name); 106 printf("using recovery %s\n", (*ra)->name);
106 107
107 for (algo = raid6_algos; *algo; algo++) { 108 for (algo = raid6_algos; *algo; algo++) {
108 if (!(*algo)->valid || (*algo)->valid()) { 109 if ((*algo)->valid && !(*algo)->valid())
109 raid6_call = **algo; 110 continue;
111
112 raid6_call = **algo;
113
114 /* Nuke syndromes */
115 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
116
117 /* Generate assumed good syndrome */
118 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
119 (void **)&dataptrs);
120
121 for (i = 0; i < NDISKS-1; i++)
122 for (j = i+1; j < NDISKS; j++)
123 err += test_disks(i, j);
124
125 if (!raid6_call.xor_syndrome)
126 continue;
127
128 for (p1 = 0; p1 < NDISKS-2; p1++)
129 for (p2 = p1; p2 < NDISKS-2; p2++) {
110 130
111 /* Nuke syndromes */ 131 /* Simulate rmw run */
112 memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); 132 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
133 (void **)&dataptrs);
134 makedata(p1, p2);
135 raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
136 (void **)&dataptrs);
113 137
114 /* Generate assumed good syndrome */ 138 for (i = 0; i < NDISKS-1; i++)
115 raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, 139 for (j = i+1; j < NDISKS; j++)
116 (void **)&dataptrs); 140 err += test_disks(i, j);
141 }
117 142
118 for (i = 0; i < NDISKS-1; i++)
119 for (j = i+1; j < NDISKS; j++)
120 err += test_disks(i, j);
121 }
122 } 143 }
123 printf("\n"); 144 printf("\n");
124 } 145 }
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
index e7c29459cbcd..2dd291a11264 100644
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
80 80
81const struct raid6_calls raid6_tilegx$# = { 81const struct raid6_calls raid6_tilegx$# = {
82 raid6_tilegx$#_gen_syndrome, 82 raid6_tilegx$#_gen_syndrome,
83 NULL, /* XOR not yet implemented */
83 NULL, 84 NULL,
84 "tilegx$#", 85 "tilegx$#",
85 0 86 0