diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-10-19 17:05:12 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-10-20 02:34:46 -0400 |
commit | da17bf4306fd3a52e938b121df82a7baa10eb282 (patch) | |
tree | c5e5056287547e6674b63c690c911367e76f8d09 | |
parent | 030b07720be0f3bfada12ff6bfa3c61a91212f32 (diff) |
async_tx: fix asynchronous raid6 recovery for ddf layouts
The raid6 recovery code currently requires special handling of the
4-disk and 5-disk recovery scenarios for the native layout. Quoting
from commit 0a82a623:
In these situations the default N-disk algorithm will present
0-source or 1-source operations to dma devices. To cover for
dma devices where the minimum source count is 2 we implement
4-disk and 5-disk handling in the recovery code.
The ddf layout presents disks=6 and disks=7 to the recovery code in
these situations. Instead of looking at the number of disks count the
number of non-zero sources in the list and call the special case code
when the number of non-failed sources is 0 or 1.
[neilb@suse.de: replace 'ddf' flag with counting good sources]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r-- | crypto/async_tx/async_raid6_recov.c | 86 |
1 files changed, 56 insertions, 30 deletions
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 8e30b6ed0789..943f2abac9b4 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c | |||
@@ -131,8 +131,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, | |||
131 | } | 131 | } |
132 | 132 | ||
133 | static struct dma_async_tx_descriptor * | 133 | static struct dma_async_tx_descriptor * |
134 | __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, | 134 | __2data_recov_4(int disks, size_t bytes, int faila, int failb, |
135 | struct async_submit_ctl *submit) | 135 | struct page **blocks, struct async_submit_ctl *submit) |
136 | { | 136 | { |
137 | struct dma_async_tx_descriptor *tx = NULL; | 137 | struct dma_async_tx_descriptor *tx = NULL; |
138 | struct page *p, *q, *a, *b; | 138 | struct page *p, *q, *a, *b; |
@@ -143,8 +143,8 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, | |||
143 | void *cb_param = submit->cb_param; | 143 | void *cb_param = submit->cb_param; |
144 | void *scribble = submit->scribble; | 144 | void *scribble = submit->scribble; |
145 | 145 | ||
146 | p = blocks[4-2]; | 146 | p = blocks[disks-2]; |
147 | q = blocks[4-1]; | 147 | q = blocks[disks-1]; |
148 | 148 | ||
149 | a = blocks[faila]; | 149 | a = blocks[faila]; |
150 | b = blocks[failb]; | 150 | b = blocks[failb]; |
@@ -170,8 +170,8 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks, | |||
170 | } | 170 | } |
171 | 171 | ||
172 | static struct dma_async_tx_descriptor * | 172 | static struct dma_async_tx_descriptor * |
173 | __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, | 173 | __2data_recov_5(int disks, size_t bytes, int faila, int failb, |
174 | struct async_submit_ctl *submit) | 174 | struct page **blocks, struct async_submit_ctl *submit) |
175 | { | 175 | { |
176 | struct dma_async_tx_descriptor *tx = NULL; | 176 | struct dma_async_tx_descriptor *tx = NULL; |
177 | struct page *p, *q, *g, *dp, *dq; | 177 | struct page *p, *q, *g, *dp, *dq; |
@@ -181,21 +181,22 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks, | |||
181 | dma_async_tx_callback cb_fn = submit->cb_fn; | 181 | dma_async_tx_callback cb_fn = submit->cb_fn; |
182 | void *cb_param = submit->cb_param; | 182 | void *cb_param = submit->cb_param; |
183 | void *scribble = submit->scribble; | 183 | void *scribble = submit->scribble; |
184 | int uninitialized_var(good); | 184 | int good_srcs, good, i; |
185 | int i; | ||
186 | 185 | ||
187 | for (i = 0; i < 3; i++) { | 186 | good_srcs = 0; |
187 | good = -1; | ||
188 | for (i = 0; i < disks-2; i++) { | ||
189 | if (blocks[i] == NULL) | ||
190 | continue; | ||
188 | if (i == faila || i == failb) | 191 | if (i == faila || i == failb) |
189 | continue; | 192 | continue; |
190 | else { | 193 | good = i; |
191 | good = i; | 194 | good_srcs++; |
192 | break; | ||
193 | } | ||
194 | } | 195 | } |
195 | BUG_ON(i >= 3); | 196 | BUG_ON(good_srcs > 1); |
196 | 197 | ||
197 | p = blocks[5-2]; | 198 | p = blocks[disks-2]; |
198 | q = blocks[5-1]; | 199 | q = blocks[disks-1]; |
199 | g = blocks[good]; | 200 | g = blocks[good]; |
200 | 201 | ||
201 | /* Compute syndrome with zero for the missing data pages | 202 | /* Compute syndrome with zero for the missing data pages |
@@ -323,6 +324,8 @@ struct dma_async_tx_descriptor * | |||
323 | async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | 324 | async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, |
324 | struct page **blocks, struct async_submit_ctl *submit) | 325 | struct page **blocks, struct async_submit_ctl *submit) |
325 | { | 326 | { |
327 | int non_zero_srcs, i; | ||
328 | |||
326 | BUG_ON(faila == failb); | 329 | BUG_ON(faila == failb); |
327 | if (failb < faila) | 330 | if (failb < faila) |
328 | swap(faila, failb); | 331 | swap(faila, failb); |
@@ -334,12 +337,11 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | |||
334 | */ | 337 | */ |
335 | if (!submit->scribble) { | 338 | if (!submit->scribble) { |
336 | void **ptrs = (void **) blocks; | 339 | void **ptrs = (void **) blocks; |
337 | int i; | ||
338 | 340 | ||
339 | async_tx_quiesce(&submit->depend_tx); | 341 | async_tx_quiesce(&submit->depend_tx); |
340 | for (i = 0; i < disks; i++) | 342 | for (i = 0; i < disks; i++) |
341 | if (blocks[i] == NULL) | 343 | if (blocks[i] == NULL) |
342 | ptrs[i] = (void*)raid6_empty_zero_page; | 344 | ptrs[i] = (void *) raid6_empty_zero_page; |
343 | else | 345 | else |
344 | ptrs[i] = page_address(blocks[i]); | 346 | ptrs[i] = page_address(blocks[i]); |
345 | 347 | ||
@@ -350,19 +352,30 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, | |||
350 | return NULL; | 352 | return NULL; |
351 | } | 353 | } |
352 | 354 | ||
353 | switch (disks) { | 355 | non_zero_srcs = 0; |
354 | case 4: | 356 | for (i = 0; i < disks-2 && non_zero_srcs < 4; i++) |
357 | if (blocks[i]) | ||
358 | non_zero_srcs++; | ||
359 | switch (non_zero_srcs) { | ||
360 | case 0: | ||
361 | case 1: | ||
362 | /* There must be at least 2 sources - the failed devices. */ | ||
363 | BUG(); | ||
364 | |||
365 | case 2: | ||
355 | /* dma devices do not uniformly understand a zero source pq | 366 | /* dma devices do not uniformly understand a zero source pq |
356 | * operation (in contrast to the synchronous case), so | 367 | * operation (in contrast to the synchronous case), so |
357 | * explicitly handle the 4 disk special case | 368 | * explicitly handle the special case of a 4 disk array with |
369 | * both data disks missing. | ||
358 | */ | 370 | */ |
359 | return __2data_recov_4(bytes, faila, failb, blocks, submit); | 371 | return __2data_recov_4(disks, bytes, faila, failb, blocks, submit); |
360 | case 5: | 372 | case 3: |
361 | /* dma devices do not uniformly understand a single | 373 | /* dma devices do not uniformly understand a single |
362 | * source pq operation (in contrast to the synchronous | 374 | * source pq operation (in contrast to the synchronous |
363 | * case), so explicitly handle the 5 disk special case | 375 | * case), so explicitly handle the special case of a 5 disk |
376 | * array with 2 of 3 data disks missing. | ||
364 | */ | 377 | */ |
365 | return __2data_recov_5(bytes, faila, failb, blocks, submit); | 378 | return __2data_recov_5(disks, bytes, faila, failb, blocks, submit); |
366 | default: | 379 | default: |
367 | return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); | 380 | return __2data_recov_n(disks, bytes, faila, failb, blocks, submit); |
368 | } | 381 | } |
@@ -388,6 +401,7 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, | |||
388 | dma_async_tx_callback cb_fn = submit->cb_fn; | 401 | dma_async_tx_callback cb_fn = submit->cb_fn; |
389 | void *cb_param = submit->cb_param; | 402 | void *cb_param = submit->cb_param; |
390 | void *scribble = submit->scribble; | 403 | void *scribble = submit->scribble; |
404 | int good_srcs, good, i; | ||
391 | struct page *srcs[2]; | 405 | struct page *srcs[2]; |
392 | 406 | ||
393 | pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); | 407 | pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); |
@@ -397,7 +411,6 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, | |||
397 | */ | 411 | */ |
398 | if (!scribble) { | 412 | if (!scribble) { |
399 | void **ptrs = (void **) blocks; | 413 | void **ptrs = (void **) blocks; |
400 | int i; | ||
401 | 414 | ||
402 | async_tx_quiesce(&submit->depend_tx); | 415 | async_tx_quiesce(&submit->depend_tx); |
403 | for (i = 0; i < disks; i++) | 416 | for (i = 0; i < disks; i++) |
@@ -413,6 +426,20 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, | |||
413 | return NULL; | 426 | return NULL; |
414 | } | 427 | } |
415 | 428 | ||
429 | good_srcs = 0; | ||
430 | good = -1; | ||
431 | for (i = 0; i < disks-2; i++) { | ||
432 | if (i == faila) | ||
433 | continue; | ||
434 | if (blocks[i]) { | ||
435 | good = i; | ||
436 | good_srcs++; | ||
437 | if (good_srcs > 1) | ||
438 | break; | ||
439 | } | ||
440 | } | ||
441 | BUG_ON(good_srcs == 0); | ||
442 | |||
416 | p = blocks[disks-2]; | 443 | p = blocks[disks-2]; |
417 | q = blocks[disks-1]; | 444 | q = blocks[disks-1]; |
418 | 445 | ||
@@ -423,11 +450,10 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, | |||
423 | blocks[faila] = NULL; | 450 | blocks[faila] = NULL; |
424 | blocks[disks-1] = dq; | 451 | blocks[disks-1] = dq; |
425 | 452 | ||
426 | /* in the 4 disk case we only need to perform a single source | 453 | /* in the 4-disk case we only need to perform a single source |
427 | * multiplication | 454 | * multiplication with the one good data block. |
428 | */ | 455 | */ |
429 | if (disks == 4) { | 456 | if (good_srcs == 1) { |
430 | int good = faila == 0 ? 1 : 0; | ||
431 | struct page *g = blocks[good]; | 457 | struct page *g = blocks[good]; |
432 | 458 | ||
433 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, | 459 | init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, |