diff options
author | Dan Williams <dan.j.williams@intel.com> | 2007-01-02 15:52:30 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2007-07-13 11:06:15 -0400 |
commit | 91c00924846a0034020451c280c76baa4299f9dc (patch) | |
tree | 7124ed6706937b793a10c37a861c5fc0f2e5b348 /drivers/md/raid5.c | |
parent | 45b4233caac05da0118b608a9fc2a40a9fc580cd (diff) |
md: raid5_run_ops - run stripe operations outside sh->lock
When the raid acceleration work was proposed, Neil laid out the following
attack plan:
1/ move the xor and copy operations outside spin_lock(&sh->lock)
2/ find/implement an asynchronous offload api
The raid5_run_ops routine uses the asynchronous offload api (async_tx) and
the stripe_operations member of a stripe_head to carry out xor+copy
operations asynchronously, outside the lock.
To perform operations outside the lock a new set of state flags is needed
to track new requests, in-flight requests, and completed requests. In this
new model handle_stripe is tasked with scanning the stripe_head for work,
updating the stripe_operations structure, and finally dropping the lock and
calling raid5_run_ops for processing. The following flags outline the
requests that handle_stripe can make of raid5_run_ops:
STRIPE_OP_BIOFILL
- copy data into request buffers to satisfy a read request
STRIPE_OP_COMPUTE_BLK
- generate a missing block in the cache from the other blocks
STRIPE_OP_PREXOR
- subtract existing data as part of the read-modify-write process
STRIPE_OP_BIODRAIN
- copy data out of request buffers to satisfy a write request
STRIPE_OP_POSTXOR
- recalculate parity for new data that has entered the cache
STRIPE_OP_CHECK
- verify that the parity is correct
STRIPE_OP_IO
- submit i/o to the member disks (note this was already performed outside
the stripe lock, but it made sense to add it as an operation type
The flow is:
1/ handle_stripe sets STRIPE_OP_* in sh->ops.pending
2/ raid5_run_ops reads sh->ops.pending, sets sh->ops.ack, and submits the
operation to the async_tx api
3/ async_tx triggers the completion callback routine to set
sh->ops.complete and release the stripe
4/ handle_stripe runs again to finish the operation and optionally submit
new operations that were previously blocked
Note this patch just defines raid5_run_ops, subsequent commits (one per
major operation type) modify handle_stripe to take advantage of this
routine.
Changelog:
* removed ops_complete_biodrain in favor of ops_complete_postxor and
ops_complete_write.
* removed the raid5_run_ops workqueue
* call bi_end_io for reads in ops_complete_biofill, saves a call to
handle_stripe
* explicitly handle the 2-disk raid5 case (xor becomes memcpy), Neil Brown
* fix race between async engines and bi_end_io call for reads, Neil Brown
* remove unnecessary spin_lock from ops_complete_biofill
* remove test_and_set/test_and_clear BUG_ONs, Neil Brown
* remove explicit interrupt handling for channel switching, this feature
was absorbed (i.e. it is now implicit) by the async_tx api
* use return_io in ops_complete_biofill
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-By: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 536 |
1 files changed, 536 insertions, 0 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e372e57687ee..0b7002479655 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include "raid6.h" | 52 | #include "raid6.h" |
53 | 53 | ||
54 | #include <linux/raid/bitmap.h> | 54 | #include <linux/raid/bitmap.h> |
55 | #include <linux/async_tx.h> | ||
55 | 56 | ||
56 | /* | 57 | /* |
57 | * Stripe cache | 58 | * Stripe cache |
@@ -341,6 +342,541 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
341 | return sh; | 342 | return sh; |
342 | } | 343 | } |
343 | 344 | ||
345 | static int | ||
346 | raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); | ||
347 | static int | ||
348 | raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error); | ||
349 | |||
350 | static void ops_run_io(struct stripe_head *sh) | ||
351 | { | ||
352 | raid5_conf_t *conf = sh->raid_conf; | ||
353 | int i, disks = sh->disks; | ||
354 | |||
355 | might_sleep(); | ||
356 | |||
357 | for (i = disks; i--; ) { | ||
358 | int rw; | ||
359 | struct bio *bi; | ||
360 | mdk_rdev_t *rdev; | ||
361 | if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) | ||
362 | rw = WRITE; | ||
363 | else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | ||
364 | rw = READ; | ||
365 | else | ||
366 | continue; | ||
367 | |||
368 | bi = &sh->dev[i].req; | ||
369 | |||
370 | bi->bi_rw = rw; | ||
371 | if (rw == WRITE) | ||
372 | bi->bi_end_io = raid5_end_write_request; | ||
373 | else | ||
374 | bi->bi_end_io = raid5_end_read_request; | ||
375 | |||
376 | rcu_read_lock(); | ||
377 | rdev = rcu_dereference(conf->disks[i].rdev); | ||
378 | if (rdev && test_bit(Faulty, &rdev->flags)) | ||
379 | rdev = NULL; | ||
380 | if (rdev) | ||
381 | atomic_inc(&rdev->nr_pending); | ||
382 | rcu_read_unlock(); | ||
383 | |||
384 | if (rdev) { | ||
385 | if (test_bit(STRIPE_SYNCING, &sh->state) || | ||
386 | test_bit(STRIPE_EXPAND_SOURCE, &sh->state) || | ||
387 | test_bit(STRIPE_EXPAND_READY, &sh->state)) | ||
388 | md_sync_acct(rdev->bdev, STRIPE_SECTORS); | ||
389 | |||
390 | bi->bi_bdev = rdev->bdev; | ||
391 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", | ||
392 | __FUNCTION__, (unsigned long long)sh->sector, | ||
393 | bi->bi_rw, i); | ||
394 | atomic_inc(&sh->count); | ||
395 | bi->bi_sector = sh->sector + rdev->data_offset; | ||
396 | bi->bi_flags = 1 << BIO_UPTODATE; | ||
397 | bi->bi_vcnt = 1; | ||
398 | bi->bi_max_vecs = 1; | ||
399 | bi->bi_idx = 0; | ||
400 | bi->bi_io_vec = &sh->dev[i].vec; | ||
401 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | ||
402 | bi->bi_io_vec[0].bv_offset = 0; | ||
403 | bi->bi_size = STRIPE_SIZE; | ||
404 | bi->bi_next = NULL; | ||
405 | if (rw == WRITE && | ||
406 | test_bit(R5_ReWrite, &sh->dev[i].flags)) | ||
407 | atomic_add(STRIPE_SECTORS, | ||
408 | &rdev->corrected_errors); | ||
409 | generic_make_request(bi); | ||
410 | } else { | ||
411 | if (rw == WRITE) | ||
412 | set_bit(STRIPE_DEGRADED, &sh->state); | ||
413 | pr_debug("skip op %ld on disc %d for sector %llu\n", | ||
414 | bi->bi_rw, i, (unsigned long long)sh->sector); | ||
415 | clear_bit(R5_LOCKED, &sh->dev[i].flags); | ||
416 | set_bit(STRIPE_HANDLE, &sh->state); | ||
417 | } | ||
418 | } | ||
419 | } | ||
420 | |||
421 | static struct dma_async_tx_descriptor * | ||
422 | async_copy_data(int frombio, struct bio *bio, struct page *page, | ||
423 | sector_t sector, struct dma_async_tx_descriptor *tx) | ||
424 | { | ||
425 | struct bio_vec *bvl; | ||
426 | struct page *bio_page; | ||
427 | int i; | ||
428 | int page_offset; | ||
429 | |||
430 | if (bio->bi_sector >= sector) | ||
431 | page_offset = (signed)(bio->bi_sector - sector) * 512; | ||
432 | else | ||
433 | page_offset = (signed)(sector - bio->bi_sector) * -512; | ||
434 | bio_for_each_segment(bvl, bio, i) { | ||
435 | int len = bio_iovec_idx(bio, i)->bv_len; | ||
436 | int clen; | ||
437 | int b_offset = 0; | ||
438 | |||
439 | if (page_offset < 0) { | ||
440 | b_offset = -page_offset; | ||
441 | page_offset += b_offset; | ||
442 | len -= b_offset; | ||
443 | } | ||
444 | |||
445 | if (len > 0 && page_offset + len > STRIPE_SIZE) | ||
446 | clen = STRIPE_SIZE - page_offset; | ||
447 | else | ||
448 | clen = len; | ||
449 | |||
450 | if (clen > 0) { | ||
451 | b_offset += bio_iovec_idx(bio, i)->bv_offset; | ||
452 | bio_page = bio_iovec_idx(bio, i)->bv_page; | ||
453 | if (frombio) | ||
454 | tx = async_memcpy(page, bio_page, page_offset, | ||
455 | b_offset, clen, | ||
456 | ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC, | ||
457 | tx, NULL, NULL); | ||
458 | else | ||
459 | tx = async_memcpy(bio_page, page, b_offset, | ||
460 | page_offset, clen, | ||
461 | ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST, | ||
462 | tx, NULL, NULL); | ||
463 | } | ||
464 | if (clen < len) /* hit end of page */ | ||
465 | break; | ||
466 | page_offset += len; | ||
467 | } | ||
468 | |||
469 | return tx; | ||
470 | } | ||
471 | |||
472 | static void ops_complete_biofill(void *stripe_head_ref) | ||
473 | { | ||
474 | struct stripe_head *sh = stripe_head_ref; | ||
475 | struct bio *return_bi = NULL; | ||
476 | raid5_conf_t *conf = sh->raid_conf; | ||
477 | int i, more_to_read = 0; | ||
478 | |||
479 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
480 | (unsigned long long)sh->sector); | ||
481 | |||
482 | /* clear completed biofills */ | ||
483 | for (i = sh->disks; i--; ) { | ||
484 | struct r5dev *dev = &sh->dev[i]; | ||
485 | /* check if this stripe has new incoming reads */ | ||
486 | if (dev->toread) | ||
487 | more_to_read++; | ||
488 | |||
489 | /* acknowledge completion of a biofill operation */ | ||
490 | /* and check if we need to reply to a read request | ||
491 | */ | ||
492 | if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) { | ||
493 | struct bio *rbi, *rbi2; | ||
494 | clear_bit(R5_Wantfill, &dev->flags); | ||
495 | |||
496 | /* The access to dev->read is outside of the | ||
497 | * spin_lock_irq(&conf->device_lock), but is protected | ||
498 | * by the STRIPE_OP_BIOFILL pending bit | ||
499 | */ | ||
500 | BUG_ON(!dev->read); | ||
501 | rbi = dev->read; | ||
502 | dev->read = NULL; | ||
503 | while (rbi && rbi->bi_sector < | ||
504 | dev->sector + STRIPE_SECTORS) { | ||
505 | rbi2 = r5_next_bio(rbi, dev->sector); | ||
506 | spin_lock_irq(&conf->device_lock); | ||
507 | if (--rbi->bi_phys_segments == 0) { | ||
508 | rbi->bi_next = return_bi; | ||
509 | return_bi = rbi; | ||
510 | } | ||
511 | spin_unlock_irq(&conf->device_lock); | ||
512 | rbi = rbi2; | ||
513 | } | ||
514 | } | ||
515 | } | ||
516 | clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); | ||
517 | clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); | ||
518 | |||
519 | return_io(return_bi); | ||
520 | |||
521 | if (more_to_read) | ||
522 | set_bit(STRIPE_HANDLE, &sh->state); | ||
523 | release_stripe(sh); | ||
524 | } | ||
525 | |||
526 | static void ops_run_biofill(struct stripe_head *sh) | ||
527 | { | ||
528 | struct dma_async_tx_descriptor *tx = NULL; | ||
529 | raid5_conf_t *conf = sh->raid_conf; | ||
530 | int i; | ||
531 | |||
532 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
533 | (unsigned long long)sh->sector); | ||
534 | |||
535 | for (i = sh->disks; i--; ) { | ||
536 | struct r5dev *dev = &sh->dev[i]; | ||
537 | if (test_bit(R5_Wantfill, &dev->flags)) { | ||
538 | struct bio *rbi; | ||
539 | spin_lock_irq(&conf->device_lock); | ||
540 | dev->read = rbi = dev->toread; | ||
541 | dev->toread = NULL; | ||
542 | spin_unlock_irq(&conf->device_lock); | ||
543 | while (rbi && rbi->bi_sector < | ||
544 | dev->sector + STRIPE_SECTORS) { | ||
545 | tx = async_copy_data(0, rbi, dev->page, | ||
546 | dev->sector, tx); | ||
547 | rbi = r5_next_bio(rbi, dev->sector); | ||
548 | } | ||
549 | } | ||
550 | } | ||
551 | |||
552 | atomic_inc(&sh->count); | ||
553 | async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | ||
554 | ops_complete_biofill, sh); | ||
555 | } | ||
556 | |||
557 | static void ops_complete_compute5(void *stripe_head_ref) | ||
558 | { | ||
559 | struct stripe_head *sh = stripe_head_ref; | ||
560 | int target = sh->ops.target; | ||
561 | struct r5dev *tgt = &sh->dev[target]; | ||
562 | |||
563 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
564 | (unsigned long long)sh->sector); | ||
565 | |||
566 | set_bit(R5_UPTODATE, &tgt->flags); | ||
567 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | ||
568 | clear_bit(R5_Wantcompute, &tgt->flags); | ||
569 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
570 | set_bit(STRIPE_HANDLE, &sh->state); | ||
571 | release_stripe(sh); | ||
572 | } | ||
573 | |||
574 | static struct dma_async_tx_descriptor * | ||
575 | ops_run_compute5(struct stripe_head *sh, unsigned long pending) | ||
576 | { | ||
577 | /* kernel stack size limits the total number of disks */ | ||
578 | int disks = sh->disks; | ||
579 | struct page *xor_srcs[disks]; | ||
580 | int target = sh->ops.target; | ||
581 | struct r5dev *tgt = &sh->dev[target]; | ||
582 | struct page *xor_dest = tgt->page; | ||
583 | int count = 0; | ||
584 | struct dma_async_tx_descriptor *tx; | ||
585 | int i; | ||
586 | |||
587 | pr_debug("%s: stripe %llu block: %d\n", | ||
588 | __FUNCTION__, (unsigned long long)sh->sector, target); | ||
589 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | ||
590 | |||
591 | for (i = disks; i--; ) | ||
592 | if (i != target) | ||
593 | xor_srcs[count++] = sh->dev[i].page; | ||
594 | |||
595 | atomic_inc(&sh->count); | ||
596 | |||
597 | if (unlikely(count == 1)) | ||
598 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | ||
599 | 0, NULL, ops_complete_compute5, sh); | ||
600 | else | ||
601 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | ||
602 | ASYNC_TX_XOR_ZERO_DST, NULL, | ||
603 | ops_complete_compute5, sh); | ||
604 | |||
605 | /* ack now if postxor is not set to be run */ | ||
606 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending)) | ||
607 | async_tx_ack(tx); | ||
608 | |||
609 | return tx; | ||
610 | } | ||
611 | |||
612 | static void ops_complete_prexor(void *stripe_head_ref) | ||
613 | { | ||
614 | struct stripe_head *sh = stripe_head_ref; | ||
615 | |||
616 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
617 | (unsigned long long)sh->sector); | ||
618 | |||
619 | set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | ||
620 | } | ||
621 | |||
622 | static struct dma_async_tx_descriptor * | ||
623 | ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | ||
624 | { | ||
625 | /* kernel stack size limits the total number of disks */ | ||
626 | int disks = sh->disks; | ||
627 | struct page *xor_srcs[disks]; | ||
628 | int count = 0, pd_idx = sh->pd_idx, i; | ||
629 | |||
630 | /* existing parity data subtracted */ | ||
631 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | ||
632 | |||
633 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
634 | (unsigned long long)sh->sector); | ||
635 | |||
636 | for (i = disks; i--; ) { | ||
637 | struct r5dev *dev = &sh->dev[i]; | ||
638 | /* Only process blocks that are known to be uptodate */ | ||
639 | if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags)) | ||
640 | xor_srcs[count++] = dev->page; | ||
641 | } | ||
642 | |||
643 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | ||
644 | ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, | ||
645 | ops_complete_prexor, sh); | ||
646 | |||
647 | return tx; | ||
648 | } | ||
649 | |||
650 | static struct dma_async_tx_descriptor * | ||
651 | ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | ||
652 | { | ||
653 | int disks = sh->disks; | ||
654 | int pd_idx = sh->pd_idx, i; | ||
655 | |||
656 | /* check if prexor is active which means only process blocks | ||
657 | * that are part of a read-modify-write (Wantprexor) | ||
658 | */ | ||
659 | int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | ||
660 | |||
661 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
662 | (unsigned long long)sh->sector); | ||
663 | |||
664 | for (i = disks; i--; ) { | ||
665 | struct r5dev *dev = &sh->dev[i]; | ||
666 | struct bio *chosen; | ||
667 | int towrite; | ||
668 | |||
669 | towrite = 0; | ||
670 | if (prexor) { /* rmw */ | ||
671 | if (dev->towrite && | ||
672 | test_bit(R5_Wantprexor, &dev->flags)) | ||
673 | towrite = 1; | ||
674 | } else { /* rcw */ | ||
675 | if (i != pd_idx && dev->towrite && | ||
676 | test_bit(R5_LOCKED, &dev->flags)) | ||
677 | towrite = 1; | ||
678 | } | ||
679 | |||
680 | if (towrite) { | ||
681 | struct bio *wbi; | ||
682 | |||
683 | spin_lock(&sh->lock); | ||
684 | chosen = dev->towrite; | ||
685 | dev->towrite = NULL; | ||
686 | BUG_ON(dev->written); | ||
687 | wbi = dev->written = chosen; | ||
688 | spin_unlock(&sh->lock); | ||
689 | |||
690 | while (wbi && wbi->bi_sector < | ||
691 | dev->sector + STRIPE_SECTORS) { | ||
692 | tx = async_copy_data(1, wbi, dev->page, | ||
693 | dev->sector, tx); | ||
694 | wbi = r5_next_bio(wbi, dev->sector); | ||
695 | } | ||
696 | } | ||
697 | } | ||
698 | |||
699 | return tx; | ||
700 | } | ||
701 | |||
702 | static void ops_complete_postxor(void *stripe_head_ref) | ||
703 | { | ||
704 | struct stripe_head *sh = stripe_head_ref; | ||
705 | |||
706 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
707 | (unsigned long long)sh->sector); | ||
708 | |||
709 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | ||
710 | set_bit(STRIPE_HANDLE, &sh->state); | ||
711 | release_stripe(sh); | ||
712 | } | ||
713 | |||
714 | static void ops_complete_write(void *stripe_head_ref) | ||
715 | { | ||
716 | struct stripe_head *sh = stripe_head_ref; | ||
717 | int disks = sh->disks, i, pd_idx = sh->pd_idx; | ||
718 | |||
719 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
720 | (unsigned long long)sh->sector); | ||
721 | |||
722 | for (i = disks; i--; ) { | ||
723 | struct r5dev *dev = &sh->dev[i]; | ||
724 | if (dev->written || i == pd_idx) | ||
725 | set_bit(R5_UPTODATE, &dev->flags); | ||
726 | } | ||
727 | |||
728 | set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); | ||
729 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | ||
730 | |||
731 | set_bit(STRIPE_HANDLE, &sh->state); | ||
732 | release_stripe(sh); | ||
733 | } | ||
734 | |||
735 | static void | ||
736 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | ||
737 | { | ||
738 | /* kernel stack size limits the total number of disks */ | ||
739 | int disks = sh->disks; | ||
740 | struct page *xor_srcs[disks]; | ||
741 | |||
742 | int count = 0, pd_idx = sh->pd_idx, i; | ||
743 | struct page *xor_dest; | ||
744 | int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | ||
745 | unsigned long flags; | ||
746 | dma_async_tx_callback callback; | ||
747 | |||
748 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
749 | (unsigned long long)sh->sector); | ||
750 | |||
751 | /* check if prexor is active which means only process blocks | ||
752 | * that are part of a read-modify-write (written) | ||
753 | */ | ||
754 | if (prexor) { | ||
755 | xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | ||
756 | for (i = disks; i--; ) { | ||
757 | struct r5dev *dev = &sh->dev[i]; | ||
758 | if (dev->written) | ||
759 | xor_srcs[count++] = dev->page; | ||
760 | } | ||
761 | } else { | ||
762 | xor_dest = sh->dev[pd_idx].page; | ||
763 | for (i = disks; i--; ) { | ||
764 | struct r5dev *dev = &sh->dev[i]; | ||
765 | if (i != pd_idx) | ||
766 | xor_srcs[count++] = dev->page; | ||
767 | } | ||
768 | } | ||
769 | |||
770 | /* check whether this postxor is part of a write */ | ||
771 | callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ? | ||
772 | ops_complete_write : ops_complete_postxor; | ||
773 | |||
774 | /* 1/ if we prexor'd then the dest is reused as a source | ||
775 | * 2/ if we did not prexor then we are redoing the parity | ||
776 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST | ||
777 | * for the synchronous xor case | ||
778 | */ | ||
779 | flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | | ||
780 | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); | ||
781 | |||
782 | atomic_inc(&sh->count); | ||
783 | |||
784 | if (unlikely(count == 1)) { | ||
785 | flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); | ||
786 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | ||
787 | flags, tx, callback, sh); | ||
788 | } else | ||
789 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | ||
790 | flags, tx, callback, sh); | ||
791 | } | ||
792 | |||
793 | static void ops_complete_check(void *stripe_head_ref) | ||
794 | { | ||
795 | struct stripe_head *sh = stripe_head_ref; | ||
796 | int pd_idx = sh->pd_idx; | ||
797 | |||
798 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
799 | (unsigned long long)sh->sector); | ||
800 | |||
801 | if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && | ||
802 | sh->ops.zero_sum_result == 0) | ||
803 | set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
804 | |||
805 | set_bit(STRIPE_OP_CHECK, &sh->ops.complete); | ||
806 | set_bit(STRIPE_HANDLE, &sh->state); | ||
807 | release_stripe(sh); | ||
808 | } | ||
809 | |||
810 | static void ops_run_check(struct stripe_head *sh) | ||
811 | { | ||
812 | /* kernel stack size limits the total number of disks */ | ||
813 | int disks = sh->disks; | ||
814 | struct page *xor_srcs[disks]; | ||
815 | struct dma_async_tx_descriptor *tx; | ||
816 | |||
817 | int count = 0, pd_idx = sh->pd_idx, i; | ||
818 | struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | ||
819 | |||
820 | pr_debug("%s: stripe %llu\n", __FUNCTION__, | ||
821 | (unsigned long long)sh->sector); | ||
822 | |||
823 | for (i = disks; i--; ) { | ||
824 | struct r5dev *dev = &sh->dev[i]; | ||
825 | if (i != pd_idx) | ||
826 | xor_srcs[count++] = dev->page; | ||
827 | } | ||
828 | |||
829 | tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | ||
830 | &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); | ||
831 | |||
832 | if (tx) | ||
833 | set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); | ||
834 | else | ||
835 | clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); | ||
836 | |||
837 | atomic_inc(&sh->count); | ||
838 | tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | ||
839 | ops_complete_check, sh); | ||
840 | } | ||
841 | |||
842 | static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) | ||
843 | { | ||
844 | int overlap_clear = 0, i, disks = sh->disks; | ||
845 | struct dma_async_tx_descriptor *tx = NULL; | ||
846 | |||
847 | if (test_bit(STRIPE_OP_BIOFILL, &pending)) { | ||
848 | ops_run_biofill(sh); | ||
849 | overlap_clear++; | ||
850 | } | ||
851 | |||
852 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending)) | ||
853 | tx = ops_run_compute5(sh, pending); | ||
854 | |||
855 | if (test_bit(STRIPE_OP_PREXOR, &pending)) | ||
856 | tx = ops_run_prexor(sh, tx); | ||
857 | |||
858 | if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { | ||
859 | tx = ops_run_biodrain(sh, tx); | ||
860 | overlap_clear++; | ||
861 | } | ||
862 | |||
863 | if (test_bit(STRIPE_OP_POSTXOR, &pending)) | ||
864 | ops_run_postxor(sh, tx); | ||
865 | |||
866 | if (test_bit(STRIPE_OP_CHECK, &pending)) | ||
867 | ops_run_check(sh); | ||
868 | |||
869 | if (test_bit(STRIPE_OP_IO, &pending)) | ||
870 | ops_run_io(sh); | ||
871 | |||
872 | if (overlap_clear) | ||
873 | for (i = disks; i--; ) { | ||
874 | struct r5dev *dev = &sh->dev[i]; | ||
875 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | ||
876 | wake_up(&sh->raid_conf->wait_for_overlap); | ||
877 | } | ||
878 | } | ||
879 | |||
344 | static int grow_one_stripe(raid5_conf_t *conf) | 880 | static int grow_one_stripe(raid5_conf_t *conf) |
345 | { | 881 | { |
346 | struct stripe_head *sh; | 882 | struct stripe_head *sh; |