aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-raid1.c
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2009-12-10 18:52:05 -0500
committerAlasdair G Kergon <agk@redhat.com>2009-12-10 18:52:05 -0500
commit60f355ead31e2be8d06ac8acb163df91a1c64e3b (patch)
tree11a011cb6bcbf4adc27b871556a6d475b6826dd3 /drivers/md/dm-raid1.c
parentc58098be979509a54021e837a47fcad08db31f94 (diff)
dm raid1: hold write bios when errors are handled
Hold all write bios when errors are handled. Previously the failures list was used only when handling errors with a userspace daemon such as dmeventd. Now, it is always used for all bios. The regions where some writes failed must be marked as nosync. This can only be done in process context (i.e. in raid1 workqueue), not in the write_callback function. Previously the write would succeed if writing to at least one leg succeeded. This is wrong because data from the failed leg may be replicated to the correct leg. Now, if using a userspace daemon, the write with some failures will be held until the daemon has done its job and reconfigured the array. If not using a daemon, the write still succeeds if at least one leg succeeds. This is bad, but it is consistent with current behavior. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Reviewed-by: Takahiro Yasui <tyasui@redhat.com> Tested-by: Takahiro Yasui <tyasui@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Diffstat (limited to 'drivers/md/dm-raid1.c')
-rw-r--r--drivers/md/dm-raid1.c63
1 files changed, 32 insertions, 31 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4f466ad75680..e363335e8d81 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -578,7 +578,6 @@ static void write_callback(unsigned long error, void *context)
578 unsigned i, ret = 0; 578 unsigned i, ret = 0;
579 struct bio *bio = (struct bio *) context; 579 struct bio *bio = (struct bio *) context;
580 struct mirror_set *ms; 580 struct mirror_set *ms;
581 int uptodate = 0;
582 int should_wake = 0; 581 int should_wake = 0;
583 unsigned long flags; 582 unsigned long flags;
584 583
@@ -591,36 +590,27 @@ static void write_callback(unsigned long error, void *context)
591 * This way we handle both writes to SYNC and NOSYNC 590 * This way we handle both writes to SYNC and NOSYNC
592 * regions with the same code. 591 * regions with the same code.
593 */ 592 */
594 if (likely(!error)) 593 if (likely(!error)) {
595 goto out; 594 bio_endio(bio, ret);
595 return;
596 }
596 597
597 for (i = 0; i < ms->nr_mirrors; i++) 598 for (i = 0; i < ms->nr_mirrors; i++)
598 if (test_bit(i, &error)) 599 if (test_bit(i, &error))
599 fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR); 600 fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
600 else
601 uptodate = 1;
602 601
603 if (unlikely(!uptodate)) { 602 /*
604 DMERR("All replicated volumes dead, failing I/O"); 603 * Need to raise event. Since raising
605 /* None of the writes succeeded, fail the I/O. */ 604 * events can block, we need to do it in
606 ret = -EIO; 605 * the main thread.
607 } else if (errors_handled(ms)) { 606 */
608 /* 607 spin_lock_irqsave(&ms->lock, flags);
609 * Need to raise event. Since raising 608 if (!ms->failures.head)
610 * events can block, we need to do it in 609 should_wake = 1;
611 * the main thread. 610 bio_list_add(&ms->failures, bio);
612 */ 611 spin_unlock_irqrestore(&ms->lock, flags);
613 spin_lock_irqsave(&ms->lock, flags); 612 if (should_wake)
614 if (!ms->failures.head) 613 wakeup_mirrord(ms);
615 should_wake = 1;
616 bio_list_add(&ms->failures, bio);
617 spin_unlock_irqrestore(&ms->lock, flags);
618 if (should_wake)
619 wakeup_mirrord(ms);
620 return;
621 }
622out:
623 bio_endio(bio, ret);
624} 614}
625 615
626static void do_write(struct mirror_set *ms, struct bio *bio) 616static void do_write(struct mirror_set *ms, struct bio *bio)
@@ -773,15 +763,26 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
773 * for us to treat them the same and requeue them 763 * for us to treat them the same and requeue them
774 * as well. 764 * as well.
775 */ 765 */
776
777 while ((bio = bio_list_pop(failures))) { 766 while ((bio = bio_list_pop(failures))) {
778 if (ms->log_failure) 767 if (!ms->log_failure) {
779 hold_bio(ms, bio);
780 else {
781 ms->in_sync = 0; 768 ms->in_sync = 0;
782 dm_rh_mark_nosync(ms->rh, bio); 769 dm_rh_mark_nosync(ms->rh, bio);
783 bio_endio(bio, 0);
784 } 770 }
771
772 /*
773 * If all the legs are dead, fail the I/O.
774 * If we have been told to handle errors, hold the bio
775 * and wait for userspace to deal with the problem.
776 * Otherwise pretend that the I/O succeeded. (This would
777 * be wrong if the failed leg returned after reboot and
778 * got replicated back to the good legs.)
779 */
780 if (!get_valid_mirror(ms))
781 bio_endio(bio, -EIO);
782 else if (errors_handled(ms))
783 hold_bio(ms, bio);
784 else
785 bio_endio(bio, 0);
785 } 786 }
786} 787}
787 788