dm raid1: hold write bios when errors are handled

Hold all write bios when errors are handled. Previously the failures list was used only when handling errors with a userspace daemon such as dmeventd. Now, it is always used for all bios. The regions where some writes failed must be marked as nosync. This can only be done in process context (i.e. in raid1 workqueue), not in the write_callback function. Previously the write would succeed if writing to at least one leg succeeded. This is wrong because data from the failed leg may be replicated to the correct leg. Now, if using a userspace daemon, the write with some failures will be held until the daemon has done its job and reconfigured the array. If not using a daemon, the write still succeeds if at least one leg succeeds. This is bad, but it is consistent with current behavior. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Reviewed-by: Takahiro Yasui <tyasui@redhat.com> Tested-by: Takahiro Yasui <tyasui@redhat.com> Signed-off-by: Alasdair G Kergon <agk@redhat.com>
author: Mikulas Patocka <mpatocka@redhat.com> 2009-12-10 18:52:05 -0500
committer: Alasdair G Kergon <agk@redhat.com> 2009-12-10 18:52:05 -0500
commit: 60f355ead31e2be8d06ac8acb163df91a1c64e3b (patch)
tree: 11a011cb6bcbf4adc27b871556a6d475b6826dd3 /drivers
parent: c58098be979509a54021e837a47fcad08db31f94 (diff)
1 files changed, 32 insertions, 31 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4f466ad75680..e363335e8d81 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -578,7 +578,6 @@ static void write_callback(unsigned long error, void *context)
        unsigned i, ret = 0;
        struct bio *bio = (struct bio *) context;
        struct mirror_set *ms;
-        int uptodate = 0;
        int should_wake = 0;
        unsigned long flags;
@@ -591,36 +590,27 @@ static void write_callback(unsigned long error, void *context)
         * This way we handle both writes to SYNC and NOSYNC
         * regions with the same code.
         */
-        if (likely(!error))
+        if (likely(!error)) {
-                goto out;
+                bio_endio(bio, ret);
+                return;
+        }
        for (i = 0; i < ms->nr_mirrors; i++)
                if (test_bit(i, &error))
                        fail_mirror(ms->mirror + i, DM_RAID1_WRITE_ERROR);
-                else
-                        uptodate = 1;
-        if (unlikely(!uptodate)) {
+        /*
-                DMERR("All replicated volumes dead, failing I/O");
+         * Need to raise event.  Since raising
-                /* None of the writes succeeded, fail the I/O. */
+         * events can block, we need to do it in
-                ret = -EIO;
+         * the main thread.
-        } else if (errors_handled(ms)) {
+         */
-                /*
+        spin_lock_irqsave(&ms->lock, flags);
-                 * Need to raise event.  Since raising
+        if (!ms->failures.head)
-                 * events can block, we need to do it in
+                should_wake = 1;
-                 * the main thread.
+        bio_list_add(&ms->failures, bio);
-                 */
+        spin_unlock_irqrestore(&ms->lock, flags);
-                spin_lock_irqsave(&ms->lock, flags);
+        if (should_wake)
-                if (!ms->failures.head)
+                wakeup_mirrord(ms);
-                        should_wake = 1;
-                bio_list_add(&ms->failures, bio);
-                spin_unlock_irqrestore(&ms->lock, flags);
-                if (should_wake)
-                        wakeup_mirrord(ms);
-                return;
-        }
-out:
-        bio_endio(bio, ret);
 }
 static void do_write(struct mirror_set *ms, struct bio *bio)
@@ -773,15 +763,26 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
         * for us to treat them the same and requeue them
         * as well.
         */
        while ((bio = bio_list_pop(failures))) {
-                if (ms->log_failure)
+                if (!ms->log_failure) {
-                        hold_bio(ms, bio);
-                else {
                        ms->in_sync = 0;
                        dm_rh_mark_nosync(ms->rh, bio);
-                        bio_endio(bio, 0);
                }
+                /*
+                 * If all the legs are dead, fail the I/O.
+                 * If we have been told to handle errors, hold the bio
+                 * and wait for userspace to deal with the problem.
+                 * Otherwise pretend that the I/O succeeded. (This would
+                 * be wrong if the failed leg returned after reboot and
+                 * got replicated back to the good legs.)
+                 */
+                if (!get_valid_mirror(ms))
+                        bio_endio(bio, -EIO);
+                else if (errors_handled(ms))
+                        hold_bio(ms, bio);
+                else
+                        bio_endio(bio, 0);
        }
 }
author	Mikulas Patocka <mpatocka@redhat.com>	2009-12-10 18:52:05 -0500
committer	Alasdair G Kergon <agk@redhat.com>	2009-12-10 18:52:05 -0500
commit	60f355ead31e2be8d06ac8acb163df91a1c64e3b (patch)
tree	11a011cb6bcbf4adc27b871556a6d475b6826dd3 /drivers
parent	c58098be979509a54021e837a47fcad08db31f94 (diff)