summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2015-10-24 01:23:48 -0400
committerNeilBrown <neilb@suse.com>2015-10-24 01:24:23 -0400
commitc340702ca26a628832fade4f133d8160a55c29cc (patch)
tree66ca777489e3de4a82eb66912f274c98ace83a78
parentbd8688a199b864944bf62eebed0ca13b46249453 (diff)
md/raid10: don't clear bitmap bit when bad-block-list write fails.
When a write fails and a bad-block-list is present, we can update the bad-block-list instead of writing the data. If this succeeds then it is OK clear the relevant bitmap-bit as no further 'sync' of the block is needed. However if writing the bad-block-list fails then we need to treat the write as failed and particularly must not clear the bitmap bit. Otherwise the device can be re-added (after any hardware connection issues are resolved) and because the relevant bit in the bitmap is clear, that block will not be resynced. This leads to data corruption. We already delay the final bio_endio() on the write until the bad-block-list is written so that when the write returns: either that data is safe, the bad-block record is safe, or the fact that the device is faulty is safe. However we *don't* delay the clearing of the bitmap, so the bitmap bit can be recorded as cleared before we know if the bad-block-list was written safely. So: delay that until the write really is safe. i.e. move the call to close_write() until just before calling bio_endio(), and recheck the 'is array degraded' status before making that call. This bug goes back to v3.1 when bad-block-lists were introduced, though it only affects arrays created with mdadm-3.3 or later as only those have bad-block lists. Backports will require at least Commit: 95af587e95aa ("md/raid10: ensure device failure recorded before write request returns.") as well. I'll send that to 'stable' separately. Note that of the two tests of R10BIO_WriteError that this patch adds, the first is certain to fail and the second is certain to succeed. However doing it this way makes the patch more obviously correct. I will tidy the code up in a future merge window. Reported-by: Nate Dailey <nate.dailey@stratus.com> Fixes: bd870a16c594 ("md/raid10: Handle write errors by updating badblock log.") Signed-off-by: NeilBrown <neilb@suse.com>
-rw-r--r--drivers/md/raid10.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a9ecec4e9a13..23de2144ee13 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2654,16 +2654,17 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2654 rdev_dec_pending(rdev, conf->mddev); 2654 rdev_dec_pending(rdev, conf->mddev);
2655 } 2655 }
2656 } 2656 }
2657 if (test_bit(R10BIO_WriteError,
2658 &r10_bio->state))
2659 close_write(r10_bio);
2660 if (fail) { 2657 if (fail) {
2661 spin_lock_irq(&conf->device_lock); 2658 spin_lock_irq(&conf->device_lock);
2662 list_add(&r10_bio->retry_list, &conf->bio_end_io_list); 2659 list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
2663 spin_unlock_irq(&conf->device_lock); 2660 spin_unlock_irq(&conf->device_lock);
2664 md_wakeup_thread(conf->mddev->thread); 2661 md_wakeup_thread(conf->mddev->thread);
2665 } else 2662 } else {
2663 if (test_bit(R10BIO_WriteError,
2664 &r10_bio->state))
2665 close_write(r10_bio);
2666 raid_end_bio_io(r10_bio); 2666 raid_end_bio_io(r10_bio);
2667 }
2667 } 2668 }
2668} 2669}
2669 2670
@@ -2691,6 +2692,12 @@ static void raid10d(struct md_thread *thread)
2691 r10_bio = list_first_entry(&tmp, struct r10bio, 2692 r10_bio = list_first_entry(&tmp, struct r10bio,
2692 retry_list); 2693 retry_list);
2693 list_del(&r10_bio->retry_list); 2694 list_del(&r10_bio->retry_list);
2695 if (mddev->degraded)
2696 set_bit(R10BIO_Degraded, &r10_bio->state);
2697
2698 if (test_bit(R10BIO_WriteError,
2699 &r10_bio->state))
2700 close_write(r10_bio);
2694 raid_end_bio_io(r10_bio); 2701 raid_end_bio_io(r10_bio);
2695 } 2702 }
2696 } 2703 }