aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-27 21:39:22 -0400
committerNeilBrown <neilb@suse.de>2011-07-27 21:39:22 -0400
commitbc2607f393bd4fb844c1886a02af929ca0372056 (patch)
tree45c7f9c538b5e196e389454286771f3beb8628ba
parent7f0da59bdc2f65795a57009d78f7753d3aea1de3 (diff)
md/raid5: write errors should be recorded as bad blocks if possible.
When a write error is detected, don't mark the device as failed immediately but rather record the fact for handle_stripe to deal with. Handle_stripe then attempts to record a bad block. Only if that fails does the device get marked as faulty. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c33
-rw-r--r--drivers/md/raid5.h18
2 files changed, 41 insertions, 10 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 5fc621673e6c..9768a7d67148 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1658,8 +1658,10 @@ static void raid5_end_write_request(struct bio *bi, int error)
1658 return; 1658 return;
1659 } 1659 }
1660 1660
1661 if (!uptodate) 1661 if (!uptodate) {
1662 md_error(conf->mddev, conf->disks[i].rdev); 1662 set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
1663 set_bit(R5_WriteError, &sh->dev[i].flags);
1664 }
1663 1665
1664 rdev_dec_pending(conf->disks[i].rdev, conf->mddev); 1666 rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
1665 1667
@@ -3038,6 +3040,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
3038 if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) 3040 if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
3039 set_bit(R5_Insync, &dev->flags); 3041 set_bit(R5_Insync, &dev->flags);
3040 } 3042 }
3043 if (test_bit(R5_WriteError, &dev->flags)) {
3044 clear_bit(R5_Insync, &dev->flags);
3045 if (!test_bit(Faulty, &rdev->flags)) {
3046 s->handle_bad_blocks = 1;
3047 atomic_inc(&rdev->nr_pending);
3048 } else
3049 clear_bit(R5_WriteError, &dev->flags);
3050 }
3041 if (!test_bit(R5_Insync, &dev->flags)) { 3051 if (!test_bit(R5_Insync, &dev->flags)) {
3042 /* The ReadError flag will just be confusing now */ 3052 /* The ReadError flag will just be confusing now */
3043 clear_bit(R5_ReadError, &dev->flags); 3053 clear_bit(R5_ReadError, &dev->flags);
@@ -3086,6 +3096,11 @@ static void handle_stripe(struct stripe_head *sh)
3086 3096
3087 analyse_stripe(sh, &s); 3097 analyse_stripe(sh, &s);
3088 3098
3099 if (s.handle_bad_blocks) {
3100 set_bit(STRIPE_HANDLE, &sh->state);
3101 goto finish;
3102 }
3103
3089 if (unlikely(s.blocked_rdev)) { 3104 if (unlikely(s.blocked_rdev)) {
3090 if (s.syncing || s.expanding || s.expanded || 3105 if (s.syncing || s.expanding || s.expanded ||
3091 s.to_write || s.written) { 3106 s.to_write || s.written) {
@@ -3283,6 +3298,20 @@ finish:
3283 if (unlikely(s.blocked_rdev)) 3298 if (unlikely(s.blocked_rdev))
3284 md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); 3299 md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
3285 3300
3301 if (s.handle_bad_blocks)
3302 for (i = disks; i--; ) {
3303 mdk_rdev_t *rdev;
3304 struct r5dev *dev = &sh->dev[i];
3305 if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
3306 /* We own a safe reference to the rdev */
3307 rdev = conf->disks[i].rdev;
3308 if (!rdev_set_badblocks(rdev, sh->sector,
3309 STRIPE_SECTORS, 0))
3310 md_error(conf->mddev, rdev);
3311 rdev_dec_pending(rdev, conf->mddev);
3312 }
3313 }
3314
3286 if (s.ops_request) 3315 if (s.ops_request)
3287 raid_run_ops(sh, s.ops_request); 3316 raid_run_ops(sh, s.ops_request);
3288 3317
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index c5429d123636..8620cb67ae39 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -249,6 +249,7 @@ struct stripe_head_state {
249 249
250 struct bio *return_bi; 250 struct bio *return_bi;
251 mdk_rdev_t *blocked_rdev; 251 mdk_rdev_t *blocked_rdev;
252 int handle_bad_blocks;
252}; 253};
253 254
254/* Flags */ 255/* Flags */
@@ -264,14 +265,15 @@ struct stripe_head_state {
264#define R5_ReWrite 9 /* have tried to over-write the readerror */ 265#define R5_ReWrite 9 /* have tried to over-write the readerror */
265 266
266#define R5_Expanded 10 /* This block now has post-expand data */ 267#define R5_Expanded 10 /* This block now has post-expand data */
267#define R5_Wantcompute 11 /* compute_block in progress treat as 268#define R5_Wantcompute 11 /* compute_block in progress treat as
268 * uptodate 269 * uptodate
269 */ 270 */
270#define R5_Wantfill 12 /* dev->toread contains a bio that needs 271#define R5_Wantfill 12 /* dev->toread contains a bio that needs
271 * filling 272 * filling
272 */ 273 */
273#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ 274#define R5_Wantdrain 13 /* dev->towrite needs to be drained */
274#define R5_WantFUA 14 /* Write should be FUA */ 275#define R5_WantFUA 14 /* Write should be FUA */
276#define R5_WriteError 15 /* got a write error - need to record it */
275/* 277/*
276 * Write method 278 * Write method
277 */ 279 */