diff options
author | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:22 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-07-27 21:39:22 -0400 |
commit | bc2607f393bd4fb844c1886a02af929ca0372056 (patch) | |
tree | 45c7f9c538b5e196e389454286771f3beb8628ba | |
parent | 7f0da59bdc2f65795a57009d78f7753d3aea1de3 (diff) |
md/raid5: write errors should be recorded as bad blocks if possible.
When a write error is detected, don't mark the device as failed
immediately but rather record the fact for handle_stripe to deal with.
Handle_stripe then attempts to record a bad block. Only if that fails
does the device get marked as faulty.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 33 | ||||
-rw-r--r-- | drivers/md/raid5.h | 18 |
2 files changed, 41 insertions, 10 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5fc621673e6c..9768a7d67148 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1658,8 +1658,10 @@ static void raid5_end_write_request(struct bio *bi, int error) | |||
1658 | return; | 1658 | return; |
1659 | } | 1659 | } |
1660 | 1660 | ||
1661 | if (!uptodate) | 1661 | if (!uptodate) { |
1662 | md_error(conf->mddev, conf->disks[i].rdev); | 1662 | set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags); |
1663 | set_bit(R5_WriteError, &sh->dev[i].flags); | ||
1664 | } | ||
1663 | 1665 | ||
1664 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); | 1666 | rdev_dec_pending(conf->disks[i].rdev, conf->mddev); |
1665 | 1667 | ||
@@ -3038,6 +3040,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
3038 | if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) | 3040 | if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) |
3039 | set_bit(R5_Insync, &dev->flags); | 3041 | set_bit(R5_Insync, &dev->flags); |
3040 | } | 3042 | } |
3043 | if (test_bit(R5_WriteError, &dev->flags)) { | ||
3044 | clear_bit(R5_Insync, &dev->flags); | ||
3045 | if (!test_bit(Faulty, &rdev->flags)) { | ||
3046 | s->handle_bad_blocks = 1; | ||
3047 | atomic_inc(&rdev->nr_pending); | ||
3048 | } else | ||
3049 | clear_bit(R5_WriteError, &dev->flags); | ||
3050 | } | ||
3041 | if (!test_bit(R5_Insync, &dev->flags)) { | 3051 | if (!test_bit(R5_Insync, &dev->flags)) { |
3042 | /* The ReadError flag will just be confusing now */ | 3052 | /* The ReadError flag will just be confusing now */ |
3043 | clear_bit(R5_ReadError, &dev->flags); | 3053 | clear_bit(R5_ReadError, &dev->flags); |
@@ -3086,6 +3096,11 @@ static void handle_stripe(struct stripe_head *sh) | |||
3086 | 3096 | ||
3087 | analyse_stripe(sh, &s); | 3097 | analyse_stripe(sh, &s); |
3088 | 3098 | ||
3099 | if (s.handle_bad_blocks) { | ||
3100 | set_bit(STRIPE_HANDLE, &sh->state); | ||
3101 | goto finish; | ||
3102 | } | ||
3103 | |||
3089 | if (unlikely(s.blocked_rdev)) { | 3104 | if (unlikely(s.blocked_rdev)) { |
3090 | if (s.syncing || s.expanding || s.expanded || | 3105 | if (s.syncing || s.expanding || s.expanded || |
3091 | s.to_write || s.written) { | 3106 | s.to_write || s.written) { |
@@ -3283,6 +3298,20 @@ finish: | |||
3283 | if (unlikely(s.blocked_rdev)) | 3298 | if (unlikely(s.blocked_rdev)) |
3284 | md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); | 3299 | md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); |
3285 | 3300 | ||
3301 | if (s.handle_bad_blocks) | ||
3302 | for (i = disks; i--; ) { | ||
3303 | mdk_rdev_t *rdev; | ||
3304 | struct r5dev *dev = &sh->dev[i]; | ||
3305 | if (test_and_clear_bit(R5_WriteError, &dev->flags)) { | ||
3306 | /* We own a safe reference to the rdev */ | ||
3307 | rdev = conf->disks[i].rdev; | ||
3308 | if (!rdev_set_badblocks(rdev, sh->sector, | ||
3309 | STRIPE_SECTORS, 0)) | ||
3310 | md_error(conf->mddev, rdev); | ||
3311 | rdev_dec_pending(rdev, conf->mddev); | ||
3312 | } | ||
3313 | } | ||
3314 | |||
3286 | if (s.ops_request) | 3315 | if (s.ops_request) |
3287 | raid_run_ops(sh, s.ops_request); | 3316 | raid_run_ops(sh, s.ops_request); |
3288 | 3317 | ||
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index c5429d123636..8620cb67ae39 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -249,6 +249,7 @@ struct stripe_head_state { | |||
249 | 249 | ||
250 | struct bio *return_bi; | 250 | struct bio *return_bi; |
251 | mdk_rdev_t *blocked_rdev; | 251 | mdk_rdev_t *blocked_rdev; |
252 | int handle_bad_blocks; | ||
252 | }; | 253 | }; |
253 | 254 | ||
254 | /* Flags */ | 255 | /* Flags */ |
@@ -264,14 +265,15 @@ struct stripe_head_state { | |||
264 | #define R5_ReWrite 9 /* have tried to over-write the readerror */ | 265 | #define R5_ReWrite 9 /* have tried to over-write the readerror */ |
265 | 266 | ||
266 | #define R5_Expanded 10 /* This block now has post-expand data */ | 267 | #define R5_Expanded 10 /* This block now has post-expand data */ |
267 | #define R5_Wantcompute 11 /* compute_block in progress treat as | 268 | #define R5_Wantcompute 11 /* compute_block in progress treat as |
268 | * uptodate | 269 | * uptodate |
269 | */ | 270 | */ |
270 | #define R5_Wantfill 12 /* dev->toread contains a bio that needs | 271 | #define R5_Wantfill 12 /* dev->toread contains a bio that needs |
271 | * filling | 272 | * filling |
272 | */ | 273 | */ |
273 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ | 274 | #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ |
274 | #define R5_WantFUA 14 /* Write should be FUA */ | 275 | #define R5_WantFUA 14 /* Write should be FUA */ |
276 | #define R5_WriteError 15 /* got a write error - need to record it */ | ||
275 | /* | 277 | /* |
276 | * Write method | 278 | * Write method |
277 | */ | 279 | */ |