aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-02-13 19:10:10 -0500
committerNeilBrown <neilb@suse.de>2012-02-13 19:10:10 -0500
commitfae8cc5ed0714953b1ad7cf86f030d2177278424 (patch)
treef61f54aaff4122671153e63cad86914566bce389 /drivers
parentf53e29fc87b6fb76db0043202ec2c3450caa5ff3 (diff)
md/raid10: fix handling of error on last working device in array.
If we get a read error on the last working device in a RAID10 which contains the target block, then we don't fail the device (which is good) but we don't abort retries, which is wrong. We end up in an infinite loop retrying the read on the one device. This patch fixes the problem in two places: 1/ in raid10_end_read_request we don't even ask for a retry if this was the last usable device. This is efficient but a little racy and will sometimes retry when it should not. 2/ in handle_read_error we are careful to exclude any device from retry which we tried to mark as faulty (that might have failed if it was the last device). This is race-free but less efficient. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid10.c37
1 files changed, 27 insertions, 10 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6e8aa213f0d..bd06ea21756 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -67,6 +67,7 @@ static int max_queued_requests = 1024;
67 67
68static void allow_barrier(struct r10conf *conf); 68static void allow_barrier(struct r10conf *conf);
69static void lower_barrier(struct r10conf *conf); 69static void lower_barrier(struct r10conf *conf);
70static int enough(struct r10conf *conf, int ignore);
70 71
71static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) 72static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
72{ 73{
@@ -347,6 +348,19 @@ static void raid10_end_read_request(struct bio *bio, int error)
347 * wait for the 'master' bio. 348 * wait for the 'master' bio.
348 */ 349 */
349 set_bit(R10BIO_Uptodate, &r10_bio->state); 350 set_bit(R10BIO_Uptodate, &r10_bio->state);
351 } else {
352 /* If all other devices that store this block have
353 * failed, we want to return the error upwards rather
354 * than fail the last device. Here we redefine
355 * "uptodate" to mean "Don't want to retry"
356 */
357 unsigned long flags;
358 spin_lock_irqsave(&conf->device_lock, flags);
359 if (!enough(conf, rdev->raid_disk))
360 uptodate = 1;
361 spin_unlock_irqrestore(&conf->device_lock, flags);
362 }
363 if (uptodate) {
350 raid_end_bio_io(r10_bio); 364 raid_end_bio_io(r10_bio);
351 rdev_dec_pending(rdev, conf->mddev); 365 rdev_dec_pending(rdev, conf->mddev);
352 } else { 366 } else {
@@ -2052,6 +2066,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2052 "md/raid10:%s: %s: Failing raid device\n", 2066 "md/raid10:%s: %s: Failing raid device\n",
2053 mdname(mddev), b); 2067 mdname(mddev), b);
2054 md_error(mddev, conf->mirrors[d].rdev); 2068 md_error(mddev, conf->mirrors[d].rdev);
2069 r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
2055 return; 2070 return;
2056 } 2071 }
2057 2072
@@ -2105,8 +2120,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2105 rdev, 2120 rdev,
2106 r10_bio->devs[r10_bio->read_slot].addr 2121 r10_bio->devs[r10_bio->read_slot].addr
2107 + sect, 2122 + sect,
2108 s, 0)) 2123 s, 0)) {
2109 md_error(mddev, rdev); 2124 md_error(mddev, rdev);
2125 r10_bio->devs[r10_bio->read_slot].bio
2126 = IO_BLOCKED;
2127 }
2110 break; 2128 break;
2111 } 2129 }
2112 2130
@@ -2299,17 +2317,20 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
2299 * This is all done synchronously while the array is 2317 * This is all done synchronously while the array is
2300 * frozen. 2318 * frozen.
2301 */ 2319 */
2320 bio = r10_bio->devs[slot].bio;
2321 bdevname(bio->bi_bdev, b);
2322 bio_put(bio);
2323 r10_bio->devs[slot].bio = NULL;
2324
2302 if (mddev->ro == 0) { 2325 if (mddev->ro == 0) {
2303 freeze_array(conf); 2326 freeze_array(conf);
2304 fix_read_error(conf, mddev, r10_bio); 2327 fix_read_error(conf, mddev, r10_bio);
2305 unfreeze_array(conf); 2328 unfreeze_array(conf);
2306 } 2329 } else
2330 r10_bio->devs[slot].bio = IO_BLOCKED;
2331
2307 rdev_dec_pending(rdev, mddev); 2332 rdev_dec_pending(rdev, mddev);
2308 2333
2309 bio = r10_bio->devs[slot].bio;
2310 bdevname(bio->bi_bdev, b);
2311 r10_bio->devs[slot].bio =
2312 mddev->ro ? IO_BLOCKED : NULL;
2313read_more: 2334read_more:
2314 rdev = read_balance(conf, r10_bio, &max_sectors); 2335 rdev = read_balance(conf, r10_bio, &max_sectors);
2315 if (rdev == NULL) { 2336 if (rdev == NULL) {
@@ -2318,13 +2339,10 @@ read_more:
2318 mdname(mddev), b, 2339 mdname(mddev), b,
2319 (unsigned long long)r10_bio->sector); 2340 (unsigned long long)r10_bio->sector);
2320 raid_end_bio_io(r10_bio); 2341 raid_end_bio_io(r10_bio);
2321 bio_put(bio);
2322 return; 2342 return;
2323 } 2343 }
2324 2344
2325 do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); 2345 do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
2326 if (bio)
2327 bio_put(bio);
2328 slot = r10_bio->read_slot; 2346 slot = r10_bio->read_slot;
2329 printk_ratelimited( 2347 printk_ratelimited(
2330 KERN_ERR 2348 KERN_ERR
@@ -2360,7 +2378,6 @@ read_more:
2360 mbio->bi_phys_segments++; 2378 mbio->bi_phys_segments++;
2361 spin_unlock_irq(&conf->device_lock); 2379 spin_unlock_irq(&conf->device_lock);
2362 generic_make_request(bio); 2380 generic_make_request(bio);
2363 bio = NULL;
2364 2381
2365 r10_bio = mempool_alloc(conf->r10bio_pool, 2382 r10_bio = mempool_alloc(conf->r10bio_pool,
2366 GFP_NOIO); 2383 GFP_NOIO);