aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:19 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:03 -0500
commitddaf22abaa831763e75775e6d4c7693504237997 (patch)
tree4f6ba4cb056f8c7cea82c7d548769b879d0fb405 /drivers
parentd69762e98456b71167865db9e33e732a28dd36ab (diff)
[PATCH] md: attempt to auto-correct read errors in raid1
On a read-error we suspend the array, then synchronously read the block from other arrays until we find one where we can read it. Then we try writing the good data back everywhere and make sure it works. If any write or subsequent read fails, only then do we fail the device out of the array. To be able to suspend the array, we need to also keep track of how many requests are queued for handling by raid1d. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/md.c1
-rw-r--r--drivers/md/raid1.c115
2 files changed, 106 insertions, 10 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 64e7da3701a5..1364a1c97e6f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -461,6 +461,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
461 bio_put(bio); 461 bio_put(bio);
462 return ret; 462 return ret;
463} 463}
464EXPORT_SYMBOL(sync_page_io);
464 465
465static int read_disk_sb(mdk_rdev_t * rdev, int size) 466static int read_disk_sb(mdk_rdev_t * rdev, int size)
466{ 467{
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c618015f07f6..b3856db8d6c2 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -191,6 +191,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
191 191
192 spin_lock_irqsave(&conf->device_lock, flags); 192 spin_lock_irqsave(&conf->device_lock, flags);
193 list_add(&r1_bio->retry_list, &conf->retry_list); 193 list_add(&r1_bio->retry_list, &conf->retry_list);
194 conf->nr_queued ++;
194 spin_unlock_irqrestore(&conf->device_lock, flags); 195 spin_unlock_irqrestore(&conf->device_lock, flags);
195 196
196 wake_up(&conf->wait_barrier); 197 wake_up(&conf->wait_barrier);
@@ -245,9 +246,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
245 /* 246 /*
246 * this branch is our 'one mirror IO has finished' event handler: 247 * this branch is our 'one mirror IO has finished' event handler:
247 */ 248 */
248 if (!uptodate) 249 update_head_pos(mirror, r1_bio);
249 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); 250
250 else 251 if (uptodate || conf->working_disks <= 1) {
251 /* 252 /*
252 * Set R1BIO_Uptodate in our master bio, so that 253 * Set R1BIO_Uptodate in our master bio, so that
253 * we will return a good error code for to the higher 254 * we will return a good error code for to the higher
@@ -259,14 +260,8 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
259 */ 260 */
260 set_bit(R1BIO_Uptodate, &r1_bio->state); 261 set_bit(R1BIO_Uptodate, &r1_bio->state);
261 262
262 update_head_pos(mirror, r1_bio);
263
264 /*
265 * we have only one bio on the read side
266 */
267 if (uptodate)
268 raid_end_bio_io(r1_bio); 263 raid_end_bio_io(r1_bio);
269 else { 264 } else {
270 /* 265 /*
271 * oops, read error: 266 * oops, read error:
272 */ 267 */
@@ -653,6 +648,32 @@ static void allow_barrier(conf_t *conf)
653 wake_up(&conf->wait_barrier); 648 wake_up(&conf->wait_barrier);
654} 649}
655 650
651static void freeze_array(conf_t *conf)
652{
653 /* stop syncio and normal IO and wait for everything to
654 * go quite.
655 * We increment barrier and nr_waiting, and then
656 * wait until barrier+nr_pending match nr_queued+2
657 */
658 spin_lock_irq(&conf->resync_lock);
659 conf->barrier++;
660 conf->nr_waiting++;
661 wait_event_lock_irq(conf->wait_barrier,
662 conf->barrier+conf->nr_pending == conf->nr_queued+2,
663 conf->resync_lock,
664 raid1_unplug(conf->mddev->queue));
665 spin_unlock_irq(&conf->resync_lock);
666}
667static void unfreeze_array(conf_t *conf)
668{
669 /* reverse the effect of the freeze */
670 spin_lock_irq(&conf->resync_lock);
671 conf->barrier--;
672 conf->nr_waiting--;
673 wake_up(&conf->wait_barrier);
674 spin_unlock_irq(&conf->resync_lock);
675}
676
656 677
657/* duplicate the data pages for behind I/O */ 678/* duplicate the data pages for behind I/O */
658static struct page **alloc_behind_pages(struct bio *bio) 679static struct page **alloc_behind_pages(struct bio *bio)
@@ -1196,6 +1217,7 @@ static void raid1d(mddev_t *mddev)
1196 break; 1217 break;
1197 r1_bio = list_entry(head->prev, r1bio_t, retry_list); 1218 r1_bio = list_entry(head->prev, r1bio_t, retry_list);
1198 list_del(head->prev); 1219 list_del(head->prev);
1220 conf->nr_queued--;
1199 spin_unlock_irqrestore(&conf->device_lock, flags); 1221 spin_unlock_irqrestore(&conf->device_lock, flags);
1200 1222
1201 mddev = r1_bio->mddev; 1223 mddev = r1_bio->mddev;
@@ -1235,6 +1257,74 @@ static void raid1d(mddev_t *mddev)
1235 } 1257 }
1236 } else { 1258 } else {
1237 int disk; 1259 int disk;
1260
1261 /* we got a read error. Maybe the drive is bad. Maybe just
1262 * the block and we can fix it.
1263 * We freeze all other IO, and try reading the block from
1264 * other devices. When we find one, we re-write
1265 * and check it that fixes the read error.
1266 * This is all done synchronously while the array is
1267 * frozen
1268 */
1269 sector_t sect = r1_bio->sector;
1270 int sectors = r1_bio->sectors;
1271 freeze_array(conf);
1272 while(sectors) {
1273 int s = sectors;
1274 int d = r1_bio->read_disk;
1275 int success = 0;
1276
1277 if (s > (PAGE_SIZE>>9))
1278 s = PAGE_SIZE >> 9;
1279
1280 do {
1281 rdev = conf->mirrors[d].rdev;
1282 if (rdev &&
1283 test_bit(In_sync, &rdev->flags) &&
1284 sync_page_io(rdev->bdev,
1285 sect + rdev->data_offset,
1286 s<<9,
1287 conf->tmppage, READ))
1288 success = 1;
1289 else {
1290 d++;
1291 if (d == conf->raid_disks)
1292 d = 0;
1293 }
1294 } while (!success && d != r1_bio->read_disk);
1295
1296 if (success) {
1297 /* write it back and re-read */
1298 while (d != r1_bio->read_disk) {
1299 if (d==0)
1300 d = conf->raid_disks;
1301 d--;
1302 rdev = conf->mirrors[d].rdev;
1303 if (rdev &&
1304 test_bit(In_sync, &rdev->flags)) {
1305 if (sync_page_io(rdev->bdev,
1306 sect + rdev->data_offset,
1307 s<<9, conf->tmppage, WRITE) == 0 ||
1308 sync_page_io(rdev->bdev,
1309 sect + rdev->data_offset,
1310 s<<9, conf->tmppage, READ) == 0) {
1311 /* Well, this device is dead */
1312 md_error(mddev, rdev);
1313 }
1314 }
1315 }
1316 } else {
1317 /* Cannot read from anywhere -- bye bye array */
1318 md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
1319 break;
1320 }
1321 sectors -= s;
1322 sect += s;
1323 }
1324
1325
1326 unfreeze_array(conf);
1327
1238 bio = r1_bio->bios[r1_bio->read_disk]; 1328 bio = r1_bio->bios[r1_bio->read_disk];
1239 if ((disk=read_balance(conf, r1_bio)) == -1) { 1329 if ((disk=read_balance(conf, r1_bio)) == -1) {
1240 printk(KERN_ALERT "raid1: %s: unrecoverable I/O" 1330 printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
@@ -1529,6 +1619,10 @@ static int run(mddev_t *mddev)
1529 1619
1530 memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks); 1620 memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
1531 1621
1622 conf->tmppage = alloc_page(GFP_KERNEL);
1623 if (!conf->tmppage)
1624 goto out_no_mem;
1625
1532 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); 1626 conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
1533 if (!conf->poolinfo) 1627 if (!conf->poolinfo)
1534 goto out_no_mem; 1628 goto out_no_mem;
@@ -1635,6 +1729,7 @@ out_free_conf:
1635 if (conf->r1bio_pool) 1729 if (conf->r1bio_pool)
1636 mempool_destroy(conf->r1bio_pool); 1730 mempool_destroy(conf->r1bio_pool);
1637 kfree(conf->mirrors); 1731 kfree(conf->mirrors);
1732 __free_page(conf->tmppage);
1638 kfree(conf->poolinfo); 1733 kfree(conf->poolinfo);
1639 kfree(conf); 1734 kfree(conf);
1640 mddev->private = NULL; 1735 mddev->private = NULL;