aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-01-06 03:20:21 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:34:03 -0500
commit3e198f7826f830604f3aa7c20359a773e70cbeaa (patch)
treeb7b23a9b9085cd652930a706f86187a16505fd84
parentddaf22abaa831763e75775e6d4c7693504237997 (diff)
[PATCH] md: tidyup some issues with raid1 resync and prepare for catching read errors
We are dereferencing ->rdev without an rcu lock! Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/md/raid1.c110
1 files changed, 56 insertions, 54 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b3856db8d6c2..ea1f1eb93c77 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -177,6 +177,13 @@ static inline void free_r1bio(r1bio_t *r1_bio)
177static inline void put_buf(r1bio_t *r1_bio) 177static inline void put_buf(r1bio_t *r1_bio)
178{ 178{
179 conf_t *conf = mddev_to_conf(r1_bio->mddev); 179 conf_t *conf = mddev_to_conf(r1_bio->mddev);
180 int i;
181
182 for (i=0; i<conf->raid_disks; i++) {
183 struct bio *bio = r1_bio->bios[i];
184 if (bio->bi_end_io)
185 rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
186 }
180 187
181 mempool_free(r1_bio, conf->r1buf_pool); 188 mempool_free(r1_bio, conf->r1buf_pool);
182 189
@@ -1085,7 +1092,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
1085 conf->mirrors[r1_bio->read_disk].rdev); 1092 conf->mirrors[r1_bio->read_disk].rdev);
1086 } else 1093 } else
1087 set_bit(R1BIO_Uptodate, &r1_bio->state); 1094 set_bit(R1BIO_Uptodate, &r1_bio->state);
1088 rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
1089 reschedule_retry(r1_bio); 1095 reschedule_retry(r1_bio);
1090 return 0; 1096 return 0;
1091} 1097}
@@ -1116,7 +1122,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
1116 md_done_sync(mddev, r1_bio->sectors, uptodate); 1122 md_done_sync(mddev, r1_bio->sectors, uptodate);
1117 put_buf(r1_bio); 1123 put_buf(r1_bio);
1118 } 1124 }
1119 rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
1120 return 0; 1125 return 0;
1121} 1126}
1122 1127
@@ -1153,10 +1158,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1153 atomic_set(&r1_bio->remaining, 1); 1158 atomic_set(&r1_bio->remaining, 1);
1154 for (i = 0; i < disks ; i++) { 1159 for (i = 0; i < disks ; i++) {
1155 wbio = r1_bio->bios[i]; 1160 wbio = r1_bio->bios[i];
1156 if (wbio->bi_end_io != end_sync_write) 1161 if (wbio->bi_end_io == NULL ||
1162 (wbio->bi_end_io == end_sync_read &&
1163 (i == r1_bio->read_disk ||
1164 !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
1157 continue; 1165 continue;
1158 1166
1159 atomic_inc(&conf->mirrors[i].rdev->nr_pending); 1167 wbio->bi_rw = WRITE;
1168 wbio->bi_end_io = end_sync_write;
1160 atomic_inc(&r1_bio->remaining); 1169 atomic_inc(&r1_bio->remaining);
1161 md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); 1170 md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
1162 1171
@@ -1388,14 +1397,13 @@ static int init_resync(conf_t *conf)
1388static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) 1397static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
1389{ 1398{
1390 conf_t *conf = mddev_to_conf(mddev); 1399 conf_t *conf = mddev_to_conf(mddev);
1391 mirror_info_t *mirror;
1392 r1bio_t *r1_bio; 1400 r1bio_t *r1_bio;
1393 struct bio *bio; 1401 struct bio *bio;
1394 sector_t max_sector, nr_sectors; 1402 sector_t max_sector, nr_sectors;
1395 int disk; 1403 int disk = -1;
1396 int i; 1404 int i;
1397 int wonly; 1405 int wonly = -1;
1398 int write_targets = 0; 1406 int write_targets = 0, read_targets = 0;
1399 int sync_blocks; 1407 int sync_blocks;
1400 int still_degraded = 0; 1408 int still_degraded = 0;
1401 1409
@@ -1447,44 +1455,24 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1447 1455
1448 conf->next_resync = sector_nr; 1456 conf->next_resync = sector_nr;
1449 1457
1458 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
1459 rcu_read_lock();
1450 /* 1460 /*
1451 * If reconstructing, and >1 working disc, 1461 * If we get a correctably read error during resync or recovery,
1452 * could dedicate one to rebuild and others to 1462 * we might want to read from a different device. So we
1453 * service read requests .. 1463 * flag all drives that could conceivably be read from for READ,
1464 * and any others (which will be non-In_sync devices) for WRITE.
1465 * If a read fails, we try reading from something else for which READ
1466 * is OK.
1454 */ 1467 */
1455 disk = conf->last_used;
1456 /* make sure disk is operational */
1457 wonly = disk;
1458 while (conf->mirrors[disk].rdev == NULL ||
1459 !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) ||
1460 test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags)
1461 ) {
1462 if (conf->mirrors[disk].rdev &&
1463 test_bit(In_sync, &conf->mirrors[disk].rdev->flags))
1464 wonly = disk;
1465 if (disk <= 0)
1466 disk = conf->raid_disks;
1467 disk--;
1468 if (disk == conf->last_used) {
1469 disk = wonly;
1470 break;
1471 }
1472 }
1473 conf->last_used = disk;
1474 atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
1475
1476
1477 mirror = conf->mirrors + disk;
1478
1479 r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
1480 1468
1481 r1_bio->mddev = mddev; 1469 r1_bio->mddev = mddev;
1482 r1_bio->sector = sector_nr; 1470 r1_bio->sector = sector_nr;
1483 r1_bio->state = 0; 1471 r1_bio->state = 0;
1484 set_bit(R1BIO_IsSync, &r1_bio->state); 1472 set_bit(R1BIO_IsSync, &r1_bio->state);
1485 r1_bio->read_disk = disk;
1486 1473
1487 for (i=0; i < conf->raid_disks; i++) { 1474 for (i=0; i < conf->raid_disks; i++) {
1475 mdk_rdev_t *rdev;
1488 bio = r1_bio->bios[i]; 1476 bio = r1_bio->bios[i];
1489 1477
1490 /* take from bio_init */ 1478 /* take from bio_init */
@@ -1499,35 +1487,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1499 bio->bi_end_io = NULL; 1487 bio->bi_end_io = NULL;
1500 bio->bi_private = NULL; 1488 bio->bi_private = NULL;
1501 1489
1502 if (i == disk) { 1490 rdev = rcu_dereference(conf->mirrors[i].rdev);
1503 bio->bi_rw = READ; 1491 if (rdev == NULL ||
1504 bio->bi_end_io = end_sync_read; 1492 test_bit(Faulty, &rdev->flags)) {
1505 } else if (conf->mirrors[i].rdev == NULL ||
1506 test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
1507 still_degraded = 1; 1493 still_degraded = 1;
1508 continue; 1494 continue;
1509 } else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) || 1495 } else if (!test_bit(In_sync, &rdev->flags)) {
1510 sector_nr + RESYNC_SECTORS > mddev->recovery_cp ||
1511 test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
1512 bio->bi_rw = WRITE; 1496 bio->bi_rw = WRITE;
1513 bio->bi_end_io = end_sync_write; 1497 bio->bi_end_io = end_sync_write;
1514 write_targets ++; 1498 write_targets ++;
1515 } else 1499 } else {
1516 /* no need to read or write here */ 1500 /* may need to read from here */
1517 continue; 1501 bio->bi_rw = READ;
1518 bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; 1502 bio->bi_end_io = end_sync_read;
1519 bio->bi_bdev = conf->mirrors[i].rdev->bdev; 1503 if (test_bit(WriteMostly, &rdev->flags)) {
1504 if (wonly < 0)
1505 wonly = i;
1506 } else {
1507 if (disk < 0)
1508 disk = i;
1509 }
1510 read_targets++;
1511 }
1512 atomic_inc(&rdev->nr_pending);
1513 bio->bi_sector = sector_nr + rdev->data_offset;
1514 bio->bi_bdev = rdev->bdev;
1520 bio->bi_private = r1_bio; 1515 bio->bi_private = r1_bio;
1521 } 1516 }
1517 rcu_read_unlock();
1518 if (disk < 0)
1519 disk = wonly;
1520 r1_bio->read_disk = disk;
1522 1521
1523 if (write_targets == 0) { 1522 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
1523 /* extra read targets are also write targets */
1524 write_targets += read_targets-1;
1525
1526 if (write_targets == 0 || read_targets == 0) {
1524 /* There is nowhere to write, so all non-sync 1527 /* There is nowhere to write, so all non-sync
1525 * drives must be failed - so we are finished 1528 * drives must be failed - so we are finished
1526 */ 1529 */
1527 sector_t rv = max_sector - sector_nr; 1530 sector_t rv = max_sector - sector_nr;
1528 *skipped = 1; 1531 *skipped = 1;
1529 put_buf(r1_bio); 1532 put_buf(r1_bio);
1530 rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
1531 return rv; 1533 return rv;
1532 } 1534 }
1533 1535
@@ -1578,10 +1580,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1578 sync_blocks -= (len>>9); 1580 sync_blocks -= (len>>9);
1579 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); 1581 } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
1580 bio_full: 1582 bio_full:
1581 bio = r1_bio->bios[disk]; 1583 bio = r1_bio->bios[r1_bio->read_disk];
1582 r1_bio->sectors = nr_sectors; 1584 r1_bio->sectors = nr_sectors;
1583 1585
1584 md_sync_acct(mirror->rdev->bdev, nr_sectors); 1586 md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors);
1585 1587
1586 generic_make_request(bio); 1588 generic_make_request(bio);
1587 1589