diff options
author | NeilBrown <neilb@suse.de> | 2006-01-06 03:20:21 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-06 11:34:03 -0500 |
commit | 3e198f7826f830604f3aa7c20359a773e70cbeaa (patch) | |
tree | b7b23a9b9085cd652930a706f86187a16505fd84 | |
parent | ddaf22abaa831763e75775e6d4c7693504237997 (diff) |
[PATCH] md: tidyup some issues with raid1 resync and prepare for catching read errors
We are dereferencing ->rdev without an rcu lock!
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/raid1.c | 110 |
1 files changed, 56 insertions, 54 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b3856db8d6c2..ea1f1eb93c77 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -177,6 +177,13 @@ static inline void free_r1bio(r1bio_t *r1_bio) | |||
177 | static inline void put_buf(r1bio_t *r1_bio) | 177 | static inline void put_buf(r1bio_t *r1_bio) |
178 | { | 178 | { |
179 | conf_t *conf = mddev_to_conf(r1_bio->mddev); | 179 | conf_t *conf = mddev_to_conf(r1_bio->mddev); |
180 | int i; | ||
181 | |||
182 | for (i=0; i<conf->raid_disks; i++) { | ||
183 | struct bio *bio = r1_bio->bios[i]; | ||
184 | if (bio->bi_end_io) | ||
185 | rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev); | ||
186 | } | ||
180 | 187 | ||
181 | mempool_free(r1_bio, conf->r1buf_pool); | 188 | mempool_free(r1_bio, conf->r1buf_pool); |
182 | 189 | ||
@@ -1085,7 +1092,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) | |||
1085 | conf->mirrors[r1_bio->read_disk].rdev); | 1092 | conf->mirrors[r1_bio->read_disk].rdev); |
1086 | } else | 1093 | } else |
1087 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 1094 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
1088 | rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); | ||
1089 | reschedule_retry(r1_bio); | 1095 | reschedule_retry(r1_bio); |
1090 | return 0; | 1096 | return 0; |
1091 | } | 1097 | } |
@@ -1116,7 +1122,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) | |||
1116 | md_done_sync(mddev, r1_bio->sectors, uptodate); | 1122 | md_done_sync(mddev, r1_bio->sectors, uptodate); |
1117 | put_buf(r1_bio); | 1123 | put_buf(r1_bio); |
1118 | } | 1124 | } |
1119 | rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); | ||
1120 | return 0; | 1125 | return 0; |
1121 | } | 1126 | } |
1122 | 1127 | ||
@@ -1153,10 +1158,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) | |||
1153 | atomic_set(&r1_bio->remaining, 1); | 1158 | atomic_set(&r1_bio->remaining, 1); |
1154 | for (i = 0; i < disks ; i++) { | 1159 | for (i = 0; i < disks ; i++) { |
1155 | wbio = r1_bio->bios[i]; | 1160 | wbio = r1_bio->bios[i]; |
1156 | if (wbio->bi_end_io != end_sync_write) | 1161 | if (wbio->bi_end_io == NULL || |
1162 | (wbio->bi_end_io == end_sync_read && | ||
1163 | (i == r1_bio->read_disk || | ||
1164 | !test_bit(MD_RECOVERY_SYNC, &mddev->recovery)))) | ||
1157 | continue; | 1165 | continue; |
1158 | 1166 | ||
1159 | atomic_inc(&conf->mirrors[i].rdev->nr_pending); | 1167 | wbio->bi_rw = WRITE; |
1168 | wbio->bi_end_io = end_sync_write; | ||
1160 | atomic_inc(&r1_bio->remaining); | 1169 | atomic_inc(&r1_bio->remaining); |
1161 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); | 1170 | md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9); |
1162 | 1171 | ||
@@ -1388,14 +1397,13 @@ static int init_resync(conf_t *conf) | |||
1388 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) | 1397 | static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster) |
1389 | { | 1398 | { |
1390 | conf_t *conf = mddev_to_conf(mddev); | 1399 | conf_t *conf = mddev_to_conf(mddev); |
1391 | mirror_info_t *mirror; | ||
1392 | r1bio_t *r1_bio; | 1400 | r1bio_t *r1_bio; |
1393 | struct bio *bio; | 1401 | struct bio *bio; |
1394 | sector_t max_sector, nr_sectors; | 1402 | sector_t max_sector, nr_sectors; |
1395 | int disk; | 1403 | int disk = -1; |
1396 | int i; | 1404 | int i; |
1397 | int wonly; | 1405 | int wonly = -1; |
1398 | int write_targets = 0; | 1406 | int write_targets = 0, read_targets = 0; |
1399 | int sync_blocks; | 1407 | int sync_blocks; |
1400 | int still_degraded = 0; | 1408 | int still_degraded = 0; |
1401 | 1409 | ||
@@ -1447,44 +1455,24 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1447 | 1455 | ||
1448 | conf->next_resync = sector_nr; | 1456 | conf->next_resync = sector_nr; |
1449 | 1457 | ||
1458 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); | ||
1459 | rcu_read_lock(); | ||
1450 | /* | 1460 | /* |
1451 | * If reconstructing, and >1 working disc, | 1461 | * If we get a correctably read error during resync or recovery, |
1452 | * could dedicate one to rebuild and others to | 1462 | * we might want to read from a different device. So we |
1453 | * service read requests .. | 1463 | * flag all drives that could conceivably be read from for READ, |
1464 | * and any others (which will be non-In_sync devices) for WRITE. | ||
1465 | * If a read fails, we try reading from something else for which READ | ||
1466 | * is OK. | ||
1454 | */ | 1467 | */ |
1455 | disk = conf->last_used; | ||
1456 | /* make sure disk is operational */ | ||
1457 | wonly = disk; | ||
1458 | while (conf->mirrors[disk].rdev == NULL || | ||
1459 | !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) || | ||
1460 | test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags) | ||
1461 | ) { | ||
1462 | if (conf->mirrors[disk].rdev && | ||
1463 | test_bit(In_sync, &conf->mirrors[disk].rdev->flags)) | ||
1464 | wonly = disk; | ||
1465 | if (disk <= 0) | ||
1466 | disk = conf->raid_disks; | ||
1467 | disk--; | ||
1468 | if (disk == conf->last_used) { | ||
1469 | disk = wonly; | ||
1470 | break; | ||
1471 | } | ||
1472 | } | ||
1473 | conf->last_used = disk; | ||
1474 | atomic_inc(&conf->mirrors[disk].rdev->nr_pending); | ||
1475 | |||
1476 | |||
1477 | mirror = conf->mirrors + disk; | ||
1478 | |||
1479 | r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); | ||
1480 | 1468 | ||
1481 | r1_bio->mddev = mddev; | 1469 | r1_bio->mddev = mddev; |
1482 | r1_bio->sector = sector_nr; | 1470 | r1_bio->sector = sector_nr; |
1483 | r1_bio->state = 0; | 1471 | r1_bio->state = 0; |
1484 | set_bit(R1BIO_IsSync, &r1_bio->state); | 1472 | set_bit(R1BIO_IsSync, &r1_bio->state); |
1485 | r1_bio->read_disk = disk; | ||
1486 | 1473 | ||
1487 | for (i=0; i < conf->raid_disks; i++) { | 1474 | for (i=0; i < conf->raid_disks; i++) { |
1475 | mdk_rdev_t *rdev; | ||
1488 | bio = r1_bio->bios[i]; | 1476 | bio = r1_bio->bios[i]; |
1489 | 1477 | ||
1490 | /* take from bio_init */ | 1478 | /* take from bio_init */ |
@@ -1499,35 +1487,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1499 | bio->bi_end_io = NULL; | 1487 | bio->bi_end_io = NULL; |
1500 | bio->bi_private = NULL; | 1488 | bio->bi_private = NULL; |
1501 | 1489 | ||
1502 | if (i == disk) { | 1490 | rdev = rcu_dereference(conf->mirrors[i].rdev); |
1503 | bio->bi_rw = READ; | 1491 | if (rdev == NULL || |
1504 | bio->bi_end_io = end_sync_read; | 1492 | test_bit(Faulty, &rdev->flags)) { |
1505 | } else if (conf->mirrors[i].rdev == NULL || | ||
1506 | test_bit(Faulty, &conf->mirrors[i].rdev->flags)) { | ||
1507 | still_degraded = 1; | 1493 | still_degraded = 1; |
1508 | continue; | 1494 | continue; |
1509 | } else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) || | 1495 | } else if (!test_bit(In_sync, &rdev->flags)) { |
1510 | sector_nr + RESYNC_SECTORS > mddev->recovery_cp || | ||
1511 | test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { | ||
1512 | bio->bi_rw = WRITE; | 1496 | bio->bi_rw = WRITE; |
1513 | bio->bi_end_io = end_sync_write; | 1497 | bio->bi_end_io = end_sync_write; |
1514 | write_targets ++; | 1498 | write_targets ++; |
1515 | } else | 1499 | } else { |
1516 | /* no need to read or write here */ | 1500 | /* may need to read from here */ |
1517 | continue; | 1501 | bio->bi_rw = READ; |
1518 | bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; | 1502 | bio->bi_end_io = end_sync_read; |
1519 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | 1503 | if (test_bit(WriteMostly, &rdev->flags)) { |
1504 | if (wonly < 0) | ||
1505 | wonly = i; | ||
1506 | } else { | ||
1507 | if (disk < 0) | ||
1508 | disk = i; | ||
1509 | } | ||
1510 | read_targets++; | ||
1511 | } | ||
1512 | atomic_inc(&rdev->nr_pending); | ||
1513 | bio->bi_sector = sector_nr + rdev->data_offset; | ||
1514 | bio->bi_bdev = rdev->bdev; | ||
1520 | bio->bi_private = r1_bio; | 1515 | bio->bi_private = r1_bio; |
1521 | } | 1516 | } |
1517 | rcu_read_unlock(); | ||
1518 | if (disk < 0) | ||
1519 | disk = wonly; | ||
1520 | r1_bio->read_disk = disk; | ||
1522 | 1521 | ||
1523 | if (write_targets == 0) { | 1522 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) |
1523 | /* extra read targets are also write targets */ | ||
1524 | write_targets += read_targets-1; | ||
1525 | |||
1526 | if (write_targets == 0 || read_targets == 0) { | ||
1524 | /* There is nowhere to write, so all non-sync | 1527 | /* There is nowhere to write, so all non-sync |
1525 | * drives must be failed - so we are finished | 1528 | * drives must be failed - so we are finished |
1526 | */ | 1529 | */ |
1527 | sector_t rv = max_sector - sector_nr; | 1530 | sector_t rv = max_sector - sector_nr; |
1528 | *skipped = 1; | 1531 | *skipped = 1; |
1529 | put_buf(r1_bio); | 1532 | put_buf(r1_bio); |
1530 | rdev_dec_pending(conf->mirrors[disk].rdev, mddev); | ||
1531 | return rv; | 1533 | return rv; |
1532 | } | 1534 | } |
1533 | 1535 | ||
@@ -1578,10 +1580,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1578 | sync_blocks -= (len>>9); | 1580 | sync_blocks -= (len>>9); |
1579 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); | 1581 | } while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES); |
1580 | bio_full: | 1582 | bio_full: |
1581 | bio = r1_bio->bios[disk]; | 1583 | bio = r1_bio->bios[r1_bio->read_disk]; |
1582 | r1_bio->sectors = nr_sectors; | 1584 | r1_bio->sectors = nr_sectors; |
1583 | 1585 | ||
1584 | md_sync_acct(mirror->rdev->bdev, nr_sectors); | 1586 | md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors); |
1585 | 1587 | ||
1586 | generic_make_request(bio); | 1588 | generic_make_request(bio); |
1587 | 1589 | ||