aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-12-22 18:17:55 -0500
committerNeilBrown <neilb@suse.de>2011-12-22 18:17:55 -0500
commit4ca40c2ce099e4f1ce35445994f49836662596c8 (patch)
tree7d6f2187e8d6aaab0cdcf4924017aeb9725d87b0
parent24afd80d99f80a79d8824d2805114b8b067e9823 (diff)
md/raid10: Allow replacement device to be replace old drive.
When recovery finish and spare_active is called, check for a replace that might have just become fully synced and mark it as such, marking the original as failed. Then when the original is removed, move the replacement into its position. This means that 'replacement' and spontaneously become NULL in some situations. Make sure we check for those. It also means that 'rdev' and 'replacement' could appear to be identical - check for that too. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid10.c72
1 files changed, 61 insertions, 11 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 403f05ac1f2a..90e951730a23 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -397,14 +397,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
397 int dec_rdev = 1; 397 int dec_rdev = 1;
398 struct r10conf *conf = r10_bio->mddev->private; 398 struct r10conf *conf = r10_bio->mddev->private;
399 int slot, repl; 399 int slot, repl;
400 struct md_rdev *rdev; 400 struct md_rdev *rdev = NULL;
401 401
402 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 402 dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
403 403
404 if (repl) 404 if (repl)
405 rdev = conf->mirrors[dev].replacement; 405 rdev = conf->mirrors[dev].replacement;
406 else 406 if (!rdev) {
407 smp_rmb();
408 repl = 0;
407 rdev = conf->mirrors[dev].rdev; 409 rdev = conf->mirrors[dev].rdev;
410 }
408 /* 411 /*
409 * this branch is our 'one mirror IO has finished' event handler: 412 * this branch is our 'one mirror IO has finished' event handler:
410 */ 413 */
@@ -1089,6 +1092,8 @@ retry_write:
1089 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); 1092 struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
1090 struct md_rdev *rrdev = rcu_dereference( 1093 struct md_rdev *rrdev = rcu_dereference(
1091 conf->mirrors[d].replacement); 1094 conf->mirrors[d].replacement);
1095 if (rdev == rrdev)
1096 rrdev = NULL;
1092 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { 1097 if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
1093 atomic_inc(&rdev->nr_pending); 1098 atomic_inc(&rdev->nr_pending);
1094 blocked_rdev = rdev; 1099 blocked_rdev = rdev;
@@ -1170,9 +1175,15 @@ retry_write:
1170 rdev_dec_pending(conf->mirrors[d].rdev, mddev); 1175 rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1171 } 1176 }
1172 if (r10_bio->devs[j].repl_bio) { 1177 if (r10_bio->devs[j].repl_bio) {
1178 struct md_rdev *rdev;
1173 d = r10_bio->devs[j].devnum; 1179 d = r10_bio->devs[j].devnum;
1174 rdev_dec_pending( 1180 rdev = conf->mirrors[d].replacement;
1175 conf->mirrors[d].replacement, mddev); 1181 if (!rdev) {
1182 /* Race with remove_disk */
1183 smp_mb();
1184 rdev = conf->mirrors[d].rdev;
1185 }
1186 rdev_dec_pending(rdev, mddev);
1176 } 1187 }
1177 } 1188 }
1178 allow_barrier(conf); 1189 allow_barrier(conf);
@@ -1230,6 +1241,10 @@ retry_write:
1230 max_sectors); 1241 max_sectors);
1231 r10_bio->devs[i].repl_bio = mbio; 1242 r10_bio->devs[i].repl_bio = mbio;
1232 1243
1244 /* We are actively writing to the original device
1245 * so it cannot disappear, so the replacement cannot
1246 * become NULL here
1247 */
1233 mbio->bi_sector = (r10_bio->devs[i].addr+ 1248 mbio->bi_sector = (r10_bio->devs[i].addr+
1234 conf->mirrors[d].replacement->data_offset); 1249 conf->mirrors[d].replacement->data_offset);
1235 mbio->bi_bdev = conf->mirrors[d].replacement->bdev; 1250 mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
@@ -1404,9 +1419,27 @@ static int raid10_spare_active(struct mddev *mddev)
1404 */ 1419 */
1405 for (i = 0; i < conf->raid_disks; i++) { 1420 for (i = 0; i < conf->raid_disks; i++) {
1406 tmp = conf->mirrors + i; 1421 tmp = conf->mirrors + i;
1407 if (tmp->rdev 1422 if (tmp->replacement
1408 && !test_bit(Faulty, &tmp->rdev->flags) 1423 && tmp->replacement->recovery_offset == MaxSector
1409 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { 1424 && !test_bit(Faulty, &tmp->replacement->flags)
1425 && !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
1426 /* Replacement has just become active */
1427 if (!tmp->rdev
1428 || !test_and_clear_bit(In_sync, &tmp->rdev->flags))
1429 count++;
1430 if (tmp->rdev) {
1431 /* Replaced device not technically faulty,
1432 * but we need to be sure it gets removed
1433 * and never re-added.
1434 */
1435 set_bit(Faulty, &tmp->rdev->flags);
1436 sysfs_notify_dirent_safe(
1437 tmp->rdev->sysfs_state);
1438 }
1439 sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
1440 } else if (tmp->rdev
1441 && !test_bit(Faulty, &tmp->rdev->flags)
1442 && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
1410 count++; 1443 count++;
1411 sysfs_notify_dirent(tmp->rdev->sysfs_state); 1444 sysfs_notify_dirent(tmp->rdev->sysfs_state);
1412 } 1445 }
@@ -1506,6 +1539,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1506 */ 1539 */
1507 if (!test_bit(Faulty, &rdev->flags) && 1540 if (!test_bit(Faulty, &rdev->flags) &&
1508 mddev->recovery_disabled != p->recovery_disabled && 1541 mddev->recovery_disabled != p->recovery_disabled &&
1542 (!p->replacement || p->replacement == rdev) &&
1509 enough(conf, -1)) { 1543 enough(conf, -1)) {
1510 err = -EBUSY; 1544 err = -EBUSY;
1511 goto abort; 1545 goto abort;
@@ -1517,7 +1551,21 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1517 err = -EBUSY; 1551 err = -EBUSY;
1518 *rdevp = rdev; 1552 *rdevp = rdev;
1519 goto abort; 1553 goto abort;
1520 } 1554 } else if (p->replacement) {
1555 /* We must have just cleared 'rdev' */
1556 p->rdev = p->replacement;
1557 clear_bit(Replacement, &p->replacement->flags);
1558 smp_mb(); /* Make sure other CPUs may see both as identical
1559 * but will never see neither -- if they are careful.
1560 */
1561 p->replacement = NULL;
1562 clear_bit(WantReplacement, &rdev->flags);
1563 } else
1564 /* We might have just remove the Replacement as faulty
1565 * Clear the flag just in case
1566 */
1567 clear_bit(WantReplacement, &rdev->flags);
1568
1521 err = md_integrity_register(mddev); 1569 err = md_integrity_register(mddev);
1522 1570
1523abort: 1571abort:
@@ -1595,13 +1643,15 @@ static void end_sync_write(struct bio *bio, int error)
1595 int bad_sectors; 1643 int bad_sectors;
1596 int slot; 1644 int slot;
1597 int repl; 1645 int repl;
1598 struct md_rdev *rdev; 1646 struct md_rdev *rdev = NULL;
1599 1647
1600 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl); 1648 d = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
1601 if (repl) 1649 if (repl)
1602 rdev = conf->mirrors[d].replacement; 1650 rdev = conf->mirrors[d].replacement;
1603 else 1651 if (!rdev) {
1652 smp_mb();
1604 rdev = conf->mirrors[d].rdev; 1653 rdev = conf->mirrors[d].rdev;
1654 }
1605 1655
1606 if (!uptodate) { 1656 if (!uptodate) {
1607 if (repl) 1657 if (repl)
@@ -2368,7 +2418,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
2368 } 2418 }
2369 bio = r10_bio->devs[m].repl_bio; 2419 bio = r10_bio->devs[m].repl_bio;
2370 rdev = conf->mirrors[dev].replacement; 2420 rdev = conf->mirrors[dev].replacement;
2371 if (bio == IO_MADE_GOOD) { 2421 if (rdev && bio == IO_MADE_GOOD) {
2372 rdev_clear_badblocks( 2422 rdev_clear_badblocks(
2373 rdev, 2423 rdev,
2374 r10_bio->devs[m].addr, 2424 r10_bio->devs[m].addr,