diff options
author | NeilBrown <neilb@suse.de> | 2012-05-20 19:28:33 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-05-20 19:28:33 -0400 |
commit | f8c9e74ff0832f2244d7991d2aea13851b20a622 (patch) | |
tree | db07ef3ecc00f83d3d9b854929a4cb13def456a7 /drivers/md | |
parent | c804cdecea418c067ee7359d62139b2b3c8cec39 (diff) |
md/raid10: Introduce 'prev' geometry to support reshape.
When RAID10 supports reshape it will need a 'previous' and a 'current'
geometry, so introduce that here.
Use the 'prev' geometry when before the reshape_position, and the
current 'geo' when beyond it. At other times, use both as
appropriate.
For now, both are identical (And reshape_position is never set).
When we use the 'prev' geometry, we must use the old data_offset.
When we use the current (And a reshape is happening) we must use
the new_data_offset.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid10.c | 107 | ||||
-rw-r--r-- | drivers/md/raid10.h | 8 |
2 files changed, 92 insertions, 23 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 36f445f9e11d..1c90005ab343 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -504,15 +504,13 @@ static void raid10_end_write_request(struct bio *bio, int error) | |||
504 | * sector offset to a virtual address | 504 | * sector offset to a virtual address |
505 | */ | 505 | */ |
506 | 506 | ||
507 | static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) | 507 | static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio) |
508 | { | 508 | { |
509 | int n,f; | 509 | int n,f; |
510 | sector_t sector; | 510 | sector_t sector; |
511 | sector_t chunk; | 511 | sector_t chunk; |
512 | sector_t stripe; | 512 | sector_t stripe; |
513 | int dev; | 513 | int dev; |
514 | struct geom *geo = &conf->geo; | ||
515 | |||
516 | int slot = 0; | 514 | int slot = 0; |
517 | 515 | ||
518 | /* now calculate first sector/dev */ | 516 | /* now calculate first sector/dev */ |
@@ -550,12 +548,29 @@ static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) | |||
550 | sector += (geo->chunk_mask + 1); | 548 | sector += (geo->chunk_mask + 1); |
551 | } | 549 | } |
552 | } | 550 | } |
553 | BUG_ON(slot != conf->copies); | 551 | } |
552 | |||
553 | static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) | ||
554 | { | ||
555 | struct geom *geo = &conf->geo; | ||
556 | |||
557 | if (conf->reshape_progress != MaxSector && | ||
558 | ((r10bio->sector >= conf->reshape_progress) != | ||
559 | conf->mddev->reshape_backwards)) { | ||
560 | set_bit(R10BIO_Previous, &r10bio->state); | ||
561 | geo = &conf->prev; | ||
562 | } else | ||
563 | clear_bit(R10BIO_Previous, &r10bio->state); | ||
564 | |||
565 | __raid10_find_phys(geo, r10bio); | ||
554 | } | 566 | } |
555 | 567 | ||
556 | static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) | 568 | static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) |
557 | { | 569 | { |
558 | sector_t offset, chunk, vchunk; | 570 | sector_t offset, chunk, vchunk; |
571 | /* Never use conf->prev as this is only called during resync | ||
572 | * or recovery, so reshape isn't happening | ||
573 | */ | ||
559 | struct geom *geo = &conf->geo; | 574 | struct geom *geo = &conf->geo; |
560 | 575 | ||
561 | offset = sector & geo->chunk_mask; | 576 | offset = sector & geo->chunk_mask; |
@@ -603,6 +618,11 @@ static int raid10_mergeable_bvec(struct request_queue *q, | |||
603 | unsigned int bio_sectors = bvm->bi_size >> 9; | 618 | unsigned int bio_sectors = bvm->bi_size >> 9; |
604 | struct geom *geo = &conf->geo; | 619 | struct geom *geo = &conf->geo; |
605 | 620 | ||
621 | if (conf->reshape_progress != MaxSector && | ||
622 | ((sector >= conf->reshape_progress) != | ||
623 | conf->mddev->reshape_backwards)) | ||
624 | geo = &conf->prev; | ||
625 | |||
606 | if (geo->near_copies < geo->raid_disks) { | 626 | if (geo->near_copies < geo->raid_disks) { |
607 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) | 627 | max = (chunk_sectors - ((sector & (chunk_sectors - 1)) |
608 | + bio_sectors)) << 9; | 628 | + bio_sectors)) << 9; |
@@ -617,6 +637,12 @@ static int raid10_mergeable_bvec(struct request_queue *q, | |||
617 | if (mddev->merge_check_needed) { | 637 | if (mddev->merge_check_needed) { |
618 | struct r10bio r10_bio; | 638 | struct r10bio r10_bio; |
619 | int s; | 639 | int s; |
640 | if (conf->reshape_progress != MaxSector) { | ||
641 | /* Cannot give any guidance during reshape */ | ||
642 | if (max <= biovec->bv_len && bio_sectors == 0) | ||
643 | return biovec->bv_len; | ||
644 | return 0; | ||
645 | } | ||
620 | r10_bio.sector = sector; | 646 | r10_bio.sector = sector; |
621 | raid10_find_phys(conf, &r10_bio); | 647 | raid10_find_phys(conf, &r10_bio); |
622 | rcu_read_lock(); | 648 | rcu_read_lock(); |
@@ -816,7 +842,10 @@ static int raid10_congested(void *data, int bits) | |||
816 | if (mddev_congested(mddev, bits)) | 842 | if (mddev_congested(mddev, bits)) |
817 | return 1; | 843 | return 1; |
818 | rcu_read_lock(); | 844 | rcu_read_lock(); |
819 | for (i = 0; i < conf->geo.raid_disks && ret == 0; i++) { | 845 | for (i = 0; |
846 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) | ||
847 | && ret == 0; | ||
848 | i++) { | ||
820 | struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); | 849 | struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); |
821 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | 850 | if (rdev && !test_bit(Faulty, &rdev->flags)) { |
822 | struct request_queue *q = bdev_get_queue(rdev->bdev); | 851 | struct request_queue *q = bdev_get_queue(rdev->bdev); |
@@ -977,13 +1006,23 @@ static void unfreeze_array(struct r10conf *conf) | |||
977 | spin_unlock_irq(&conf->resync_lock); | 1006 | spin_unlock_irq(&conf->resync_lock); |
978 | } | 1007 | } |
979 | 1008 | ||
1009 | static sector_t choose_data_offset(struct r10bio *r10_bio, | ||
1010 | struct md_rdev *rdev) | ||
1011 | { | ||
1012 | if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) || | ||
1013 | test_bit(R10BIO_Previous, &r10_bio->state)) | ||
1014 | return rdev->data_offset; | ||
1015 | else | ||
1016 | return rdev->new_data_offset; | ||
1017 | } | ||
1018 | |||
980 | static void make_request(struct mddev *mddev, struct bio * bio) | 1019 | static void make_request(struct mddev *mddev, struct bio * bio) |
981 | { | 1020 | { |
982 | struct r10conf *conf = mddev->private; | 1021 | struct r10conf *conf = mddev->private; |
983 | struct r10bio *r10_bio; | 1022 | struct r10bio *r10_bio; |
984 | struct bio *read_bio; | 1023 | struct bio *read_bio; |
985 | int i; | 1024 | int i; |
986 | sector_t chunk_mask = conf->geo.chunk_mask; | 1025 | sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask); |
987 | int chunk_sects = chunk_mask + 1; | 1026 | int chunk_sects = chunk_mask + 1; |
988 | const int rw = bio_data_dir(bio); | 1027 | const int rw = bio_data_dir(bio); |
989 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 1028 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
@@ -1004,7 +1043,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) | |||
1004 | */ | 1043 | */ |
1005 | if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) | 1044 | if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) |
1006 | > chunk_sects | 1045 | > chunk_sects |
1007 | && conf->geo.near_copies < conf->geo.raid_disks)) { | 1046 | && (conf->geo.near_copies < conf->geo.raid_disks |
1047 | || conf->prev.near_copies < conf->prev.raid_disks))) { | ||
1008 | struct bio_pair *bp; | 1048 | struct bio_pair *bp; |
1009 | /* Sanity check -- queue functions should prevent this happening */ | 1049 | /* Sanity check -- queue functions should prevent this happening */ |
1010 | if (bio->bi_vcnt != 1 || | 1050 | if (bio->bi_vcnt != 1 || |
@@ -1098,7 +1138,7 @@ read_again: | |||
1098 | r10_bio->devs[slot].rdev = rdev; | 1138 | r10_bio->devs[slot].rdev = rdev; |
1099 | 1139 | ||
1100 | read_bio->bi_sector = r10_bio->devs[slot].addr + | 1140 | read_bio->bi_sector = r10_bio->devs[slot].addr + |
1101 | rdev->data_offset; | 1141 | choose_data_offset(r10_bio, rdev); |
1102 | read_bio->bi_bdev = rdev->bdev; | 1142 | read_bio->bi_bdev = rdev->bdev; |
1103 | read_bio->bi_end_io = raid10_end_read_request; | 1143 | read_bio->bi_end_io = raid10_end_read_request; |
1104 | read_bio->bi_rw = READ | do_sync; | 1144 | read_bio->bi_rw = READ | do_sync; |
@@ -1302,7 +1342,8 @@ retry_write: | |||
1302 | r10_bio->devs[i].bio = mbio; | 1342 | r10_bio->devs[i].bio = mbio; |
1303 | 1343 | ||
1304 | mbio->bi_sector = (r10_bio->devs[i].addr+ | 1344 | mbio->bi_sector = (r10_bio->devs[i].addr+ |
1305 | conf->mirrors[d].rdev->data_offset); | 1345 | choose_data_offset(r10_bio, |
1346 | conf->mirrors[d].rdev)); | ||
1306 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; | 1347 | mbio->bi_bdev = conf->mirrors[d].rdev->bdev; |
1307 | mbio->bi_end_io = raid10_end_write_request; | 1348 | mbio->bi_end_io = raid10_end_write_request; |
1308 | mbio->bi_rw = WRITE | do_sync | do_fua; | 1349 | mbio->bi_rw = WRITE | do_sync | do_fua; |
@@ -1326,8 +1367,10 @@ retry_write: | |||
1326 | * so it cannot disappear, so the replacement cannot | 1367 | * so it cannot disappear, so the replacement cannot |
1327 | * become NULL here | 1368 | * become NULL here |
1328 | */ | 1369 | */ |
1329 | mbio->bi_sector = (r10_bio->devs[i].addr+ | 1370 | mbio->bi_sector = (r10_bio->devs[i].addr + |
1330 | conf->mirrors[d].replacement->data_offset); | 1371 | choose_data_offset( |
1372 | r10_bio, | ||
1373 | conf->mirrors[d].replacement)); | ||
1331 | mbio->bi_bdev = conf->mirrors[d].replacement->bdev; | 1374 | mbio->bi_bdev = conf->mirrors[d].replacement->bdev; |
1332 | mbio->bi_end_io = raid10_end_write_request; | 1375 | mbio->bi_end_io = raid10_end_write_request; |
1333 | mbio->bi_rw = WRITE | do_sync | do_fua; | 1376 | mbio->bi_rw = WRITE | do_sync | do_fua; |
@@ -1397,7 +1440,7 @@ static void status(struct seq_file *seq, struct mddev *mddev) | |||
1397 | * Don't consider the device numbered 'ignore' | 1440 | * Don't consider the device numbered 'ignore' |
1398 | * as we might be about to remove it. | 1441 | * as we might be about to remove it. |
1399 | */ | 1442 | */ |
1400 | static int enough(struct r10conf *conf, int ignore) | 1443 | static int _enough(struct r10conf *conf, struct geom *geo, int ignore) |
1401 | { | 1444 | { |
1402 | int first = 0; | 1445 | int first = 0; |
1403 | 1446 | ||
@@ -1408,7 +1451,7 @@ static int enough(struct r10conf *conf, int ignore) | |||
1408 | if (conf->mirrors[first].rdev && | 1451 | if (conf->mirrors[first].rdev && |
1409 | first != ignore) | 1452 | first != ignore) |
1410 | cnt++; | 1453 | cnt++; |
1411 | first = (first+1) % conf->geo.raid_disks; | 1454 | first = (first+1) % geo->raid_disks; |
1412 | } | 1455 | } |
1413 | if (cnt == 0) | 1456 | if (cnt == 0) |
1414 | return 0; | 1457 | return 0; |
@@ -1416,6 +1459,12 @@ static int enough(struct r10conf *conf, int ignore) | |||
1416 | return 1; | 1459 | return 1; |
1417 | } | 1460 | } |
1418 | 1461 | ||
1462 | static int enough(struct r10conf *conf, int ignore) | ||
1463 | { | ||
1464 | return _enough(conf, &conf->geo, ignore) && | ||
1465 | _enough(conf, &conf->prev, ignore); | ||
1466 | } | ||
1467 | |||
1419 | static void error(struct mddev *mddev, struct md_rdev *rdev) | 1468 | static void error(struct mddev *mddev, struct md_rdev *rdev) |
1420 | { | 1469 | { |
1421 | char b[BDEVNAME_SIZE]; | 1470 | char b[BDEVNAME_SIZE]; |
@@ -1548,7 +1597,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1548 | * very different from resync | 1597 | * very different from resync |
1549 | */ | 1598 | */ |
1550 | return -EBUSY; | 1599 | return -EBUSY; |
1551 | if (rdev->saved_raid_disk < 0 && !enough(conf, -1)) | 1600 | if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1)) |
1552 | return -EINVAL; | 1601 | return -EINVAL; |
1553 | 1602 | ||
1554 | if (rdev->raid_disk >= 0) | 1603 | if (rdev->raid_disk >= 0) |
@@ -2223,7 +2272,9 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2223 | " (%d sectors at %llu on %s)\n", | 2272 | " (%d sectors at %llu on %s)\n", |
2224 | mdname(mddev), s, | 2273 | mdname(mddev), s, |
2225 | (unsigned long long)( | 2274 | (unsigned long long)( |
2226 | sect + rdev->data_offset), | 2275 | sect + |
2276 | choose_data_offset(r10_bio, | ||
2277 | rdev)), | ||
2227 | bdevname(rdev->bdev, b)); | 2278 | bdevname(rdev->bdev, b)); |
2228 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " | 2279 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " |
2229 | "drive\n", | 2280 | "drive\n", |
@@ -2261,7 +2312,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2261 | " (%d sectors at %llu on %s)\n", | 2312 | " (%d sectors at %llu on %s)\n", |
2262 | mdname(mddev), s, | 2313 | mdname(mddev), s, |
2263 | (unsigned long long)( | 2314 | (unsigned long long)( |
2264 | sect + rdev->data_offset), | 2315 | sect + |
2316 | choose_data_offset(r10_bio, rdev)), | ||
2265 | bdevname(rdev->bdev, b)); | 2317 | bdevname(rdev->bdev, b)); |
2266 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " | 2318 | printk(KERN_NOTICE "md/raid10:%s: %s: failing " |
2267 | "drive\n", | 2319 | "drive\n", |
@@ -2274,7 +2326,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2274 | " (%d sectors at %llu on %s)\n", | 2326 | " (%d sectors at %llu on %s)\n", |
2275 | mdname(mddev), s, | 2327 | mdname(mddev), s, |
2276 | (unsigned long long)( | 2328 | (unsigned long long)( |
2277 | sect + rdev->data_offset), | 2329 | sect + |
2330 | choose_data_offset(r10_bio, rdev)), | ||
2278 | bdevname(rdev->bdev, b)); | 2331 | bdevname(rdev->bdev, b)); |
2279 | atomic_add(s, &rdev->corrected_errors); | 2332 | atomic_add(s, &rdev->corrected_errors); |
2280 | } | 2333 | } |
@@ -2348,7 +2401,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) | |||
2348 | wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); | 2401 | wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); |
2349 | md_trim_bio(wbio, sector - bio->bi_sector, sectors); | 2402 | md_trim_bio(wbio, sector - bio->bi_sector, sectors); |
2350 | wbio->bi_sector = (r10_bio->devs[i].addr+ | 2403 | wbio->bi_sector = (r10_bio->devs[i].addr+ |
2351 | rdev->data_offset+ | 2404 | choose_data_offset(r10_bio, rdev) + |
2352 | (sector - r10_bio->sector)); | 2405 | (sector - r10_bio->sector)); |
2353 | wbio->bi_bdev = rdev->bdev; | 2406 | wbio->bi_bdev = rdev->bdev; |
2354 | if (submit_bio_wait(WRITE, wbio) == 0) | 2407 | if (submit_bio_wait(WRITE, wbio) == 0) |
@@ -2425,7 +2478,7 @@ read_more: | |||
2425 | r10_bio->devs[slot].bio = bio; | 2478 | r10_bio->devs[slot].bio = bio; |
2426 | r10_bio->devs[slot].rdev = rdev; | 2479 | r10_bio->devs[slot].rdev = rdev; |
2427 | bio->bi_sector = r10_bio->devs[slot].addr | 2480 | bio->bi_sector = r10_bio->devs[slot].addr |
2428 | + rdev->data_offset; | 2481 | + choose_data_offset(r10_bio, rdev); |
2429 | bio->bi_bdev = rdev->bdev; | 2482 | bio->bi_bdev = rdev->bdev; |
2430 | bio->bi_rw = READ | do_sync; | 2483 | bio->bi_rw = READ | do_sync; |
2431 | bio->bi_private = r10_bio; | 2484 | bio->bi_private = r10_bio; |
@@ -3254,6 +3307,8 @@ static struct r10conf *setup_conf(struct mddev *mddev) | |||
3254 | goto out; | 3307 | goto out; |
3255 | 3308 | ||
3256 | calc_sectors(conf, mddev->dev_sectors); | 3309 | calc_sectors(conf, mddev->dev_sectors); |
3310 | conf->prev = conf->geo; | ||
3311 | conf->reshape_progress = MaxSector; | ||
3257 | 3312 | ||
3258 | spin_lock_init(&conf->device_lock); | 3313 | spin_lock_init(&conf->device_lock); |
3259 | INIT_LIST_HEAD(&conf->retry_list); | 3314 | INIT_LIST_HEAD(&conf->retry_list); |
@@ -3319,8 +3374,10 @@ static int run(struct mddev *mddev) | |||
3319 | rdev_for_each(rdev, mddev) { | 3374 | rdev_for_each(rdev, mddev) { |
3320 | 3375 | ||
3321 | disk_idx = rdev->raid_disk; | 3376 | disk_idx = rdev->raid_disk; |
3322 | if (disk_idx >= conf->geo.raid_disks | 3377 | if (disk_idx < 0) |
3323 | || disk_idx < 0) | 3378 | continue; |
3379 | if (disk_idx >= conf->geo.raid_disks && | ||
3380 | disk_idx >= conf->prev.raid_disks) | ||
3324 | continue; | 3381 | continue; |
3325 | disk = conf->mirrors + disk_idx; | 3382 | disk = conf->mirrors + disk_idx; |
3326 | 3383 | ||
@@ -3347,7 +3404,10 @@ static int run(struct mddev *mddev) | |||
3347 | } | 3404 | } |
3348 | 3405 | ||
3349 | mddev->degraded = 0; | 3406 | mddev->degraded = 0; |
3350 | for (i = 0; i < conf->geo.raid_disks; i++) { | 3407 | for (i = 0; |
3408 | i < conf->geo.raid_disks | ||
3409 | || i < conf->prev.raid_disks; | ||
3410 | i++) { | ||
3351 | 3411 | ||
3352 | disk = conf->mirrors + i; | 3412 | disk = conf->mirrors + i; |
3353 | 3413 | ||
@@ -3466,6 +3526,9 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) | |||
3466 | struct r10conf *conf = mddev->private; | 3526 | struct r10conf *conf = mddev->private; |
3467 | sector_t oldsize, size; | 3527 | sector_t oldsize, size; |
3468 | 3528 | ||
3529 | if (mddev->reshape_position != MaxSector) | ||
3530 | return -EBUSY; | ||
3531 | |||
3469 | if (conf->geo.far_copies > 1 && !conf->geo.far_offset) | 3532 | if (conf->geo.far_copies > 1 && !conf->geo.far_offset) |
3470 | return -EINVAL; | 3533 | return -EINVAL; |
3471 | 3534 | ||
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 4c4942ac46fc..37509d7134aa 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h | |||
@@ -34,13 +34,14 @@ struct r10conf { | |||
34 | */ | 34 | */ |
35 | int chunk_shift; /* shift from chunks to sectors */ | 35 | int chunk_shift; /* shift from chunks to sectors */ |
36 | sector_t chunk_mask; | 36 | sector_t chunk_mask; |
37 | } geo; | 37 | } prev, geo; |
38 | int copies; /* near_copies * far_copies. | 38 | int copies; /* near_copies * far_copies. |
39 | * must be <= raid_disks | 39 | * must be <= raid_disks |
40 | */ | 40 | */ |
41 | 41 | ||
42 | sector_t dev_sectors; /* temp copy of | 42 | sector_t dev_sectors; /* temp copy of |
43 | * mddev->dev_sectors */ | 43 | * mddev->dev_sectors */ |
44 | sector_t reshape_progress; | ||
44 | 45 | ||
45 | struct list_head retry_list; | 46 | struct list_head retry_list; |
46 | /* queue pending writes and submit them on unplug */ | 47 | /* queue pending writes and submit them on unplug */ |
@@ -147,5 +148,10 @@ enum r10bio_state { | |||
147 | */ | 148 | */ |
148 | R10BIO_MadeGood, | 149 | R10BIO_MadeGood, |
149 | R10BIO_WriteError, | 150 | R10BIO_WriteError, |
151 | /* During a reshape we might be performing IO on the | ||
152 | * 'previous' part of the array, in which case this | ||
153 | * flag is set | ||
154 | */ | ||
155 | R10BIO_Previous, | ||
150 | }; | 156 | }; |
151 | #endif | 157 | #endif |