aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-05-20 19:28:33 -0400
committerNeilBrown <neilb@suse.de>2012-05-20 19:28:33 -0400
commitf8c9e74ff0832f2244d7991d2aea13851b20a622 (patch)
treedb07ef3ecc00f83d3d9b854929a4cb13def456a7 /drivers
parentc804cdecea418c067ee7359d62139b2b3c8cec39 (diff)
md/raid10: Introduce 'prev' geometry to support reshape.
When RAID10 supports reshape it will need a 'previous' and a 'current' geometry, so introduce that here. Use the 'prev' geometry when before the reshape_position, and the current 'geo' when beyond it. At other times, use both as appropriate. For now, both are identical (And reshape_position is never set). When we use the 'prev' geometry, we must use the old data_offset. When we use the current (And a reshape is happening) we must use the new_data_offset. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/raid10.c107
-rw-r--r--drivers/md/raid10.h8
2 files changed, 92 insertions, 23 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 36f445f9e11d..1c90005ab343 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -504,15 +504,13 @@ static void raid10_end_write_request(struct bio *bio, int error)
504 * sector offset to a virtual address 504 * sector offset to a virtual address
505 */ 505 */
506 506
507static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio) 507static void __raid10_find_phys(struct geom *geo, struct r10bio *r10bio)
508{ 508{
509 int n,f; 509 int n,f;
510 sector_t sector; 510 sector_t sector;
511 sector_t chunk; 511 sector_t chunk;
512 sector_t stripe; 512 sector_t stripe;
513 int dev; 513 int dev;
514 struct geom *geo = &conf->geo;
515
516 int slot = 0; 514 int slot = 0;
517 515
518 /* now calculate first sector/dev */ 516 /* now calculate first sector/dev */
@@ -550,12 +548,29 @@ static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio)
550 sector += (geo->chunk_mask + 1); 548 sector += (geo->chunk_mask + 1);
551 } 549 }
552 } 550 }
553 BUG_ON(slot != conf->copies); 551}
552
553static void raid10_find_phys(struct r10conf *conf, struct r10bio *r10bio)
554{
555 struct geom *geo = &conf->geo;
556
557 if (conf->reshape_progress != MaxSector &&
558 ((r10bio->sector >= conf->reshape_progress) !=
559 conf->mddev->reshape_backwards)) {
560 set_bit(R10BIO_Previous, &r10bio->state);
561 geo = &conf->prev;
562 } else
563 clear_bit(R10BIO_Previous, &r10bio->state);
564
565 __raid10_find_phys(geo, r10bio);
554} 566}
555 567
556static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev) 568static sector_t raid10_find_virt(struct r10conf *conf, sector_t sector, int dev)
557{ 569{
558 sector_t offset, chunk, vchunk; 570 sector_t offset, chunk, vchunk;
571 /* Never use conf->prev as this is only called during resync
572 * or recovery, so reshape isn't happening
573 */
559 struct geom *geo = &conf->geo; 574 struct geom *geo = &conf->geo;
560 575
561 offset = sector & geo->chunk_mask; 576 offset = sector & geo->chunk_mask;
@@ -603,6 +618,11 @@ static int raid10_mergeable_bvec(struct request_queue *q,
603 unsigned int bio_sectors = bvm->bi_size >> 9; 618 unsigned int bio_sectors = bvm->bi_size >> 9;
604 struct geom *geo = &conf->geo; 619 struct geom *geo = &conf->geo;
605 620
621 if (conf->reshape_progress != MaxSector &&
622 ((sector >= conf->reshape_progress) !=
623 conf->mddev->reshape_backwards))
624 geo = &conf->prev;
625
606 if (geo->near_copies < geo->raid_disks) { 626 if (geo->near_copies < geo->raid_disks) {
607 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) 627 max = (chunk_sectors - ((sector & (chunk_sectors - 1))
608 + bio_sectors)) << 9; 628 + bio_sectors)) << 9;
@@ -617,6 +637,12 @@ static int raid10_mergeable_bvec(struct request_queue *q,
617 if (mddev->merge_check_needed) { 637 if (mddev->merge_check_needed) {
618 struct r10bio r10_bio; 638 struct r10bio r10_bio;
619 int s; 639 int s;
640 if (conf->reshape_progress != MaxSector) {
641 /* Cannot give any guidance during reshape */
642 if (max <= biovec->bv_len && bio_sectors == 0)
643 return biovec->bv_len;
644 return 0;
645 }
620 r10_bio.sector = sector; 646 r10_bio.sector = sector;
621 raid10_find_phys(conf, &r10_bio); 647 raid10_find_phys(conf, &r10_bio);
622 rcu_read_lock(); 648 rcu_read_lock();
@@ -816,7 +842,10 @@ static int raid10_congested(void *data, int bits)
816 if (mddev_congested(mddev, bits)) 842 if (mddev_congested(mddev, bits))
817 return 1; 843 return 1;
818 rcu_read_lock(); 844 rcu_read_lock();
819 for (i = 0; i < conf->geo.raid_disks && ret == 0; i++) { 845 for (i = 0;
846 (i < conf->geo.raid_disks || i < conf->prev.raid_disks)
847 && ret == 0;
848 i++) {
820 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); 849 struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
821 if (rdev && !test_bit(Faulty, &rdev->flags)) { 850 if (rdev && !test_bit(Faulty, &rdev->flags)) {
822 struct request_queue *q = bdev_get_queue(rdev->bdev); 851 struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -977,13 +1006,23 @@ static void unfreeze_array(struct r10conf *conf)
977 spin_unlock_irq(&conf->resync_lock); 1006 spin_unlock_irq(&conf->resync_lock);
978} 1007}
979 1008
1009static sector_t choose_data_offset(struct r10bio *r10_bio,
1010 struct md_rdev *rdev)
1011{
1012 if (!test_bit(MD_RECOVERY_RESHAPE, &rdev->mddev->recovery) ||
1013 test_bit(R10BIO_Previous, &r10_bio->state))
1014 return rdev->data_offset;
1015 else
1016 return rdev->new_data_offset;
1017}
1018
980static void make_request(struct mddev *mddev, struct bio * bio) 1019static void make_request(struct mddev *mddev, struct bio * bio)
981{ 1020{
982 struct r10conf *conf = mddev->private; 1021 struct r10conf *conf = mddev->private;
983 struct r10bio *r10_bio; 1022 struct r10bio *r10_bio;
984 struct bio *read_bio; 1023 struct bio *read_bio;
985 int i; 1024 int i;
986 sector_t chunk_mask = conf->geo.chunk_mask; 1025 sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
987 int chunk_sects = chunk_mask + 1; 1026 int chunk_sects = chunk_mask + 1;
988 const int rw = bio_data_dir(bio); 1027 const int rw = bio_data_dir(bio);
989 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 1028 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
@@ -1004,7 +1043,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1004 */ 1043 */
1005 if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9) 1044 if (unlikely((bio->bi_sector & chunk_mask) + (bio->bi_size >> 9)
1006 > chunk_sects 1045 > chunk_sects
1007 && conf->geo.near_copies < conf->geo.raid_disks)) { 1046 && (conf->geo.near_copies < conf->geo.raid_disks
1047 || conf->prev.near_copies < conf->prev.raid_disks))) {
1008 struct bio_pair *bp; 1048 struct bio_pair *bp;
1009 /* Sanity check -- queue functions should prevent this happening */ 1049 /* Sanity check -- queue functions should prevent this happening */
1010 if (bio->bi_vcnt != 1 || 1050 if (bio->bi_vcnt != 1 ||
@@ -1098,7 +1138,7 @@ read_again:
1098 r10_bio->devs[slot].rdev = rdev; 1138 r10_bio->devs[slot].rdev = rdev;
1099 1139
1100 read_bio->bi_sector = r10_bio->devs[slot].addr + 1140 read_bio->bi_sector = r10_bio->devs[slot].addr +
1101 rdev->data_offset; 1141 choose_data_offset(r10_bio, rdev);
1102 read_bio->bi_bdev = rdev->bdev; 1142 read_bio->bi_bdev = rdev->bdev;
1103 read_bio->bi_end_io = raid10_end_read_request; 1143 read_bio->bi_end_io = raid10_end_read_request;
1104 read_bio->bi_rw = READ | do_sync; 1144 read_bio->bi_rw = READ | do_sync;
@@ -1302,7 +1342,8 @@ retry_write:
1302 r10_bio->devs[i].bio = mbio; 1342 r10_bio->devs[i].bio = mbio;
1303 1343
1304 mbio->bi_sector = (r10_bio->devs[i].addr+ 1344 mbio->bi_sector = (r10_bio->devs[i].addr+
1305 conf->mirrors[d].rdev->data_offset); 1345 choose_data_offset(r10_bio,
1346 conf->mirrors[d].rdev));
1306 mbio->bi_bdev = conf->mirrors[d].rdev->bdev; 1347 mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
1307 mbio->bi_end_io = raid10_end_write_request; 1348 mbio->bi_end_io = raid10_end_write_request;
1308 mbio->bi_rw = WRITE | do_sync | do_fua; 1349 mbio->bi_rw = WRITE | do_sync | do_fua;
@@ -1326,8 +1367,10 @@ retry_write:
1326 * so it cannot disappear, so the replacement cannot 1367 * so it cannot disappear, so the replacement cannot
1327 * become NULL here 1368 * become NULL here
1328 */ 1369 */
1329 mbio->bi_sector = (r10_bio->devs[i].addr+ 1370 mbio->bi_sector = (r10_bio->devs[i].addr +
1330 conf->mirrors[d].replacement->data_offset); 1371 choose_data_offset(
1372 r10_bio,
1373 conf->mirrors[d].replacement));
1331 mbio->bi_bdev = conf->mirrors[d].replacement->bdev; 1374 mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
1332 mbio->bi_end_io = raid10_end_write_request; 1375 mbio->bi_end_io = raid10_end_write_request;
1333 mbio->bi_rw = WRITE | do_sync | do_fua; 1376 mbio->bi_rw = WRITE | do_sync | do_fua;
@@ -1397,7 +1440,7 @@ static void status(struct seq_file *seq, struct mddev *mddev)
1397 * Don't consider the device numbered 'ignore' 1440 * Don't consider the device numbered 'ignore'
1398 * as we might be about to remove it. 1441 * as we might be about to remove it.
1399 */ 1442 */
1400static int enough(struct r10conf *conf, int ignore) 1443static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
1401{ 1444{
1402 int first = 0; 1445 int first = 0;
1403 1446
@@ -1408,7 +1451,7 @@ static int enough(struct r10conf *conf, int ignore)
1408 if (conf->mirrors[first].rdev && 1451 if (conf->mirrors[first].rdev &&
1409 first != ignore) 1452 first != ignore)
1410 cnt++; 1453 cnt++;
1411 first = (first+1) % conf->geo.raid_disks; 1454 first = (first+1) % geo->raid_disks;
1412 } 1455 }
1413 if (cnt == 0) 1456 if (cnt == 0)
1414 return 0; 1457 return 0;
@@ -1416,6 +1459,12 @@ static int enough(struct r10conf *conf, int ignore)
1416 return 1; 1459 return 1;
1417} 1460}
1418 1461
1462static int enough(struct r10conf *conf, int ignore)
1463{
1464 return _enough(conf, &conf->geo, ignore) &&
1465 _enough(conf, &conf->prev, ignore);
1466}
1467
1419static void error(struct mddev *mddev, struct md_rdev *rdev) 1468static void error(struct mddev *mddev, struct md_rdev *rdev)
1420{ 1469{
1421 char b[BDEVNAME_SIZE]; 1470 char b[BDEVNAME_SIZE];
@@ -1548,7 +1597,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1548 * very different from resync 1597 * very different from resync
1549 */ 1598 */
1550 return -EBUSY; 1599 return -EBUSY;
1551 if (rdev->saved_raid_disk < 0 && !enough(conf, -1)) 1600 if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1))
1552 return -EINVAL; 1601 return -EINVAL;
1553 1602
1554 if (rdev->raid_disk >= 0) 1603 if (rdev->raid_disk >= 0)
@@ -2223,7 +2272,9 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2223 " (%d sectors at %llu on %s)\n", 2272 " (%d sectors at %llu on %s)\n",
2224 mdname(mddev), s, 2273 mdname(mddev), s,
2225 (unsigned long long)( 2274 (unsigned long long)(
2226 sect + rdev->data_offset), 2275 sect +
2276 choose_data_offset(r10_bio,
2277 rdev)),
2227 bdevname(rdev->bdev, b)); 2278 bdevname(rdev->bdev, b));
2228 printk(KERN_NOTICE "md/raid10:%s: %s: failing " 2279 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
2229 "drive\n", 2280 "drive\n",
@@ -2261,7 +2312,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2261 " (%d sectors at %llu on %s)\n", 2312 " (%d sectors at %llu on %s)\n",
2262 mdname(mddev), s, 2313 mdname(mddev), s,
2263 (unsigned long long)( 2314 (unsigned long long)(
2264 sect + rdev->data_offset), 2315 sect +
2316 choose_data_offset(r10_bio, rdev)),
2265 bdevname(rdev->bdev, b)); 2317 bdevname(rdev->bdev, b));
2266 printk(KERN_NOTICE "md/raid10:%s: %s: failing " 2318 printk(KERN_NOTICE "md/raid10:%s: %s: failing "
2267 "drive\n", 2319 "drive\n",
@@ -2274,7 +2326,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2274 " (%d sectors at %llu on %s)\n", 2326 " (%d sectors at %llu on %s)\n",
2275 mdname(mddev), s, 2327 mdname(mddev), s,
2276 (unsigned long long)( 2328 (unsigned long long)(
2277 sect + rdev->data_offset), 2329 sect +
2330 choose_data_offset(r10_bio, rdev)),
2278 bdevname(rdev->bdev, b)); 2331 bdevname(rdev->bdev, b));
2279 atomic_add(s, &rdev->corrected_errors); 2332 atomic_add(s, &rdev->corrected_errors);
2280 } 2333 }
@@ -2348,7 +2401,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
2348 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); 2401 wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
2349 md_trim_bio(wbio, sector - bio->bi_sector, sectors); 2402 md_trim_bio(wbio, sector - bio->bi_sector, sectors);
2350 wbio->bi_sector = (r10_bio->devs[i].addr+ 2403 wbio->bi_sector = (r10_bio->devs[i].addr+
2351 rdev->data_offset+ 2404 choose_data_offset(r10_bio, rdev) +
2352 (sector - r10_bio->sector)); 2405 (sector - r10_bio->sector));
2353 wbio->bi_bdev = rdev->bdev; 2406 wbio->bi_bdev = rdev->bdev;
2354 if (submit_bio_wait(WRITE, wbio) == 0) 2407 if (submit_bio_wait(WRITE, wbio) == 0)
@@ -2425,7 +2478,7 @@ read_more:
2425 r10_bio->devs[slot].bio = bio; 2478 r10_bio->devs[slot].bio = bio;
2426 r10_bio->devs[slot].rdev = rdev; 2479 r10_bio->devs[slot].rdev = rdev;
2427 bio->bi_sector = r10_bio->devs[slot].addr 2480 bio->bi_sector = r10_bio->devs[slot].addr
2428 + rdev->data_offset; 2481 + choose_data_offset(r10_bio, rdev);
2429 bio->bi_bdev = rdev->bdev; 2482 bio->bi_bdev = rdev->bdev;
2430 bio->bi_rw = READ | do_sync; 2483 bio->bi_rw = READ | do_sync;
2431 bio->bi_private = r10_bio; 2484 bio->bi_private = r10_bio;
@@ -3254,6 +3307,8 @@ static struct r10conf *setup_conf(struct mddev *mddev)
3254 goto out; 3307 goto out;
3255 3308
3256 calc_sectors(conf, mddev->dev_sectors); 3309 calc_sectors(conf, mddev->dev_sectors);
3310 conf->prev = conf->geo;
3311 conf->reshape_progress = MaxSector;
3257 3312
3258 spin_lock_init(&conf->device_lock); 3313 spin_lock_init(&conf->device_lock);
3259 INIT_LIST_HEAD(&conf->retry_list); 3314 INIT_LIST_HEAD(&conf->retry_list);
@@ -3319,8 +3374,10 @@ static int run(struct mddev *mddev)
3319 rdev_for_each(rdev, mddev) { 3374 rdev_for_each(rdev, mddev) {
3320 3375
3321 disk_idx = rdev->raid_disk; 3376 disk_idx = rdev->raid_disk;
3322 if (disk_idx >= conf->geo.raid_disks 3377 if (disk_idx < 0)
3323 || disk_idx < 0) 3378 continue;
3379 if (disk_idx >= conf->geo.raid_disks &&
3380 disk_idx >= conf->prev.raid_disks)
3324 continue; 3381 continue;
3325 disk = conf->mirrors + disk_idx; 3382 disk = conf->mirrors + disk_idx;
3326 3383
@@ -3347,7 +3404,10 @@ static int run(struct mddev *mddev)
3347 } 3404 }
3348 3405
3349 mddev->degraded = 0; 3406 mddev->degraded = 0;
3350 for (i = 0; i < conf->geo.raid_disks; i++) { 3407 for (i = 0;
3408 i < conf->geo.raid_disks
3409 || i < conf->prev.raid_disks;
3410 i++) {
3351 3411
3352 disk = conf->mirrors + i; 3412 disk = conf->mirrors + i;
3353 3413
@@ -3466,6 +3526,9 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors)
3466 struct r10conf *conf = mddev->private; 3526 struct r10conf *conf = mddev->private;
3467 sector_t oldsize, size; 3527 sector_t oldsize, size;
3468 3528
3529 if (mddev->reshape_position != MaxSector)
3530 return -EBUSY;
3531
3469 if (conf->geo.far_copies > 1 && !conf->geo.far_offset) 3532 if (conf->geo.far_copies > 1 && !conf->geo.far_offset)
3470 return -EINVAL; 3533 return -EINVAL;
3471 3534
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 4c4942ac46fc..37509d7134aa 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -34,13 +34,14 @@ struct r10conf {
34 */ 34 */
35 int chunk_shift; /* shift from chunks to sectors */ 35 int chunk_shift; /* shift from chunks to sectors */
36 sector_t chunk_mask; 36 sector_t chunk_mask;
37 } geo; 37 } prev, geo;
38 int copies; /* near_copies * far_copies. 38 int copies; /* near_copies * far_copies.
39 * must be <= raid_disks 39 * must be <= raid_disks
40 */ 40 */
41 41
42 sector_t dev_sectors; /* temp copy of 42 sector_t dev_sectors; /* temp copy of
43 * mddev->dev_sectors */ 43 * mddev->dev_sectors */
44 sector_t reshape_progress;
44 45
45 struct list_head retry_list; 46 struct list_head retry_list;
46 /* queue pending writes and submit them on unplug */ 47 /* queue pending writes and submit them on unplug */
@@ -147,5 +148,10 @@ enum r10bio_state {
147 */ 148 */
148 R10BIO_MadeGood, 149 R10BIO_MadeGood,
149 R10BIO_WriteError, 150 R10BIO_WriteError,
151/* During a reshape we might be performing IO on the
152 * 'previous' part of the array, in which case this
153 * flag is set
154 */
155 R10BIO_Previous,
150}; 156};
151#endif 157#endif