diff options
author | NeilBrown <neilb@suse.de> | 2012-03-18 21:46:39 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2012-03-18 21:46:39 -0400 |
commit | 6b740b8d79252f13bcb7e5d3c1d43157e78a81e7 (patch) | |
tree | 4b99f8fb395e7a205a85766d1bd241dce39c4337 /drivers/md | |
parent | 050b66152f87c79e8d66aed0e7996f9336462d5f (diff) |
md/raid1: handle merge_bvec_fn in member devices.
Currently we don't honour merge_bvec_fn in member devices so if there
is one, we force all requests to be single-page at most.
This is not ideal.
So create a raid1 merge_bvec_fn to check that function in children
as well.
This introduces a small problem. There is no locking around calls
the ->merge_bvec_fn and subsequent calls to ->make_request. So a
device added between these could end up getting a request which
violates its merge_bvec_fn.
Currently the best we can do is synchronize_sched(). This will work
providing no preemption happens. If there is is preemption, we just
have to hope that new devices are largely consistent with old devices.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid1.c | 77 |
1 files changed, 56 insertions, 21 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a933bd4065a5..4a40a200d769 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); | 523 | rdev = rcu_dereference(conf->mirrors[disk].rdev); |
524 | if (r1_bio->bios[disk] == IO_BLOCKED | 524 | if (r1_bio->bios[disk] == IO_BLOCKED |
525 | || rdev == NULL | 525 | || rdev == NULL |
526 | || test_bit(Unmerged, &rdev->flags) | ||
526 | || test_bit(Faulty, &rdev->flags)) | 527 | || test_bit(Faulty, &rdev->flags)) |
527 | continue; | 528 | continue; |
528 | if (!test_bit(In_sync, &rdev->flags) && | 529 | if (!test_bit(In_sync, &rdev->flags) && |
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect | |||
614 | return best_disk; | 615 | return best_disk; |
615 | } | 616 | } |
616 | 617 | ||
618 | static int raid1_mergeable_bvec(struct request_queue *q, | ||
619 | struct bvec_merge_data *bvm, | ||
620 | struct bio_vec *biovec) | ||
621 | { | ||
622 | struct mddev *mddev = q->queuedata; | ||
623 | struct r1conf *conf = mddev->private; | ||
624 | sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); | ||
625 | int max = biovec->bv_len; | ||
626 | |||
627 | if (mddev->merge_check_needed) { | ||
628 | int disk; | ||
629 | rcu_read_lock(); | ||
630 | for (disk = 0; disk < conf->raid_disks * 2; disk++) { | ||
631 | struct md_rdev *rdev = rcu_dereference( | ||
632 | conf->mirrors[disk].rdev); | ||
633 | if (rdev && !test_bit(Faulty, &rdev->flags)) { | ||
634 | struct request_queue *q = | ||
635 | bdev_get_queue(rdev->bdev); | ||
636 | if (q->merge_bvec_fn) { | ||
637 | bvm->bi_sector = sector + | ||
638 | rdev->data_offset; | ||
639 | bvm->bi_bdev = rdev->bdev; | ||
640 | max = min(max, q->merge_bvec_fn( | ||
641 | q, bvm, biovec)); | ||
642 | } | ||
643 | } | ||
644 | } | ||
645 | rcu_read_unlock(); | ||
646 | } | ||
647 | return max; | ||
648 | |||
649 | } | ||
650 | |||
617 | int md_raid1_congested(struct mddev *mddev, int bits) | 651 | int md_raid1_congested(struct mddev *mddev, int bits) |
618 | { | 652 | { |
619 | struct r1conf *conf = mddev->private; | 653 | struct r1conf *conf = mddev->private; |
@@ -1015,7 +1049,8 @@ read_again: | |||
1015 | break; | 1049 | break; |
1016 | } | 1050 | } |
1017 | r1_bio->bios[i] = NULL; | 1051 | r1_bio->bios[i] = NULL; |
1018 | if (!rdev || test_bit(Faulty, &rdev->flags)) { | 1052 | if (!rdev || test_bit(Faulty, &rdev->flags) |
1053 | || test_bit(Unmerged, &rdev->flags)) { | ||
1019 | if (i < conf->raid_disks) | 1054 | if (i < conf->raid_disks) |
1020 | set_bit(R1BIO_Degraded, &r1_bio->state); | 1055 | set_bit(R1BIO_Degraded, &r1_bio->state); |
1021 | continue; | 1056 | continue; |
@@ -1335,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1335 | struct mirror_info *p; | 1370 | struct mirror_info *p; |
1336 | int first = 0; | 1371 | int first = 0; |
1337 | int last = conf->raid_disks - 1; | 1372 | int last = conf->raid_disks - 1; |
1373 | struct request_queue *q = bdev_get_queue(rdev->bdev); | ||
1338 | 1374 | ||
1339 | if (mddev->recovery_disabled == conf->recovery_disabled) | 1375 | if (mddev->recovery_disabled == conf->recovery_disabled) |
1340 | return -EBUSY; | 1376 | return -EBUSY; |
@@ -1342,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1342 | if (rdev->raid_disk >= 0) | 1378 | if (rdev->raid_disk >= 0) |
1343 | first = last = rdev->raid_disk; | 1379 | first = last = rdev->raid_disk; |
1344 | 1380 | ||
1381 | if (q->merge_bvec_fn) { | ||
1382 | set_bit(Unmerged, &rdev->flags); | ||
1383 | mddev->merge_check_needed = 1; | ||
1384 | } | ||
1385 | |||
1345 | for (mirror = first; mirror <= last; mirror++) { | 1386 | for (mirror = first; mirror <= last; mirror++) { |
1346 | p = conf->mirrors+mirror; | 1387 | p = conf->mirrors+mirror; |
1347 | if (!p->rdev) { | 1388 | if (!p->rdev) { |
1348 | 1389 | ||
1349 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 1390 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1350 | rdev->data_offset << 9); | 1391 | rdev->data_offset << 9); |
1351 | /* as we don't honour merge_bvec_fn, we must | ||
1352 | * never risk violating it, so limit | ||
1353 | * ->max_segments to one lying with a single | ||
1354 | * page, as a one page request is never in | ||
1355 | * violation. | ||
1356 | */ | ||
1357 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
1358 | blk_queue_max_segments(mddev->queue, 1); | ||
1359 | blk_queue_segment_boundary(mddev->queue, | ||
1360 | PAGE_CACHE_SIZE - 1); | ||
1361 | } | ||
1362 | 1392 | ||
1363 | p->head_position = 0; | 1393 | p->head_position = 0; |
1364 | rdev->raid_disk = mirror; | 1394 | rdev->raid_disk = mirror; |
@@ -1383,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1383 | break; | 1413 | break; |
1384 | } | 1414 | } |
1385 | } | 1415 | } |
1416 | if (err == 0 && test_bit(Unmerged, &rdev->flags)) { | ||
1417 | /* Some requests might not have seen this new | ||
1418 | * merge_bvec_fn. We must wait for them to complete | ||
1419 | * before merging the device fully. | ||
1420 | * First we make sure any code which has tested | ||
1421 | * our function has submitted the request, then | ||
1422 | * we wait for all outstanding requests to complete. | ||
1423 | */ | ||
1424 | synchronize_sched(); | ||
1425 | raise_barrier(conf); | ||
1426 | lower_barrier(conf); | ||
1427 | clear_bit(Unmerged, &rdev->flags); | ||
1428 | } | ||
1386 | md_integrity_add_rdev(rdev, mddev); | 1429 | md_integrity_add_rdev(rdev, mddev); |
1387 | print_conf(conf); | 1430 | print_conf(conf); |
1388 | return err; | 1431 | return err; |
@@ -2627,15 +2670,6 @@ static int run(struct mddev *mddev) | |||
2627 | continue; | 2670 | continue; |
2628 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 2671 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
2629 | rdev->data_offset << 9); | 2672 | rdev->data_offset << 9); |
2630 | /* as we don't honour merge_bvec_fn, we must never risk | ||
2631 | * violating it, so limit ->max_segments to 1 lying within | ||
2632 | * a single page, as a one page request is never in violation. | ||
2633 | */ | ||
2634 | if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { | ||
2635 | blk_queue_max_segments(mddev->queue, 1); | ||
2636 | blk_queue_segment_boundary(mddev->queue, | ||
2637 | PAGE_CACHE_SIZE - 1); | ||
2638 | } | ||
2639 | } | 2673 | } |
2640 | 2674 | ||
2641 | mddev->degraded = 0; | 2675 | mddev->degraded = 0; |
@@ -2669,6 +2703,7 @@ static int run(struct mddev *mddev) | |||
2669 | if (mddev->queue) { | 2703 | if (mddev->queue) { |
2670 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2704 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
2671 | mddev->queue->backing_dev_info.congested_data = mddev; | 2705 | mddev->queue->backing_dev_info.congested_data = mddev; |
2706 | blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec); | ||
2672 | } | 2707 | } |
2673 | return md_integrity_register(mddev); | 2708 | return md_integrity_register(mddev); |
2674 | } | 2709 | } |