aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-03-18 21:46:39 -0400
committerNeilBrown <neilb@suse.de>2012-03-18 21:46:39 -0400
commit6b740b8d79252f13bcb7e5d3c1d43157e78a81e7 (patch)
tree4b99f8fb395e7a205a85766d1bd241dce39c4337 /drivers/md
parent050b66152f87c79e8d66aed0e7996f9336462d5f (diff)
md/raid1: handle merge_bvec_fn in member devices.
Currently we don't honour merge_bvec_fn in member devices so if there is one, we force all requests to be single-page at most. This is not ideal. So create a raid1 merge_bvec_fn to check that function in children as well. This introduces a small problem. There is no locking around calls the ->merge_bvec_fn and subsequent calls to ->make_request. So a device added between these could end up getting a request which violates its merge_bvec_fn. Currently the best we can do is synchronize_sched(). This will work providing no preemption happens. If there is is preemption, we just have to hope that new devices are largely consistent with old devices. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid1.c77
1 files changed, 56 insertions, 21 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a933bd4065a..4a40a200d76 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
523 rdev = rcu_dereference(conf->mirrors[disk].rdev); 523 rdev = rcu_dereference(conf->mirrors[disk].rdev);
524 if (r1_bio->bios[disk] == IO_BLOCKED 524 if (r1_bio->bios[disk] == IO_BLOCKED
525 || rdev == NULL 525 || rdev == NULL
526 || test_bit(Unmerged, &rdev->flags)
526 || test_bit(Faulty, &rdev->flags)) 527 || test_bit(Faulty, &rdev->flags))
527 continue; 528 continue;
528 if (!test_bit(In_sync, &rdev->flags) && 529 if (!test_bit(In_sync, &rdev->flags) &&
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
614 return best_disk; 615 return best_disk;
615} 616}
616 617
618static int raid1_mergeable_bvec(struct request_queue *q,
619 struct bvec_merge_data *bvm,
620 struct bio_vec *biovec)
621{
622 struct mddev *mddev = q->queuedata;
623 struct r1conf *conf = mddev->private;
624 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
625 int max = biovec->bv_len;
626
627 if (mddev->merge_check_needed) {
628 int disk;
629 rcu_read_lock();
630 for (disk = 0; disk < conf->raid_disks * 2; disk++) {
631 struct md_rdev *rdev = rcu_dereference(
632 conf->mirrors[disk].rdev);
633 if (rdev && !test_bit(Faulty, &rdev->flags)) {
634 struct request_queue *q =
635 bdev_get_queue(rdev->bdev);
636 if (q->merge_bvec_fn) {
637 bvm->bi_sector = sector +
638 rdev->data_offset;
639 bvm->bi_bdev = rdev->bdev;
640 max = min(max, q->merge_bvec_fn(
641 q, bvm, biovec));
642 }
643 }
644 }
645 rcu_read_unlock();
646 }
647 return max;
648
649}
650
617int md_raid1_congested(struct mddev *mddev, int bits) 651int md_raid1_congested(struct mddev *mddev, int bits)
618{ 652{
619 struct r1conf *conf = mddev->private; 653 struct r1conf *conf = mddev->private;
@@ -1015,7 +1049,8 @@ read_again:
1015 break; 1049 break;
1016 } 1050 }
1017 r1_bio->bios[i] = NULL; 1051 r1_bio->bios[i] = NULL;
1018 if (!rdev || test_bit(Faulty, &rdev->flags)) { 1052 if (!rdev || test_bit(Faulty, &rdev->flags)
1053 || test_bit(Unmerged, &rdev->flags)) {
1019 if (i < conf->raid_disks) 1054 if (i < conf->raid_disks)
1020 set_bit(R1BIO_Degraded, &r1_bio->state); 1055 set_bit(R1BIO_Degraded, &r1_bio->state);
1021 continue; 1056 continue;
@@ -1335,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1335 struct mirror_info *p; 1370 struct mirror_info *p;
1336 int first = 0; 1371 int first = 0;
1337 int last = conf->raid_disks - 1; 1372 int last = conf->raid_disks - 1;
1373 struct request_queue *q = bdev_get_queue(rdev->bdev);
1338 1374
1339 if (mddev->recovery_disabled == conf->recovery_disabled) 1375 if (mddev->recovery_disabled == conf->recovery_disabled)
1340 return -EBUSY; 1376 return -EBUSY;
@@ -1342,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1342 if (rdev->raid_disk >= 0) 1378 if (rdev->raid_disk >= 0)
1343 first = last = rdev->raid_disk; 1379 first = last = rdev->raid_disk;
1344 1380
1381 if (q->merge_bvec_fn) {
1382 set_bit(Unmerged, &rdev->flags);
1383 mddev->merge_check_needed = 1;
1384 }
1385
1345 for (mirror = first; mirror <= last; mirror++) { 1386 for (mirror = first; mirror <= last; mirror++) {
1346 p = conf->mirrors+mirror; 1387 p = conf->mirrors+mirror;
1347 if (!p->rdev) { 1388 if (!p->rdev) {
1348 1389
1349 disk_stack_limits(mddev->gendisk, rdev->bdev, 1390 disk_stack_limits(mddev->gendisk, rdev->bdev,
1350 rdev->data_offset << 9); 1391 rdev->data_offset << 9);
1351 /* as we don't honour merge_bvec_fn, we must
1352 * never risk violating it, so limit
1353 * ->max_segments to one lying with a single
1354 * page, as a one page request is never in
1355 * violation.
1356 */
1357 if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
1358 blk_queue_max_segments(mddev->queue, 1);
1359 blk_queue_segment_boundary(mddev->queue,
1360 PAGE_CACHE_SIZE - 1);
1361 }
1362 1392
1363 p->head_position = 0; 1393 p->head_position = 0;
1364 rdev->raid_disk = mirror; 1394 rdev->raid_disk = mirror;
@@ -1383,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
1383 break; 1413 break;
1384 } 1414 }
1385 } 1415 }
1416 if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
1417 /* Some requests might not have seen this new
1418 * merge_bvec_fn. We must wait for them to complete
1419 * before merging the device fully.
1420 * First we make sure any code which has tested
1421 * our function has submitted the request, then
1422 * we wait for all outstanding requests to complete.
1423 */
1424 synchronize_sched();
1425 raise_barrier(conf);
1426 lower_barrier(conf);
1427 clear_bit(Unmerged, &rdev->flags);
1428 }
1386 md_integrity_add_rdev(rdev, mddev); 1429 md_integrity_add_rdev(rdev, mddev);
1387 print_conf(conf); 1430 print_conf(conf);
1388 return err; 1431 return err;
@@ -2627,15 +2670,6 @@ static int run(struct mddev *mddev)
2627 continue; 2670 continue;
2628 disk_stack_limits(mddev->gendisk, rdev->bdev, 2671 disk_stack_limits(mddev->gendisk, rdev->bdev,
2629 rdev->data_offset << 9); 2672 rdev->data_offset << 9);
2630 /* as we don't honour merge_bvec_fn, we must never risk
2631 * violating it, so limit ->max_segments to 1 lying within
2632 * a single page, as a one page request is never in violation.
2633 */
2634 if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
2635 blk_queue_max_segments(mddev->queue, 1);
2636 blk_queue_segment_boundary(mddev->queue,
2637 PAGE_CACHE_SIZE - 1);
2638 }
2639 } 2673 }
2640 2674
2641 mddev->degraded = 0; 2675 mddev->degraded = 0;
@@ -2669,6 +2703,7 @@ static int run(struct mddev *mddev)
2669 if (mddev->queue) { 2703 if (mddev->queue) {
2670 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2704 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
2671 mddev->queue->backing_dev_info.congested_data = mddev; 2705 mddev->queue->backing_dev_info.congested_data = mddev;
2706 blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
2672 } 2707 }
2673 return md_integrity_register(mddev); 2708 return md_integrity_register(mddev);
2674} 2709}