diff options
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 92 |
1 files changed, 59 insertions, 33 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8da6282254c3..e2549deab7c3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -60,7 +60,21 @@ | |||
60 | */ | 60 | */ |
61 | #define NR_RAID10_BIOS 256 | 61 | #define NR_RAID10_BIOS 256 |
62 | 62 | ||
63 | /* When there are this many requests queue to be written by | 63 | /* when we get a read error on a read-only array, we redirect to another |
64 | * device without failing the first device, or trying to over-write to | ||
65 | * correct the read error. To keep track of bad blocks on a per-bio | ||
66 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
67 | */ | ||
68 | #define IO_BLOCKED ((struct bio *)1) | ||
69 | /* When we successfully write to a known bad-block, we need to remove the | ||
70 | * bad-block marking which must be done from process context. So we record | ||
71 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
72 | */ | ||
73 | #define IO_MADE_GOOD ((struct bio *)2) | ||
74 | |||
75 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
76 | |||
77 | /* When there are this many requests queued to be written by | ||
64 | * the raid10 thread, we become 'congested' to provide back-pressure | 78 | * the raid10 thread, we become 'congested' to provide back-pressure |
65 | * for writeback. | 79 | * for writeback. |
66 | */ | 80 | */ |
@@ -717,7 +731,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, | |||
717 | int sectors = r10_bio->sectors; | 731 | int sectors = r10_bio->sectors; |
718 | int best_good_sectors; | 732 | int best_good_sectors; |
719 | sector_t new_distance, best_dist; | 733 | sector_t new_distance, best_dist; |
720 | struct md_rdev *rdev, *best_rdev; | 734 | struct md_rdev *best_rdev, *rdev = NULL; |
721 | int do_balance; | 735 | int do_balance; |
722 | int best_slot; | 736 | int best_slot; |
723 | struct geom *geo = &conf->geo; | 737 | struct geom *geo = &conf->geo; |
@@ -839,9 +853,8 @@ retry: | |||
839 | return rdev; | 853 | return rdev; |
840 | } | 854 | } |
841 | 855 | ||
842 | static int raid10_congested(void *data, int bits) | 856 | int md_raid10_congested(struct mddev *mddev, int bits) |
843 | { | 857 | { |
844 | struct mddev *mddev = data; | ||
845 | struct r10conf *conf = mddev->private; | 858 | struct r10conf *conf = mddev->private; |
846 | int i, ret = 0; | 859 | int i, ret = 0; |
847 | 860 | ||
@@ -849,8 +862,6 @@ static int raid10_congested(void *data, int bits) | |||
849 | conf->pending_count >= max_queued_requests) | 862 | conf->pending_count >= max_queued_requests) |
850 | return 1; | 863 | return 1; |
851 | 864 | ||
852 | if (mddev_congested(mddev, bits)) | ||
853 | return 1; | ||
854 | rcu_read_lock(); | 865 | rcu_read_lock(); |
855 | for (i = 0; | 866 | for (i = 0; |
856 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) | 867 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) |
@@ -866,6 +877,15 @@ static int raid10_congested(void *data, int bits) | |||
866 | rcu_read_unlock(); | 877 | rcu_read_unlock(); |
867 | return ret; | 878 | return ret; |
868 | } | 879 | } |
880 | EXPORT_SYMBOL_GPL(md_raid10_congested); | ||
881 | |||
882 | static int raid10_congested(void *data, int bits) | ||
883 | { | ||
884 | struct mddev *mddev = data; | ||
885 | |||
886 | return mddev_congested(mddev, bits) || | ||
887 | md_raid10_congested(mddev, bits); | ||
888 | } | ||
869 | 889 | ||
870 | static void flush_pending_writes(struct r10conf *conf) | 890 | static void flush_pending_writes(struct r10conf *conf) |
871 | { | 891 | { |
@@ -1546,7 +1566,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) | |||
1546 | static void print_conf(struct r10conf *conf) | 1566 | static void print_conf(struct r10conf *conf) |
1547 | { | 1567 | { |
1548 | int i; | 1568 | int i; |
1549 | struct mirror_info *tmp; | 1569 | struct raid10_info *tmp; |
1550 | 1570 | ||
1551 | printk(KERN_DEBUG "RAID10 conf printout:\n"); | 1571 | printk(KERN_DEBUG "RAID10 conf printout:\n"); |
1552 | if (!conf) { | 1572 | if (!conf) { |
@@ -1580,7 +1600,7 @@ static int raid10_spare_active(struct mddev *mddev) | |||
1580 | { | 1600 | { |
1581 | int i; | 1601 | int i; |
1582 | struct r10conf *conf = mddev->private; | 1602 | struct r10conf *conf = mddev->private; |
1583 | struct mirror_info *tmp; | 1603 | struct raid10_info *tmp; |
1584 | int count = 0; | 1604 | int count = 0; |
1585 | unsigned long flags; | 1605 | unsigned long flags; |
1586 | 1606 | ||
@@ -1655,7 +1675,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1655 | else | 1675 | else |
1656 | mirror = first; | 1676 | mirror = first; |
1657 | for ( ; mirror <= last ; mirror++) { | 1677 | for ( ; mirror <= last ; mirror++) { |
1658 | struct mirror_info *p = &conf->mirrors[mirror]; | 1678 | struct raid10_info *p = &conf->mirrors[mirror]; |
1659 | if (p->recovery_disabled == mddev->recovery_disabled) | 1679 | if (p->recovery_disabled == mddev->recovery_disabled) |
1660 | continue; | 1680 | continue; |
1661 | if (p->rdev) { | 1681 | if (p->rdev) { |
@@ -1709,7 +1729,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
1709 | int err = 0; | 1729 | int err = 0; |
1710 | int number = rdev->raid_disk; | 1730 | int number = rdev->raid_disk; |
1711 | struct md_rdev **rdevp; | 1731 | struct md_rdev **rdevp; |
1712 | struct mirror_info *p = conf->mirrors + number; | 1732 | struct raid10_info *p = conf->mirrors + number; |
1713 | 1733 | ||
1714 | print_conf(conf); | 1734 | print_conf(conf); |
1715 | if (rdev == p->rdev) | 1735 | if (rdev == p->rdev) |
@@ -2876,7 +2896,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
2876 | sector_t sect; | 2896 | sector_t sect; |
2877 | int must_sync; | 2897 | int must_sync; |
2878 | int any_working; | 2898 | int any_working; |
2879 | struct mirror_info *mirror = &conf->mirrors[i]; | 2899 | struct raid10_info *mirror = &conf->mirrors[i]; |
2880 | 2900 | ||
2881 | if ((mirror->rdev == NULL || | 2901 | if ((mirror->rdev == NULL || |
2882 | test_bit(In_sync, &mirror->rdev->flags)) | 2902 | test_bit(In_sync, &mirror->rdev->flags)) |
@@ -3388,7 +3408,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) | |||
3388 | goto out; | 3408 | goto out; |
3389 | 3409 | ||
3390 | /* FIXME calc properly */ | 3410 | /* FIXME calc properly */ |
3391 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*(mddev->raid_disks + | 3411 | conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + |
3392 | max(0,mddev->delta_disks)), | 3412 | max(0,mddev->delta_disks)), |
3393 | GFP_KERNEL); | 3413 | GFP_KERNEL); |
3394 | if (!conf->mirrors) | 3414 | if (!conf->mirrors) |
@@ -3452,7 +3472,7 @@ static int run(struct mddev *mddev) | |||
3452 | { | 3472 | { |
3453 | struct r10conf *conf; | 3473 | struct r10conf *conf; |
3454 | int i, disk_idx, chunk_size; | 3474 | int i, disk_idx, chunk_size; |
3455 | struct mirror_info *disk; | 3475 | struct raid10_info *disk; |
3456 | struct md_rdev *rdev; | 3476 | struct md_rdev *rdev; |
3457 | sector_t size; | 3477 | sector_t size; |
3458 | sector_t min_offset_diff = 0; | 3478 | sector_t min_offset_diff = 0; |
@@ -3472,12 +3492,14 @@ static int run(struct mddev *mddev) | |||
3472 | conf->thread = NULL; | 3492 | conf->thread = NULL; |
3473 | 3493 | ||
3474 | chunk_size = mddev->chunk_sectors << 9; | 3494 | chunk_size = mddev->chunk_sectors << 9; |
3475 | blk_queue_io_min(mddev->queue, chunk_size); | 3495 | if (mddev->queue) { |
3476 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3496 | blk_queue_io_min(mddev->queue, chunk_size); |
3477 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3497 | if (conf->geo.raid_disks % conf->geo.near_copies) |
3478 | else | 3498 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
3479 | blk_queue_io_opt(mddev->queue, chunk_size * | 3499 | else |
3480 | (conf->geo.raid_disks / conf->geo.near_copies)); | 3500 | blk_queue_io_opt(mddev->queue, chunk_size * |
3501 | (conf->geo.raid_disks / conf->geo.near_copies)); | ||
3502 | } | ||
3481 | 3503 | ||
3482 | rdev_for_each(rdev, mddev) { | 3504 | rdev_for_each(rdev, mddev) { |
3483 | long long diff; | 3505 | long long diff; |
@@ -3511,8 +3533,9 @@ static int run(struct mddev *mddev) | |||
3511 | if (first || diff < min_offset_diff) | 3533 | if (first || diff < min_offset_diff) |
3512 | min_offset_diff = diff; | 3534 | min_offset_diff = diff; |
3513 | 3535 | ||
3514 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 3536 | if (mddev->gendisk) |
3515 | rdev->data_offset << 9); | 3537 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
3538 | rdev->data_offset << 9); | ||
3516 | 3539 | ||
3517 | disk->head_position = 0; | 3540 | disk->head_position = 0; |
3518 | } | 3541 | } |
@@ -3575,22 +3598,22 @@ static int run(struct mddev *mddev) | |||
3575 | md_set_array_sectors(mddev, size); | 3598 | md_set_array_sectors(mddev, size); |
3576 | mddev->resync_max_sectors = size; | 3599 | mddev->resync_max_sectors = size; |
3577 | 3600 | ||
3578 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 3601 | if (mddev->queue) { |
3579 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3580 | |||
3581 | /* Calculate max read-ahead size. | ||
3582 | * We need to readahead at least twice a whole stripe.... | ||
3583 | * maybe... | ||
3584 | */ | ||
3585 | { | ||
3586 | int stripe = conf->geo.raid_disks * | 3602 | int stripe = conf->geo.raid_disks * |
3587 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | 3603 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
3604 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | ||
3605 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
3606 | |||
3607 | /* Calculate max read-ahead size. | ||
3608 | * We need to readahead at least twice a whole stripe.... | ||
3609 | * maybe... | ||
3610 | */ | ||
3588 | stripe /= conf->geo.near_copies; | 3611 | stripe /= conf->geo.near_copies; |
3589 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 3612 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
3590 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 3613 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
3614 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3591 | } | 3615 | } |
3592 | 3616 | ||
3593 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
3594 | 3617 | ||
3595 | if (md_integrity_register(mddev)) | 3618 | if (md_integrity_register(mddev)) |
3596 | goto out_free_conf; | 3619 | goto out_free_conf; |
@@ -3641,7 +3664,10 @@ static int stop(struct mddev *mddev) | |||
3641 | lower_barrier(conf); | 3664 | lower_barrier(conf); |
3642 | 3665 | ||
3643 | md_unregister_thread(&mddev->thread); | 3666 | md_unregister_thread(&mddev->thread); |
3644 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 3667 | if (mddev->queue) |
3668 | /* the unplug fn references 'conf'*/ | ||
3669 | blk_sync_queue(mddev->queue); | ||
3670 | |||
3645 | if (conf->r10bio_pool) | 3671 | if (conf->r10bio_pool) |
3646 | mempool_destroy(conf->r10bio_pool); | 3672 | mempool_destroy(conf->r10bio_pool); |
3647 | kfree(conf->mirrors); | 3673 | kfree(conf->mirrors); |
@@ -3805,7 +3831,7 @@ static int raid10_check_reshape(struct mddev *mddev) | |||
3805 | if (mddev->delta_disks > 0) { | 3831 | if (mddev->delta_disks > 0) { |
3806 | /* allocate new 'mirrors' list */ | 3832 | /* allocate new 'mirrors' list */ |
3807 | conf->mirrors_new = kzalloc( | 3833 | conf->mirrors_new = kzalloc( |
3808 | sizeof(struct mirror_info) | 3834 | sizeof(struct raid10_info) |
3809 | *(mddev->raid_disks + | 3835 | *(mddev->raid_disks + |
3810 | mddev->delta_disks), | 3836 | mddev->delta_disks), |
3811 | GFP_KERNEL); | 3837 | GFP_KERNEL); |
@@ -3930,7 +3956,7 @@ static int raid10_start_reshape(struct mddev *mddev) | |||
3930 | spin_lock_irq(&conf->device_lock); | 3956 | spin_lock_irq(&conf->device_lock); |
3931 | if (conf->mirrors_new) { | 3957 | if (conf->mirrors_new) { |
3932 | memcpy(conf->mirrors_new, conf->mirrors, | 3958 | memcpy(conf->mirrors_new, conf->mirrors, |
3933 | sizeof(struct mirror_info)*conf->prev.raid_disks); | 3959 | sizeof(struct raid10_info)*conf->prev.raid_disks); |
3934 | smp_mb(); | 3960 | smp_mb(); |
3935 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ | 3961 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ |
3936 | conf->mirrors_old = conf->mirrors; | 3962 | conf->mirrors_old = conf->mirrors; |