diff options
Diffstat (limited to 'drivers/md/raid10.c')
| -rw-r--r-- | drivers/md/raid10.c | 92 |
1 files changed, 59 insertions, 33 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8da6282254c3..e2549deab7c3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -60,7 +60,21 @@ | |||
| 60 | */ | 60 | */ |
| 61 | #define NR_RAID10_BIOS 256 | 61 | #define NR_RAID10_BIOS 256 |
| 62 | 62 | ||
| 63 | /* When there are this many requests queue to be written by | 63 | /* when we get a read error on a read-only array, we redirect to another |
| 64 | * device without failing the first device, or trying to over-write to | ||
| 65 | * correct the read error. To keep track of bad blocks on a per-bio | ||
| 66 | * level, we store IO_BLOCKED in the appropriate 'bios' pointer | ||
| 67 | */ | ||
| 68 | #define IO_BLOCKED ((struct bio *)1) | ||
| 69 | /* When we successfully write to a known bad-block, we need to remove the | ||
| 70 | * bad-block marking which must be done from process context. So we record | ||
| 71 | * the success by setting devs[n].bio to IO_MADE_GOOD | ||
| 72 | */ | ||
| 73 | #define IO_MADE_GOOD ((struct bio *)2) | ||
| 74 | |||
| 75 | #define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) | ||
| 76 | |||
| 77 | /* When there are this many requests queued to be written by | ||
| 64 | * the raid10 thread, we become 'congested' to provide back-pressure | 78 | * the raid10 thread, we become 'congested' to provide back-pressure |
| 65 | * for writeback. | 79 | * for writeback. |
| 66 | */ | 80 | */ |
| @@ -717,7 +731,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, | |||
| 717 | int sectors = r10_bio->sectors; | 731 | int sectors = r10_bio->sectors; |
| 718 | int best_good_sectors; | 732 | int best_good_sectors; |
| 719 | sector_t new_distance, best_dist; | 733 | sector_t new_distance, best_dist; |
| 720 | struct md_rdev *rdev, *best_rdev; | 734 | struct md_rdev *best_rdev, *rdev = NULL; |
| 721 | int do_balance; | 735 | int do_balance; |
| 722 | int best_slot; | 736 | int best_slot; |
| 723 | struct geom *geo = &conf->geo; | 737 | struct geom *geo = &conf->geo; |
| @@ -839,9 +853,8 @@ retry: | |||
| 839 | return rdev; | 853 | return rdev; |
| 840 | } | 854 | } |
| 841 | 855 | ||
| 842 | static int raid10_congested(void *data, int bits) | 856 | int md_raid10_congested(struct mddev *mddev, int bits) |
| 843 | { | 857 | { |
| 844 | struct mddev *mddev = data; | ||
| 845 | struct r10conf *conf = mddev->private; | 858 | struct r10conf *conf = mddev->private; |
| 846 | int i, ret = 0; | 859 | int i, ret = 0; |
| 847 | 860 | ||
| @@ -849,8 +862,6 @@ static int raid10_congested(void *data, int bits) | |||
| 849 | conf->pending_count >= max_queued_requests) | 862 | conf->pending_count >= max_queued_requests) |
| 850 | return 1; | 863 | return 1; |
| 851 | 864 | ||
| 852 | if (mddev_congested(mddev, bits)) | ||
| 853 | return 1; | ||
| 854 | rcu_read_lock(); | 865 | rcu_read_lock(); |
| 855 | for (i = 0; | 866 | for (i = 0; |
| 856 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) | 867 | (i < conf->geo.raid_disks || i < conf->prev.raid_disks) |
| @@ -866,6 +877,15 @@ static int raid10_congested(void *data, int bits) | |||
| 866 | rcu_read_unlock(); | 877 | rcu_read_unlock(); |
| 867 | return ret; | 878 | return ret; |
| 868 | } | 879 | } |
| 880 | EXPORT_SYMBOL_GPL(md_raid10_congested); | ||
| 881 | |||
| 882 | static int raid10_congested(void *data, int bits) | ||
| 883 | { | ||
| 884 | struct mddev *mddev = data; | ||
| 885 | |||
| 886 | return mddev_congested(mddev, bits) || | ||
| 887 | md_raid10_congested(mddev, bits); | ||
| 888 | } | ||
| 869 | 889 | ||
| 870 | static void flush_pending_writes(struct r10conf *conf) | 890 | static void flush_pending_writes(struct r10conf *conf) |
| 871 | { | 891 | { |
| @@ -1546,7 +1566,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1546 | static void print_conf(struct r10conf *conf) | 1566 | static void print_conf(struct r10conf *conf) |
| 1547 | { | 1567 | { |
| 1548 | int i; | 1568 | int i; |
| 1549 | struct mirror_info *tmp; | 1569 | struct raid10_info *tmp; |
| 1550 | 1570 | ||
| 1551 | printk(KERN_DEBUG "RAID10 conf printout:\n"); | 1571 | printk(KERN_DEBUG "RAID10 conf printout:\n"); |
| 1552 | if (!conf) { | 1572 | if (!conf) { |
| @@ -1580,7 +1600,7 @@ static int raid10_spare_active(struct mddev *mddev) | |||
| 1580 | { | 1600 | { |
| 1581 | int i; | 1601 | int i; |
| 1582 | struct r10conf *conf = mddev->private; | 1602 | struct r10conf *conf = mddev->private; |
| 1583 | struct mirror_info *tmp; | 1603 | struct raid10_info *tmp; |
| 1584 | int count = 0; | 1604 | int count = 0; |
| 1585 | unsigned long flags; | 1605 | unsigned long flags; |
| 1586 | 1606 | ||
| @@ -1655,7 +1675,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1655 | else | 1675 | else |
| 1656 | mirror = first; | 1676 | mirror = first; |
| 1657 | for ( ; mirror <= last ; mirror++) { | 1677 | for ( ; mirror <= last ; mirror++) { |
| 1658 | struct mirror_info *p = &conf->mirrors[mirror]; | 1678 | struct raid10_info *p = &conf->mirrors[mirror]; |
| 1659 | if (p->recovery_disabled == mddev->recovery_disabled) | 1679 | if (p->recovery_disabled == mddev->recovery_disabled) |
| 1660 | continue; | 1680 | continue; |
| 1661 | if (p->rdev) { | 1681 | if (p->rdev) { |
| @@ -1709,7 +1729,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
| 1709 | int err = 0; | 1729 | int err = 0; |
| 1710 | int number = rdev->raid_disk; | 1730 | int number = rdev->raid_disk; |
| 1711 | struct md_rdev **rdevp; | 1731 | struct md_rdev **rdevp; |
| 1712 | struct mirror_info *p = conf->mirrors + number; | 1732 | struct raid10_info *p = conf->mirrors + number; |
| 1713 | 1733 | ||
| 1714 | print_conf(conf); | 1734 | print_conf(conf); |
| 1715 | if (rdev == p->rdev) | 1735 | if (rdev == p->rdev) |
| @@ -2876,7 +2896,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, | |||
| 2876 | sector_t sect; | 2896 | sector_t sect; |
| 2877 | int must_sync; | 2897 | int must_sync; |
| 2878 | int any_working; | 2898 | int any_working; |
| 2879 | struct mirror_info *mirror = &conf->mirrors[i]; | 2899 | struct raid10_info *mirror = &conf->mirrors[i]; |
| 2880 | 2900 | ||
| 2881 | if ((mirror->rdev == NULL || | 2901 | if ((mirror->rdev == NULL || |
| 2882 | test_bit(In_sync, &mirror->rdev->flags)) | 2902 | test_bit(In_sync, &mirror->rdev->flags)) |
| @@ -3388,7 +3408,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) | |||
| 3388 | goto out; | 3408 | goto out; |
| 3389 | 3409 | ||
| 3390 | /* FIXME calc properly */ | 3410 | /* FIXME calc properly */ |
| 3391 | conf->mirrors = kzalloc(sizeof(struct mirror_info)*(mddev->raid_disks + | 3411 | conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks + |
| 3392 | max(0,mddev->delta_disks)), | 3412 | max(0,mddev->delta_disks)), |
| 3393 | GFP_KERNEL); | 3413 | GFP_KERNEL); |
| 3394 | if (!conf->mirrors) | 3414 | if (!conf->mirrors) |
| @@ -3452,7 +3472,7 @@ static int run(struct mddev *mddev) | |||
| 3452 | { | 3472 | { |
| 3453 | struct r10conf *conf; | 3473 | struct r10conf *conf; |
| 3454 | int i, disk_idx, chunk_size; | 3474 | int i, disk_idx, chunk_size; |
| 3455 | struct mirror_info *disk; | 3475 | struct raid10_info *disk; |
| 3456 | struct md_rdev *rdev; | 3476 | struct md_rdev *rdev; |
| 3457 | sector_t size; | 3477 | sector_t size; |
| 3458 | sector_t min_offset_diff = 0; | 3478 | sector_t min_offset_diff = 0; |
| @@ -3472,12 +3492,14 @@ static int run(struct mddev *mddev) | |||
| 3472 | conf->thread = NULL; | 3492 | conf->thread = NULL; |
| 3473 | 3493 | ||
| 3474 | chunk_size = mddev->chunk_sectors << 9; | 3494 | chunk_size = mddev->chunk_sectors << 9; |
| 3475 | blk_queue_io_min(mddev->queue, chunk_size); | 3495 | if (mddev->queue) { |
| 3476 | if (conf->geo.raid_disks % conf->geo.near_copies) | 3496 | blk_queue_io_min(mddev->queue, chunk_size); |
| 3477 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); | 3497 | if (conf->geo.raid_disks % conf->geo.near_copies) |
| 3478 | else | 3498 | blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); |
| 3479 | blk_queue_io_opt(mddev->queue, chunk_size * | 3499 | else |
| 3480 | (conf->geo.raid_disks / conf->geo.near_copies)); | 3500 | blk_queue_io_opt(mddev->queue, chunk_size * |
| 3501 | (conf->geo.raid_disks / conf->geo.near_copies)); | ||
| 3502 | } | ||
| 3481 | 3503 | ||
| 3482 | rdev_for_each(rdev, mddev) { | 3504 | rdev_for_each(rdev, mddev) { |
| 3483 | long long diff; | 3505 | long long diff; |
| @@ -3511,8 +3533,9 @@ static int run(struct mddev *mddev) | |||
| 3511 | if (first || diff < min_offset_diff) | 3533 | if (first || diff < min_offset_diff) |
| 3512 | min_offset_diff = diff; | 3534 | min_offset_diff = diff; |
| 3513 | 3535 | ||
| 3514 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 3536 | if (mddev->gendisk) |
| 3515 | rdev->data_offset << 9); | 3537 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 3538 | rdev->data_offset << 9); | ||
| 3516 | 3539 | ||
| 3517 | disk->head_position = 0; | 3540 | disk->head_position = 0; |
| 3518 | } | 3541 | } |
| @@ -3575,22 +3598,22 @@ static int run(struct mddev *mddev) | |||
| 3575 | md_set_array_sectors(mddev, size); | 3598 | md_set_array_sectors(mddev, size); |
| 3576 | mddev->resync_max_sectors = size; | 3599 | mddev->resync_max_sectors = size; |
| 3577 | 3600 | ||
| 3578 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | 3601 | if (mddev->queue) { |
| 3579 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
| 3580 | |||
| 3581 | /* Calculate max read-ahead size. | ||
| 3582 | * We need to readahead at least twice a whole stripe.... | ||
| 3583 | * maybe... | ||
| 3584 | */ | ||
| 3585 | { | ||
| 3586 | int stripe = conf->geo.raid_disks * | 3602 | int stripe = conf->geo.raid_disks * |
| 3587 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | 3603 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); |
| 3604 | mddev->queue->backing_dev_info.congested_fn = raid10_congested; | ||
| 3605 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
| 3606 | |||
| 3607 | /* Calculate max read-ahead size. | ||
| 3608 | * We need to readahead at least twice a whole stripe.... | ||
| 3609 | * maybe... | ||
| 3610 | */ | ||
| 3588 | stripe /= conf->geo.near_copies; | 3611 | stripe /= conf->geo.near_copies; |
| 3589 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | 3612 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) |
| 3590 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | 3613 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; |
| 3614 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
| 3591 | } | 3615 | } |
| 3592 | 3616 | ||
| 3593 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | ||
| 3594 | 3617 | ||
| 3595 | if (md_integrity_register(mddev)) | 3618 | if (md_integrity_register(mddev)) |
| 3596 | goto out_free_conf; | 3619 | goto out_free_conf; |
| @@ -3641,7 +3664,10 @@ static int stop(struct mddev *mddev) | |||
| 3641 | lower_barrier(conf); | 3664 | lower_barrier(conf); |
| 3642 | 3665 | ||
| 3643 | md_unregister_thread(&mddev->thread); | 3666 | md_unregister_thread(&mddev->thread); |
| 3644 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 3667 | if (mddev->queue) |
| 3668 | /* the unplug fn references 'conf'*/ | ||
| 3669 | blk_sync_queue(mddev->queue); | ||
| 3670 | |||
| 3645 | if (conf->r10bio_pool) | 3671 | if (conf->r10bio_pool) |
| 3646 | mempool_destroy(conf->r10bio_pool); | 3672 | mempool_destroy(conf->r10bio_pool); |
| 3647 | kfree(conf->mirrors); | 3673 | kfree(conf->mirrors); |
| @@ -3805,7 +3831,7 @@ static int raid10_check_reshape(struct mddev *mddev) | |||
| 3805 | if (mddev->delta_disks > 0) { | 3831 | if (mddev->delta_disks > 0) { |
| 3806 | /* allocate new 'mirrors' list */ | 3832 | /* allocate new 'mirrors' list */ |
| 3807 | conf->mirrors_new = kzalloc( | 3833 | conf->mirrors_new = kzalloc( |
| 3808 | sizeof(struct mirror_info) | 3834 | sizeof(struct raid10_info) |
| 3809 | *(mddev->raid_disks + | 3835 | *(mddev->raid_disks + |
| 3810 | mddev->delta_disks), | 3836 | mddev->delta_disks), |
| 3811 | GFP_KERNEL); | 3837 | GFP_KERNEL); |
| @@ -3930,7 +3956,7 @@ static int raid10_start_reshape(struct mddev *mddev) | |||
| 3930 | spin_lock_irq(&conf->device_lock); | 3956 | spin_lock_irq(&conf->device_lock); |
| 3931 | if (conf->mirrors_new) { | 3957 | if (conf->mirrors_new) { |
| 3932 | memcpy(conf->mirrors_new, conf->mirrors, | 3958 | memcpy(conf->mirrors_new, conf->mirrors, |
| 3933 | sizeof(struct mirror_info)*conf->prev.raid_disks); | 3959 | sizeof(struct raid10_info)*conf->prev.raid_disks); |
| 3934 | smp_mb(); | 3960 | smp_mb(); |
| 3935 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ | 3961 | kfree(conf->mirrors_old); /* FIXME and elsewhere */ |
| 3936 | conf->mirrors_old = conf->mirrors; | 3962 | conf->mirrors_old = conf->mirrors; |
