aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c223
1 files changed, 115 insertions, 108 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 54ef8d75541d..cac6f4d3a143 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1617,8 +1617,8 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
1617 sector_t new_sector; 1617 sector_t new_sector;
1618 int algorithm = previous ? conf->prev_algo 1618 int algorithm = previous ? conf->prev_algo
1619 : conf->algorithm; 1619 : conf->algorithm;
1620 int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) 1620 int sectors_per_chunk = previous ? conf->prev_chunk_sectors
1621 : (conf->chunk_size >> 9); 1621 : conf->chunk_sectors;
1622 int raid_disks = previous ? conf->previous_raid_disks 1622 int raid_disks = previous ? conf->previous_raid_disks
1623 : conf->raid_disks; 1623 : conf->raid_disks;
1624 int data_disks = raid_disks - conf->max_degraded; 1624 int data_disks = raid_disks - conf->max_degraded;
@@ -1823,8 +1823,8 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
1823 int raid_disks = sh->disks; 1823 int raid_disks = sh->disks;
1824 int data_disks = raid_disks - conf->max_degraded; 1824 int data_disks = raid_disks - conf->max_degraded;
1825 sector_t new_sector = sh->sector, check; 1825 sector_t new_sector = sh->sector, check;
1826 int sectors_per_chunk = previous ? (conf->prev_chunk >> 9) 1826 int sectors_per_chunk = previous ? conf->prev_chunk_sectors
1827 : (conf->chunk_size >> 9); 1827 : conf->chunk_sectors;
1828 int algorithm = previous ? conf->prev_algo 1828 int algorithm = previous ? conf->prev_algo
1829 : conf->algorithm; 1829 : conf->algorithm;
1830 sector_t stripe; 1830 sector_t stripe;
@@ -2098,8 +2098,7 @@ static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
2098 struct stripe_head *sh) 2098 struct stripe_head *sh)
2099{ 2099{
2100 int sectors_per_chunk = 2100 int sectors_per_chunk =
2101 previous ? (conf->prev_chunk >> 9) 2101 previous ? conf->prev_chunk_sectors : conf->chunk_sectors;
2102 : (conf->chunk_size >> 9);
2103 int dd_idx; 2102 int dd_idx;
2104 int chunk_offset = sector_div(stripe, sectors_per_chunk); 2103 int chunk_offset = sector_div(stripe, sectors_per_chunk);
2105 int disks = previous ? conf->previous_raid_disks : conf->raid_disks; 2104 int disks = previous ? conf->previous_raid_disks : conf->raid_disks;
@@ -3496,7 +3495,7 @@ static void activate_bit_delay(raid5_conf_t *conf)
3496 3495
3497static void unplug_slaves(mddev_t *mddev) 3496static void unplug_slaves(mddev_t *mddev)
3498{ 3497{
3499 raid5_conf_t *conf = mddev_to_conf(mddev); 3498 raid5_conf_t *conf = mddev->private;
3500 int i; 3499 int i;
3501 3500
3502 rcu_read_lock(); 3501 rcu_read_lock();
@@ -3520,7 +3519,7 @@ static void unplug_slaves(mddev_t *mddev)
3520static void raid5_unplug_device(struct request_queue *q) 3519static void raid5_unplug_device(struct request_queue *q)
3521{ 3520{
3522 mddev_t *mddev = q->queuedata; 3521 mddev_t *mddev = q->queuedata;
3523 raid5_conf_t *conf = mddev_to_conf(mddev); 3522 raid5_conf_t *conf = mddev->private;
3524 unsigned long flags; 3523 unsigned long flags;
3525 3524
3526 spin_lock_irqsave(&conf->device_lock, flags); 3525 spin_lock_irqsave(&conf->device_lock, flags);
@@ -3539,7 +3538,7 @@ static void raid5_unplug_device(struct request_queue *q)
3539static int raid5_congested(void *data, int bits) 3538static int raid5_congested(void *data, int bits)
3540{ 3539{
3541 mddev_t *mddev = data; 3540 mddev_t *mddev = data;
3542 raid5_conf_t *conf = mddev_to_conf(mddev); 3541 raid5_conf_t *conf = mddev->private;
3543 3542
3544 /* No difference between reads and writes. Just check 3543 /* No difference between reads and writes. Just check
3545 * how busy the stripe_cache is 3544 * how busy the stripe_cache is
@@ -3564,14 +3563,14 @@ static int raid5_mergeable_bvec(struct request_queue *q,
3564 mddev_t *mddev = q->queuedata; 3563 mddev_t *mddev = q->queuedata;
3565 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); 3564 sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
3566 int max; 3565 int max;
3567 unsigned int chunk_sectors = mddev->chunk_size >> 9; 3566 unsigned int chunk_sectors = mddev->chunk_sectors;
3568 unsigned int bio_sectors = bvm->bi_size >> 9; 3567 unsigned int bio_sectors = bvm->bi_size >> 9;
3569 3568
3570 if ((bvm->bi_rw & 1) == WRITE) 3569 if ((bvm->bi_rw & 1) == WRITE)
3571 return biovec->bv_len; /* always allow writes to be mergeable */ 3570 return biovec->bv_len; /* always allow writes to be mergeable */
3572 3571
3573 if (mddev->new_chunk < mddev->chunk_size) 3572 if (mddev->new_chunk_sectors < mddev->chunk_sectors)
3574 chunk_sectors = mddev->new_chunk >> 9; 3573 chunk_sectors = mddev->new_chunk_sectors;
3575 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; 3574 max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
3576 if (max < 0) max = 0; 3575 if (max < 0) max = 0;
3577 if (max <= biovec->bv_len && bio_sectors == 0) 3576 if (max <= biovec->bv_len && bio_sectors == 0)
@@ -3584,11 +3583,11 @@ static int raid5_mergeable_bvec(struct request_queue *q,
3584static int in_chunk_boundary(mddev_t *mddev, struct bio *bio) 3583static int in_chunk_boundary(mddev_t *mddev, struct bio *bio)
3585{ 3584{
3586 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); 3585 sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
3587 unsigned int chunk_sectors = mddev->chunk_size >> 9; 3586 unsigned int chunk_sectors = mddev->chunk_sectors;
3588 unsigned int bio_sectors = bio->bi_size >> 9; 3587 unsigned int bio_sectors = bio->bi_size >> 9;
3589 3588
3590 if (mddev->new_chunk < mddev->chunk_size) 3589 if (mddev->new_chunk_sectors < mddev->chunk_sectors)
3591 chunk_sectors = mddev->new_chunk >> 9; 3590 chunk_sectors = mddev->new_chunk_sectors;
3592 return chunk_sectors >= 3591 return chunk_sectors >=
3593 ((sector & (chunk_sectors - 1)) + bio_sectors); 3592 ((sector & (chunk_sectors - 1)) + bio_sectors);
3594} 3593}
@@ -3652,7 +3651,7 @@ static void raid5_align_endio(struct bio *bi, int error)
3652 bio_put(bi); 3651 bio_put(bi);
3653 3652
3654 mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata; 3653 mddev = raid_bi->bi_bdev->bd_disk->queue->queuedata;
3655 conf = mddev_to_conf(mddev); 3654 conf = mddev->private;
3656 rdev = (void*)raid_bi->bi_next; 3655 rdev = (void*)raid_bi->bi_next;
3657 raid_bi->bi_next = NULL; 3656 raid_bi->bi_next = NULL;
3658 3657
@@ -3675,10 +3674,10 @@ static int bio_fits_rdev(struct bio *bi)
3675{ 3674{
3676 struct request_queue *q = bdev_get_queue(bi->bi_bdev); 3675 struct request_queue *q = bdev_get_queue(bi->bi_bdev);
3677 3676
3678 if ((bi->bi_size>>9) > q->max_sectors) 3677 if ((bi->bi_size>>9) > queue_max_sectors(q))
3679 return 0; 3678 return 0;
3680 blk_recount_segments(q, bi); 3679 blk_recount_segments(q, bi);
3681 if (bi->bi_phys_segments > q->max_phys_segments) 3680 if (bi->bi_phys_segments > queue_max_phys_segments(q))
3682 return 0; 3681 return 0;
3683 3682
3684 if (q->merge_bvec_fn) 3683 if (q->merge_bvec_fn)
@@ -3694,7 +3693,7 @@ static int bio_fits_rdev(struct bio *bi)
3694static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio) 3693static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
3695{ 3694{
3696 mddev_t *mddev = q->queuedata; 3695 mddev_t *mddev = q->queuedata;
3697 raid5_conf_t *conf = mddev_to_conf(mddev); 3696 raid5_conf_t *conf = mddev->private;
3698 unsigned int dd_idx; 3697 unsigned int dd_idx;
3699 struct bio* align_bi; 3698 struct bio* align_bi;
3700 mdk_rdev_t *rdev; 3699 mdk_rdev_t *rdev;
@@ -3811,7 +3810,7 @@ static struct stripe_head *__get_priority_stripe(raid5_conf_t *conf)
3811static int make_request(struct request_queue *q, struct bio * bi) 3810static int make_request(struct request_queue *q, struct bio * bi)
3812{ 3811{
3813 mddev_t *mddev = q->queuedata; 3812 mddev_t *mddev = q->queuedata;
3814 raid5_conf_t *conf = mddev_to_conf(mddev); 3813 raid5_conf_t *conf = mddev->private;
3815 int dd_idx; 3814 int dd_idx;
3816 sector_t new_sector; 3815 sector_t new_sector;
3817 sector_t logical_sector, last_sector; 3816 sector_t logical_sector, last_sector;
@@ -3908,6 +3907,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3908 spin_unlock_irq(&conf->device_lock); 3907 spin_unlock_irq(&conf->device_lock);
3909 if (must_retry) { 3908 if (must_retry) {
3910 release_stripe(sh); 3909 release_stripe(sh);
3910 schedule();
3911 goto retry; 3911 goto retry;
3912 } 3912 }
3913 } 3913 }
@@ -4003,10 +4003,10 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
4003 * If old and new chunk sizes differ, we need to process the 4003 * If old and new chunk sizes differ, we need to process the
4004 * largest of these 4004 * largest of these
4005 */ 4005 */
4006 if (mddev->new_chunk > mddev->chunk_size) 4006 if (mddev->new_chunk_sectors > mddev->chunk_sectors)
4007 reshape_sectors = mddev->new_chunk / 512; 4007 reshape_sectors = mddev->new_chunk_sectors;
4008 else 4008 else
4009 reshape_sectors = mddev->chunk_size / 512; 4009 reshape_sectors = mddev->chunk_sectors;
4010 4010
4011 /* we update the metadata when there is more than 3Meg 4011 /* we update the metadata when there is more than 3Meg
4012 * in the block range (that is rather arbitrary, should 4012 * in the block range (that is rather arbitrary, should
@@ -4129,7 +4129,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
4129 1, &dd_idx, NULL); 4129 1, &dd_idx, NULL);
4130 last_sector = 4130 last_sector =
4131 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors) 4131 raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
4132 *(new_data_disks) - 1), 4132 * new_data_disks - 1),
4133 1, &dd_idx, NULL); 4133 1, &dd_idx, NULL);
4134 if (last_sector >= mddev->dev_sectors) 4134 if (last_sector >= mddev->dev_sectors)
4135 last_sector = mddev->dev_sectors - 1; 4135 last_sector = mddev->dev_sectors - 1;
@@ -4158,7 +4158,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
4158 wait_event(conf->wait_for_overlap, 4158 wait_event(conf->wait_for_overlap,
4159 atomic_read(&conf->reshape_stripes) == 0); 4159 atomic_read(&conf->reshape_stripes) == 0);
4160 mddev->reshape_position = conf->reshape_progress; 4160 mddev->reshape_position = conf->reshape_progress;
4161 mddev->curr_resync_completed = mddev->curr_resync; 4161 mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors;
4162 conf->reshape_checkpoint = jiffies; 4162 conf->reshape_checkpoint = jiffies;
4163 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4163 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4164 md_wakeup_thread(mddev->thread); 4164 md_wakeup_thread(mddev->thread);
@@ -4371,7 +4371,7 @@ static void synchronize_stripe_processing(struct list_head *domain)
4371static void raid5d(mddev_t *mddev) 4371static void raid5d(mddev_t *mddev)
4372{ 4372{
4373 struct stripe_head *sh; 4373 struct stripe_head *sh;
4374 raid5_conf_t *conf = mddev_to_conf(mddev); 4374 raid5_conf_t *conf = mddev->private;
4375 int handled; 4375 int handled;
4376 LIST_HEAD(raid_domain); 4376 LIST_HEAD(raid_domain);
4377 4377
@@ -4428,7 +4428,7 @@ static void raid5d(mddev_t *mddev)
4428static ssize_t 4428static ssize_t
4429raid5_show_stripe_cache_size(mddev_t *mddev, char *page) 4429raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
4430{ 4430{
4431 raid5_conf_t *conf = mddev_to_conf(mddev); 4431 raid5_conf_t *conf = mddev->private;
4432 if (conf) 4432 if (conf)
4433 return sprintf(page, "%d\n", conf->max_nr_stripes); 4433 return sprintf(page, "%d\n", conf->max_nr_stripes);
4434 else 4434 else
@@ -4438,7 +4438,7 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
4438static ssize_t 4438static ssize_t
4439raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) 4439raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
4440{ 4440{
4441 raid5_conf_t *conf = mddev_to_conf(mddev); 4441 raid5_conf_t *conf = mddev->private;
4442 unsigned long new; 4442 unsigned long new;
4443 int err; 4443 int err;
4444 4444
@@ -4476,7 +4476,7 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
4476static ssize_t 4476static ssize_t
4477raid5_show_preread_threshold(mddev_t *mddev, char *page) 4477raid5_show_preread_threshold(mddev_t *mddev, char *page)
4478{ 4478{
4479 raid5_conf_t *conf = mddev_to_conf(mddev); 4479 raid5_conf_t *conf = mddev->private;
4480 if (conf) 4480 if (conf)
4481 return sprintf(page, "%d\n", conf->bypass_threshold); 4481 return sprintf(page, "%d\n", conf->bypass_threshold);
4482 else 4482 else
@@ -4486,7 +4486,7 @@ raid5_show_preread_threshold(mddev_t *mddev, char *page)
4486static ssize_t 4486static ssize_t
4487raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len) 4487raid5_store_preread_threshold(mddev_t *mddev, const char *page, size_t len)
4488{ 4488{
4489 raid5_conf_t *conf = mddev_to_conf(mddev); 4489 raid5_conf_t *conf = mddev->private;
4490 unsigned long new; 4490 unsigned long new;
4491 if (len >= PAGE_SIZE) 4491 if (len >= PAGE_SIZE)
4492 return -EINVAL; 4492 return -EINVAL;
@@ -4510,7 +4510,7 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
4510static ssize_t 4510static ssize_t
4511stripe_cache_active_show(mddev_t *mddev, char *page) 4511stripe_cache_active_show(mddev_t *mddev, char *page)
4512{ 4512{
4513 raid5_conf_t *conf = mddev_to_conf(mddev); 4513 raid5_conf_t *conf = mddev->private;
4514 if (conf) 4514 if (conf)
4515 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes)); 4515 return sprintf(page, "%d\n", atomic_read(&conf->active_stripes));
4516 else 4516 else
@@ -4534,7 +4534,7 @@ static struct attribute_group raid5_attrs_group = {
4534static sector_t 4534static sector_t
4535raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks) 4535raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4536{ 4536{
4537 raid5_conf_t *conf = mddev_to_conf(mddev); 4537 raid5_conf_t *conf = mddev->private;
4538 4538
4539 if (!sectors) 4539 if (!sectors)
4540 sectors = mddev->dev_sectors; 4540 sectors = mddev->dev_sectors;
@@ -4546,8 +4546,8 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
4546 raid_disks = conf->previous_raid_disks; 4546 raid_disks = conf->previous_raid_disks;
4547 } 4547 }
4548 4548
4549 sectors &= ~((sector_t)mddev->chunk_size/512 - 1); 4549 sectors &= ~((sector_t)mddev->chunk_sectors - 1);
4550 sectors &= ~((sector_t)mddev->new_chunk/512 - 1); 4550 sectors &= ~((sector_t)mddev->new_chunk_sectors - 1);
4551 return sectors * (raid_disks - conf->max_degraded); 4551 return sectors * (raid_disks - conf->max_degraded);
4552} 4552}
4553 4553
@@ -4691,9 +4691,11 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4691 return ERR_PTR(-EINVAL); 4691 return ERR_PTR(-EINVAL);
4692 } 4692 }
4693 4693
4694 if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { 4694 if (!mddev->new_chunk_sectors ||
4695 (mddev->new_chunk_sectors << 9) % PAGE_SIZE ||
4696 !is_power_of_2(mddev->new_chunk_sectors)) {
4695 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", 4697 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
4696 mddev->new_chunk, mdname(mddev)); 4698 mddev->new_chunk_sectors << 9, mdname(mddev));
4697 return ERR_PTR(-EINVAL); 4699 return ERR_PTR(-EINVAL);
4698 } 4700 }
4699 4701
@@ -4756,7 +4758,8 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4756 conf->fullsync = 1; 4758 conf->fullsync = 1;
4757 } 4759 }
4758 4760
4759 conf->chunk_size = mddev->new_chunk; 4761 conf->chunk_sectors = mddev->new_chunk_sectors;
4762 conf->level = mddev->new_level;
4760 if (conf->level == 6) 4763 if (conf->level == 6)
4761 conf->max_degraded = 2; 4764 conf->max_degraded = 2;
4762 else 4765 else
@@ -4765,7 +4768,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4765 conf->max_nr_stripes = NR_STRIPES; 4768 conf->max_nr_stripes = NR_STRIPES;
4766 conf->reshape_progress = mddev->reshape_position; 4769 conf->reshape_progress = mddev->reshape_position;
4767 if (conf->reshape_progress != MaxSector) { 4770 if (conf->reshape_progress != MaxSector) {
4768 conf->prev_chunk = mddev->chunk_size; 4771 conf->prev_chunk_sectors = mddev->chunk_sectors;
4769 conf->prev_algo = mddev->layout; 4772 conf->prev_algo = mddev->layout;
4770 } 4773 }
4771 4774
@@ -4803,6 +4806,10 @@ static int run(mddev_t *mddev)
4803 int working_disks = 0; 4806 int working_disks = 0;
4804 mdk_rdev_t *rdev; 4807 mdk_rdev_t *rdev;
4805 4808
4809 if (mddev->recovery_cp != MaxSector)
4810 printk(KERN_NOTICE "raid5: %s is not clean"
4811 " -- starting background reconstruction\n",
4812 mdname(mddev));
4806 if (mddev->reshape_position != MaxSector) { 4813 if (mddev->reshape_position != MaxSector) {
4807 /* Check that we can continue the reshape. 4814 /* Check that we can continue the reshape.
4808 * Currently only disks can change, it must 4815 * Currently only disks can change, it must
@@ -4825,7 +4832,7 @@ static int run(mddev_t *mddev)
4825 * geometry. 4832 * geometry.
4826 */ 4833 */
4827 here_new = mddev->reshape_position; 4834 here_new = mddev->reshape_position;
4828 if (sector_div(here_new, (mddev->new_chunk>>9)* 4835 if (sector_div(here_new, mddev->new_chunk_sectors *
4829 (mddev->raid_disks - max_degraded))) { 4836 (mddev->raid_disks - max_degraded))) {
4830 printk(KERN_ERR "raid5: reshape_position not " 4837 printk(KERN_ERR "raid5: reshape_position not "
4831 "on a stripe boundary\n"); 4838 "on a stripe boundary\n");
@@ -4833,7 +4840,7 @@ static int run(mddev_t *mddev)
4833 } 4840 }
4834 /* here_new is the stripe we will write to */ 4841 /* here_new is the stripe we will write to */
4835 here_old = mddev->reshape_position; 4842 here_old = mddev->reshape_position;
4836 sector_div(here_old, (mddev->chunk_size>>9)* 4843 sector_div(here_old, mddev->chunk_sectors *
4837 (old_disks-max_degraded)); 4844 (old_disks-max_degraded));
4838 /* here_old is the first stripe that we might need to read 4845 /* here_old is the first stripe that we might need to read
4839 * from */ 4846 * from */
@@ -4848,7 +4855,7 @@ static int run(mddev_t *mddev)
4848 } else { 4855 } else {
4849 BUG_ON(mddev->level != mddev->new_level); 4856 BUG_ON(mddev->level != mddev->new_level);
4850 BUG_ON(mddev->layout != mddev->new_layout); 4857 BUG_ON(mddev->layout != mddev->new_layout);
4851 BUG_ON(mddev->chunk_size != mddev->new_chunk); 4858 BUG_ON(mddev->chunk_sectors != mddev->new_chunk_sectors);
4852 BUG_ON(mddev->delta_disks != 0); 4859 BUG_ON(mddev->delta_disks != 0);
4853 } 4860 }
4854 4861
@@ -4882,7 +4889,7 @@ static int run(mddev_t *mddev)
4882 } 4889 }
4883 4890
4884 /* device size must be a multiple of chunk size */ 4891 /* device size must be a multiple of chunk size */
4885 mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); 4892 mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
4886 mddev->resync_max_sectors = mddev->dev_sectors; 4893 mddev->resync_max_sectors = mddev->dev_sectors;
4887 4894
4888 if (mddev->degraded > 0 && 4895 if (mddev->degraded > 0 &&
@@ -4931,7 +4938,7 @@ static int run(mddev_t *mddev)
4931 { 4938 {
4932 int data_disks = conf->previous_raid_disks - conf->max_degraded; 4939 int data_disks = conf->previous_raid_disks - conf->max_degraded;
4933 int stripe = data_disks * 4940 int stripe = data_disks *
4934 (mddev->chunk_size / PAGE_SIZE); 4941 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
4935 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 4942 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
4936 mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 4943 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
4937 } 4944 }
@@ -5021,7 +5028,8 @@ static void status(struct seq_file *seq, mddev_t *mddev)
5021 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 5028 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
5022 int i; 5029 int i;
5023 5030
5024 seq_printf (seq, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout); 5031 seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
5032 mddev->chunk_sectors / 2, mddev->layout);
5025 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded); 5033 seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
5026 for (i = 0; i < conf->raid_disks; i++) 5034 for (i = 0; i < conf->raid_disks; i++)
5027 seq_printf (seq, "%s", 5035 seq_printf (seq, "%s",
@@ -5169,7 +5177,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
5169 * any io in the removed space completes, but it hardly seems 5177 * any io in the removed space completes, but it hardly seems
5170 * worth it. 5178 * worth it.
5171 */ 5179 */
5172 sectors &= ~((sector_t)mddev->chunk_size/512 - 1); 5180 sectors &= ~((sector_t)mddev->chunk_sectors - 1);
5173 md_set_array_sectors(mddev, raid5_size(mddev, sectors, 5181 md_set_array_sectors(mddev, raid5_size(mddev, sectors,
5174 mddev->raid_disks)); 5182 mddev->raid_disks));
5175 if (mddev->array_sectors > 5183 if (mddev->array_sectors >
@@ -5186,14 +5194,37 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
5186 return 0; 5194 return 0;
5187} 5195}
5188 5196
5189static int raid5_check_reshape(mddev_t *mddev) 5197static int check_stripe_cache(mddev_t *mddev)
5198{
5199 /* Can only proceed if there are plenty of stripe_heads.
5200 * We need a minimum of one full stripe,, and for sensible progress
5201 * it is best to have about 4 times that.
5202 * If we require 4 times, then the default 256 4K stripe_heads will
5203 * allow for chunk sizes up to 256K, which is probably OK.
5204 * If the chunk size is greater, user-space should request more
5205 * stripe_heads first.
5206 */
5207 raid5_conf_t *conf = mddev->private;
5208 if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
5209 > conf->max_nr_stripes ||
5210 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
5211 > conf->max_nr_stripes) {
5212 printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
5213 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
5214 / STRIPE_SIZE)*4);
5215 return 0;
5216 }
5217 return 1;
5218}
5219
5220static int check_reshape(mddev_t *mddev)
5190{ 5221{
5191 raid5_conf_t *conf = mddev_to_conf(mddev); 5222 raid5_conf_t *conf = mddev->private;
5192 5223
5193 if (mddev->delta_disks == 0 && 5224 if (mddev->delta_disks == 0 &&
5194 mddev->new_layout == mddev->layout && 5225 mddev->new_layout == mddev->layout &&
5195 mddev->new_chunk == mddev->chunk_size) 5226 mddev->new_chunk_sectors == mddev->chunk_sectors)
5196 return -EINVAL; /* nothing to do */ 5227 return 0; /* nothing to do */
5197 if (mddev->bitmap) 5228 if (mddev->bitmap)
5198 /* Cannot grow a bitmap yet */ 5229 /* Cannot grow a bitmap yet */
5199 return -EBUSY; 5230 return -EBUSY;
@@ -5212,28 +5243,15 @@ static int raid5_check_reshape(mddev_t *mddev)
5212 return -EINVAL; 5243 return -EINVAL;
5213 } 5244 }
5214 5245
5215 /* Can only proceed if there are plenty of stripe_heads. 5246 if (!check_stripe_cache(mddev))
5216 * We need a minimum of one full stripe,, and for sensible progress
5217 * it is best to have about 4 times that.
5218 * If we require 4 times, then the default 256 4K stripe_heads will
5219 * allow for chunk sizes up to 256K, which is probably OK.
5220 * If the chunk size is greater, user-space should request more
5221 * stripe_heads first.
5222 */
5223 if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes ||
5224 (mddev->new_chunk / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
5225 printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
5226 (max(mddev->chunk_size, mddev->new_chunk)
5227 / STRIPE_SIZE)*4);
5228 return -ENOSPC; 5247 return -ENOSPC;
5229 }
5230 5248
5231 return resize_stripes(conf, conf->raid_disks + mddev->delta_disks); 5249 return resize_stripes(conf, conf->raid_disks + mddev->delta_disks);
5232} 5250}
5233 5251
5234static int raid5_start_reshape(mddev_t *mddev) 5252static int raid5_start_reshape(mddev_t *mddev)
5235{ 5253{
5236 raid5_conf_t *conf = mddev_to_conf(mddev); 5254 raid5_conf_t *conf = mddev->private;
5237 mdk_rdev_t *rdev; 5255 mdk_rdev_t *rdev;
5238 int spares = 0; 5256 int spares = 0;
5239 int added_devices = 0; 5257 int added_devices = 0;
@@ -5242,6 +5260,9 @@ static int raid5_start_reshape(mddev_t *mddev)
5242 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 5260 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
5243 return -EBUSY; 5261 return -EBUSY;
5244 5262
5263 if (!check_stripe_cache(mddev))
5264 return -ENOSPC;
5265
5245 list_for_each_entry(rdev, &mddev->disks, same_set) 5266 list_for_each_entry(rdev, &mddev->disks, same_set)
5246 if (rdev->raid_disk < 0 && 5267 if (rdev->raid_disk < 0 &&
5247 !test_bit(Faulty, &rdev->flags)) 5268 !test_bit(Faulty, &rdev->flags))
@@ -5268,8 +5289,8 @@ static int raid5_start_reshape(mddev_t *mddev)
5268 spin_lock_irq(&conf->device_lock); 5289 spin_lock_irq(&conf->device_lock);
5269 conf->previous_raid_disks = conf->raid_disks; 5290 conf->previous_raid_disks = conf->raid_disks;
5270 conf->raid_disks += mddev->delta_disks; 5291 conf->raid_disks += mddev->delta_disks;
5271 conf->prev_chunk = conf->chunk_size; 5292 conf->prev_chunk_sectors = conf->chunk_sectors;
5272 conf->chunk_size = mddev->new_chunk; 5293 conf->chunk_sectors = mddev->new_chunk_sectors;
5273 conf->prev_algo = conf->algorithm; 5294 conf->prev_algo = conf->algorithm;
5274 conf->algorithm = mddev->new_layout; 5295 conf->algorithm = mddev->new_layout;
5275 if (mddev->delta_disks < 0) 5296 if (mddev->delta_disks < 0)
@@ -5351,7 +5372,7 @@ static void end_reshape(raid5_conf_t *conf)
5351 */ 5372 */
5352 { 5373 {
5353 int data_disks = conf->raid_disks - conf->max_degraded; 5374 int data_disks = conf->raid_disks - conf->max_degraded;
5354 int stripe = data_disks * (conf->chunk_size 5375 int stripe = data_disks * ((conf->chunk_sectors << 9)
5355 / PAGE_SIZE); 5376 / PAGE_SIZE);
5356 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe) 5377 if (conf->mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
5357 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe; 5378 conf->mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
@@ -5365,7 +5386,7 @@ static void end_reshape(raid5_conf_t *conf)
5365static void raid5_finish_reshape(mddev_t *mddev) 5386static void raid5_finish_reshape(mddev_t *mddev)
5366{ 5387{
5367 struct block_device *bdev; 5388 struct block_device *bdev;
5368 raid5_conf_t *conf = mddev_to_conf(mddev); 5389 raid5_conf_t *conf = mddev->private;
5369 5390
5370 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5391 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
5371 5392
@@ -5396,7 +5417,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
5396 raid5_remove_disk(mddev, d); 5417 raid5_remove_disk(mddev, d);
5397 } 5418 }
5398 mddev->layout = conf->algorithm; 5419 mddev->layout = conf->algorithm;
5399 mddev->chunk_size = conf->chunk_size; 5420 mddev->chunk_sectors = conf->chunk_sectors;
5400 mddev->reshape_position = MaxSector; 5421 mddev->reshape_position = MaxSector;
5401 mddev->delta_disks = 0; 5422 mddev->delta_disks = 0;
5402 } 5423 }
@@ -5404,7 +5425,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
5404 5425
5405static void raid5_quiesce(mddev_t *mddev, int state) 5426static void raid5_quiesce(mddev_t *mddev, int state)
5406{ 5427{
5407 raid5_conf_t *conf = mddev_to_conf(mddev); 5428 raid5_conf_t *conf = mddev->private;
5408 5429
5409 switch(state) { 5430 switch(state) {
5410 case 2: /* resume for a suspend */ 5431 case 2: /* resume for a suspend */
@@ -5454,7 +5475,7 @@ static void *raid5_takeover_raid1(mddev_t *mddev)
5454 5475
5455 mddev->new_level = 5; 5476 mddev->new_level = 5;
5456 mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC; 5477 mddev->new_layout = ALGORITHM_LEFT_SYMMETRIC;
5457 mddev->new_chunk = chunksect << 9; 5478 mddev->new_chunk_sectors = chunksect;
5458 5479
5459 return setup_conf(mddev); 5480 return setup_conf(mddev);
5460} 5481}
@@ -5493,24 +5514,24 @@ static void *raid5_takeover_raid6(mddev_t *mddev)
5493} 5514}
5494 5515
5495 5516
5496static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk) 5517static int raid5_check_reshape(mddev_t *mddev)
5497{ 5518{
5498 /* For a 2-drive array, the layout and chunk size can be changed 5519 /* For a 2-drive array, the layout and chunk size can be changed
5499 * immediately as not restriping is needed. 5520 * immediately as not restriping is needed.
5500 * For larger arrays we record the new value - after validation 5521 * For larger arrays we record the new value - after validation
5501 * to be used by a reshape pass. 5522 * to be used by a reshape pass.
5502 */ 5523 */
5503 raid5_conf_t *conf = mddev_to_conf(mddev); 5524 raid5_conf_t *conf = mddev->private;
5525 int new_chunk = mddev->new_chunk_sectors;
5504 5526
5505 if (new_layout >= 0 && !algorithm_valid_raid5(new_layout)) 5527 if (mddev->new_layout >= 0 && !algorithm_valid_raid5(mddev->new_layout))
5506 return -EINVAL; 5528 return -EINVAL;
5507 if (new_chunk > 0) { 5529 if (new_chunk > 0) {
5508 if (new_chunk & (new_chunk-1)) 5530 if (!is_power_of_2(new_chunk))
5509 /* not a power of 2 */
5510 return -EINVAL; 5531 return -EINVAL;
5511 if (new_chunk < PAGE_SIZE) 5532 if (new_chunk < (PAGE_SIZE>>9))
5512 return -EINVAL; 5533 return -EINVAL;
5513 if (mddev->array_sectors & ((new_chunk>>9)-1)) 5534 if (mddev->array_sectors & (new_chunk-1))
5514 /* not factor of array size */ 5535 /* not factor of array size */
5515 return -EINVAL; 5536 return -EINVAL;
5516 } 5537 }
@@ -5518,49 +5539,39 @@ static int raid5_reconfig(mddev_t *mddev, int new_layout, int new_chunk)
5518 /* They look valid */ 5539 /* They look valid */
5519 5540
5520 if (mddev->raid_disks == 2) { 5541 if (mddev->raid_disks == 2) {
5521 5542 /* can make the change immediately */
5522 if (new_layout >= 0) { 5543 if (mddev->new_layout >= 0) {
5523 conf->algorithm = new_layout; 5544 conf->algorithm = mddev->new_layout;
5524 mddev->layout = mddev->new_layout = new_layout; 5545 mddev->layout = mddev->new_layout;
5525 } 5546 }
5526 if (new_chunk > 0) { 5547 if (new_chunk > 0) {
5527 conf->chunk_size = new_chunk; 5548 conf->chunk_sectors = new_chunk ;
5528 mddev->chunk_size = mddev->new_chunk = new_chunk; 5549 mddev->chunk_sectors = new_chunk;
5529 } 5550 }
5530 set_bit(MD_CHANGE_DEVS, &mddev->flags); 5551 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5531 md_wakeup_thread(mddev->thread); 5552 md_wakeup_thread(mddev->thread);
5532 } else {
5533 if (new_layout >= 0)
5534 mddev->new_layout = new_layout;
5535 if (new_chunk > 0)
5536 mddev->new_chunk = new_chunk;
5537 } 5553 }
5538 return 0; 5554 return check_reshape(mddev);
5539} 5555}
5540 5556
5541static int raid6_reconfig(mddev_t *mddev, int new_layout, int new_chunk) 5557static int raid6_check_reshape(mddev_t *mddev)
5542{ 5558{
5543 if (new_layout >= 0 && !algorithm_valid_raid6(new_layout)) 5559 int new_chunk = mddev->new_chunk_sectors;
5560
5561 if (mddev->new_layout >= 0 && !algorithm_valid_raid6(mddev->new_layout))
5544 return -EINVAL; 5562 return -EINVAL;
5545 if (new_chunk > 0) { 5563 if (new_chunk > 0) {
5546 if (new_chunk & (new_chunk-1)) 5564 if (!is_power_of_2(new_chunk))
5547 /* not a power of 2 */
5548 return -EINVAL; 5565 return -EINVAL;
5549 if (new_chunk < PAGE_SIZE) 5566 if (new_chunk < (PAGE_SIZE >> 9))
5550 return -EINVAL; 5567 return -EINVAL;
5551 if (mddev->array_sectors & ((new_chunk>>9)-1)) 5568 if (mddev->array_sectors & (new_chunk-1))
5552 /* not factor of array size */ 5569 /* not factor of array size */
5553 return -EINVAL; 5570 return -EINVAL;
5554 } 5571 }
5555 5572
5556 /* They look valid */ 5573 /* They look valid */
5557 5574 return check_reshape(mddev);
5558 if (new_layout >= 0)
5559 mddev->new_layout = new_layout;
5560 if (new_chunk > 0)
5561 mddev->new_chunk = new_chunk;
5562
5563 return 0;
5564} 5575}
5565 5576
5566static void *raid5_takeover(mddev_t *mddev) 5577static void *raid5_takeover(mddev_t *mddev)
@@ -5570,8 +5581,6 @@ static void *raid5_takeover(mddev_t *mddev)
5570 * raid1 - if there are two drives. We need to know the chunk size 5581 * raid1 - if there are two drives. We need to know the chunk size
5571 * raid4 - trivial - just use a raid4 layout. 5582 * raid4 - trivial - just use a raid4 layout.
5572 * raid6 - Providing it is a *_6 layout 5583 * raid6 - Providing it is a *_6 layout
5573 *
5574 * For now, just do raid1
5575 */ 5584 */
5576 5585
5577 if (mddev->level == 1) 5586 if (mddev->level == 1)
@@ -5653,12 +5662,11 @@ static struct mdk_personality raid6_personality =
5653 .sync_request = sync_request, 5662 .sync_request = sync_request,
5654 .resize = raid5_resize, 5663 .resize = raid5_resize,
5655 .size = raid5_size, 5664 .size = raid5_size,
5656 .check_reshape = raid5_check_reshape, 5665 .check_reshape = raid6_check_reshape,
5657 .start_reshape = raid5_start_reshape, 5666 .start_reshape = raid5_start_reshape,
5658 .finish_reshape = raid5_finish_reshape, 5667 .finish_reshape = raid5_finish_reshape,
5659 .quiesce = raid5_quiesce, 5668 .quiesce = raid5_quiesce,
5660 .takeover = raid6_takeover, 5669 .takeover = raid6_takeover,
5661 .reconfig = raid6_reconfig,
5662}; 5670};
5663static struct mdk_personality raid5_personality = 5671static struct mdk_personality raid5_personality =
5664{ 5672{
@@ -5681,7 +5689,6 @@ static struct mdk_personality raid5_personality =
5681 .finish_reshape = raid5_finish_reshape, 5689 .finish_reshape = raid5_finish_reshape,
5682 .quiesce = raid5_quiesce, 5690 .quiesce = raid5_quiesce,
5683 .takeover = raid5_takeover, 5691 .takeover = raid5_takeover,
5684 .reconfig = raid5_reconfig,
5685}; 5692};
5686 5693
5687static struct mdk_personality raid4_personality = 5694static struct mdk_personality raid4_personality =