diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 87 |
1 files changed, 65 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cac6f4d3a143..9b00a229015a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3911,13 +3911,21 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3911 | goto retry; | 3911 | goto retry; |
3912 | } | 3912 | } |
3913 | } | 3913 | } |
3914 | /* FIXME what if we get a false positive because these | 3914 | |
3915 | * are being updated. | 3915 | if (bio_data_dir(bi) == WRITE && |
3916 | */ | 3916 | logical_sector >= mddev->suspend_lo && |
3917 | if (logical_sector >= mddev->suspend_lo && | ||
3918 | logical_sector < mddev->suspend_hi) { | 3917 | logical_sector < mddev->suspend_hi) { |
3919 | release_stripe(sh); | 3918 | release_stripe(sh); |
3920 | schedule(); | 3919 | /* As the suspend_* range is controlled by |
3920 | * userspace, we want an interruptible | ||
3921 | * wait. | ||
3922 | */ | ||
3923 | flush_signals(current); | ||
3924 | prepare_to_wait(&conf->wait_for_overlap, | ||
3925 | &w, TASK_INTERRUPTIBLE); | ||
3926 | if (logical_sector >= mddev->suspend_lo && | ||
3927 | logical_sector < mddev->suspend_hi) | ||
3928 | schedule(); | ||
3921 | goto retry; | 3929 | goto retry; |
3922 | } | 3930 | } |
3923 | 3931 | ||
@@ -3989,7 +3997,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3989 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { | 3997 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { |
3990 | sector_nr = raid5_size(mddev, 0, 0) | 3998 | sector_nr = raid5_size(mddev, 0, 0) |
3991 | - conf->reshape_progress; | 3999 | - conf->reshape_progress; |
3992 | } else if (mddev->delta_disks > 0 && | 4000 | } else if (mddev->delta_disks >= 0 && |
3993 | conf->reshape_progress > 0) | 4001 | conf->reshape_progress > 0) |
3994 | sector_nr = conf->reshape_progress; | 4002 | sector_nr = conf->reshape_progress; |
3995 | sector_div(sector_nr, new_data_disks); | 4003 | sector_div(sector_nr, new_data_disks); |
@@ -4203,6 +4211,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
4203 | return 0; | 4211 | return 0; |
4204 | } | 4212 | } |
4205 | 4213 | ||
4214 | /* Allow raid5_quiesce to complete */ | ||
4215 | wait_event(conf->wait_for_overlap, conf->quiesce != 2); | ||
4216 | |||
4206 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4217 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4207 | return reshape_request(mddev, sector_nr, skipped); | 4218 | return reshape_request(mddev, sector_nr, skipped); |
4208 | 4219 | ||
@@ -4803,7 +4814,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4803 | static int run(mddev_t *mddev) | 4814 | static int run(mddev_t *mddev) |
4804 | { | 4815 | { |
4805 | raid5_conf_t *conf; | 4816 | raid5_conf_t *conf; |
4806 | int working_disks = 0; | 4817 | int working_disks = 0, chunk_size; |
4807 | mdk_rdev_t *rdev; | 4818 | mdk_rdev_t *rdev; |
4808 | 4819 | ||
4809 | if (mddev->recovery_cp != MaxSector) | 4820 | if (mddev->recovery_cp != MaxSector) |
@@ -4844,7 +4855,26 @@ static int run(mddev_t *mddev) | |||
4844 | (old_disks-max_degraded)); | 4855 | (old_disks-max_degraded)); |
4845 | /* here_old is the first stripe that we might need to read | 4856 | /* here_old is the first stripe that we might need to read |
4846 | * from */ | 4857 | * from */ |
4847 | if (here_new >= here_old) { | 4858 | if (mddev->delta_disks == 0) { |
4859 | /* We cannot be sure it is safe to start an in-place | ||
4860 | * reshape. It is only safe if user-space if monitoring | ||
4861 | * and taking constant backups. | ||
4862 | * mdadm always starts a situation like this in | ||
4863 | * readonly mode so it can take control before | ||
4864 | * allowing any writes. So just check for that. | ||
4865 | */ | ||
4866 | if ((here_new * mddev->new_chunk_sectors != | ||
4867 | here_old * mddev->chunk_sectors) || | ||
4868 | mddev->ro == 0) { | ||
4869 | printk(KERN_ERR "raid5: in-place reshape must be started" | ||
4870 | " in read-only mode - aborting\n"); | ||
4871 | return -EINVAL; | ||
4872 | } | ||
4873 | } else if (mddev->delta_disks < 0 | ||
4874 | ? (here_new * mddev->new_chunk_sectors <= | ||
4875 | here_old * mddev->chunk_sectors) | ||
4876 | : (here_new * mddev->new_chunk_sectors >= | ||
4877 | here_old * mddev->chunk_sectors)) { | ||
4848 | /* Reading from the same stripe as writing to - bad */ | 4878 | /* Reading from the same stripe as writing to - bad */ |
4849 | printk(KERN_ERR "raid5: reshape_position too early for " | 4879 | printk(KERN_ERR "raid5: reshape_position too early for " |
4850 | "auto-recovery - aborting.\n"); | 4880 | "auto-recovery - aborting.\n"); |
@@ -4958,6 +4988,14 @@ static int run(mddev_t *mddev) | |||
4958 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 4988 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
4959 | 4989 | ||
4960 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 4990 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
4991 | chunk_size = mddev->chunk_sectors << 9; | ||
4992 | blk_queue_io_min(mddev->queue, chunk_size); | ||
4993 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
4994 | (conf->raid_disks - conf->max_degraded)); | ||
4995 | |||
4996 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4997 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
4998 | rdev->data_offset << 9); | ||
4961 | 4999 | ||
4962 | return 0; | 5000 | return 0; |
4963 | abort: | 5001 | abort: |
@@ -5185,6 +5223,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
5185 | return -EINVAL; | 5223 | return -EINVAL; |
5186 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5224 | set_capacity(mddev->gendisk, mddev->array_sectors); |
5187 | mddev->changed = 1; | 5225 | mddev->changed = 1; |
5226 | revalidate_disk(mddev->gendisk); | ||
5188 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { | 5227 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
5189 | mddev->recovery_cp = mddev->dev_sectors; | 5228 | mddev->recovery_cp = mddev->dev_sectors; |
5190 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5229 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -5330,7 +5369,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5330 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5369 | spin_unlock_irqrestore(&conf->device_lock, flags); |
5331 | } | 5370 | } |
5332 | mddev->raid_disks = conf->raid_disks; | 5371 | mddev->raid_disks = conf->raid_disks; |
5333 | mddev->reshape_position = 0; | 5372 | mddev->reshape_position = conf->reshape_progress; |
5334 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5373 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
5335 | 5374 | ||
5336 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5375 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
@@ -5385,7 +5424,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
5385 | */ | 5424 | */ |
5386 | static void raid5_finish_reshape(mddev_t *mddev) | 5425 | static void raid5_finish_reshape(mddev_t *mddev) |
5387 | { | 5426 | { |
5388 | struct block_device *bdev; | ||
5389 | raid5_conf_t *conf = mddev->private; | 5427 | raid5_conf_t *conf = mddev->private; |
5390 | 5428 | ||
5391 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5429 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
@@ -5394,15 +5432,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5394 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5432 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
5395 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5433 | set_capacity(mddev->gendisk, mddev->array_sectors); |
5396 | mddev->changed = 1; | 5434 | mddev->changed = 1; |
5397 | 5435 | revalidate_disk(mddev->gendisk); | |
5398 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5399 | if (bdev) { | ||
5400 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5401 | i_size_write(bdev->bd_inode, | ||
5402 | (loff_t)mddev->array_sectors << 9); | ||
5403 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5404 | bdput(bdev); | ||
5405 | } | ||
5406 | } else { | 5436 | } else { |
5407 | int d; | 5437 | int d; |
5408 | mddev->degraded = conf->raid_disks; | 5438 | mddev->degraded = conf->raid_disks; |
@@ -5413,8 +5443,15 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5413 | mddev->degraded--; | 5443 | mddev->degraded--; |
5414 | for (d = conf->raid_disks ; | 5444 | for (d = conf->raid_disks ; |
5415 | d < conf->raid_disks - mddev->delta_disks; | 5445 | d < conf->raid_disks - mddev->delta_disks; |
5416 | d++) | 5446 | d++) { |
5417 | raid5_remove_disk(mddev, d); | 5447 | mdk_rdev_t *rdev = conf->disks[d].rdev; |
5448 | if (rdev && raid5_remove_disk(mddev, d) == 0) { | ||
5449 | char nm[20]; | ||
5450 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
5451 | sysfs_remove_link(&mddev->kobj, nm); | ||
5452 | rdev->raid_disk = -1; | ||
5453 | } | ||
5454 | } | ||
5418 | } | 5455 | } |
5419 | mddev->layout = conf->algorithm; | 5456 | mddev->layout = conf->algorithm; |
5420 | mddev->chunk_sectors = conf->chunk_sectors; | 5457 | mddev->chunk_sectors = conf->chunk_sectors; |
@@ -5434,12 +5471,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
5434 | 5471 | ||
5435 | case 1: /* stop all writes */ | 5472 | case 1: /* stop all writes */ |
5436 | spin_lock_irq(&conf->device_lock); | 5473 | spin_lock_irq(&conf->device_lock); |
5437 | conf->quiesce = 1; | 5474 | /* '2' tells resync/reshape to pause so that all |
5475 | * active stripes can drain | ||
5476 | */ | ||
5477 | conf->quiesce = 2; | ||
5438 | wait_event_lock_irq(conf->wait_for_stripe, | 5478 | wait_event_lock_irq(conf->wait_for_stripe, |
5439 | atomic_read(&conf->active_stripes) == 0 && | 5479 | atomic_read(&conf->active_stripes) == 0 && |
5440 | atomic_read(&conf->active_aligned_reads) == 0, | 5480 | atomic_read(&conf->active_aligned_reads) == 0, |
5441 | conf->device_lock, /* nothing */); | 5481 | conf->device_lock, /* nothing */); |
5482 | conf->quiesce = 1; | ||
5442 | spin_unlock_irq(&conf->device_lock); | 5483 | spin_unlock_irq(&conf->device_lock); |
5484 | /* allow reshape to continue */ | ||
5485 | wake_up(&conf->wait_for_overlap); | ||
5443 | break; | 5486 | break; |
5444 | 5487 | ||
5445 | case 0: /* re-enable writes */ | 5488 | case 0: /* re-enable writes */ |