aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:54 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:54 -0400
commit9134d02bc0af4a8747d448d1f811ec5f8eb96df6 (patch)
tree704c3e5dcc10f360815c4868a74711f82fb62e27 /drivers/md/raid5.c
parentbbb20089a3275a19e475dbc21320c3742e3ca423 (diff)
parent80ffb3cceaefa405f2ecd46d66500ed8d53efe74 (diff)
Merge commit 'md/for-linus' into async-tx-next
Conflicts: drivers/md/raid5.c
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c87
1 files changed, 65 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cac6f4d3a143..9b00a229015a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3911,13 +3911,21 @@ static int make_request(struct request_queue *q, struct bio * bi)
3911 goto retry; 3911 goto retry;
3912 } 3912 }
3913 } 3913 }
3914 /* FIXME what if we get a false positive because these 3914
3915 * are being updated. 3915 if (bio_data_dir(bi) == WRITE &&
3916 */ 3916 logical_sector >= mddev->suspend_lo &&
3917 if (logical_sector >= mddev->suspend_lo &&
3918 logical_sector < mddev->suspend_hi) { 3917 logical_sector < mddev->suspend_hi) {
3919 release_stripe(sh); 3918 release_stripe(sh);
3920 schedule(); 3919 /* As the suspend_* range is controlled by
3920 * userspace, we want an interruptible
3921 * wait.
3922 */
3923 flush_signals(current);
3924 prepare_to_wait(&conf->wait_for_overlap,
3925 &w, TASK_INTERRUPTIBLE);
3926 if (logical_sector >= mddev->suspend_lo &&
3927 logical_sector < mddev->suspend_hi)
3928 schedule();
3921 goto retry; 3929 goto retry;
3922 } 3930 }
3923 3931
@@ -3989,7 +3997,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3989 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3997 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3990 sector_nr = raid5_size(mddev, 0, 0) 3998 sector_nr = raid5_size(mddev, 0, 0)
3991 - conf->reshape_progress; 3999 - conf->reshape_progress;
3992 } else if (mddev->delta_disks > 0 && 4000 } else if (mddev->delta_disks >= 0 &&
3993 conf->reshape_progress > 0) 4001 conf->reshape_progress > 0)
3994 sector_nr = conf->reshape_progress; 4002 sector_nr = conf->reshape_progress;
3995 sector_div(sector_nr, new_data_disks); 4003 sector_div(sector_nr, new_data_disks);
@@ -4203,6 +4211,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4203 return 0; 4211 return 0;
4204 } 4212 }
4205 4213
4214 /* Allow raid5_quiesce to complete */
4215 wait_event(conf->wait_for_overlap, conf->quiesce != 2);
4216
4206 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 4217 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4207 return reshape_request(mddev, sector_nr, skipped); 4218 return reshape_request(mddev, sector_nr, skipped);
4208 4219
@@ -4803,7 +4814,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4803static int run(mddev_t *mddev) 4814static int run(mddev_t *mddev)
4804{ 4815{
4805 raid5_conf_t *conf; 4816 raid5_conf_t *conf;
4806 int working_disks = 0; 4817 int working_disks = 0, chunk_size;
4807 mdk_rdev_t *rdev; 4818 mdk_rdev_t *rdev;
4808 4819
4809 if (mddev->recovery_cp != MaxSector) 4820 if (mddev->recovery_cp != MaxSector)
@@ -4844,7 +4855,26 @@ static int run(mddev_t *mddev)
4844 (old_disks-max_degraded)); 4855 (old_disks-max_degraded));
4845 /* here_old is the first stripe that we might need to read 4856 /* here_old is the first stripe that we might need to read
4846 * from */ 4857 * from */
4847 if (here_new >= here_old) { 4858 if (mddev->delta_disks == 0) {
4859 /* We cannot be sure it is safe to start an in-place
4860 * reshape. It is only safe if user-space if monitoring
4861 * and taking constant backups.
4862 * mdadm always starts a situation like this in
4863 * readonly mode so it can take control before
4864 * allowing any writes. So just check for that.
4865 */
4866 if ((here_new * mddev->new_chunk_sectors !=
4867 here_old * mddev->chunk_sectors) ||
4868 mddev->ro == 0) {
4869 printk(KERN_ERR "raid5: in-place reshape must be started"
4870 " in read-only mode - aborting\n");
4871 return -EINVAL;
4872 }
4873 } else if (mddev->delta_disks < 0
4874 ? (here_new * mddev->new_chunk_sectors <=
4875 here_old * mddev->chunk_sectors)
4876 : (here_new * mddev->new_chunk_sectors >=
4877 here_old * mddev->chunk_sectors)) {
4848 /* Reading from the same stripe as writing to - bad */ 4878 /* Reading from the same stripe as writing to - bad */
4849 printk(KERN_ERR "raid5: reshape_position too early for " 4879 printk(KERN_ERR "raid5: reshape_position too early for "
4850 "auto-recovery - aborting.\n"); 4880 "auto-recovery - aborting.\n");
@@ -4958,6 +4988,14 @@ static int run(mddev_t *mddev)
4958 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 4988 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
4959 4989
4960 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 4990 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
4991 chunk_size = mddev->chunk_sectors << 9;
4992 blk_queue_io_min(mddev->queue, chunk_size);
4993 blk_queue_io_opt(mddev->queue, chunk_size *
4994 (conf->raid_disks - conf->max_degraded));
4995
4996 list_for_each_entry(rdev, &mddev->disks, same_set)
4997 disk_stack_limits(mddev->gendisk, rdev->bdev,
4998 rdev->data_offset << 9);
4961 4999
4962 return 0; 5000 return 0;
4963abort: 5001abort:
@@ -5185,6 +5223,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
5185 return -EINVAL; 5223 return -EINVAL;
5186 set_capacity(mddev->gendisk, mddev->array_sectors); 5224 set_capacity(mddev->gendisk, mddev->array_sectors);
5187 mddev->changed = 1; 5225 mddev->changed = 1;
5226 revalidate_disk(mddev->gendisk);
5188 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { 5227 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
5189 mddev->recovery_cp = mddev->dev_sectors; 5228 mddev->recovery_cp = mddev->dev_sectors;
5190 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5229 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5330,7 +5369,7 @@ static int raid5_start_reshape(mddev_t *mddev)
5330 spin_unlock_irqrestore(&conf->device_lock, flags); 5369 spin_unlock_irqrestore(&conf->device_lock, flags);
5331 } 5370 }
5332 mddev->raid_disks = conf->raid_disks; 5371 mddev->raid_disks = conf->raid_disks;
5333 mddev->reshape_position = 0; 5372 mddev->reshape_position = conf->reshape_progress;
5334 set_bit(MD_CHANGE_DEVS, &mddev->flags); 5373 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5335 5374
5336 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 5375 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -5385,7 +5424,6 @@ static void end_reshape(raid5_conf_t *conf)
5385 */ 5424 */
5386static void raid5_finish_reshape(mddev_t *mddev) 5425static void raid5_finish_reshape(mddev_t *mddev)
5387{ 5426{
5388 struct block_device *bdev;
5389 raid5_conf_t *conf = mddev->private; 5427 raid5_conf_t *conf = mddev->private;
5390 5428
5391 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5429 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5394,15 +5432,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
5394 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5432 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
5395 set_capacity(mddev->gendisk, mddev->array_sectors); 5433 set_capacity(mddev->gendisk, mddev->array_sectors);
5396 mddev->changed = 1; 5434 mddev->changed = 1;
5397 5435 revalidate_disk(mddev->gendisk);
5398 bdev = bdget_disk(mddev->gendisk, 0);
5399 if (bdev) {
5400 mutex_lock(&bdev->bd_inode->i_mutex);
5401 i_size_write(bdev->bd_inode,
5402 (loff_t)mddev->array_sectors << 9);
5403 mutex_unlock(&bdev->bd_inode->i_mutex);
5404 bdput(bdev);
5405 }
5406 } else { 5436 } else {
5407 int d; 5437 int d;
5408 mddev->degraded = conf->raid_disks; 5438 mddev->degraded = conf->raid_disks;
@@ -5413,8 +5443,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5413 mddev->degraded--; 5443 mddev->degraded--;
5414 for (d = conf->raid_disks ; 5444 for (d = conf->raid_disks ;
5415 d < conf->raid_disks - mddev->delta_disks; 5445 d < conf->raid_disks - mddev->delta_disks;
5416 d++) 5446 d++) {
5417 raid5_remove_disk(mddev, d); 5447 mdk_rdev_t *rdev = conf->disks[d].rdev;
5448 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5449 char nm[20];
5450 sprintf(nm, "rd%d", rdev->raid_disk);
5451 sysfs_remove_link(&mddev->kobj, nm);
5452 rdev->raid_disk = -1;
5453 }
5454 }
5418 } 5455 }
5419 mddev->layout = conf->algorithm; 5456 mddev->layout = conf->algorithm;
5420 mddev->chunk_sectors = conf->chunk_sectors; 5457 mddev->chunk_sectors = conf->chunk_sectors;
@@ -5434,12 +5471,18 @@ static void raid5_quiesce(mddev_t *mddev, int state)
5434 5471
5435 case 1: /* stop all writes */ 5472 case 1: /* stop all writes */
5436 spin_lock_irq(&conf->device_lock); 5473 spin_lock_irq(&conf->device_lock);
5437 conf->quiesce = 1; 5474 /* '2' tells resync/reshape to pause so that all
5475 * active stripes can drain
5476 */
5477 conf->quiesce = 2;
5438 wait_event_lock_irq(conf->wait_for_stripe, 5478 wait_event_lock_irq(conf->wait_for_stripe,
5439 atomic_read(&conf->active_stripes) == 0 && 5479 atomic_read(&conf->active_stripes) == 0 &&
5440 atomic_read(&conf->active_aligned_reads) == 0, 5480 atomic_read(&conf->active_aligned_reads) == 0,
5441 conf->device_lock, /* nothing */); 5481 conf->device_lock, /* nothing */);
5482 conf->quiesce = 1;
5442 spin_unlock_irq(&conf->device_lock); 5483 spin_unlock_irq(&conf->device_lock);
5484 /* allow reshape to continue */
5485 wake_up(&conf->wait_for_overlap);
5443 break; 5486 break;
5444 5487
5445 case 0: /* re-enable writes */ 5488 case 0: /* re-enable writes */