aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-07-01 13:31:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-07-01 13:31:26 -0400
commit544ae5f96e14998cabc637fa20cf409eb92a0dd0 (patch)
tree163523ded713a8b90ac68543979e127795ecbc58
parent7b85425fac72588674d5c71604af618f690c91d7 (diff)
parente62e58a5ffdc98ac28d8dbd070c857620d541f99 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: use interruptible wait when duration is controlled by userspace. md/raid5: suspend shouldn't affect read requests. md: tidy up error paths in md_alloc md: fix error path when duplicate name is found on md device creation. md: avoid dereferencing NULL pointer when accessing suspend_* sysfs attributes. md: Use new topology calls to indicate alignment and I/O sizes
-rw-r--r--drivers/md/linear.c4
-rw-r--r--drivers/md/md.c56
-rw-r--r--drivers/md/multipath.c7
-rw-r--r--drivers/md/raid0.c9
-rw-r--r--drivers/md/raid1.c9
-rw-r--r--drivers/md/raid10.c19
-rw-r--r--drivers/md/raid5.c28
7 files changed, 84 insertions, 48 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 15c8b7b25a9b..5810fa906af0 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
166 rdev->sectors = sectors * mddev->chunk_sectors; 166 rdev->sectors = sectors * mddev->chunk_sectors;
167 } 167 }
168 168
169 blk_queue_stack_limits(mddev->queue, 169 disk_stack_limits(mddev->gendisk, rdev->bdev,
170 rdev->bdev->bd_disk->queue); 170 rdev->data_offset << 9);
171 /* as we don't honour merge_bvec_fn, we must never risk 171 /* as we don't honour merge_bvec_fn, we must never risk
172 * violating it, so limit ->max_sector to one PAGE, as 172 * violating it, so limit ->max_sector to one PAGE, as
173 * a one page request is never in violation. 173 * a one page request is never in violation.
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 09be637d52cb..0f4a70c43ffc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3573,7 +3573,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3573 char *e; 3573 char *e;
3574 unsigned long long new = simple_strtoull(buf, &e, 10); 3574 unsigned long long new = simple_strtoull(buf, &e, 10);
3575 3575
3576 if (mddev->pers->quiesce == NULL) 3576 if (mddev->pers == NULL ||
3577 mddev->pers->quiesce == NULL)
3577 return -EINVAL; 3578 return -EINVAL;
3578 if (buf == e || (*e && *e != '\n')) 3579 if (buf == e || (*e && *e != '\n'))
3579 return -EINVAL; 3580 return -EINVAL;
@@ -3601,7 +3602,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3601 char *e; 3602 char *e;
3602 unsigned long long new = simple_strtoull(buf, &e, 10); 3603 unsigned long long new = simple_strtoull(buf, &e, 10);
3603 3604
3604 if (mddev->pers->quiesce == NULL) 3605 if (mddev->pers == NULL ||
3606 mddev->pers->quiesce == NULL)
3605 return -EINVAL; 3607 return -EINVAL;
3606 if (buf == e || (*e && *e != '\n')) 3608 if (buf == e || (*e && *e != '\n'))
3607 return -EINVAL; 3609 return -EINVAL;
@@ -3844,11 +3846,9 @@ static int md_alloc(dev_t dev, char *name)
3844 flush_scheduled_work(); 3846 flush_scheduled_work();
3845 3847
3846 mutex_lock(&disks_mutex); 3848 mutex_lock(&disks_mutex);
3847 if (mddev->gendisk) { 3849 error = -EEXIST;
3848 mutex_unlock(&disks_mutex); 3850 if (mddev->gendisk)
3849 mddev_put(mddev); 3851 goto abort;
3850 return -EEXIST;
3851 }
3852 3852
3853 if (name) { 3853 if (name) {
3854 /* Need to ensure that 'name' is not a duplicate. 3854 /* Need to ensure that 'name' is not a duplicate.
@@ -3860,17 +3860,15 @@ static int md_alloc(dev_t dev, char *name)
3860 if (mddev2->gendisk && 3860 if (mddev2->gendisk &&
3861 strcmp(mddev2->gendisk->disk_name, name) == 0) { 3861 strcmp(mddev2->gendisk->disk_name, name) == 0) {
3862 spin_unlock(&all_mddevs_lock); 3862 spin_unlock(&all_mddevs_lock);
3863 return -EEXIST; 3863 goto abort;
3864 } 3864 }
3865 spin_unlock(&all_mddevs_lock); 3865 spin_unlock(&all_mddevs_lock);
3866 } 3866 }
3867 3867
3868 error = -ENOMEM;
3868 mddev->queue = blk_alloc_queue(GFP_KERNEL); 3869 mddev->queue = blk_alloc_queue(GFP_KERNEL);
3869 if (!mddev->queue) { 3870 if (!mddev->queue)
3870 mutex_unlock(&disks_mutex); 3871 goto abort;
3871 mddev_put(mddev);
3872 return -ENOMEM;
3873 }
3874 mddev->queue->queuedata = mddev; 3872 mddev->queue->queuedata = mddev;
3875 3873
3876 /* Can be unlocked because the queue is new: no concurrency */ 3874 /* Can be unlocked because the queue is new: no concurrency */
@@ -3880,11 +3878,9 @@ static int md_alloc(dev_t dev, char *name)
3880 3878
3881 disk = alloc_disk(1 << shift); 3879 disk = alloc_disk(1 << shift);
3882 if (!disk) { 3880 if (!disk) {
3883 mutex_unlock(&disks_mutex);
3884 blk_cleanup_queue(mddev->queue); 3881 blk_cleanup_queue(mddev->queue);
3885 mddev->queue = NULL; 3882 mddev->queue = NULL;
3886 mddev_put(mddev); 3883 goto abort;
3887 return -ENOMEM;
3888 } 3884 }
3889 disk->major = MAJOR(mddev->unit); 3885 disk->major = MAJOR(mddev->unit);
3890 disk->first_minor = unit << shift; 3886 disk->first_minor = unit << shift;
@@ -3906,16 +3902,22 @@ static int md_alloc(dev_t dev, char *name)
3906 mddev->gendisk = disk; 3902 mddev->gendisk = disk;
3907 error = kobject_init_and_add(&mddev->kobj, &md_ktype, 3903 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
3908 &disk_to_dev(disk)->kobj, "%s", "md"); 3904 &disk_to_dev(disk)->kobj, "%s", "md");
3909 mutex_unlock(&disks_mutex); 3905 if (error) {
3910 if (error) 3906 /* This isn't possible, but as kobject_init_and_add is marked
3907 * __must_check, we must do something with the result
3908 */
3911 printk(KERN_WARNING "md: cannot register %s/md - name in use\n", 3909 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3912 disk->disk_name); 3910 disk->disk_name);
3913 else { 3911 error = 0;
3912 }
3913 abort:
3914 mutex_unlock(&disks_mutex);
3915 if (!error) {
3914 kobject_uevent(&mddev->kobj, KOBJ_ADD); 3916 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3915 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); 3917 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
3916 } 3918 }
3917 mddev_put(mddev); 3919 mddev_put(mddev);
3918 return 0; 3920 return error;
3919} 3921}
3920 3922
3921static struct kobject *md_probe(dev_t dev, int *part, void *data) 3923static struct kobject *md_probe(dev_t dev, int *part, void *data)
@@ -6334,10 +6336,16 @@ void md_do_sync(mddev_t *mddev)
6334 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6336 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6335 } 6337 }
6336 6338
6337 if (j >= mddev->resync_max) 6339 while (j >= mddev->resync_max && !kthread_should_stop()) {
6338 wait_event(mddev->recovery_wait, 6340 /* As this condition is controlled by user-space,
6339 mddev->resync_max > j 6341 * we can block indefinitely, so use '_interruptible'
6340 || kthread_should_stop()); 6342 * to avoid triggering warnings.
6343 */
6344 flush_signals(current); /* just in case */
6345 wait_event_interruptible(mddev->recovery_wait,
6346 mddev->resync_max > j
6347 || kthread_should_stop());
6348 }
6341 6349
6342 if (kthread_should_stop()) 6350 if (kthread_should_stop())
6343 goto interrupted; 6351 goto interrupted;
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index cbe368fa6598..237fe3fd235c 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
294 for (path = first; path <= last; path++) 294 for (path = first; path <= last; path++)
295 if ((p=conf->multipaths+path)->rdev == NULL) { 295 if ((p=conf->multipaths+path)->rdev == NULL) {
296 q = rdev->bdev->bd_disk->queue; 296 q = rdev->bdev->bd_disk->queue;
297 blk_queue_stack_limits(mddev->queue, q); 297 disk_stack_limits(mddev->gendisk, rdev->bdev,
298 rdev->data_offset << 9);
298 299
299 /* as we don't honour merge_bvec_fn, we must never risk 300 /* as we don't honour merge_bvec_fn, we must never risk
300 * violating it, so limit ->max_sector to one PAGE, as 301 * violating it, so limit ->max_sector to one PAGE, as
@@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev)
463 464
464 disk = conf->multipaths + disk_idx; 465 disk = conf->multipaths + disk_idx;
465 disk->rdev = rdev; 466 disk->rdev = rdev;
467 disk_stack_limits(mddev->gendisk, rdev->bdev,
468 rdev->data_offset << 9);
466 469
467 blk_queue_stack_limits(mddev->queue,
468 rdev->bdev->bd_disk->queue);
469 /* as we don't honour merge_bvec_fn, we must never risk 470 /* as we don't honour merge_bvec_fn, we must never risk
470 * violating it, not that we ever expect a device with 471 * violating it, not that we ever expect a device with
471 * a merge_bvec_fn to be involved in multipath */ 472 * a merge_bvec_fn to be involved in multipath */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ab4a489d8695..335f490dcad6 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev)
170 } 170 }
171 dev[j] = rdev1; 171 dev[j] = rdev1;
172 172
173 blk_queue_stack_limits(mddev->queue, 173 disk_stack_limits(mddev->gendisk, rdev1->bdev,
174 rdev1->bdev->bd_disk->queue); 174 rdev1->data_offset << 9);
175 /* as we don't honour merge_bvec_fn, we must never risk 175 /* as we don't honour merge_bvec_fn, we must never risk
176 * violating it, so limit ->max_sector to one PAGE, as 176 * violating it, so limit ->max_sector to one PAGE, as
177 * a one page request is never in violation. 177 * a one page request is never in violation.
@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev)
250 mddev->chunk_sectors << 9); 250 mddev->chunk_sectors << 9);
251 goto abort; 251 goto abort;
252 } 252 }
253
254 blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
255 blk_queue_io_opt(mddev->queue,
256 (mddev->chunk_sectors << 9) * mddev->raid_disks);
257
253 printk(KERN_INFO "raid0: done.\n"); 258 printk(KERN_INFO "raid0: done.\n");
254 mddev->private = conf; 259 mddev->private = conf;
255 return 0; 260 return 0;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 89939a7aef57..0569efba0c02 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1123 for (mirror = first; mirror <= last; mirror++) 1123 for (mirror = first; mirror <= last; mirror++)
1124 if ( !(p=conf->mirrors+mirror)->rdev) { 1124 if ( !(p=conf->mirrors+mirror)->rdev) {
1125 1125
1126 blk_queue_stack_limits(mddev->queue, 1126 disk_stack_limits(mddev->gendisk, rdev->bdev,
1127 rdev->bdev->bd_disk->queue); 1127 rdev->data_offset << 9);
1128 /* as we don't honour merge_bvec_fn, we must never risk 1128 /* as we don't honour merge_bvec_fn, we must never risk
1129 * violating it, so limit ->max_sector to one PAGE, as 1129 * violating it, so limit ->max_sector to one PAGE, as
1130 * a one page request is never in violation. 1130 * a one page request is never in violation.
@@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev)
1988 disk = conf->mirrors + disk_idx; 1988 disk = conf->mirrors + disk_idx;
1989 1989
1990 disk->rdev = rdev; 1990 disk->rdev = rdev;
1991 1991 disk_stack_limits(mddev->gendisk, rdev->bdev,
1992 blk_queue_stack_limits(mddev->queue, 1992 rdev->data_offset << 9);
1993 rdev->bdev->bd_disk->queue);
1994 /* as we don't honour merge_bvec_fn, we must never risk 1993 /* as we don't honour merge_bvec_fn, we must never risk
1995 * violating it, so limit ->max_sector to one PAGE, as 1994 * violating it, so limit ->max_sector to one PAGE, as
1996 * a one page request is never in violation. 1995 * a one page request is never in violation.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ae12ceafe10c..7298a5e5a183 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1151 for ( ; mirror <= last ; mirror++) 1151 for ( ; mirror <= last ; mirror++)
1152 if ( !(p=conf->mirrors+mirror)->rdev) { 1152 if ( !(p=conf->mirrors+mirror)->rdev) {
1153 1153
1154 blk_queue_stack_limits(mddev->queue, 1154 disk_stack_limits(mddev->gendisk, rdev->bdev,
1155 rdev->bdev->bd_disk->queue); 1155 rdev->data_offset << 9);
1156 /* as we don't honour merge_bvec_fn, we must never risk 1156 /* as we don't honour merge_bvec_fn, we must never risk
1157 * violating it, so limit ->max_sector to one PAGE, as 1157 * violating it, so limit ->max_sector to one PAGE, as
1158 * a one page request is never in violation. 1158 * a one page request is never in violation.
@@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2044static int run(mddev_t *mddev) 2044static int run(mddev_t *mddev)
2045{ 2045{
2046 conf_t *conf; 2046 conf_t *conf;
2047 int i, disk_idx; 2047 int i, disk_idx, chunk_size;
2048 mirror_info_t *disk; 2048 mirror_info_t *disk;
2049 mdk_rdev_t *rdev; 2049 mdk_rdev_t *rdev;
2050 int nc, fc, fo; 2050 int nc, fc, fo;
@@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev)
2130 spin_lock_init(&conf->device_lock); 2130 spin_lock_init(&conf->device_lock);
2131 mddev->queue->queue_lock = &conf->device_lock; 2131 mddev->queue->queue_lock = &conf->device_lock;
2132 2132
2133 chunk_size = mddev->chunk_sectors << 9;
2134 blk_queue_io_min(mddev->queue, chunk_size);
2135 if (conf->raid_disks % conf->near_copies)
2136 blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks);
2137 else
2138 blk_queue_io_opt(mddev->queue, chunk_size *
2139 (conf->raid_disks / conf->near_copies));
2140
2133 list_for_each_entry(rdev, &mddev->disks, same_set) { 2141 list_for_each_entry(rdev, &mddev->disks, same_set) {
2134 disk_idx = rdev->raid_disk; 2142 disk_idx = rdev->raid_disk;
2135 if (disk_idx >= mddev->raid_disks 2143 if (disk_idx >= mddev->raid_disks
@@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev)
2138 disk = conf->mirrors + disk_idx; 2146 disk = conf->mirrors + disk_idx;
2139 2147
2140 disk->rdev = rdev; 2148 disk->rdev = rdev;
2141 2149 disk_stack_limits(mddev->gendisk, rdev->bdev,
2142 blk_queue_stack_limits(mddev->queue, 2150 rdev->data_offset << 9);
2143 rdev->bdev->bd_disk->queue);
2144 /* as we don't honour merge_bvec_fn, we must never risk 2151 /* as we don't honour merge_bvec_fn, we must never risk
2145 * violating it, so limit ->max_sector to one PAGE, as 2152 * violating it, so limit ->max_sector to one PAGE, as
2146 * a one page request is never in violation. 2153 * a one page request is never in violation.
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f9f991e6e138..37835538b58e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3699,13 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi)
3699 goto retry; 3699 goto retry;
3700 } 3700 }
3701 } 3701 }
3702 /* FIXME what if we get a false positive because these 3702
3703 * are being updated. 3703 if (bio_data_dir(bi) == WRITE &&
3704 */ 3704 logical_sector >= mddev->suspend_lo &&
3705 if (logical_sector >= mddev->suspend_lo &&
3706 logical_sector < mddev->suspend_hi) { 3705 logical_sector < mddev->suspend_hi) {
3707 release_stripe(sh); 3706 release_stripe(sh);
3708 schedule(); 3707 /* As the suspend_* range is controlled by
3708 * userspace, we want an interruptible
3709 * wait.
3710 */
3711 flush_signals(current);
3712 prepare_to_wait(&conf->wait_for_overlap,
3713 &w, TASK_INTERRUPTIBLE);
3714 if (logical_sector >= mddev->suspend_lo &&
3715 logical_sector < mddev->suspend_hi)
3716 schedule();
3709 goto retry; 3717 goto retry;
3710 } 3718 }
3711 3719
@@ -4452,7 +4460,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4452static int run(mddev_t *mddev) 4460static int run(mddev_t *mddev)
4453{ 4461{
4454 raid5_conf_t *conf; 4462 raid5_conf_t *conf;
4455 int working_disks = 0; 4463 int working_disks = 0, chunk_size;
4456 mdk_rdev_t *rdev; 4464 mdk_rdev_t *rdev;
4457 4465
4458 if (mddev->recovery_cp != MaxSector) 4466 if (mddev->recovery_cp != MaxSector)
@@ -4607,6 +4615,14 @@ static int run(mddev_t *mddev)
4607 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 4615 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
4608 4616
4609 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 4617 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
4618 chunk_size = mddev->chunk_sectors << 9;
4619 blk_queue_io_min(mddev->queue, chunk_size);
4620 blk_queue_io_opt(mddev->queue, chunk_size *
4621 (conf->raid_disks - conf->max_degraded));
4622
4623 list_for_each_entry(rdev, &mddev->disks, same_set)
4624 disk_stack_limits(mddev->gendisk, rdev->bdev,
4625 rdev->data_offset << 9);
4610 4626
4611 return 0; 4627 return 0;
4612abort: 4628abort: