diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-01 13:31:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-01 13:31:26 -0400 |
commit | 544ae5f96e14998cabc637fa20cf409eb92a0dd0 (patch) | |
tree | 163523ded713a8b90ac68543979e127795ecbc58 | |
parent | 7b85425fac72588674d5c71604af618f690c91d7 (diff) | |
parent | e62e58a5ffdc98ac28d8dbd070c857620d541f99 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
md: use interruptible wait when duration is controlled by userspace.
md/raid5: suspend shouldn't affect read requests.
md: tidy up error paths in md_alloc
md: fix error path when duplicate name is found on md device creation.
md: avoid dereferencing NULL pointer when accessing suspend_* sysfs attributes.
md: Use new topology calls to indicate alignment and I/O sizes
-rw-r--r-- | drivers/md/linear.c | 4 | ||||
-rw-r--r-- | drivers/md/md.c | 56 | ||||
-rw-r--r-- | drivers/md/multipath.c | 7 | ||||
-rw-r--r-- | drivers/md/raid0.c | 9 | ||||
-rw-r--r-- | drivers/md/raid1.c | 9 | ||||
-rw-r--r-- | drivers/md/raid10.c | 19 | ||||
-rw-r--r-- | drivers/md/raid5.c | 28 |
7 files changed, 84 insertions, 48 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 15c8b7b25a9b..5810fa906af0 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
166 | rdev->sectors = sectors * mddev->chunk_sectors; | 166 | rdev->sectors = sectors * mddev->chunk_sectors; |
167 | } | 167 | } |
168 | 168 | ||
169 | blk_queue_stack_limits(mddev->queue, | 169 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
170 | rdev->bdev->bd_disk->queue); | 170 | rdev->data_offset << 9); |
171 | /* as we don't honour merge_bvec_fn, we must never risk | 171 | /* as we don't honour merge_bvec_fn, we must never risk |
172 | * violating it, so limit ->max_sector to one PAGE, as | 172 | * violating it, so limit ->max_sector to one PAGE, as |
173 | * a one page request is never in violation. | 173 | * a one page request is never in violation. |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 09be637d52cb..0f4a70c43ffc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3573,7 +3573,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) | |||
3573 | char *e; | 3573 | char *e; |
3574 | unsigned long long new = simple_strtoull(buf, &e, 10); | 3574 | unsigned long long new = simple_strtoull(buf, &e, 10); |
3575 | 3575 | ||
3576 | if (mddev->pers->quiesce == NULL) | 3576 | if (mddev->pers == NULL || |
3577 | mddev->pers->quiesce == NULL) | ||
3577 | return -EINVAL; | 3578 | return -EINVAL; |
3578 | if (buf == e || (*e && *e != '\n')) | 3579 | if (buf == e || (*e && *e != '\n')) |
3579 | return -EINVAL; | 3580 | return -EINVAL; |
@@ -3601,7 +3602,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | |||
3601 | char *e; | 3602 | char *e; |
3602 | unsigned long long new = simple_strtoull(buf, &e, 10); | 3603 | unsigned long long new = simple_strtoull(buf, &e, 10); |
3603 | 3604 | ||
3604 | if (mddev->pers->quiesce == NULL) | 3605 | if (mddev->pers == NULL || |
3606 | mddev->pers->quiesce == NULL) | ||
3605 | return -EINVAL; | 3607 | return -EINVAL; |
3606 | if (buf == e || (*e && *e != '\n')) | 3608 | if (buf == e || (*e && *e != '\n')) |
3607 | return -EINVAL; | 3609 | return -EINVAL; |
@@ -3844,11 +3846,9 @@ static int md_alloc(dev_t dev, char *name) | |||
3844 | flush_scheduled_work(); | 3846 | flush_scheduled_work(); |
3845 | 3847 | ||
3846 | mutex_lock(&disks_mutex); | 3848 | mutex_lock(&disks_mutex); |
3847 | if (mddev->gendisk) { | 3849 | error = -EEXIST; |
3848 | mutex_unlock(&disks_mutex); | 3850 | if (mddev->gendisk) |
3849 | mddev_put(mddev); | 3851 | goto abort; |
3850 | return -EEXIST; | ||
3851 | } | ||
3852 | 3852 | ||
3853 | if (name) { | 3853 | if (name) { |
3854 | /* Need to ensure that 'name' is not a duplicate. | 3854 | /* Need to ensure that 'name' is not a duplicate. |
@@ -3860,17 +3860,15 @@ static int md_alloc(dev_t dev, char *name) | |||
3860 | if (mddev2->gendisk && | 3860 | if (mddev2->gendisk && |
3861 | strcmp(mddev2->gendisk->disk_name, name) == 0) { | 3861 | strcmp(mddev2->gendisk->disk_name, name) == 0) { |
3862 | spin_unlock(&all_mddevs_lock); | 3862 | spin_unlock(&all_mddevs_lock); |
3863 | return -EEXIST; | 3863 | goto abort; |
3864 | } | 3864 | } |
3865 | spin_unlock(&all_mddevs_lock); | 3865 | spin_unlock(&all_mddevs_lock); |
3866 | } | 3866 | } |
3867 | 3867 | ||
3868 | error = -ENOMEM; | ||
3868 | mddev->queue = blk_alloc_queue(GFP_KERNEL); | 3869 | mddev->queue = blk_alloc_queue(GFP_KERNEL); |
3869 | if (!mddev->queue) { | 3870 | if (!mddev->queue) |
3870 | mutex_unlock(&disks_mutex); | 3871 | goto abort; |
3871 | mddev_put(mddev); | ||
3872 | return -ENOMEM; | ||
3873 | } | ||
3874 | mddev->queue->queuedata = mddev; | 3872 | mddev->queue->queuedata = mddev; |
3875 | 3873 | ||
3876 | /* Can be unlocked because the queue is new: no concurrency */ | 3874 | /* Can be unlocked because the queue is new: no concurrency */ |
@@ -3880,11 +3878,9 @@ static int md_alloc(dev_t dev, char *name) | |||
3880 | 3878 | ||
3881 | disk = alloc_disk(1 << shift); | 3879 | disk = alloc_disk(1 << shift); |
3882 | if (!disk) { | 3880 | if (!disk) { |
3883 | mutex_unlock(&disks_mutex); | ||
3884 | blk_cleanup_queue(mddev->queue); | 3881 | blk_cleanup_queue(mddev->queue); |
3885 | mddev->queue = NULL; | 3882 | mddev->queue = NULL; |
3886 | mddev_put(mddev); | 3883 | goto abort; |
3887 | return -ENOMEM; | ||
3888 | } | 3884 | } |
3889 | disk->major = MAJOR(mddev->unit); | 3885 | disk->major = MAJOR(mddev->unit); |
3890 | disk->first_minor = unit << shift; | 3886 | disk->first_minor = unit << shift; |
@@ -3906,16 +3902,22 @@ static int md_alloc(dev_t dev, char *name) | |||
3906 | mddev->gendisk = disk; | 3902 | mddev->gendisk = disk; |
3907 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, | 3903 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, |
3908 | &disk_to_dev(disk)->kobj, "%s", "md"); | 3904 | &disk_to_dev(disk)->kobj, "%s", "md"); |
3909 | mutex_unlock(&disks_mutex); | 3905 | if (error) { |
3910 | if (error) | 3906 | /* This isn't possible, but as kobject_init_and_add is marked |
3907 | * __must_check, we must do something with the result | ||
3908 | */ | ||
3911 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", | 3909 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", |
3912 | disk->disk_name); | 3910 | disk->disk_name); |
3913 | else { | 3911 | error = 0; |
3912 | } | ||
3913 | abort: | ||
3914 | mutex_unlock(&disks_mutex); | ||
3915 | if (!error) { | ||
3914 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 3916 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
3915 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); | 3917 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); |
3916 | } | 3918 | } |
3917 | mddev_put(mddev); | 3919 | mddev_put(mddev); |
3918 | return 0; | 3920 | return error; |
3919 | } | 3921 | } |
3920 | 3922 | ||
3921 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 3923 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
@@ -6334,10 +6336,16 @@ void md_do_sync(mddev_t *mddev) | |||
6334 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6336 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
6335 | } | 6337 | } |
6336 | 6338 | ||
6337 | if (j >= mddev->resync_max) | 6339 | while (j >= mddev->resync_max && !kthread_should_stop()) { |
6338 | wait_event(mddev->recovery_wait, | 6340 | /* As this condition is controlled by user-space, |
6339 | mddev->resync_max > j | 6341 | * we can block indefinitely, so use '_interruptible' |
6340 | || kthread_should_stop()); | 6342 | * to avoid triggering warnings. |
6343 | */ | ||
6344 | flush_signals(current); /* just in case */ | ||
6345 | wait_event_interruptible(mddev->recovery_wait, | ||
6346 | mddev->resync_max > j | ||
6347 | || kthread_should_stop()); | ||
6348 | } | ||
6341 | 6349 | ||
6342 | if (kthread_should_stop()) | 6350 | if (kthread_should_stop()) |
6343 | goto interrupted; | 6351 | goto interrupted; |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index cbe368fa6598..237fe3fd235c 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
294 | for (path = first; path <= last; path++) | 294 | for (path = first; path <= last; path++) |
295 | if ((p=conf->multipaths+path)->rdev == NULL) { | 295 | if ((p=conf->multipaths+path)->rdev == NULL) { |
296 | q = rdev->bdev->bd_disk->queue; | 296 | q = rdev->bdev->bd_disk->queue; |
297 | blk_queue_stack_limits(mddev->queue, q); | 297 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
298 | rdev->data_offset << 9); | ||
298 | 299 | ||
299 | /* as we don't honour merge_bvec_fn, we must never risk | 300 | /* as we don't honour merge_bvec_fn, we must never risk |
300 | * violating it, so limit ->max_sector to one PAGE, as | 301 | * violating it, so limit ->max_sector to one PAGE, as |
@@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev) | |||
463 | 464 | ||
464 | disk = conf->multipaths + disk_idx; | 465 | disk = conf->multipaths + disk_idx; |
465 | disk->rdev = rdev; | 466 | disk->rdev = rdev; |
467 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
468 | rdev->data_offset << 9); | ||
466 | 469 | ||
467 | blk_queue_stack_limits(mddev->queue, | ||
468 | rdev->bdev->bd_disk->queue); | ||
469 | /* as we don't honour merge_bvec_fn, we must never risk | 470 | /* as we don't honour merge_bvec_fn, we must never risk |
470 | * violating it, not that we ever expect a device with | 471 | * violating it, not that we ever expect a device with |
471 | * a merge_bvec_fn to be involved in multipath */ | 472 | * a merge_bvec_fn to be involved in multipath */ |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ab4a489d8695..335f490dcad6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev) | |||
170 | } | 170 | } |
171 | dev[j] = rdev1; | 171 | dev[j] = rdev1; |
172 | 172 | ||
173 | blk_queue_stack_limits(mddev->queue, | 173 | disk_stack_limits(mddev->gendisk, rdev1->bdev, |
174 | rdev1->bdev->bd_disk->queue); | 174 | rdev1->data_offset << 9); |
175 | /* as we don't honour merge_bvec_fn, we must never risk | 175 | /* as we don't honour merge_bvec_fn, we must never risk |
176 | * violating it, so limit ->max_sector to one PAGE, as | 176 | * violating it, so limit ->max_sector to one PAGE, as |
177 | * a one page request is never in violation. | 177 | * a one page request is never in violation. |
@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev) | |||
250 | mddev->chunk_sectors << 9); | 250 | mddev->chunk_sectors << 9); |
251 | goto abort; | 251 | goto abort; |
252 | } | 252 | } |
253 | |||
254 | blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); | ||
255 | blk_queue_io_opt(mddev->queue, | ||
256 | (mddev->chunk_sectors << 9) * mddev->raid_disks); | ||
257 | |||
253 | printk(KERN_INFO "raid0: done.\n"); | 258 | printk(KERN_INFO "raid0: done.\n"); |
254 | mddev->private = conf; | 259 | mddev->private = conf; |
255 | return 0; | 260 | return 0; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 89939a7aef57..0569efba0c02 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1123 | for (mirror = first; mirror <= last; mirror++) | 1123 | for (mirror = first; mirror <= last; mirror++) |
1124 | if ( !(p=conf->mirrors+mirror)->rdev) { | 1124 | if ( !(p=conf->mirrors+mirror)->rdev) { |
1125 | 1125 | ||
1126 | blk_queue_stack_limits(mddev->queue, | 1126 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1127 | rdev->bdev->bd_disk->queue); | 1127 | rdev->data_offset << 9); |
1128 | /* as we don't honour merge_bvec_fn, we must never risk | 1128 | /* as we don't honour merge_bvec_fn, we must never risk |
1129 | * violating it, so limit ->max_sector to one PAGE, as | 1129 | * violating it, so limit ->max_sector to one PAGE, as |
1130 | * a one page request is never in violation. | 1130 | * a one page request is never in violation. |
@@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev) | |||
1988 | disk = conf->mirrors + disk_idx; | 1988 | disk = conf->mirrors + disk_idx; |
1989 | 1989 | ||
1990 | disk->rdev = rdev; | 1990 | disk->rdev = rdev; |
1991 | 1991 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
1992 | blk_queue_stack_limits(mddev->queue, | 1992 | rdev->data_offset << 9); |
1993 | rdev->bdev->bd_disk->queue); | ||
1994 | /* as we don't honour merge_bvec_fn, we must never risk | 1993 | /* as we don't honour merge_bvec_fn, we must never risk |
1995 | * violating it, so limit ->max_sector to one PAGE, as | 1994 | * violating it, so limit ->max_sector to one PAGE, as |
1996 | * a one page request is never in violation. | 1995 | * a one page request is never in violation. |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ae12ceafe10c..7298a5e5a183 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1151 | for ( ; mirror <= last ; mirror++) | 1151 | for ( ; mirror <= last ; mirror++) |
1152 | if ( !(p=conf->mirrors+mirror)->rdev) { | 1152 | if ( !(p=conf->mirrors+mirror)->rdev) { |
1153 | 1153 | ||
1154 | blk_queue_stack_limits(mddev->queue, | 1154 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1155 | rdev->bdev->bd_disk->queue); | 1155 | rdev->data_offset << 9); |
1156 | /* as we don't honour merge_bvec_fn, we must never risk | 1156 | /* as we don't honour merge_bvec_fn, we must never risk |
1157 | * violating it, so limit ->max_sector to one PAGE, as | 1157 | * violating it, so limit ->max_sector to one PAGE, as |
1158 | * a one page request is never in violation. | 1158 | * a one page request is never in violation. |
@@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
2044 | static int run(mddev_t *mddev) | 2044 | static int run(mddev_t *mddev) |
2045 | { | 2045 | { |
2046 | conf_t *conf; | 2046 | conf_t *conf; |
2047 | int i, disk_idx; | 2047 | int i, disk_idx, chunk_size; |
2048 | mirror_info_t *disk; | 2048 | mirror_info_t *disk; |
2049 | mdk_rdev_t *rdev; | 2049 | mdk_rdev_t *rdev; |
2050 | int nc, fc, fo; | 2050 | int nc, fc, fo; |
@@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev) | |||
2130 | spin_lock_init(&conf->device_lock); | 2130 | spin_lock_init(&conf->device_lock); |
2131 | mddev->queue->queue_lock = &conf->device_lock; | 2131 | mddev->queue->queue_lock = &conf->device_lock; |
2132 | 2132 | ||
2133 | chunk_size = mddev->chunk_sectors << 9; | ||
2134 | blk_queue_io_min(mddev->queue, chunk_size); | ||
2135 | if (conf->raid_disks % conf->near_copies) | ||
2136 | blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); | ||
2137 | else | ||
2138 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
2139 | (conf->raid_disks / conf->near_copies)); | ||
2140 | |||
2133 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2141 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2134 | disk_idx = rdev->raid_disk; | 2142 | disk_idx = rdev->raid_disk; |
2135 | if (disk_idx >= mddev->raid_disks | 2143 | if (disk_idx >= mddev->raid_disks |
@@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev) | |||
2138 | disk = conf->mirrors + disk_idx; | 2146 | disk = conf->mirrors + disk_idx; |
2139 | 2147 | ||
2140 | disk->rdev = rdev; | 2148 | disk->rdev = rdev; |
2141 | 2149 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
2142 | blk_queue_stack_limits(mddev->queue, | 2150 | rdev->data_offset << 9); |
2143 | rdev->bdev->bd_disk->queue); | ||
2144 | /* as we don't honour merge_bvec_fn, we must never risk | 2151 | /* as we don't honour merge_bvec_fn, we must never risk |
2145 | * violating it, so limit ->max_sector to one PAGE, as | 2152 | * violating it, so limit ->max_sector to one PAGE, as |
2146 | * a one page request is never in violation. | 2153 | * a one page request is never in violation. |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f9f991e6e138..37835538b58e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3699,13 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3699 | goto retry; | 3699 | goto retry; |
3700 | } | 3700 | } |
3701 | } | 3701 | } |
3702 | /* FIXME what if we get a false positive because these | 3702 | |
3703 | * are being updated. | 3703 | if (bio_data_dir(bi) == WRITE && |
3704 | */ | 3704 | logical_sector >= mddev->suspend_lo && |
3705 | if (logical_sector >= mddev->suspend_lo && | ||
3706 | logical_sector < mddev->suspend_hi) { | 3705 | logical_sector < mddev->suspend_hi) { |
3707 | release_stripe(sh); | 3706 | release_stripe(sh); |
3708 | schedule(); | 3707 | /* As the suspend_* range is controlled by |
3708 | * userspace, we want an interruptible | ||
3709 | * wait. | ||
3710 | */ | ||
3711 | flush_signals(current); | ||
3712 | prepare_to_wait(&conf->wait_for_overlap, | ||
3713 | &w, TASK_INTERRUPTIBLE); | ||
3714 | if (logical_sector >= mddev->suspend_lo && | ||
3715 | logical_sector < mddev->suspend_hi) | ||
3716 | schedule(); | ||
3709 | goto retry; | 3717 | goto retry; |
3710 | } | 3718 | } |
3711 | 3719 | ||
@@ -4452,7 +4460,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4452 | static int run(mddev_t *mddev) | 4460 | static int run(mddev_t *mddev) |
4453 | { | 4461 | { |
4454 | raid5_conf_t *conf; | 4462 | raid5_conf_t *conf; |
4455 | int working_disks = 0; | 4463 | int working_disks = 0, chunk_size; |
4456 | mdk_rdev_t *rdev; | 4464 | mdk_rdev_t *rdev; |
4457 | 4465 | ||
4458 | if (mddev->recovery_cp != MaxSector) | 4466 | if (mddev->recovery_cp != MaxSector) |
@@ -4607,6 +4615,14 @@ static int run(mddev_t *mddev) | |||
4607 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 4615 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
4608 | 4616 | ||
4609 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 4617 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
4618 | chunk_size = mddev->chunk_sectors << 9; | ||
4619 | blk_queue_io_min(mddev->queue, chunk_size); | ||
4620 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
4621 | (conf->raid_disks - conf->max_degraded)); | ||
4622 | |||
4623 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4624 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
4625 | rdev->data_offset << 9); | ||
4610 | 4626 | ||
4611 | return 0; | 4627 | return 0; |
4612 | abort: | 4628 | abort: |