diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 18:38:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 18:38:19 -0400 |
commit | 3d30701b58970425e1d45994d6cb82f828924fdd (patch) | |
tree | 8b14cf462628bebf8548c1b8c205a674564052d1 /drivers/md/raid5.c | |
parent | 8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff) | |
parent | fd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits)
md: clean up do_md_stop
md: fix another deadlock with removing sysfs attributes.
md: move revalidate_disk() back outside open_mutex
md/raid10: fix deadlock with unaligned read during resync
md/bitmap: separate out loading a bitmap from initialising the structures.
md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log.
md/bitmap: optimise scanning of empty bitmaps.
md/bitmap: clean up plugging calls.
md/bitmap: reduce dependence on sysfs.
md/bitmap: white space clean up and similar.
md/raid5: export raid5 unplugging interface.
md/plug: optionally use plugger to unplug an array during resync/recovery.
md/raid5: add simple plugging infrastructure.
md/raid5: export is_congested test
raid5: Don't set read-ahead when there is no queue
md: add support for raising dm events.
md: export various start/stop interfaces
md: split out md_rdev_init
md: be more careful setting MD_CHANGE_CLEAN
md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk
...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 168 |
1 files changed, 102 insertions, 66 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 20ac2f14376a..866d4b5a144c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { | 201 | if (test_bit(STRIPE_HANDLE, &sh->state)) { |
202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { | 202 | if (test_bit(STRIPE_DELAYED, &sh->state)) { |
203 | list_add_tail(&sh->lru, &conf->delayed_list); | 203 | list_add_tail(&sh->lru, &conf->delayed_list); |
204 | blk_plug_device(conf->mddev->queue); | 204 | plugger_set_plug(&conf->plug); |
205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 205 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
206 | sh->bm_seq - conf->seq_write > 0) { | 206 | sh->bm_seq - conf->seq_write > 0) { |
207 | list_add_tail(&sh->lru, &conf->bitmap_list); | 207 | list_add_tail(&sh->lru, &conf->bitmap_list); |
208 | blk_plug_device(conf->mddev->queue); | 208 | plugger_set_plug(&conf->plug); |
209 | } else { | 209 | } else { |
210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); | 210 | clear_bit(STRIPE_BIT_DELAY, &sh->state); |
211 | list_add_tail(&sh->lru, &conf->handle_list); | 211 | list_add_tail(&sh->lru, &conf->handle_list); |
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf) | |||
434 | } | 434 | } |
435 | 435 | ||
436 | static void unplug_slaves(mddev_t *mddev); | 436 | static void unplug_slaves(mddev_t *mddev); |
437 | static void raid5_unplug_device(struct request_queue *q); | ||
438 | 437 | ||
439 | static struct stripe_head * | 438 | static struct stripe_head * |
440 | get_active_stripe(raid5_conf_t *conf, sector_t sector, | 439 | get_active_stripe(raid5_conf_t *conf, sector_t sector, |
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector, | |||
464 | < (conf->max_nr_stripes *3/4) | 463 | < (conf->max_nr_stripes *3/4) |
465 | || !conf->inactive_blocked), | 464 | || !conf->inactive_blocked), |
466 | conf->device_lock, | 465 | conf->device_lock, |
467 | raid5_unplug_device(conf->mddev->queue) | 466 | md_raid5_unplug_device(conf) |
468 | ); | 467 | ); |
469 | conf->inactive_blocked = 0; | 468 | conf->inactive_blocked = 0; |
470 | } else | 469 | } else |
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
1337 | struct kmem_cache *sc; | 1336 | struct kmem_cache *sc; |
1338 | int devs = max(conf->raid_disks, conf->previous_raid_disks); | 1337 | int devs = max(conf->raid_disks, conf->previous_raid_disks); |
1339 | 1338 | ||
1340 | sprintf(conf->cache_name[0], | 1339 | if (conf->mddev->gendisk) |
1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); | 1340 | sprintf(conf->cache_name[0], |
1342 | sprintf(conf->cache_name[1], | 1341 | "raid%d-%s", conf->level, mdname(conf->mddev)); |
1343 | "raid%d-%s-alt", conf->level, mdname(conf->mddev)); | 1342 | else |
1343 | sprintf(conf->cache_name[0], | ||
1344 | "raid%d-%p", conf->level, conf->mddev); | ||
1345 | sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]); | ||
1346 | |||
1344 | conf->active_name = 0; | 1347 | conf->active_name = 0; |
1345 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | 1348 | sc = kmem_cache_create(conf->cache_name[conf->active_name], |
1346 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 1349 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf) | |||
3614 | list_add_tail(&sh->lru, &conf->hold_list); | 3617 | list_add_tail(&sh->lru, &conf->hold_list); |
3615 | } | 3618 | } |
3616 | } else | 3619 | } else |
3617 | blk_plug_device(conf->mddev->queue); | 3620 | plugger_set_plug(&conf->plug); |
3618 | } | 3621 | } |
3619 | 3622 | ||
3620 | static void activate_bit_delay(raid5_conf_t *conf) | 3623 | static void activate_bit_delay(raid5_conf_t *conf) |
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev) | |||
3655 | rcu_read_unlock(); | 3658 | rcu_read_unlock(); |
3656 | } | 3659 | } |
3657 | 3660 | ||
3658 | static void raid5_unplug_device(struct request_queue *q) | 3661 | void md_raid5_unplug_device(raid5_conf_t *conf) |
3659 | { | 3662 | { |
3660 | mddev_t *mddev = q->queuedata; | ||
3661 | raid5_conf_t *conf = mddev->private; | ||
3662 | unsigned long flags; | 3663 | unsigned long flags; |
3663 | 3664 | ||
3664 | spin_lock_irqsave(&conf->device_lock, flags); | 3665 | spin_lock_irqsave(&conf->device_lock, flags); |
3665 | 3666 | ||
3666 | if (blk_remove_plug(q)) { | 3667 | if (plugger_remove_plug(&conf->plug)) { |
3667 | conf->seq_flush++; | 3668 | conf->seq_flush++; |
3668 | raid5_activate_delayed(conf); | 3669 | raid5_activate_delayed(conf); |
3669 | } | 3670 | } |
3670 | md_wakeup_thread(mddev->thread); | 3671 | md_wakeup_thread(conf->mddev->thread); |
3671 | 3672 | ||
3672 | spin_unlock_irqrestore(&conf->device_lock, flags); | 3673 | spin_unlock_irqrestore(&conf->device_lock, flags); |
3673 | 3674 | ||
3674 | unplug_slaves(mddev); | 3675 | unplug_slaves(conf->mddev); |
3675 | } | 3676 | } |
3677 | EXPORT_SYMBOL_GPL(md_raid5_unplug_device); | ||
3676 | 3678 | ||
3677 | static int raid5_congested(void *data, int bits) | 3679 | static void raid5_unplug(struct plug_handle *plug) |
3680 | { | ||
3681 | raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug); | ||
3682 | md_raid5_unplug_device(conf); | ||
3683 | } | ||
3684 | |||
3685 | static void raid5_unplug_queue(struct request_queue *q) | ||
3686 | { | ||
3687 | mddev_t *mddev = q->queuedata; | ||
3688 | md_raid5_unplug_device(mddev->private); | ||
3689 | } | ||
3690 | |||
3691 | int md_raid5_congested(mddev_t *mddev, int bits) | ||
3678 | { | 3692 | { |
3679 | mddev_t *mddev = data; | ||
3680 | raid5_conf_t *conf = mddev->private; | 3693 | raid5_conf_t *conf = mddev->private; |
3681 | 3694 | ||
3682 | /* No difference between reads and writes. Just check | 3695 | /* No difference between reads and writes. Just check |
3683 | * how busy the stripe_cache is | 3696 | * how busy the stripe_cache is |
3684 | */ | 3697 | */ |
3685 | 3698 | ||
3686 | if (mddev_congested(mddev, bits)) | ||
3687 | return 1; | ||
3688 | if (conf->inactive_blocked) | 3699 | if (conf->inactive_blocked) |
3689 | return 1; | 3700 | return 1; |
3690 | if (conf->quiesce) | 3701 | if (conf->quiesce) |
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits) | |||
3694 | 3705 | ||
3695 | return 0; | 3706 | return 0; |
3696 | } | 3707 | } |
3708 | EXPORT_SYMBOL_GPL(md_raid5_congested); | ||
3709 | |||
3710 | static int raid5_congested(void *data, int bits) | ||
3711 | { | ||
3712 | mddev_t *mddev = data; | ||
3713 | |||
3714 | return mddev_congested(mddev, bits) || | ||
3715 | md_raid5_congested(mddev, bits); | ||
3716 | } | ||
3697 | 3717 | ||
3698 | /* We want read requests to align with chunks where possible, | 3718 | /* We want read requests to align with chunks where possible, |
3699 | * but write requests don't need to. | 3719 | * but write requests don't need to. |
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) | |||
4075 | * add failed due to overlap. Flush everything | 4095 | * add failed due to overlap. Flush everything |
4076 | * and wait a while | 4096 | * and wait a while |
4077 | */ | 4097 | */ |
4078 | raid5_unplug_device(mddev->queue); | 4098 | md_raid5_unplug_device(conf); |
4079 | release_stripe(sh); | 4099 | release_stripe(sh); |
4080 | schedule(); | 4100 | schedule(); |
4081 | goto retry; | 4101 | goto retry; |
@@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page) | |||
4566 | return 0; | 4586 | return 0; |
4567 | } | 4587 | } |
4568 | 4588 | ||
4569 | static ssize_t | 4589 | int |
4570 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | 4590 | raid5_set_cache_size(mddev_t *mddev, int size) |
4571 | { | 4591 | { |
4572 | raid5_conf_t *conf = mddev->private; | 4592 | raid5_conf_t *conf = mddev->private; |
4573 | unsigned long new; | ||
4574 | int err; | 4593 | int err; |
4575 | 4594 | ||
4576 | if (len >= PAGE_SIZE) | 4595 | if (size <= 16 || size > 32768) |
4577 | return -EINVAL; | 4596 | return -EINVAL; |
4578 | if (!conf) | 4597 | while (size < conf->max_nr_stripes) { |
4579 | return -ENODEV; | ||
4580 | |||
4581 | if (strict_strtoul(page, 10, &new)) | ||
4582 | return -EINVAL; | ||
4583 | if (new <= 16 || new > 32768) | ||
4584 | return -EINVAL; | ||
4585 | while (new < conf->max_nr_stripes) { | ||
4586 | if (drop_one_stripe(conf)) | 4598 | if (drop_one_stripe(conf)) |
4587 | conf->max_nr_stripes--; | 4599 | conf->max_nr_stripes--; |
4588 | else | 4600 | else |
@@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
4591 | err = md_allow_write(mddev); | 4603 | err = md_allow_write(mddev); |
4592 | if (err) | 4604 | if (err) |
4593 | return err; | 4605 | return err; |
4594 | while (new > conf->max_nr_stripes) { | 4606 | while (size > conf->max_nr_stripes) { |
4595 | if (grow_one_stripe(conf)) | 4607 | if (grow_one_stripe(conf)) |
4596 | conf->max_nr_stripes++; | 4608 | conf->max_nr_stripes++; |
4597 | else break; | 4609 | else break; |
4598 | } | 4610 | } |
4611 | return 0; | ||
4612 | } | ||
4613 | EXPORT_SYMBOL(raid5_set_cache_size); | ||
4614 | |||
4615 | static ssize_t | ||
4616 | raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | ||
4617 | { | ||
4618 | raid5_conf_t *conf = mddev->private; | ||
4619 | unsigned long new; | ||
4620 | int err; | ||
4621 | |||
4622 | if (len >= PAGE_SIZE) | ||
4623 | return -EINVAL; | ||
4624 | if (!conf) | ||
4625 | return -ENODEV; | ||
4626 | |||
4627 | if (strict_strtoul(page, 10, &new)) | ||
4628 | return -EINVAL; | ||
4629 | err = raid5_set_cache_size(mddev, new); | ||
4630 | if (err) | ||
4631 | return err; | ||
4599 | return len; | 4632 | return len; |
4600 | } | 4633 | } |
4601 | 4634 | ||
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded | |||
4958 | static int run(mddev_t *mddev) | 4991 | static int run(mddev_t *mddev) |
4959 | { | 4992 | { |
4960 | raid5_conf_t *conf; | 4993 | raid5_conf_t *conf; |
4961 | int working_disks = 0, chunk_size; | 4994 | int working_disks = 0; |
4962 | int dirty_parity_disks = 0; | 4995 | int dirty_parity_disks = 0; |
4963 | mdk_rdev_t *rdev; | 4996 | mdk_rdev_t *rdev; |
4964 | sector_t reshape_offset = 0; | 4997 | sector_t reshape_offset = 0; |
@@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev) | |||
5144 | "reshape"); | 5177 | "reshape"); |
5145 | } | 5178 | } |
5146 | 5179 | ||
5147 | /* read-ahead size must cover two whole stripes, which is | ||
5148 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | ||
5149 | */ | ||
5150 | { | ||
5151 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
5152 | int stripe = data_disks * | ||
5153 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
5154 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
5155 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
5156 | } | ||
5157 | 5180 | ||
5158 | /* Ok, everything is just fine now */ | 5181 | /* Ok, everything is just fine now */ |
5159 | if (mddev->to_remove == &raid5_attrs_group) | 5182 | if (mddev->to_remove == &raid5_attrs_group) |
5160 | mddev->to_remove = NULL; | 5183 | mddev->to_remove = NULL; |
5161 | else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | 5184 | else if (mddev->kobj.sd && |
5185 | sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) | ||
5162 | printk(KERN_WARNING | 5186 | printk(KERN_WARNING |
5163 | "md/raid:%s: failed to create sysfs attributes.\n", | 5187 | "raid5: failed to create sysfs attributes for %s\n", |
5164 | mdname(mddev)); | 5188 | mdname(mddev)); |
5189 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | ||
5165 | 5190 | ||
5166 | mddev->queue->queue_lock = &conf->device_lock; | 5191 | plugger_init(&conf->plug, raid5_unplug); |
5192 | mddev->plug = &conf->plug; | ||
5193 | if (mddev->queue) { | ||
5194 | int chunk_size; | ||
5195 | /* read-ahead size must cover two whole stripes, which | ||
5196 | * is 2 * (datadisks) * chunksize where 'n' is the | ||
5197 | * number of raid devices | ||
5198 | */ | ||
5199 | int data_disks = conf->previous_raid_disks - conf->max_degraded; | ||
5200 | int stripe = data_disks * | ||
5201 | ((mddev->chunk_sectors << 9) / PAGE_SIZE); | ||
5202 | if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe) | ||
5203 | mddev->queue->backing_dev_info.ra_pages = 2 * stripe; | ||
5167 | 5204 | ||
5168 | mddev->queue->unplug_fn = raid5_unplug_device; | 5205 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
5169 | mddev->queue->backing_dev_info.congested_data = mddev; | ||
5170 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
5171 | 5206 | ||
5172 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5207 | mddev->queue->backing_dev_info.congested_data = mddev; |
5208 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | ||
5209 | mddev->queue->queue_lock = &conf->device_lock; | ||
5210 | mddev->queue->unplug_fn = raid5_unplug_queue; | ||
5173 | 5211 | ||
5174 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 5212 | chunk_size = mddev->chunk_sectors << 9; |
5175 | chunk_size = mddev->chunk_sectors << 9; | 5213 | blk_queue_io_min(mddev->queue, chunk_size); |
5176 | blk_queue_io_min(mddev->queue, chunk_size); | 5214 | blk_queue_io_opt(mddev->queue, chunk_size * |
5177 | blk_queue_io_opt(mddev->queue, chunk_size * | 5215 | (conf->raid_disks - conf->max_degraded)); |
5178 | (conf->raid_disks - conf->max_degraded)); | ||
5179 | 5216 | ||
5180 | list_for_each_entry(rdev, &mddev->disks, same_set) | 5217 | list_for_each_entry(rdev, &mddev->disks, same_set) |
5181 | disk_stack_limits(mddev->gendisk, rdev->bdev, | 5218 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
5182 | rdev->data_offset << 9); | 5219 | rdev->data_offset << 9); |
5220 | } | ||
5183 | 5221 | ||
5184 | return 0; | 5222 | return 0; |
5185 | abort: | 5223 | abort: |
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev) | |||
5200 | 5238 | ||
5201 | md_unregister_thread(mddev->thread); | 5239 | md_unregister_thread(mddev->thread); |
5202 | mddev->thread = NULL; | 5240 | mddev->thread = NULL; |
5203 | mddev->queue->backing_dev_info.congested_fn = NULL; | 5241 | if (mddev->queue) |
5204 | blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ | 5242 | mddev->queue->backing_dev_info.congested_fn = NULL; |
5243 | plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/ | ||
5205 | free_conf(conf); | 5244 | free_conf(conf); |
5206 | mddev->private = NULL; | 5245 | mddev->private = NULL; |
5207 | mddev->to_remove = &raid5_attrs_group; | 5246 | mddev->to_remove = &raid5_attrs_group; |
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5545 | sprintf(nm, "rd%d", rdev->raid_disk); | 5584 | sprintf(nm, "rd%d", rdev->raid_disk); |
5546 | if (sysfs_create_link(&mddev->kobj, | 5585 | if (sysfs_create_link(&mddev->kobj, |
5547 | &rdev->kobj, nm)) | 5586 | &rdev->kobj, nm)) |
5548 | printk(KERN_WARNING | 5587 | /* Failure here is OK */; |
5549 | "md/raid:%s: failed to create " | ||
5550 | " link %s\n", | ||
5551 | mdname(mddev), nm); | ||
5552 | } else | 5588 | } else |
5553 | break; | 5589 | break; |
5554 | } | 5590 | } |
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf) | |||
5603 | /* read-ahead size must cover two whole stripes, which is | 5639 | /* read-ahead size must cover two whole stripes, which is |
5604 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices | 5640 | * 2 * (datadisks) * chunksize where 'n' is the number of raid devices |
5605 | */ | 5641 | */ |
5606 | { | 5642 | if (conf->mddev->queue) { |
5607 | int data_disks = conf->raid_disks - conf->max_degraded; | 5643 | int data_disks = conf->raid_disks - conf->max_degraded; |
5608 | int stripe = data_disks * ((conf->chunk_sectors << 9) | 5644 | int stripe = data_disks * ((conf->chunk_sectors << 9) |
5609 | / PAGE_SIZE); | 5645 | / PAGE_SIZE); |