aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 18:38:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-10 18:38:19 -0400
commit3d30701b58970425e1d45994d6cb82f828924fdd (patch)
tree8b14cf462628bebf8548c1b8c205a674564052d1 /drivers/md/raid5.c
parent8cbd84f2dd4e52a8771b191030c374ba3e56d291 (diff)
parentfd8aa2c1811bf60ccb2d5de0579c6f62aec1772d (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (24 commits) md: clean up do_md_stop md: fix another deadlock with removing sysfs attributes. md: move revalidate_disk() back outside open_mutex md/raid10: fix deadlock with unaligned read during resync md/bitmap: separate out loading a bitmap from initialising the structures. md/bitmap: prepare for storing write-intent-bitmap via dm-dirty-log. md/bitmap: optimise scanning of empty bitmaps. md/bitmap: clean up plugging calls. md/bitmap: reduce dependence on sysfs. md/bitmap: white space clean up and similar. md/raid5: export raid5 unplugging interface. md/plug: optionally use plugger to unplug an array during resync/recovery. md/raid5: add simple plugging infrastructure. md/raid5: export is_congested test raid5: Don't set read-ahead when there is no queue md: add support for raising dm events. md: export various start/stop interfaces md: split out md_rdev_init md: be more careful setting MD_CHANGE_CLEAN md/raid5: ensure we create a unique name for kmem_cache when mddev has no gendisk ...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c168
1 files changed, 102 insertions, 66 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 20ac2f14376a..866d4b5a144c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -201,11 +201,11 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
201 if (test_bit(STRIPE_HANDLE, &sh->state)) { 201 if (test_bit(STRIPE_HANDLE, &sh->state)) {
202 if (test_bit(STRIPE_DELAYED, &sh->state)) { 202 if (test_bit(STRIPE_DELAYED, &sh->state)) {
203 list_add_tail(&sh->lru, &conf->delayed_list); 203 list_add_tail(&sh->lru, &conf->delayed_list);
204 blk_plug_device(conf->mddev->queue); 204 plugger_set_plug(&conf->plug);
205 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 205 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
206 sh->bm_seq - conf->seq_write > 0) { 206 sh->bm_seq - conf->seq_write > 0) {
207 list_add_tail(&sh->lru, &conf->bitmap_list); 207 list_add_tail(&sh->lru, &conf->bitmap_list);
208 blk_plug_device(conf->mddev->queue); 208 plugger_set_plug(&conf->plug);
209 } else { 209 } else {
210 clear_bit(STRIPE_BIT_DELAY, &sh->state); 210 clear_bit(STRIPE_BIT_DELAY, &sh->state);
211 list_add_tail(&sh->lru, &conf->handle_list); 211 list_add_tail(&sh->lru, &conf->handle_list);
@@ -434,7 +434,6 @@ static int has_failed(raid5_conf_t *conf)
434} 434}
435 435
436static void unplug_slaves(mddev_t *mddev); 436static void unplug_slaves(mddev_t *mddev);
437static void raid5_unplug_device(struct request_queue *q);
438 437
439static struct stripe_head * 438static struct stripe_head *
440get_active_stripe(raid5_conf_t *conf, sector_t sector, 439get_active_stripe(raid5_conf_t *conf, sector_t sector,
@@ -464,7 +463,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
464 < (conf->max_nr_stripes *3/4) 463 < (conf->max_nr_stripes *3/4)
465 || !conf->inactive_blocked), 464 || !conf->inactive_blocked),
466 conf->device_lock, 465 conf->device_lock,
467 raid5_unplug_device(conf->mddev->queue) 466 md_raid5_unplug_device(conf)
468 ); 467 );
469 conf->inactive_blocked = 0; 468 conf->inactive_blocked = 0;
470 } else 469 } else
@@ -1337,10 +1336,14 @@ static int grow_stripes(raid5_conf_t *conf, int num)
1337 struct kmem_cache *sc; 1336 struct kmem_cache *sc;
1338 int devs = max(conf->raid_disks, conf->previous_raid_disks); 1337 int devs = max(conf->raid_disks, conf->previous_raid_disks);
1339 1338
1340 sprintf(conf->cache_name[0], 1339 if (conf->mddev->gendisk)
1341 "raid%d-%s", conf->level, mdname(conf->mddev)); 1340 sprintf(conf->cache_name[0],
1342 sprintf(conf->cache_name[1], 1341 "raid%d-%s", conf->level, mdname(conf->mddev));
1343 "raid%d-%s-alt", conf->level, mdname(conf->mddev)); 1342 else
1343 sprintf(conf->cache_name[0],
1344 "raid%d-%p", conf->level, conf->mddev);
1345 sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
1346
1344 conf->active_name = 0; 1347 conf->active_name = 0;
1345 sc = kmem_cache_create(conf->cache_name[conf->active_name], 1348 sc = kmem_cache_create(conf->cache_name[conf->active_name],
1346 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), 1349 sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -3614,7 +3617,7 @@ static void raid5_activate_delayed(raid5_conf_t *conf)
3614 list_add_tail(&sh->lru, &conf->hold_list); 3617 list_add_tail(&sh->lru, &conf->hold_list);
3615 } 3618 }
3616 } else 3619 } else
3617 blk_plug_device(conf->mddev->queue); 3620 plugger_set_plug(&conf->plug);
3618} 3621}
3619 3622
3620static void activate_bit_delay(raid5_conf_t *conf) 3623static void activate_bit_delay(raid5_conf_t *conf)
@@ -3655,36 +3658,44 @@ static void unplug_slaves(mddev_t *mddev)
3655 rcu_read_unlock(); 3658 rcu_read_unlock();
3656} 3659}
3657 3660
3658static void raid5_unplug_device(struct request_queue *q) 3661void md_raid5_unplug_device(raid5_conf_t *conf)
3659{ 3662{
3660 mddev_t *mddev = q->queuedata;
3661 raid5_conf_t *conf = mddev->private;
3662 unsigned long flags; 3663 unsigned long flags;
3663 3664
3664 spin_lock_irqsave(&conf->device_lock, flags); 3665 spin_lock_irqsave(&conf->device_lock, flags);
3665 3666
3666 if (blk_remove_plug(q)) { 3667 if (plugger_remove_plug(&conf->plug)) {
3667 conf->seq_flush++; 3668 conf->seq_flush++;
3668 raid5_activate_delayed(conf); 3669 raid5_activate_delayed(conf);
3669 } 3670 }
3670 md_wakeup_thread(mddev->thread); 3671 md_wakeup_thread(conf->mddev->thread);
3671 3672
3672 spin_unlock_irqrestore(&conf->device_lock, flags); 3673 spin_unlock_irqrestore(&conf->device_lock, flags);
3673 3674
3674 unplug_slaves(mddev); 3675 unplug_slaves(conf->mddev);
3675} 3676}
3677EXPORT_SYMBOL_GPL(md_raid5_unplug_device);
3676 3678
3677static int raid5_congested(void *data, int bits) 3679static void raid5_unplug(struct plug_handle *plug)
3680{
3681 raid5_conf_t *conf = container_of(plug, raid5_conf_t, plug);
3682 md_raid5_unplug_device(conf);
3683}
3684
3685static void raid5_unplug_queue(struct request_queue *q)
3686{
3687 mddev_t *mddev = q->queuedata;
3688 md_raid5_unplug_device(mddev->private);
3689}
3690
3691int md_raid5_congested(mddev_t *mddev, int bits)
3678{ 3692{
3679 mddev_t *mddev = data;
3680 raid5_conf_t *conf = mddev->private; 3693 raid5_conf_t *conf = mddev->private;
3681 3694
3682 /* No difference between reads and writes. Just check 3695 /* No difference between reads and writes. Just check
3683 * how busy the stripe_cache is 3696 * how busy the stripe_cache is
3684 */ 3697 */
3685 3698
3686 if (mddev_congested(mddev, bits))
3687 return 1;
3688 if (conf->inactive_blocked) 3699 if (conf->inactive_blocked)
3689 return 1; 3700 return 1;
3690 if (conf->quiesce) 3701 if (conf->quiesce)
@@ -3694,6 +3705,15 @@ static int raid5_congested(void *data, int bits)
3694 3705
3695 return 0; 3706 return 0;
3696} 3707}
3708EXPORT_SYMBOL_GPL(md_raid5_congested);
3709
3710static int raid5_congested(void *data, int bits)
3711{
3712 mddev_t *mddev = data;
3713
3714 return mddev_congested(mddev, bits) ||
3715 md_raid5_congested(mddev, bits);
3716}
3697 3717
3698/* We want read requests to align with chunks where possible, 3718/* We want read requests to align with chunks where possible,
3699 * but write requests don't need to. 3719 * but write requests don't need to.
@@ -4075,7 +4095,7 @@ static int make_request(mddev_t *mddev, struct bio * bi)
4075 * add failed due to overlap. Flush everything 4095 * add failed due to overlap. Flush everything
4076 * and wait a while 4096 * and wait a while
4077 */ 4097 */
4078 raid5_unplug_device(mddev->queue); 4098 md_raid5_unplug_device(conf);
4079 release_stripe(sh); 4099 release_stripe(sh);
4080 schedule(); 4100 schedule();
4081 goto retry; 4101 goto retry;
@@ -4566,23 +4586,15 @@ raid5_show_stripe_cache_size(mddev_t *mddev, char *page)
4566 return 0; 4586 return 0;
4567} 4587}
4568 4588
4569static ssize_t 4589int
4570raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) 4590raid5_set_cache_size(mddev_t *mddev, int size)
4571{ 4591{
4572 raid5_conf_t *conf = mddev->private; 4592 raid5_conf_t *conf = mddev->private;
4573 unsigned long new;
4574 int err; 4593 int err;
4575 4594
4576 if (len >= PAGE_SIZE) 4595 if (size <= 16 || size > 32768)
4577 return -EINVAL; 4596 return -EINVAL;
4578 if (!conf) 4597 while (size < conf->max_nr_stripes) {
4579 return -ENODEV;
4580
4581 if (strict_strtoul(page, 10, &new))
4582 return -EINVAL;
4583 if (new <= 16 || new > 32768)
4584 return -EINVAL;
4585 while (new < conf->max_nr_stripes) {
4586 if (drop_one_stripe(conf)) 4598 if (drop_one_stripe(conf))
4587 conf->max_nr_stripes--; 4599 conf->max_nr_stripes--;
4588 else 4600 else
@@ -4591,11 +4603,32 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
4591 err = md_allow_write(mddev); 4603 err = md_allow_write(mddev);
4592 if (err) 4604 if (err)
4593 return err; 4605 return err;
4594 while (new > conf->max_nr_stripes) { 4606 while (size > conf->max_nr_stripes) {
4595 if (grow_one_stripe(conf)) 4607 if (grow_one_stripe(conf))
4596 conf->max_nr_stripes++; 4608 conf->max_nr_stripes++;
4597 else break; 4609 else break;
4598 } 4610 }
4611 return 0;
4612}
4613EXPORT_SYMBOL(raid5_set_cache_size);
4614
4615static ssize_t
4616raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len)
4617{
4618 raid5_conf_t *conf = mddev->private;
4619 unsigned long new;
4620 int err;
4621
4622 if (len >= PAGE_SIZE)
4623 return -EINVAL;
4624 if (!conf)
4625 return -ENODEV;
4626
4627 if (strict_strtoul(page, 10, &new))
4628 return -EINVAL;
4629 err = raid5_set_cache_size(mddev, new);
4630 if (err)
4631 return err;
4599 return len; 4632 return len;
4600} 4633}
4601 4634
@@ -4958,7 +4991,7 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
4958static int run(mddev_t *mddev) 4991static int run(mddev_t *mddev)
4959{ 4992{
4960 raid5_conf_t *conf; 4993 raid5_conf_t *conf;
4961 int working_disks = 0, chunk_size; 4994 int working_disks = 0;
4962 int dirty_parity_disks = 0; 4995 int dirty_parity_disks = 0;
4963 mdk_rdev_t *rdev; 4996 mdk_rdev_t *rdev;
4964 sector_t reshape_offset = 0; 4997 sector_t reshape_offset = 0;
@@ -5144,42 +5177,47 @@ static int run(mddev_t *mddev)
5144 "reshape"); 5177 "reshape");
5145 } 5178 }
5146 5179
5147 /* read-ahead size must cover two whole stripes, which is
5148 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
5149 */
5150 {
5151 int data_disks = conf->previous_raid_disks - conf->max_degraded;
5152 int stripe = data_disks *
5153 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
5154 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
5155 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
5156 }
5157 5180
5158 /* Ok, everything is just fine now */ 5181 /* Ok, everything is just fine now */
5159 if (mddev->to_remove == &raid5_attrs_group) 5182 if (mddev->to_remove == &raid5_attrs_group)
5160 mddev->to_remove = NULL; 5183 mddev->to_remove = NULL;
5161 else if (sysfs_create_group(&mddev->kobj, &raid5_attrs_group)) 5184 else if (mddev->kobj.sd &&
5185 sysfs_create_group(&mddev->kobj, &raid5_attrs_group))
5162 printk(KERN_WARNING 5186 printk(KERN_WARNING
5163 "md/raid:%s: failed to create sysfs attributes.\n", 5187 "raid5: failed to create sysfs attributes for %s\n",
5164 mdname(mddev)); 5188 mdname(mddev));
5189 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
5165 5190
5166 mddev->queue->queue_lock = &conf->device_lock; 5191 plugger_init(&conf->plug, raid5_unplug);
5192 mddev->plug = &conf->plug;
5193 if (mddev->queue) {
5194 int chunk_size;
5195 /* read-ahead size must cover two whole stripes, which
5196 * is 2 * (datadisks) * chunksize where 'n' is the
5197 * number of raid devices
5198 */
5199 int data_disks = conf->previous_raid_disks - conf->max_degraded;
5200 int stripe = data_disks *
5201 ((mddev->chunk_sectors << 9) / PAGE_SIZE);
5202 if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
5203 mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
5167 5204
5168 mddev->queue->unplug_fn = raid5_unplug_device; 5205 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
5169 mddev->queue->backing_dev_info.congested_data = mddev;
5170 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5171 5206
5172 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5207 mddev->queue->backing_dev_info.congested_data = mddev;
5208 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
5209 mddev->queue->queue_lock = &conf->device_lock;
5210 mddev->queue->unplug_fn = raid5_unplug_queue;
5173 5211
5174 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 5212 chunk_size = mddev->chunk_sectors << 9;
5175 chunk_size = mddev->chunk_sectors << 9; 5213 blk_queue_io_min(mddev->queue, chunk_size);
5176 blk_queue_io_min(mddev->queue, chunk_size); 5214 blk_queue_io_opt(mddev->queue, chunk_size *
5177 blk_queue_io_opt(mddev->queue, chunk_size * 5215 (conf->raid_disks - conf->max_degraded));
5178 (conf->raid_disks - conf->max_degraded));
5179 5216
5180 list_for_each_entry(rdev, &mddev->disks, same_set) 5217 list_for_each_entry(rdev, &mddev->disks, same_set)
5181 disk_stack_limits(mddev->gendisk, rdev->bdev, 5218 disk_stack_limits(mddev->gendisk, rdev->bdev,
5182 rdev->data_offset << 9); 5219 rdev->data_offset << 9);
5220 }
5183 5221
5184 return 0; 5222 return 0;
5185abort: 5223abort:
@@ -5200,8 +5238,9 @@ static int stop(mddev_t *mddev)
5200 5238
5201 md_unregister_thread(mddev->thread); 5239 md_unregister_thread(mddev->thread);
5202 mddev->thread = NULL; 5240 mddev->thread = NULL;
5203 mddev->queue->backing_dev_info.congested_fn = NULL; 5241 if (mddev->queue)
5204 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/ 5242 mddev->queue->backing_dev_info.congested_fn = NULL;
5243 plugger_flush(&conf->plug); /* the unplug fn references 'conf'*/
5205 free_conf(conf); 5244 free_conf(conf);
5206 mddev->private = NULL; 5245 mddev->private = NULL;
5207 mddev->to_remove = &raid5_attrs_group; 5246 mddev->to_remove = &raid5_attrs_group;
@@ -5545,10 +5584,7 @@ static int raid5_start_reshape(mddev_t *mddev)
5545 sprintf(nm, "rd%d", rdev->raid_disk); 5584 sprintf(nm, "rd%d", rdev->raid_disk);
5546 if (sysfs_create_link(&mddev->kobj, 5585 if (sysfs_create_link(&mddev->kobj,
5547 &rdev->kobj, nm)) 5586 &rdev->kobj, nm))
5548 printk(KERN_WARNING 5587 /* Failure here is OK */;
5549 "md/raid:%s: failed to create "
5550 " link %s\n",
5551 mdname(mddev), nm);
5552 } else 5588 } else
5553 break; 5589 break;
5554 } 5590 }
@@ -5603,7 +5639,7 @@ static void end_reshape(raid5_conf_t *conf)
5603 /* read-ahead size must cover two whole stripes, which is 5639 /* read-ahead size must cover two whole stripes, which is
5604 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices 5640 * 2 * (datadisks) * chunksize where 'n' is the number of raid devices
5605 */ 5641 */
5606 { 5642 if (conf->mddev->queue) {
5607 int data_disks = conf->raid_disks - conf->max_degraded; 5643 int data_disks = conf->raid_disks - conf->max_degraded;
5608 int stripe = data_disks * ((conf->chunk_sectors << 9) 5644 int stripe = data_disks * ((conf->chunk_sectors << 9)
5609 / PAGE_SIZE); 5645 / PAGE_SIZE);