aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-10-15 09:36:08 -0400
committerNeilBrown <neilb@suse.de>2010-10-28 02:32:29 -0400
commite804ac780e2f01cb3b914daca2fd4780d1743db1 (patch)
tree60447c76ea9dbaa526c8cabc14898e4df4498bf6 /drivers/md
parent57dab0bdf689d42972975ec646d862b0900a4bf3 (diff)
md: fix and update workqueue usage
Workqueue usage in md has two problems. * Flush can be used during or depended upon by memory reclaim, but md uses the system workqueue for flush_work which may lead to deadlock. * md depends on flush_scheduled_work() to achieve exclusion against completion of removal of previous instances. flush_scheduled_work() may incur unexpected amount of delay and is scheduled to be removed. This patch adds two workqueues to md - md_wq and md_misc_wq. The former is guaranteed to make forward progress under memory pressure and serves flush_work. The latter serves as the flush domain for other works. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c64
1 files changed, 43 insertions, 21 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2399168b6315..0b6fa2a1882a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -67,6 +67,8 @@ static DEFINE_SPINLOCK(pers_lock);
67static void md_print_devices(void); 67static void md_print_devices(void);
68 68
69static DECLARE_WAIT_QUEUE_HEAD(resync_wait); 69static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
70static struct workqueue_struct *md_wq;
71static struct workqueue_struct *md_misc_wq;
70 72
71#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } 73#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
72 74
@@ -298,7 +300,7 @@ static void md_end_flush(struct bio *bio, int err)
298 300
299 if (atomic_dec_and_test(&mddev->flush_pending)) { 301 if (atomic_dec_and_test(&mddev->flush_pending)) {
300 /* The pre-request flush has finished */ 302 /* The pre-request flush has finished */
301 schedule_work(&mddev->flush_work); 303 queue_work(md_wq, &mddev->flush_work);
302 } 304 }
303 bio_put(bio); 305 bio_put(bio);
304} 306}
@@ -367,7 +369,7 @@ void md_flush_request(mddev_t *mddev, struct bio *bio)
367 submit_flushes(mddev); 369 submit_flushes(mddev);
368 370
369 if (atomic_dec_and_test(&mddev->flush_pending)) 371 if (atomic_dec_and_test(&mddev->flush_pending))
370 schedule_work(&mddev->flush_work); 372 queue_work(md_wq, &mddev->flush_work);
371} 373}
372EXPORT_SYMBOL(md_flush_request); 374EXPORT_SYMBOL(md_flush_request);
373 375
@@ -434,14 +436,13 @@ static void mddev_put(mddev_t *mddev)
434 * so destroy it */ 436 * so destroy it */
435 list_del(&mddev->all_mddevs); 437 list_del(&mddev->all_mddevs);
436 if (mddev->gendisk) { 438 if (mddev->gendisk) {
437 /* we did a probe so need to clean up. 439 /* We did a probe so need to clean up. Call
438 * Call schedule_work inside the spinlock 440 * queue_work inside the spinlock so that
439 * so that flush_scheduled_work() after 441 * flush_workqueue() after mddev_find will
440 * mddev_find will succeed in waiting for the 442 * succeed in waiting for the work to be done.
441 * work to be done.
442 */ 443 */
443 INIT_WORK(&mddev->del_work, mddev_delayed_delete); 444 INIT_WORK(&mddev->del_work, mddev_delayed_delete);
444 schedule_work(&mddev->del_work); 445 queue_work(md_misc_wq, &mddev->del_work);
445 } else 446 } else
446 kfree(mddev); 447 kfree(mddev);
447 } 448 }
@@ -1848,7 +1849,7 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
1848 synchronize_rcu(); 1849 synchronize_rcu();
1849 INIT_WORK(&rdev->del_work, md_delayed_delete); 1850 INIT_WORK(&rdev->del_work, md_delayed_delete);
1850 kobject_get(&rdev->kobj); 1851 kobject_get(&rdev->kobj);
1851 schedule_work(&rdev->del_work); 1852 queue_work(md_misc_wq, &rdev->del_work);
1852} 1853}
1853 1854
1854/* 1855/*
@@ -4192,10 +4193,10 @@ static int md_alloc(dev_t dev, char *name)
4192 shift = partitioned ? MdpMinorShift : 0; 4193 shift = partitioned ? MdpMinorShift : 0;
4193 unit = MINOR(mddev->unit) >> shift; 4194 unit = MINOR(mddev->unit) >> shift;
4194 4195
4195 /* wait for any previous instance if this device 4196 /* wait for any previous instance of this device to be
4196 * to be completed removed (mddev_delayed_delete). 4197 * completely removed (mddev_delayed_delete).
4197 */ 4198 */
4198 flush_scheduled_work(); 4199 flush_workqueue(md_misc_wq);
4199 4200
4200 mutex_lock(&disks_mutex); 4201 mutex_lock(&disks_mutex);
4201 error = -EEXIST; 4202 error = -EEXIST;
@@ -5891,7 +5892,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5891 */ 5892 */
5892 mddev_put(mddev); 5893 mddev_put(mddev);
5893 /* Wait until bdev->bd_disk is definitely gone */ 5894 /* Wait until bdev->bd_disk is definitely gone */
5894 flush_scheduled_work(); 5895 flush_workqueue(md_misc_wq);
5895 /* Then retry the open from the top */ 5896 /* Then retry the open from the top */
5896 return -ERESTARTSYS; 5897 return -ERESTARTSYS;
5897 } 5898 }
@@ -6047,7 +6048,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
6047 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 6048 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
6048 md_wakeup_thread(mddev->thread); 6049 md_wakeup_thread(mddev->thread);
6049 if (mddev->event_work.func) 6050 if (mddev->event_work.func)
6050 schedule_work(&mddev->event_work); 6051 queue_work(md_misc_wq, &mddev->event_work);
6051 md_new_event_inintr(mddev); 6052 md_new_event_inintr(mddev);
6052} 6053}
6053 6054
@@ -7207,12 +7208,23 @@ static void md_geninit(void)
7207 7208
7208static int __init md_init(void) 7209static int __init md_init(void)
7209{ 7210{
7210 if (register_blkdev(MD_MAJOR, "md")) 7211 int ret = -ENOMEM;
7211 return -1; 7212
7212 if ((mdp_major=register_blkdev(0, "mdp"))<=0) { 7213 md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
7213 unregister_blkdev(MD_MAJOR, "md"); 7214 if (!md_wq)
7214 return -1; 7215 goto err_wq;
7215 } 7216
7217 md_misc_wq = alloc_workqueue("md_misc", 0, 0);
7218 if (!md_misc_wq)
7219 goto err_misc_wq;
7220
7221 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
7222 goto err_md;
7223
7224 if ((ret = register_blkdev(0, "mdp")) < 0)
7225 goto err_mdp;
7226 mdp_major = ret;
7227
7216 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, 7228 blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
7217 md_probe, NULL, NULL); 7229 md_probe, NULL, NULL);
7218 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, 7230 blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
@@ -7223,8 +7235,16 @@ static int __init md_init(void)
7223 7235
7224 md_geninit(); 7236 md_geninit();
7225 return 0; 7237 return 0;
7226}
7227 7238
7239err_mdp:
7240 unregister_blkdev(MD_MAJOR, "md");
7241err_md:
7242 destroy_workqueue(md_misc_wq);
7243err_misc_wq:
7244 destroy_workqueue(md_wq);
7245err_wq:
7246 return ret;
7247}
7228 7248
7229#ifndef MODULE 7249#ifndef MODULE
7230 7250
@@ -7311,6 +7331,8 @@ static __exit void md_exit(void)
7311 export_array(mddev); 7331 export_array(mddev);
7312 mddev->hold_active = 0; 7332 mddev->hold_active = 0;
7313 } 7333 }
7334 destroy_workqueue(md_misc_wq);
7335 destroy_workqueue(md_wq);
7314} 7336}
7315 7337
7316subsys_initcall(md_init); 7338subsys_initcall(md_init);