aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-01-15 21:52:51 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-01-16 12:05:33 -0500
commit774a1221e862b343388347bac9b318767336b20b (patch)
treeee02b2b71268d9478f709bea8636ddd5eafdc007
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
module, async: async_synchronize_full() on module init iff async is used
If the default iosched is built as module, the kernel may deadlock while trying to load the iosched module on device probe if the probing was running off async. This is because async_synchronize_full() at the end of module init ends up waiting for the async job which initiated the module loading. async A modprobe 1. finds a device 2. registers the block device 3. request_module(default iosched) 4. modprobe in userland 5. load and init module 6. async_synchronize_full() Async A waits for modprobe to finish in request_module() and modprobe waits for async A to finish in async_synchronize_full(). Because there's no easy to track dependency once control goes out to userland, implementing properly nested flushing is difficult. For now, make module init perform async_synchronize_full() iff module init has queued async jobs as suggested by Linus. This avoids the described deadlock because iosched module doesn't use async and thus wouldn't invoke async_synchronize_full(). This is hacky and incomplete. It will deadlock if async module loading nests; however, this works around the known problem case and seems to be the best of bad options. For more details, please refer to the following thread. http://thread.gmane.org/gmane.linux.kernel/1420814 Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Alex Riesen <raa.lkml@gmail.com> Tested-by: Ming Lei <ming.lei@canonical.com> Tested-by: Alex Riesen <raa.lkml@gmail.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/async.c3
-rw-r--r--kernel/module.c27
3 files changed, 29 insertions, 2 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 206bb089c06b..6fc8f45de4e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1810,6 +1810,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
1810#define PF_MEMALLOC 0x00000800 /* Allocating memory */ 1810#define PF_MEMALLOC 0x00000800 /* Allocating memory */
1811#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ 1811#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */
1812#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ 1812#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */
1813#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */
1813#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ 1814#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */
1814#define PF_FROZEN 0x00010000 /* frozen for system suspend */ 1815#define PF_FROZEN 0x00010000 /* frozen for system suspend */
1815#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ 1816#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
diff --git a/kernel/async.c b/kernel/async.c
index 9d3118384858..a1d585c351d6 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
196 atomic_inc(&entry_count); 196 atomic_inc(&entry_count);
197 spin_unlock_irqrestore(&async_lock, flags); 197 spin_unlock_irqrestore(&async_lock, flags);
198 198
199 /* mark that this task has queued an async job, used by module init */
200 current->flags |= PF_USED_ASYNC;
201
199 /* schedule for execution */ 202 /* schedule for execution */
200 queue_work(system_unbound_wq, &entry->work); 203 queue_work(system_unbound_wq, &entry->work);
201 204
diff --git a/kernel/module.c b/kernel/module.c
index 250092c1d57d..b10b048367e1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod)
3013{ 3013{
3014 int ret = 0; 3014 int ret = 0;
3015 3015
3016 /*
3017 * We want to find out whether @mod uses async during init. Clear
3018 * PF_USED_ASYNC. async_schedule*() will set it.
3019 */
3020 current->flags &= ~PF_USED_ASYNC;
3021
3016 blocking_notifier_call_chain(&module_notify_list, 3022 blocking_notifier_call_chain(&module_notify_list,
3017 MODULE_STATE_COMING, mod); 3023 MODULE_STATE_COMING, mod);
3018 3024
@@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod)
3058 blocking_notifier_call_chain(&module_notify_list, 3064 blocking_notifier_call_chain(&module_notify_list,
3059 MODULE_STATE_LIVE, mod); 3065 MODULE_STATE_LIVE, mod);
3060 3066
3061 /* We need to finish all async code before the module init sequence is done */ 3067 /*
3062 async_synchronize_full(); 3068 * We need to finish all async code before the module init sequence
3069 * is done. This has potential to deadlock. For example, a newly
3070 * detected block device can trigger request_module() of the
3071 * default iosched from async probing task. Once userland helper
3072 * reaches here, async_synchronize_full() will wait on the async
3073 * task waiting on request_module() and deadlock.
3074 *
3075 * This deadlock is avoided by perfomring async_synchronize_full()
3076 * iff module init queued any async jobs. This isn't a full
3077 * solution as it will deadlock the same if module loading from
3078 * async jobs nests more than once; however, due to the various
3079 * constraints, this hack seems to be the best option for now.
3080 * Please refer to the following thread for details.
3081 *
3082 * http://thread.gmane.org/gmane.linux.kernel/1420814
3083 */
3084 if (current->flags & PF_USED_ASYNC)
3085 async_synchronize_full();
3063 3086
3064 mutex_lock(&module_mutex); 3087 mutex_lock(&module_mutex);
3065 /* Drop initial reference. */ 3088 /* Drop initial reference. */