aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-12-18 13:35:02 -0500
committerTejun Heo <tj@kernel.org>2012-12-18 13:56:14 -0500
commita2c1c57be8d9fd5b716113c8991d3d702eeacf77 (patch)
treedd275d53f76528c37e4f8f71fbfd4e2e9954f70b /kernel/workqueue.c
parent42f8570f437b65aaf3ef176a38ad7d7fc5847d8b (diff)
workqueue: consider work function when searching for busy work items
To avoid executing the same work item concurrenlty, workqueue hashes currently busy workers according to their current work items and looks up the the table when it wants to execute a new work item. If there already is a worker which is executing the new work item, the new item is queued to the found worker so that it gets executed only after the current execution finishes. Unfortunately, a work item may be freed while being executed and thus recycled for different purposes. If it gets recycled for a different work item and queued while the previous execution is still in progress, workqueue may make the new work item wait for the old one although the two aren't really related in any way. In extreme cases, this false dependency may lead to deadlock although it's extremely unlikely given that there aren't too many self-freeing work item users and they usually don't wait for other work items. To alleviate the problem, record the current work function in each busy worker and match it together with the work item address in find_worker_executing_work(). While this isn't complete, it ensures that unrelated work items don't interact with each other and in the very unlikely case where a twisted wq user triggers it, it's always onto itself making the culprit easy to spot. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Andrey Isakov <andy51@gmx.ru> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=51701 Cc: stable@vger.kernel.org
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c39
1 files changed, 31 insertions, 8 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index acd417be8199..d24a41101838 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -137,6 +137,7 @@ struct worker {
137 }; 137 };
138 138
139 struct work_struct *current_work; /* L: work being processed */ 139 struct work_struct *current_work; /* L: work being processed */
140 work_func_t current_func; /* L: current_work's fn */
140 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ 141 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
141 struct list_head scheduled; /* L: scheduled works */ 142 struct list_head scheduled; /* L: scheduled works */
142 struct task_struct *task; /* I: worker task */ 143 struct task_struct *task; /* I: worker task */
@@ -861,9 +862,27 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
861 * @gcwq: gcwq of interest 862 * @gcwq: gcwq of interest
862 * @work: work to find worker for 863 * @work: work to find worker for
863 * 864 *
864 * Find a worker which is executing @work on @gcwq. This function is 865 * Find a worker which is executing @work on @gcwq by searching
865 * identical to __find_worker_executing_work() except that this 866 * @gcwq->busy_hash which is keyed by the address of @work. For a worker
866 * function calculates @bwh itself. 867 * to match, its current execution should match the address of @work and
868 * its work function. This is to avoid unwanted dependency between
869 * unrelated work executions through a work item being recycled while still
870 * being executed.
871 *
872 * This is a bit tricky. A work item may be freed once its execution
873 * starts and nothing prevents the freed area from being recycled for
874 * another work item. If the same work item address ends up being reused
875 * before the original execution finishes, workqueue will identify the
876 * recycled work item as currently executing and make it wait until the
877 * current execution finishes, introducing an unwanted dependency.
878 *
879 * This function checks the work item address, work function and workqueue
880 * to avoid false positives. Note that this isn't complete as one may
881 * construct a work function which can introduce dependency onto itself
882 * through a recycled work item. Well, if somebody wants to shoot oneself
883 * in the foot that badly, there's only so much we can do, and if such
884 * deadlock actually occurs, it should be easy to locate the culprit work
885 * function.
867 * 886 *
868 * CONTEXT: 887 * CONTEXT:
869 * spin_lock_irq(gcwq->lock). 888 * spin_lock_irq(gcwq->lock).
@@ -878,8 +897,10 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
878 struct worker *worker; 897 struct worker *worker;
879 struct hlist_node *tmp; 898 struct hlist_node *tmp;
880 899
881 hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry, (unsigned long)work) 900 hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry,
882 if (worker->current_work == work) 901 (unsigned long)work)
902 if (worker->current_work == work &&
903 worker->current_func == work->func)
883 return worker; 904 return worker;
884 905
885 return NULL; 906 return NULL;
@@ -2114,7 +2135,6 @@ __acquires(&gcwq->lock)
2114 struct worker_pool *pool = worker->pool; 2135 struct worker_pool *pool = worker->pool;
2115 struct global_cwq *gcwq = pool->gcwq; 2136 struct global_cwq *gcwq = pool->gcwq;
2116 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; 2137 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
2117 work_func_t f = work->func;
2118 int work_color; 2138 int work_color;
2119 struct worker *collision; 2139 struct worker *collision;
2120#ifdef CONFIG_LOCKDEP 2140#ifdef CONFIG_LOCKDEP
@@ -2154,6 +2174,7 @@ __acquires(&gcwq->lock)
2154 debug_work_deactivate(work); 2174 debug_work_deactivate(work);
2155 hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)worker); 2175 hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)worker);
2156 worker->current_work = work; 2176 worker->current_work = work;
2177 worker->current_func = work->func;
2157 worker->current_cwq = cwq; 2178 worker->current_cwq = cwq;
2158 work_color = get_work_color(work); 2179 work_color = get_work_color(work);
2159 2180
@@ -2186,7 +2207,7 @@ __acquires(&gcwq->lock)
2186 lock_map_acquire_read(&cwq->wq->lockdep_map); 2207 lock_map_acquire_read(&cwq->wq->lockdep_map);
2187 lock_map_acquire(&lockdep_map); 2208 lock_map_acquire(&lockdep_map);
2188 trace_workqueue_execute_start(work); 2209 trace_workqueue_execute_start(work);
2189 f(work); 2210 worker->current_func(work);
2190 /* 2211 /*
2191 * While we must be careful to not use "work" after this, the trace 2212 * While we must be careful to not use "work" after this, the trace
2192 * point will only record its address. 2213 * point will only record its address.
@@ -2198,7 +2219,8 @@ __acquires(&gcwq->lock)
2198 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 2219 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2199 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" 2220 pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2200 " last function: %pf\n", 2221 " last function: %pf\n",
2201 current->comm, preempt_count(), task_pid_nr(current), f); 2222 current->comm, preempt_count(), task_pid_nr(current),
2223 worker->current_func);
2202 debug_show_held_locks(current); 2224 debug_show_held_locks(current);
2203 dump_stack(); 2225 dump_stack();
2204 } 2226 }
@@ -2212,6 +2234,7 @@ __acquires(&gcwq->lock)
2212 /* we're done with it, release */ 2234 /* we're done with it, release */
2213 hash_del(&worker->hentry); 2235 hash_del(&worker->hentry);
2214 worker->current_work = NULL; 2236 worker->current_work = NULL;
2237 worker->current_func = NULL;
2215 worker->current_cwq = NULL; 2238 worker->current_cwq = NULL;
2216 cwq_dec_nr_in_flight(cwq, work_color); 2239 cwq_dec_nr_in_flight(cwq, work_color);
2217} 2240}