locking/lockdep: Fix workqueue crossrelease annotation

The new completion/crossrelease annotations interact unfavourable with the extant flush_work()/flush_workqueue() annotations. The problem is that when a single work class does: wait_for_completion(&C) and complete(&C) in different executions, we'll build dependencies like: lock_map_acquire(W) complete_acquire(C) and lock_map_acquire(W) complete_release(C) which results in the dependency chain: W->C->W, which lockdep thinks spells deadlock, even though there is no deadlock potential since works are ran concurrently. One possibility would be to change the work 'lock' to recursive-read, but that would mean hitting a lockdep limitation on recursive locks. Also, unconditinoally switching to recursive-read here would fail to detect the actual deadlock on single-threaded workqueues, which do have a problem with this. For now, forcefully disregard these locks for crossrelease. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Tejun Heo <tj@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: boqun.feng@gmail.com Cc: byungchul.park@lge.com Cc: david@fromorbit.com Cc: johannes@sipsolutions.net Cc: oleg@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2017-08-23 07:23:30 -0400
committer: Ingo Molnar <mingo@kernel.org> 2017-08-25 05:06:33 -0400
commit: e6f3faa734a00c606b7b06c6b9f15e5627d3245b (patch)
tree: 4c3f0047d1fa1796442512e03147c40c026f25a8 /kernel
parent: a1d14934ea4b9db816a8dbfeab1c3e7204a0d871 (diff)
2 files changed, 58 insertions, 21 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 66011c9f5df3..f73ca595b81e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4629,7 +4629,7 @@ asmlinkage __visible void lockdep_sys_exit(void)
         * the index to point to the last entry, which is already invalid.
         */
        crossrelease_hist_end(XHLOCK_PROC);
-        crossrelease_hist_start(XHLOCK_PROC);
+        crossrelease_hist_start(XHLOCK_PROC, false);
 }
 void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
@@ -4725,25 +4725,25 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock)
 /*
 * Lock history stacks; we have 3 nested lock history stacks:
 *
- *   Hard IRQ
+ *   HARD(IRQ)
- *   Soft IRQ
+ *   SOFT(IRQ)
- *   History / Task
+ *   PROC(ess)
 *
- * The thing is that once we complete a (Hard/Soft) IRQ the future task locks
+ * The thing is that once we complete a HARD/SOFT IRQ the future task locks
- * should not depend on any of the locks observed while running the IRQ.
+ * should not depend on any of the locks observed while running the IRQ.  So
+ * what we do is rewind the history buffer and erase all our knowledge of that
+ * temporal event.
 *
- * So what we do is rewind the history buffer and erase all our knowledge of
+ * The PROCess one is special though; it is used to annotate independence
- * that temporal event.
+ * inside a task.
- */
+ *
+ * Take for instance workqueues; each work is independent of the last. The
-/*
+ * completion of a future work does not depend on the completion of a past work
- * We need this to annotate lock history boundaries. Take for instance
+ * (in general). Therefore we must not carry that (lock) dependency across
- * workqueues; each work is independent of the last. The completion of a future
+ * works.
- * work does not depend on the completion of a past work (in general).
- * Therefore we must not carry that (lock) dependency across works.
 *
 * This is true for many things; pretty much all kthreads fall into this
- * pattern, where they have an 'idle' state and future completions do not
+ * pattern, where they have an invariant state and future completions do not
 * depend on past completions. Its just that since they all have the 'same'
 * form -- the kthread does the same over and over -- it doesn't typically
 * matter.
@@ -4751,15 +4751,31 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock)
 * The same is true for system-calls, once a system call is completed (we've
 * returned to userspace) the next system call does not depend on the lock
 * history of the previous system call.
+ *
+ * They key property for independence, this invariant state, is that it must be
+ * a point where we hold no locks and have no history. Because if we were to
+ * hold locks, the restore at _end() would not necessarily recover it's history
+ * entry. Similarly, independence per-definition means it does not depend on
+ * prior state.
 */
-void crossrelease_hist_start(enum xhlock_context_t c)
+void crossrelease_hist_start(enum xhlock_context_t c, bool force)
 {
        struct task_struct *cur = current;
-        if (cur->xhlocks) {
+        if (!cur->xhlocks)
-                cur->xhlock_idx_hist[c] = cur->xhlock_idx;
+                return;
-                cur->hist_id_save[c] = cur->hist_id;
+        /*
+         * We call this at an invariant point, no current state, no history.
+         */
+        if (c == XHLOCK_PROC) {
+                /* verified the former, ensure the latter */
+                WARN_ON_ONCE(!force && cur->lockdep_depth);
+                invalidate_xhlock(&xhlock(cur->xhlock_idx));
        }
+        cur->xhlock_idx_hist[c] = cur->xhlock_idx;
+        cur->hist_id_save[c]    = cur->hist_id;
 }
 void crossrelease_hist_end(enum xhlock_context_t c)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8ad214dc15a9..c0331891dec1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2093,7 +2093,28 @@ __acquires(&pool->lock)
        lock_map_acquire(&pwq->wq->lockdep_map);
        lock_map_acquire(&lockdep_map);
-        crossrelease_hist_start(XHLOCK_PROC);
+        /*
+         * Strictly speaking we should do start(PROC) without holding any
+         * locks, that is, before these two lock_map_acquire()'s.
+         *
+         * However, that would result in:
+         *
+         *   A(W1)
+         *   WFC(C)
+         *              A(W1)
+         *              C(C)
+         *
+         * Which would create W1->C->W1 dependencies, even though there is no
+         * actual deadlock possible. There are two solutions, using a
+         * read-recursive acquire on the work(queue) 'locks', but this will then
+         * hit the lockdep limitation on recursive locks, or simly discard
+         * these locks.
+         *
+         * AFAICT there is no possible deadlock scenario between the
+         * flush_work() and complete() primitives (except for single-threaded
+         * workqueues), so hiding them isn't a problem.
+         */
+        crossrelease_hist_start(XHLOCK_PROC, true);
        trace_workqueue_execute_start(work);
        worker->current_func(work);
        /*
author	Peter Zijlstra <peterz@infradead.org>	2017-08-23 07:23:30 -0400
committer	Ingo Molnar <mingo@kernel.org>	2017-08-25 05:06:33 -0400
commit	e6f3faa734a00c606b7b06c6b9f15e5627d3245b (patch)
tree	4c3f0047d1fa1796442512e03147c40c026f25a8 /kernel
parent	a1d14934ea4b9db816a8dbfeab1c3e7204a0d871 (diff)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 66011c9f5df3..f73ca595b81e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c
@@ -4629,7 +4629,7 @@ asmlinkage __visible void lockdep_sys_exit(void)
4629	* the index to point to the last entry, which is already invalid.	4629	* the index to point to the last entry, which is already invalid.
4630	*/	4630	*/
4631	crossrelease_hist_end(XHLOCK_PROC);	4631	crossrelease_hist_end(XHLOCK_PROC);
4632	crossrelease_hist_start(XHLOCK_PROC);	4632	crossrelease_hist_start(XHLOCK_PROC, false);
4633	}	4633	}
4634		4634
4635	void lockdep_rcu_suspicious(const char file, const int line, const char s)	4635	void lockdep_rcu_suspicious(const char file, const int line, const char s)
@@ -4725,25 +4725,25 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock)
4725	/*	4725	/*
4726	* Lock history stacks; we have 3 nested lock history stacks:	4726	* Lock history stacks; we have 3 nested lock history stacks:
4727	*	4727	*
4728	* Hard IRQ	4728	* HARD(IRQ)
4729	* Soft IRQ	4729	* SOFT(IRQ)
4730	* History / Task	4730	* PROC(ess)
4731	*	4731	*
4732	* The thing is that once we complete a (Hard/Soft) IRQ the future task locks	4732	* The thing is that once we complete a HARD/SOFT IRQ the future task locks
4733	* should not depend on any of the locks observed while running the IRQ.	4733	* should not depend on any of the locks observed while running the IRQ. So
		4734	* what we do is rewind the history buffer and erase all our knowledge of that
		4735	* temporal event.
4734	*	4736	*
4735	* So what we do is rewind the history buffer and erase all our knowledge of	4737	* The PROCess one is special though; it is used to annotate independence
4736	* that temporal event.	4738	* inside a task.
4737	*/	4739	*
4738		4740	* Take for instance workqueues; each work is independent of the last. The
4739	/*	4741	* completion of a future work does not depend on the completion of a past work
4740	* We need this to annotate lock history boundaries. Take for instance	4742	* (in general). Therefore we must not carry that (lock) dependency across
4741	* workqueues; each work is independent of the last. The completion of a future	4743	* works.
4742	* work does not depend on the completion of a past work (in general).
4743	* Therefore we must not carry that (lock) dependency across works.
4744	*	4744	*
4745	* This is true for many things; pretty much all kthreads fall into this	4745	* This is true for many things; pretty much all kthreads fall into this
4746	* pattern, where they have an 'idle' state and future completions do not	4746	* pattern, where they have an invariant state and future completions do not
4747	* depend on past completions. Its just that since they all have the 'same'	4747	* depend on past completions. Its just that since they all have the 'same'
4748	* form -- the kthread does the same over and over -- it doesn't typically	4748	* form -- the kthread does the same over and over -- it doesn't typically
4749	* matter.	4749	* matter.
@@ -4751,15 +4751,31 @@ static inline void invalidate_xhlock(struct hist_lock *xhlock)
4751	* The same is true for system-calls, once a system call is completed (we've	4751	* The same is true for system-calls, once a system call is completed (we've
4752	* returned to userspace) the next system call does not depend on the lock	4752	* returned to userspace) the next system call does not depend on the lock
4753	* history of the previous system call.	4753	* history of the previous system call.
		4754	*
		4755	* They key property for independence, this invariant state, is that it must be
		4756	* a point where we hold no locks and have no history. Because if we were to
		4757	* hold locks, the restore at _end() would not necessarily recover it's history
		4758	* entry. Similarly, independence per-definition means it does not depend on
		4759	* prior state.
4754	*/	4760	*/
4755	void crossrelease_hist_start(enum xhlock_context_t c)	4761	void crossrelease_hist_start(enum xhlock_context_t c, bool force)
4756	{	4762	{
4757	struct task_struct *cur = current;	4763	struct task_struct *cur = current;
4758		4764
4759	if (cur->xhlocks) {	4765	if (!cur->xhlocks)
4760	cur->xhlock_idx_hist[c] = cur->xhlock_idx;	4766	return;
4761	cur->hist_id_save[c] = cur->hist_id;	4767
		4768	/*
		4769	* We call this at an invariant point, no current state, no history.
		4770	*/
		4771	if (c == XHLOCK_PROC) {
		4772	/* verified the former, ensure the latter */
		4773	WARN_ON_ONCE(!force && cur->lockdep_depth);
		4774	invalidate_xhlock(&xhlock(cur->xhlock_idx));
4762	}	4775	}
		4776
		4777	cur->xhlock_idx_hist[c] = cur->xhlock_idx;
		4778	cur->hist_id_save[c] = cur->hist_id;
4763	}	4779	}
4764		4780
4765	void crossrelease_hist_end(enum xhlock_context_t c)	4781	void crossrelease_hist_end(enum xhlock_context_t c)


diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8ad214dc15a9..c0331891dec1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -2093,7 +2093,28 @@ __acquires(&pool->lock)
2093		2093
2094	lock_map_acquire(&pwq->wq->lockdep_map);	2094	lock_map_acquire(&pwq->wq->lockdep_map);
2095	lock_map_acquire(&lockdep_map);	2095	lock_map_acquire(&lockdep_map);
2096	crossrelease_hist_start(XHLOCK_PROC);	2096	/*
		2097	* Strictly speaking we should do start(PROC) without holding any
		2098	* locks, that is, before these two lock_map_acquire()'s.
		2099	*
		2100	* However, that would result in:
		2101	*
		2102	* A(W1)
		2103	* WFC(C)
		2104	* A(W1)
		2105	* C(C)
		2106	*
		2107	* Which would create W1->C->W1 dependencies, even though there is no
		2108	* actual deadlock possible. There are two solutions, using a
		2109	* read-recursive acquire on the work(queue) 'locks', but this will then
		2110	* hit the lockdep limitation on recursive locks, or simly discard
		2111	* these locks.
		2112	*
		2113	* AFAICT there is no possible deadlock scenario between the
		2114	* flush_work() and complete() primitives (except for single-threaded
		2115	* workqueues), so hiding them isn't a problem.
		2116	*/
		2117	crossrelease_hist_start(XHLOCK_PROC, true);
2097	trace_workqueue_execute_start(work);	2118	trace_workqueue_execute_start(work);
2098	worker->current_func(work);	2119	worker->current_func(work);
2099	/*	2120	/*