aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-05-15 11:06:19 -0400
committerTejun Heo <tj@kernel.org>2012-05-15 11:08:31 -0400
commit4d82a1debbffec129cc387aafa8f40b7bbab3297 (patch)
tree64e7bc03962b99fa9b8c4cdb603d1784185a2a20
parent544ecf310f0e7f51fa057ac2a295fc1b3b35a9d3 (diff)
lockdep: fix oops in processing workqueue
Under memory load, on x86_64, with lockdep enabled, the workqueue's process_one_work() has been seen to oops in __lock_acquire(), barfing on a 0xffffffff00000000 pointer in the lockdep_map's class_cache[]. Because it's permissible to free a work_struct from its callout function, the map used is an onstack copy of the map given in the work_struct: and that copy is made without any locking. Surprisingly, gcc (4.5.1 in Hugh's case) uses "rep movsl" rather than "rep movsq" for that structure copy: which might race with a workqueue user's wait_on_work() doing lock_map_acquire() on the source of the copy, putting a pointer into the class_cache[], but only in time for the top half of that pointer to be copied to the destination map. Boom when process_one_work() subsequently does lock_map_acquire() on its onstack copy of the lockdep_map. Fix this, and a similar instance in call_timer_fn(), with a lockdep_copy_map() function which additionally NULLs the class_cache[]. Note: this oops was actually seen on 3.4-next, where flush_work() newly does the racing lock_map_acquire(); but Tejun points out that 3.4 and earlier are already vulnerable to the same through wait_on_work(). * Patch orginally from Peter. Hugh modified it a bit and wrote the description. Signed-off-by: Peter Zijlstra <peterz@infradead.org> Reported-by: Hugh Dickins <hughd@google.com> LKML-Reference: <alpine.LSU.2.00.1205070951170.1544@eggly.anvils> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--include/linux/lockdep.h18
-rw-r--r--kernel/timer.c4
-rw-r--r--kernel/workqueue.c4
3 files changed, 24 insertions, 2 deletions
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index d36619ead3ba..00e46376e28f 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -157,6 +157,24 @@ struct lockdep_map {
157#endif 157#endif
158}; 158};
159 159
160static inline void lockdep_copy_map(struct lockdep_map *to,
161 struct lockdep_map *from)
162{
163 int i;
164
165 *to = *from;
166 /*
167 * Since the class cache can be modified concurrently we could observe
168 * half pointers (64bit arch using 32bit copy insns). Therefore clear
169 * the caches and take the performance hit.
170 *
171 * XXX it doesn't work well with lockdep_set_class_and_subclass(), since
172 * that relies on cache abuse.
173 */
174 for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++)
175 to->class_cache[i] = NULL;
176}
177
160/* 178/*
161 * Every lock has a list of other locks that were taken after it. 179 * Every lock has a list of other locks that were taken after it.
162 * We only grow the list, never remove from it: 180 * We only grow the list, never remove from it:
diff --git a/kernel/timer.c b/kernel/timer.c
index a297ffcf888e..b12385244bb5 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1102,7 +1102,9 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
1102 * warnings as well as problems when looking into 1102 * warnings as well as problems when looking into
1103 * timer->lockdep_map, make a copy and use that here. 1103 * timer->lockdep_map, make a copy and use that here.
1104 */ 1104 */
1105 struct lockdep_map lockdep_map = timer->lockdep_map; 1105 struct lockdep_map lockdep_map;
1106
1107 lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
1106#endif 1108#endif
1107 /* 1109 /*
1108 * Couple the lock chain with the lock chain at 1110 * Couple the lock chain with the lock chain at
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c36c86cf7900..9a3128dc67df 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1818,7 +1818,9 @@ __acquires(&gcwq->lock)
1818 * lock freed" warnings as well as problems when looking into 1818 * lock freed" warnings as well as problems when looking into
1819 * work->lockdep_map, make a copy and use that here. 1819 * work->lockdep_map, make a copy and use that here.
1820 */ 1820 */
1821 struct lockdep_map lockdep_map = work->lockdep_map; 1821 struct lockdep_map lockdep_map;
1822
1823 lockdep_copy_map(&lockdep_map, &work->lockdep_map);
1822#endif 1824#endif
1823 /* 1825 /*
1824 * A single work shouldn't be executed concurrently by 1826 * A single work shouldn't be executed concurrently by