diff options
author | Robin Holt <holt@sgi.com> | 2013-02-22 19:35:34 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:21 -0500 |
commit | 751efd8610d3d7d67b7bdf7f62646edea7365dd7 (patch) | |
tree | 1703264d0c128a5d2e602b389cce35de88c06039 | |
parent | c1f19495277c34b01fe1ac9f781bbeefafaa0d02 (diff) |
mmu_notifier_unregister NULL Pointer deref and multiple ->release() callouts
There is a race condition between mmu_notifier_unregister() and
__mmu_notifier_release().
Assume two tasks, one calling mmu_notifier_unregister() as a result of a
filp_close() ->flush() callout (task A), and the other calling
mmu_notifier_release() from an mmput() (task B).
A B
t1 srcu_read_lock()
t2 if (!hlist_unhashed())
t3 srcu_read_unlock()
t4 srcu_read_lock()
t5 hlist_del_init_rcu()
t6 synchronize_srcu()
t7 srcu_read_unlock()
t8 hlist_del_rcu() <--- NULL pointer deref.
Additionally, the list traversal in __mmu_notifier_release() is not
protected by the by the mmu_notifier_mm->hlist_lock which can result in
callouts to the ->release() notifier from both mmu_notifier_unregister()
and __mmu_notifier_release().
-stable suggestions:
The stable trees prior to 3.7.y need commits 21a92735f660 and
70400303ce0c cherry-picked in that order prior to cherry-picking this
commit. The 3.7.y tree already has those two commits.
Signed-off-by: Robin Holt <holt@sgi.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Sagi Grimberg <sagig@mellanox.co.il>
Cc: Haggai Eran <haggaie@mellanox.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/mmu_notifier.c | 82 |
1 files changed, 42 insertions, 40 deletions
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 8a5ac8c686b0..f5c3d968d8c6 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c | |||
@@ -37,49 +37,51 @@ static struct srcu_struct srcu; | |||
37 | void __mmu_notifier_release(struct mm_struct *mm) | 37 | void __mmu_notifier_release(struct mm_struct *mm) |
38 | { | 38 | { |
39 | struct mmu_notifier *mn; | 39 | struct mmu_notifier *mn; |
40 | struct hlist_node *n; | ||
41 | int id; | 40 | int id; |
42 | 41 | ||
43 | /* | 42 | /* |
44 | * SRCU here will block mmu_notifier_unregister until | 43 | * srcu_read_lock() here will block synchronize_srcu() in |
45 | * ->release returns. | 44 | * mmu_notifier_unregister() until all registered |
45 | * ->release() callouts this function makes have | ||
46 | * returned. | ||
46 | */ | 47 | */ |
47 | id = srcu_read_lock(&srcu); | 48 | id = srcu_read_lock(&srcu); |
48 | hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) | ||
49 | /* | ||
50 | * if ->release runs before mmu_notifier_unregister it | ||
51 | * must be handled as it's the only way for the driver | ||
52 | * to flush all existing sptes and stop the driver | ||
53 | * from establishing any more sptes before all the | ||
54 | * pages in the mm are freed. | ||
55 | */ | ||
56 | if (mn->ops->release) | ||
57 | mn->ops->release(mn, mm); | ||
58 | srcu_read_unlock(&srcu, id); | ||
59 | |||
60 | spin_lock(&mm->mmu_notifier_mm->lock); | 49 | spin_lock(&mm->mmu_notifier_mm->lock); |
61 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { | 50 | while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { |
62 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, | 51 | mn = hlist_entry(mm->mmu_notifier_mm->list.first, |
63 | struct mmu_notifier, | 52 | struct mmu_notifier, |
64 | hlist); | 53 | hlist); |
54 | |||
65 | /* | 55 | /* |
66 | * We arrived before mmu_notifier_unregister so | 56 | * Unlink. This will prevent mmu_notifier_unregister() |
67 | * mmu_notifier_unregister will do nothing other than | 57 | * from also making the ->release() callout. |
68 | * to wait ->release to finish and | ||
69 | * mmu_notifier_unregister to return. | ||
70 | */ | 58 | */ |
71 | hlist_del_init_rcu(&mn->hlist); | 59 | hlist_del_init_rcu(&mn->hlist); |
60 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
61 | |||
62 | /* | ||
63 | * Clear sptes. (see 'release' description in mmu_notifier.h) | ||
64 | */ | ||
65 | if (mn->ops->release) | ||
66 | mn->ops->release(mn, mm); | ||
67 | |||
68 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
72 | } | 69 | } |
73 | spin_unlock(&mm->mmu_notifier_mm->lock); | 70 | spin_unlock(&mm->mmu_notifier_mm->lock); |
74 | 71 | ||
75 | /* | 72 | /* |
76 | * synchronize_srcu here prevents mmu_notifier_release to | 73 | * All callouts to ->release() which we have done are complete. |
77 | * return to exit_mmap (which would proceed freeing all pages | 74 | * Allow synchronize_srcu() in mmu_notifier_unregister() to complete |
78 | * in the mm) until the ->release method returns, if it was | 75 | */ |
79 | * invoked by mmu_notifier_unregister. | 76 | srcu_read_unlock(&srcu, id); |
80 | * | 77 | |
81 | * The mmu_notifier_mm can't go away from under us because one | 78 | /* |
82 | * mm_count is hold by exit_mmap. | 79 | * mmu_notifier_unregister() may have unlinked a notifier and may |
80 | * still be calling out to it. Additionally, other notifiers | ||
81 | * may have been active via vmtruncate() et. al. Block here | ||
82 | * to ensure that all notifier callouts for this mm have been | ||
83 | * completed and the sptes are really cleaned up before returning | ||
84 | * to exit_mmap(). | ||
83 | */ | 85 | */ |
84 | synchronize_srcu(&srcu); | 86 | synchronize_srcu(&srcu); |
85 | } | 87 | } |
@@ -294,31 +296,31 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) | |||
294 | { | 296 | { |
295 | BUG_ON(atomic_read(&mm->mm_count) <= 0); | 297 | BUG_ON(atomic_read(&mm->mm_count) <= 0); |
296 | 298 | ||
299 | spin_lock(&mm->mmu_notifier_mm->lock); | ||
297 | if (!hlist_unhashed(&mn->hlist)) { | 300 | if (!hlist_unhashed(&mn->hlist)) { |
298 | /* | ||
299 | * SRCU here will force exit_mmap to wait ->release to finish | ||
300 | * before freeing the pages. | ||
301 | */ | ||
302 | int id; | 301 | int id; |
303 | 302 | ||
304 | id = srcu_read_lock(&srcu); | ||
305 | /* | 303 | /* |
306 | * exit_mmap will block in mmu_notifier_release to | 304 | * Ensure we synchronize up with __mmu_notifier_release(). |
307 | * guarantee ->release is called before freeing the | ||
308 | * pages. | ||
309 | */ | 305 | */ |
306 | id = srcu_read_lock(&srcu); | ||
307 | |||
308 | hlist_del_rcu(&mn->hlist); | ||
309 | spin_unlock(&mm->mmu_notifier_mm->lock); | ||
310 | |||
310 | if (mn->ops->release) | 311 | if (mn->ops->release) |
311 | mn->ops->release(mn, mm); | 312 | mn->ops->release(mn, mm); |
312 | srcu_read_unlock(&srcu, id); | ||
313 | 313 | ||
314 | spin_lock(&mm->mmu_notifier_mm->lock); | 314 | /* |
315 | hlist_del_rcu(&mn->hlist); | 315 | * Allow __mmu_notifier_release() to complete. |
316 | */ | ||
317 | srcu_read_unlock(&srcu, id); | ||
318 | } else | ||
316 | spin_unlock(&mm->mmu_notifier_mm->lock); | 319 | spin_unlock(&mm->mmu_notifier_mm->lock); |
317 | } | ||
318 | 320 | ||
319 | /* | 321 | /* |
320 | * Wait any running method to finish, of course including | 322 | * Wait for any running method to finish, including ->release() if it |
321 | * ->release if it was run by mmu_notifier_relase instead of us. | 323 | * was run by __mmu_notifier_release() instead of us. |
322 | */ | 324 | */ |
323 | synchronize_srcu(&srcu); | 325 | synchronize_srcu(&srcu); |
324 | 326 | ||