aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmu_notifier.c
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2013-05-24 18:55:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-24 19:22:51 -0400
commitd34883d4e35c0a994e91dd847a82b4c9e0c31d83 (patch)
treef141323369c629f8de23bf7bb8f814f643228532 /mm/mmu_notifier.c
parent4c663cfc523a88d97a8309b04a089c27dc57fd7e (diff)
mm: mmu_notifier: re-fix freed page still mapped in secondary MMU
Commit 751efd8610d3 ("mmu_notifier_unregister NULL Pointer deref and multiple ->release()") breaks the fix 3ad3d901bbcf ("mm: mmu_notifier: fix freed page still mapped in secondary MMU"). Since hlist_for_each_entry_rcu() is changed now, we can not revert that patch directly, so this patch reverts the commit and simply fix the bug spotted by that patch This bug spotted by commit 751efd8610d3 is: There is a race condition between mmu_notifier_unregister() and __mmu_notifier_release(). Assume two tasks, one calling mmu_notifier_unregister() as a result of a filp_close() ->flush() callout (task A), and the other calling mmu_notifier_release() from an mmput() (task B). A B t1 srcu_read_lock() t2 if (!hlist_unhashed()) t3 srcu_read_unlock() t4 srcu_read_lock() t5 hlist_del_init_rcu() t6 synchronize_srcu() t7 srcu_read_unlock() t8 hlist_del_rcu() <--- NULL pointer deref. This can be fixed by using hlist_del_init_rcu instead of hlist_del_rcu. The another issue spotted in the commit is "multiple ->release() callouts", we needn't care it too much because it is really rare (e.g, can not happen on kvm since mmu-notify is unregistered after exit_mmap()) and the later call of multiple ->release should be fast since all the pages have already been released by the first call. Anyway, this issue should be fixed in a separate patch. -stable suggestions: Any version that has commit 751efd8610d3 need to be backported. I find the oldest version has this commit is 3.0-stable. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Tested-by: Robin Holt <holt@sgi.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mmu_notifier.c')
-rw-r--r--mm/mmu_notifier.c79
1 files changed, 39 insertions, 40 deletions
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index be04122fb277..6725ff183374 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -40,48 +40,44 @@ void __mmu_notifier_release(struct mm_struct *mm)
40 int id; 40 int id;
41 41
42 /* 42 /*
43 * srcu_read_lock() here will block synchronize_srcu() in 43 * SRCU here will block mmu_notifier_unregister until
44 * mmu_notifier_unregister() until all registered 44 * ->release returns.
45 * ->release() callouts this function makes have
46 * returned.
47 */ 45 */
48 id = srcu_read_lock(&srcu); 46 id = srcu_read_lock(&srcu);
47 hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist)
48 /*
49 * If ->release runs before mmu_notifier_unregister it must be
50 * handled, as it's the only way for the driver to flush all
51 * existing sptes and stop the driver from establishing any more
52 * sptes before all the pages in the mm are freed.
53 */
54 if (mn->ops->release)
55 mn->ops->release(mn, mm);
56 srcu_read_unlock(&srcu, id);
57
49 spin_lock(&mm->mmu_notifier_mm->lock); 58 spin_lock(&mm->mmu_notifier_mm->lock);
50 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 59 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
51 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 60 mn = hlist_entry(mm->mmu_notifier_mm->list.first,
52 struct mmu_notifier, 61 struct mmu_notifier,
53 hlist); 62 hlist);
54
55 /* 63 /*
56 * Unlink. This will prevent mmu_notifier_unregister() 64 * We arrived before mmu_notifier_unregister so
57 * from also making the ->release() callout. 65 * mmu_notifier_unregister will do nothing other than to wait
66 * for ->release to finish and for mmu_notifier_unregister to
67 * return.
58 */ 68 */
59 hlist_del_init_rcu(&mn->hlist); 69 hlist_del_init_rcu(&mn->hlist);
60 spin_unlock(&mm->mmu_notifier_mm->lock);
61
62 /*
63 * Clear sptes. (see 'release' description in mmu_notifier.h)
64 */
65 if (mn->ops->release)
66 mn->ops->release(mn, mm);
67
68 spin_lock(&mm->mmu_notifier_mm->lock);
69 } 70 }
70 spin_unlock(&mm->mmu_notifier_mm->lock); 71 spin_unlock(&mm->mmu_notifier_mm->lock);
71 72
72 /* 73 /*
73 * All callouts to ->release() which we have done are complete. 74 * synchronize_srcu here prevents mmu_notifier_release from returning to
74 * Allow synchronize_srcu() in mmu_notifier_unregister() to complete 75 * exit_mmap (which would proceed with freeing all pages in the mm)
75 */ 76 * until the ->release method returns, if it was invoked by
76 srcu_read_unlock(&srcu, id); 77 * mmu_notifier_unregister.
77 78 *
78 /* 79 * The mmu_notifier_mm can't go away from under us because one mm_count
79 * mmu_notifier_unregister() may have unlinked a notifier and may 80 * is held by exit_mmap.
80 * still be calling out to it. Additionally, other notifiers
81 * may have been active via vmtruncate() et. al. Block here
82 * to ensure that all notifier callouts for this mm have been
83 * completed and the sptes are really cleaned up before returning
84 * to exit_mmap().
85 */ 81 */
86 synchronize_srcu(&srcu); 82 synchronize_srcu(&srcu);
87} 83}
@@ -292,31 +288,34 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
292{ 288{
293 BUG_ON(atomic_read(&mm->mm_count) <= 0); 289 BUG_ON(atomic_read(&mm->mm_count) <= 0);
294 290
295 spin_lock(&mm->mmu_notifier_mm->lock);
296 if (!hlist_unhashed(&mn->hlist)) { 291 if (!hlist_unhashed(&mn->hlist)) {
292 /*
293 * SRCU here will force exit_mmap to wait for ->release to
294 * finish before freeing the pages.
295 */
297 int id; 296 int id;
298 297
298 id = srcu_read_lock(&srcu);
299 /* 299 /*
300 * Ensure we synchronize up with __mmu_notifier_release(). 300 * exit_mmap will block in mmu_notifier_release to guarantee
301 * that ->release is called before freeing the pages.
301 */ 302 */
302 id = srcu_read_lock(&srcu);
303
304 hlist_del_rcu(&mn->hlist);
305 spin_unlock(&mm->mmu_notifier_mm->lock);
306
307 if (mn->ops->release) 303 if (mn->ops->release)
308 mn->ops->release(mn, mm); 304 mn->ops->release(mn, mm);
305 srcu_read_unlock(&srcu, id);
309 306
307 spin_lock(&mm->mmu_notifier_mm->lock);
310 /* 308 /*
311 * Allow __mmu_notifier_release() to complete. 309 * Can not use list_del_rcu() since __mmu_notifier_release
310 * can delete it before we hold the lock.
312 */ 311 */
313 srcu_read_unlock(&srcu, id); 312 hlist_del_init_rcu(&mn->hlist);
314 } else
315 spin_unlock(&mm->mmu_notifier_mm->lock); 313 spin_unlock(&mm->mmu_notifier_mm->lock);
314 }
316 315
317 /* 316 /*
318 * Wait for any running method to finish, including ->release() if it 317 * Wait for any running method to finish, of course including
319 * was run by __mmu_notifier_release() instead of us. 318 * ->release if it was run by mmu_notifier_relase instead of us.
320 */ 319 */
321 synchronize_srcu(&srcu); 320 synchronize_srcu(&srcu);
322 321