aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mmu_notifier.h
diff options
context:
space:
mode:
authorXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>2012-10-08 19:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:22 -0400
commit48af0d7cb3c87fae2ff38af372821dcb0b019c9e (patch)
tree13e7cee9d6b2ed1a69cf7ef322d4697a2085d5ea /include/linux/mmu_notifier.h
parent00442ad04a5eac08a98255697c510e708f6082e2 (diff)
mm: mmu_notifier: fix inconsistent memory between secondary MMU and host
There is a bug in set_pte_at_notify() which always sets the pte to the new page before releasing the old page in the secondary MMU. At this time, the process will access on the new page, but the secondary MMU still access on the old page, the memory is inconsistent between them The below scenario shows the bug more clearly: at the beginning: *p = 0, and p is write-protected by KSM or shared with parent process CPU 0 CPU 1 write 1 to p to trigger COW, set_pte_at_notify will be called: *pte = new_page + W; /* The W bit of pte is set */ *p = 1; /* pte is valid, so no #PF */ return back to secondary MMU, then the secondary MMU read p, but get: *p == 0; /* * !!!!!! * the host has already set p to 1, but the secondary * MMU still get the old value 0 */ call mmu_notifier_change_pte to release old page in secondary MMU We can fix it by release old page first, then set the pte to the new page. Note, the new page will be firstly used in secondary MMU before it is mapped into the page table of the process, but this is safe because it is protected by the page table lock, there is no race to change the pte [akpm@linux-foundation.org: add comment from Andrea] Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/mmu_notifier.h')
-rw-r--r--include/linux/mmu_notifier.h12
1 files changed, 11 insertions, 1 deletions
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1d1b1e13f79f..6f32b2b1f76b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -311,14 +311,24 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
311 __young; \ 311 __young; \
312}) 312})
313 313
314/*
315 * set_pte_at_notify() sets the pte _after_ running the notifier.
316 * This is safe to start by updating the secondary MMUs, because the primary MMU
317 * pte invalidate must have already happened with a ptep_clear_flush() before
318 * set_pte_at_notify() has been invoked. Updating the secondary MMUs first is
319 * required when we change both the protection of the mapping from read-only to
320 * read-write and the pfn (like during copy on write page faults). Otherwise the
321 * old page would remain mapped readonly in the secondary MMUs after the new
322 * page is already writable by some CPU through the primary MMU.
323 */
314#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ 324#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
315({ \ 325({ \
316 struct mm_struct *___mm = __mm; \ 326 struct mm_struct *___mm = __mm; \
317 unsigned long ___address = __address; \ 327 unsigned long ___address = __address; \
318 pte_t ___pte = __pte; \ 328 pte_t ___pte = __pte; \
319 \ 329 \
320 set_pte_at(___mm, ___address, __ptep, ___pte); \
321 mmu_notifier_change_pte(___mm, ___address, ___pte); \ 330 mmu_notifier_change_pte(___mm, ___address, ___pte); \
331 set_pte_at(___mm, ___address, __ptep, ___pte); \
322}) 332})
323 333
324#else /* CONFIG_MMU_NOTIFIER */ 334#else /* CONFIG_MMU_NOTIFIER */