From f18194275c39835cb84563500995e0d503a32d9a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 25 Aug 2010 23:12:54 -0700 Subject: mm: fix hang on anon_vma->root->lock After several hours, kbuild tests hang with anon_vma_prepare() spinning on a newly allocated anon_vma's lock - on a box with CONFIG_TREE_PREEMPT_RCU=y (which makes this very much more likely, but it could happen without). The ever-subtle page_lock_anon_vma() now needs a further twist: since anon_vma_prepare() and anon_vma_fork() are liable to change the ->root of a reused anon_vma structure at any moment, page_lock_anon_vma() needs to check page_mapped() again before succeeding, otherwise page_unlock_anon_vma() might address a different root->lock. Signed-off-by: Hugh Dickins Reviewed-by: Rik van Riel Cc: Christoph Lameter Cc: Peter Zijlstra Cc: Andrea Arcangeli Signed-off-by: Linus Torvalds --- mm/rmap.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 87b9e8ad4509..f6f0d2dda2ea 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -316,7 +316,7 @@ void __init anon_vma_init(void) */ struct anon_vma *page_lock_anon_vma(struct page *page) { - struct anon_vma *anon_vma; + struct anon_vma *anon_vma, *root_anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -327,8 +327,21 @@ struct anon_vma *page_lock_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - anon_vma_lock(anon_vma); - return anon_vma; + root_anon_vma = ACCESS_ONCE(anon_vma->root); + spin_lock(&root_anon_vma->lock); + + /* + * If this page is still mapped, then its anon_vma cannot have been + * freed. But if it has been unmapped, we have no security against + * the anon_vma structure being freed and reused (for another anon_vma: + * SLAB_DESTROY_BY_RCU guarantees that - so the spin_lock above cannot + * corrupt): with anon_vma_prepare() or anon_vma_fork() redirecting + * anon_vma->root before page_unlock_anon_vma() is called to unlock. + */ + if (page_mapped(page)) + return anon_vma; + + spin_unlock(&root_anon_vma->lock); out: rcu_read_unlock(); return NULL; -- cgit v1.2.2 From 433abed6c6f76ca079a9564f2a1a51fd28ebe0ca Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 10 Sep 2010 13:23:03 +0900 Subject: hugetlb, rmap: always use anon_vma root pointer This patch applies Andrea's fix given by the following patch into hugepage rmapping code: commit 288468c334e98aacbb7e2fb8bde6bc1adcd55e05 Author: Andrea Arcangeli Date: Mon Aug 9 17:19:09 2010 -0700 This patch uses anon_vma->root and avoids unnecessary overwriting when anon_vma is already set up. Signed-off-by: Naoya Horiguchi Acked-by: Andrea Arcangeli Reviewed-by: Rik van Riel Signed-off-by: Linus Torvalds --- mm/rmap.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index f6f0d2dda2ea..2854857fd63b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1564,13 +1564,14 @@ static void __hugepage_set_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int exclusive) { struct anon_vma *anon_vma = vma->anon_vma; + BUG_ON(!anon_vma); - if (!exclusive) { - struct anon_vma_chain *avc; - avc = list_entry(vma->anon_vma_chain.prev, - struct anon_vma_chain, same_vma); - anon_vma = avc->anon_vma; - } + + if (PageAnon(page)) + return; + if (!exclusive) + anon_vma = anon_vma->root; + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address); -- cgit v1.2.2 From a850ea30374ebed32a0724742601861853fde869 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 10 Sep 2010 13:23:06 +0900 Subject: hugetlb, rmap: add BUG_ON(!PageLocked) in hugetlb_add_anon_rmap() Confirming page lock is held in hugetlb_add_anon_rmap() may be useful to detect possible future problems. Signed-off-by: Naoya Horiguchi Acked-by: Rik van Riel Acked-by: Andrea Arcangeli Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 2854857fd63b..9d2ba01bd4f9 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1582,6 +1582,8 @@ void hugepage_add_anon_rmap(struct page *page, { struct anon_vma *anon_vma = vma->anon_vma; int first; + + BUG_ON(!PageLocked(page)); BUG_ON(!anon_vma); BUG_ON(address < vma->vm_start || address >= vma->vm_end); first = atomic_inc_and_test(&page->_mapcount); -- cgit v1.2.2 From 4829b906cc063cb7cd1b7f34fa05de6db75ec8bb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 2 Oct 2010 17:46:06 -0700 Subject: ksm: fix page_address_in_vma anon_vma oops 2.6.36-rc1 commit 21d0d443cdc1658a8c1484fdcece4803f0f96d0e "rmap: resurrect page_address_in_vma anon_vma check" was right to resurrect that check; but now that it's comparing anon_vma->roots instead of just anon_vmas, there's a danger of oopsing on a NULL anon_vma. In most cases no NULL anon_vma ever gets here; but it turns out that occasionally KSM, when enabled on a forked or forking process, will itself call page_address_in_vma() on a "half-KSM" page left over from an earlier failed attempt to merge - whose page_anon_vma() is NULL. It's my bug that those should be getting here at all: I thought they were already dealt with, this oops proves me wrong, I'll fix it in the next release - such pages are effectively pinned until their process exits, since rmap cannot find their ptes (though swapoff can). For now just work around it by making page_address_in_vma() safe (and add a comment on why that check is wanted anyway). A similar check in __page_check_anon_rmap() is safe because do_page_add_anon_rmap() already excluded KSM pages. Signed-off-by: Hugh Dickins Cc: Andrew Morton Cc: Andrea Arcangeli Cc: Rik van Riel Signed-off-by: Linus Torvalds --- mm/rmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 9d2ba01bd4f9..92e6757f196e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -381,7 +381,13 @@ vma_address(struct page *page, struct vm_area_struct *vma) unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { if (PageAnon(page)) { - if (vma->anon_vma->root != page_anon_vma(page)->root) + struct anon_vma *page__anon_vma = page_anon_vma(page); + /* + * Note: swapoff's unuse_vma() is more efficient with this + * check, and needs it to match anon_vma when KSM is active. + */ + if (!vma->anon_vma || !page__anon_vma || + vma->anon_vma->root != page__anon_vma->root) return -EFAULT; } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { if (!vma->vm_file || -- cgit v1.2.2