aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2012-03-05 17:59:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-05 18:49:43 -0500
commit1c641e84719429bbfe62a95ed3545ee7fe24408f (patch)
tree054160f0bcb9845538278116facc759ff7a345e1 /mm
parent62aca403657fe30e5235c5331e9871e676d9ea0a (diff)
mm: thp: fix BUG on mm->nr_ptes
Dave Jones reports a few Fedora users hitting the BUG_ON(mm->nr_ptes...) in exit_mmap() recently. Quoting Hugh's discovery and explanation of the SMP race condition: "mm->nr_ptes had unusual locking: down_read mmap_sem plus page_table_lock when incrementing, down_write mmap_sem (or mm_users 0) when decrementing; whereas THP is careful to increment and decrement it under page_table_lock. Now most of those paths in THP also hold mmap_sem for read or write (with appropriate checks on mm_users), but two do not: when split_huge_page() is called by hwpoison_user_mappings(), and when called by add_to_swap(). It's conceivable that the latter case is responsible for the exit_mmap() BUG_ON mm->nr_ptes that has been reported on Fedora." The simplest way to fix it without having to alter the locking is to make split_huge_page() a noop in nr_ptes terms, so by counting the preallocated pagetables that exists for every mapped hugepage. It was an arbitrary choice not to count them and either way is not wrong or right, because they are not used but they're still allocated. Reported-by: Dave Jones <davej@redhat.com> Reported-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: David Rientjes <rientjes@google.com> Cc: Josh Boyer <jwboyer@redhat.com> Cc: <stable@vger.kernel.org> [3.0.x, 3.1.x, 3.2.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 91d3efb25d15..8f7fc394f636 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -671,6 +671,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
671 set_pmd_at(mm, haddr, pmd, entry); 671 set_pmd_at(mm, haddr, pmd, entry);
672 prepare_pmd_huge_pte(pgtable, mm); 672 prepare_pmd_huge_pte(pgtable, mm);
673 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); 673 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
674 mm->nr_ptes++;
674 spin_unlock(&mm->page_table_lock); 675 spin_unlock(&mm->page_table_lock);
675 } 676 }
676 677
@@ -789,6 +790,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
789 pmd = pmd_mkold(pmd_wrprotect(pmd)); 790 pmd = pmd_mkold(pmd_wrprotect(pmd));
790 set_pmd_at(dst_mm, addr, dst_pmd, pmd); 791 set_pmd_at(dst_mm, addr, dst_pmd, pmd);
791 prepare_pmd_huge_pte(pgtable, dst_mm); 792 prepare_pmd_huge_pte(pgtable, dst_mm);
793 dst_mm->nr_ptes++;
792 794
793 ret = 0; 795 ret = 0;
794out_unlock: 796out_unlock:
@@ -887,7 +889,6 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
887 } 889 }
888 kfree(pages); 890 kfree(pages);
889 891
890 mm->nr_ptes++;
891 smp_wmb(); /* make pte visible before pmd */ 892 smp_wmb(); /* make pte visible before pmd */
892 pmd_populate(mm, pmd, pgtable); 893 pmd_populate(mm, pmd, pgtable);
893 page_remove_rmap(page); 894 page_remove_rmap(page);
@@ -1047,6 +1048,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1047 VM_BUG_ON(page_mapcount(page) < 0); 1048 VM_BUG_ON(page_mapcount(page) < 0);
1048 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); 1049 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
1049 VM_BUG_ON(!PageHead(page)); 1050 VM_BUG_ON(!PageHead(page));
1051 tlb->mm->nr_ptes--;
1050 spin_unlock(&tlb->mm->page_table_lock); 1052 spin_unlock(&tlb->mm->page_table_lock);
1051 tlb_remove_page(tlb, page); 1053 tlb_remove_page(tlb, page);
1052 pte_free(tlb->mm, pgtable); 1054 pte_free(tlb->mm, pgtable);
@@ -1375,7 +1377,6 @@ static int __split_huge_page_map(struct page *page,
1375 pte_unmap(pte); 1377 pte_unmap(pte);
1376 } 1378 }
1377 1379
1378 mm->nr_ptes++;
1379 smp_wmb(); /* make pte visible before pmd */ 1380 smp_wmb(); /* make pte visible before pmd */
1380 /* 1381 /*
1381 * Up to this point the pmd is present and huge and 1382 * Up to this point the pmd is present and huge and
@@ -1988,7 +1989,6 @@ static void collapse_huge_page(struct mm_struct *mm,
1988 set_pmd_at(mm, address, pmd, _pmd); 1989 set_pmd_at(mm, address, pmd, _pmd);
1989 update_mmu_cache(vma, address, _pmd); 1990 update_mmu_cache(vma, address, _pmd);
1990 prepare_pmd_huge_pte(pgtable, mm); 1991 prepare_pmd_huge_pte(pgtable, mm);
1991 mm->nr_ptes--;
1992 spin_unlock(&mm->page_table_lock); 1992 spin_unlock(&mm->page_table_lock);
1993 1993
1994#ifndef CONFIG_NUMA 1994#ifndef CONFIG_NUMA