aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-04-28 19:18:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-04-28 22:34:04 -0400
commitaa88b68c3b1dce8bc3fd54c8a7372a777ff265cd (patch)
tree98ad92a044b6a2f39a8fa4c1c24df001108bd1a8
parent66ee95d16a7f1b7b4f1dd74a2d81c6e19dc29a14 (diff)
thp: keep huge zero page pinned until tlb flush
Andrea has found[1] a race condition on MMU-gather based TLB flush vs split_huge_page() or shrinker which frees huge zero under us (patch 1/2 and 2/2 respectively). With new THP refcounting, we don't need patch 1/2: mmu_gather keeps the page pinned until flush is complete and the pin prevents the page from being split under us. We still need patch 2/2. This is simplified version of Andrea's patch. We don't need fancy encoding. [1] http://lkml.kernel.org/r/1447938052-22165-1-git-send-email-aarcange@redhat.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Reported-by: Andrea Arcangeli <aarcange@redhat.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/huge_mm.h5
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/swap.c5
3 files changed, 13 insertions, 3 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7008623e24b1..d7b9e5346fba 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -152,6 +152,7 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
152} 152}
153 153
154struct page *get_huge_zero_page(void); 154struct page *get_huge_zero_page(void);
155void put_huge_zero_page(void);
155 156
156#else /* CONFIG_TRANSPARENT_HUGEPAGE */ 157#else /* CONFIG_TRANSPARENT_HUGEPAGE */
157#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) 158#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -208,6 +209,10 @@ static inline bool is_huge_zero_page(struct page *page)
208 return false; 209 return false;
209} 210}
210 211
212static inline void put_huge_zero_page(void)
213{
214 BUILD_BUG();
215}
211 216
212static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, 217static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
213 unsigned long addr, pmd_t *pmd, int flags) 218 unsigned long addr, pmd_t *pmd, int flags)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86f9f8b82f8e..5346de05f471 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -232,7 +232,7 @@ retry:
232 return READ_ONCE(huge_zero_page); 232 return READ_ONCE(huge_zero_page);
233} 233}
234 234
235static void put_huge_zero_page(void) 235void put_huge_zero_page(void)
236{ 236{
237 /* 237 /*
238 * Counter should never go to zero here. Only shrinker can put 238 * Counter should never go to zero here. Only shrinker can put
@@ -1684,12 +1684,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1684 if (vma_is_dax(vma)) { 1684 if (vma_is_dax(vma)) {
1685 spin_unlock(ptl); 1685 spin_unlock(ptl);
1686 if (is_huge_zero_pmd(orig_pmd)) 1686 if (is_huge_zero_pmd(orig_pmd))
1687 put_huge_zero_page(); 1687 tlb_remove_page(tlb, pmd_page(orig_pmd));
1688 } else if (is_huge_zero_pmd(orig_pmd)) { 1688 } else if (is_huge_zero_pmd(orig_pmd)) {
1689 pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); 1689 pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
1690 atomic_long_dec(&tlb->mm->nr_ptes); 1690 atomic_long_dec(&tlb->mm->nr_ptes);
1691 spin_unlock(ptl); 1691 spin_unlock(ptl);
1692 put_huge_zero_page(); 1692 tlb_remove_page(tlb, pmd_page(orig_pmd));
1693 } else { 1693 } else {
1694 struct page *page = pmd_page(orig_pmd); 1694 struct page *page = pmd_page(orig_pmd);
1695 page_remove_rmap(page, true); 1695 page_remove_rmap(page, true);
diff --git a/mm/swap.c b/mm/swap.c
index a0bc206b4ac6..03aacbcb013f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -728,6 +728,11 @@ void release_pages(struct page **pages, int nr, bool cold)
728 zone = NULL; 728 zone = NULL;
729 } 729 }
730 730
731 if (is_huge_zero_page(page)) {
732 put_huge_zero_page();
733 continue;
734 }
735
731 page = compound_head(page); 736 page = compound_head(page);
732 if (!put_page_testzero(page)) 737 if (!put_page_testzero(page))
733 continue; 738 continue;