diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2016-04-28 19:18:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-04-28 22:34:04 -0400 |
commit | aa88b68c3b1dce8bc3fd54c8a7372a777ff265cd (patch) | |
tree | 98ad92a044b6a2f39a8fa4c1c24df001108bd1a8 | |
parent | 66ee95d16a7f1b7b4f1dd74a2d81c6e19dc29a14 (diff) |
thp: keep huge zero page pinned until tlb flush
Andrea has found[1] a race condition on MMU-gather based TLB flush vs
split_huge_page() or shrinker which frees huge zero under us (patch 1/2
and 2/2 respectively).
With new THP refcounting, we don't need patch 1/2: mmu_gather keeps the
page pinned until flush is complete and the pin prevents the page from
being split under us.
We still need patch 2/2. This is simplified version of Andrea's patch.
We don't need fancy encoding.
[1] http://lkml.kernel.org/r/1447938052-22165-1-git-send-email-aarcange@redhat.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/huge_mm.h | 5 | ||||
-rw-r--r-- | mm/huge_memory.c | 6 | ||||
-rw-r--r-- | mm/swap.c | 5 |
3 files changed, 13 insertions, 3 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7008623e24b1..d7b9e5346fba 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -152,6 +152,7 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) | |||
152 | } | 152 | } |
153 | 153 | ||
154 | struct page *get_huge_zero_page(void); | 154 | struct page *get_huge_zero_page(void); |
155 | void put_huge_zero_page(void); | ||
155 | 156 | ||
156 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ | 157 | #else /* CONFIG_TRANSPARENT_HUGEPAGE */ |
157 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) | 158 | #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) |
@@ -208,6 +209,10 @@ static inline bool is_huge_zero_page(struct page *page) | |||
208 | return false; | 209 | return false; |
209 | } | 210 | } |
210 | 211 | ||
212 | static inline void put_huge_zero_page(void) | ||
213 | { | ||
214 | BUILD_BUG(); | ||
215 | } | ||
211 | 216 | ||
212 | static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, | 217 | static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, |
213 | unsigned long addr, pmd_t *pmd, int flags) | 218 | unsigned long addr, pmd_t *pmd, int flags) |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86f9f8b82f8e..5346de05f471 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -232,7 +232,7 @@ retry: | |||
232 | return READ_ONCE(huge_zero_page); | 232 | return READ_ONCE(huge_zero_page); |
233 | } | 233 | } |
234 | 234 | ||
235 | static void put_huge_zero_page(void) | 235 | void put_huge_zero_page(void) |
236 | { | 236 | { |
237 | /* | 237 | /* |
238 | * Counter should never go to zero here. Only shrinker can put | 238 | * Counter should never go to zero here. Only shrinker can put |
@@ -1684,12 +1684,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1684 | if (vma_is_dax(vma)) { | 1684 | if (vma_is_dax(vma)) { |
1685 | spin_unlock(ptl); | 1685 | spin_unlock(ptl); |
1686 | if (is_huge_zero_pmd(orig_pmd)) | 1686 | if (is_huge_zero_pmd(orig_pmd)) |
1687 | put_huge_zero_page(); | 1687 | tlb_remove_page(tlb, pmd_page(orig_pmd)); |
1688 | } else if (is_huge_zero_pmd(orig_pmd)) { | 1688 | } else if (is_huge_zero_pmd(orig_pmd)) { |
1689 | pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); | 1689 | pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); |
1690 | atomic_long_dec(&tlb->mm->nr_ptes); | 1690 | atomic_long_dec(&tlb->mm->nr_ptes); |
1691 | spin_unlock(ptl); | 1691 | spin_unlock(ptl); |
1692 | put_huge_zero_page(); | 1692 | tlb_remove_page(tlb, pmd_page(orig_pmd)); |
1693 | } else { | 1693 | } else { |
1694 | struct page *page = pmd_page(orig_pmd); | 1694 | struct page *page = pmd_page(orig_pmd); |
1695 | page_remove_rmap(page, true); | 1695 | page_remove_rmap(page, true); |
@@ -728,6 +728,11 @@ void release_pages(struct page **pages, int nr, bool cold) | |||
728 | zone = NULL; | 728 | zone = NULL; |
729 | } | 729 | } |
730 | 730 | ||
731 | if (is_huge_zero_page(page)) { | ||
732 | put_huge_zero_page(); | ||
733 | continue; | ||
734 | } | ||
735 | |||
731 | page = compound_head(page); | 736 | page = compound_head(page); |
732 | if (!put_page_testzero(page)) | 737 | if (!put_page_testzero(page)) |
733 | continue; | 738 | continue; |