summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2019-09-04 15:54:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-28 17:05:38 -0400
commit19deb7695e072deaff025e03de40c61b525bd57e (patch)
treecdbb63365d5f464d4e2660ba467ec062bd5f450b
parentac79f78dab892fcdc11fda8af5cc5e80d09dca8a (diff)
Revert "Revert "Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask""
This reverts commit 92717d429b38e4f9f934eed7e605cc42858f1839. Since commit a8282608c88e ("Revert "mm, thp: restore node-local hugepage allocations"") is reverted in this series, it is better to restore the previous 5.2 behavior between the thp allocation and the page allocator rather than to attempt any consolidation or cleanup for a policy that is now reverted. It's less risky during an rc cycle and subsequent patches in this series further modify the same policy that the pre-5.3 behavior implements. Consolidation and cleanup can be done subsequent to a sane default page allocation strategy, so this patch reverts a cleanup done on a strategy that is now reverted and thus is the least risky option. Signed-off-by: David Rientjes <rientjes@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h12
-rw-r--r--mm/huge_memory.c27
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/shmem.c2
4 files changed, 51 insertions, 22 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f33881688f42..fb07b503dc45 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
510} 510}
511extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, 511extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
512 struct vm_area_struct *vma, unsigned long addr, 512 struct vm_area_struct *vma, unsigned long addr,
513 int node); 513 int node, bool hugepage);
514#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
515 alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
514#else 516#else
515#define alloc_pages(gfp_mask, order) \ 517#define alloc_pages(gfp_mask, order) \
516 alloc_pages_node(numa_node_id(), gfp_mask, order) 518 alloc_pages_node(numa_node_id(), gfp_mask, order)
517#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ 519#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
520 alloc_pages(gfp_mask, order)
521#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
518 alloc_pages(gfp_mask, order) 522 alloc_pages(gfp_mask, order)
519#endif 523#endif
520#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 524#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
521#define alloc_page_vma(gfp_mask, vma, addr) \ 525#define alloc_page_vma(gfp_mask, vma, addr) \
522 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) 526 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
523#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ 527#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
524 alloc_pages_vma(gfp_mask, 0, vma, addr, node) 528 alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
525 529
526extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 530extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
527extern unsigned long get_zeroed_page(gfp_t gfp_mask); 531extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 62f0d8e9d76b..aec462cc5d46 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -645,30 +645,30 @@ release:
645 * available 645 * available
646 * never: never stall for any thp allocation 646 * never: never stall for any thp allocation
647 */ 647 */
648static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) 648static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
649{ 649{
650 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); 650 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
651 const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
652 651
653 /* Always do synchronous compaction */ 652 /* Always do synchronous compaction */
654 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) 653 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
655 return GFP_TRANSHUGE | __GFP_THISNODE | 654 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
656 (vma_madvised ? 0 : __GFP_NORETRY);
657 655
658 /* Kick kcompactd and fail quickly */ 656 /* Kick kcompactd and fail quickly */
659 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) 657 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
660 return gfp_mask | __GFP_KSWAPD_RECLAIM; 658 return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
661 659
662 /* Synchronous compaction if madvised, otherwise kick kcompactd */ 660 /* Synchronous compaction if madvised, otherwise kick kcompactd */
663 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) 661 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
664 return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 662 return GFP_TRANSHUGE_LIGHT |
665 __GFP_KSWAPD_RECLAIM); 663 (vma_madvised ? __GFP_DIRECT_RECLAIM :
664 __GFP_KSWAPD_RECLAIM);
666 665
667 /* Only do synchronous compaction if madvised */ 666 /* Only do synchronous compaction if madvised */
668 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) 667 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
669 return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); 668 return GFP_TRANSHUGE_LIGHT |
669 (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
670 670
671 return gfp_mask; 671 return GFP_TRANSHUGE_LIGHT;
672} 672}
673 673
674/* Caller must hold page table lock. */ 674/* Caller must hold page table lock. */
@@ -740,8 +740,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
740 pte_free(vma->vm_mm, pgtable); 740 pte_free(vma->vm_mm, pgtable);
741 return ret; 741 return ret;
742 } 742 }
743 gfp = alloc_hugepage_direct_gfpmask(vma, haddr); 743 gfp = alloc_hugepage_direct_gfpmask(vma);
744 page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); 744 page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
745 if (unlikely(!page)) { 745 if (unlikely(!page)) {
746 count_vm_event(THP_FAULT_FALLBACK); 746 count_vm_event(THP_FAULT_FALLBACK);
747 return VM_FAULT_FALLBACK; 747 return VM_FAULT_FALLBACK;
@@ -1348,9 +1348,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
1348alloc: 1348alloc:
1349 if (__transparent_hugepage_enabled(vma) && 1349 if (__transparent_hugepage_enabled(vma) &&
1350 !transparent_hugepage_debug_cow()) { 1350 !transparent_hugepage_debug_cow()) {
1351 huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); 1351 huge_gfp = alloc_hugepage_direct_gfpmask(vma);
1352 new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, 1352 new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
1353 haddr, numa_node_id());
1354 } else 1353 } else
1355 new_page = NULL; 1354 new_page = NULL;
1356 1355
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9c9877a43d58..547cd403ed02 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
1180 } else if (PageTransHuge(page)) { 1180 } else if (PageTransHuge(page)) {
1181 struct page *thp; 1181 struct page *thp;
1182 1182
1183 thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, 1183 thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
1184 address, numa_node_id()); 1184 HPAGE_PMD_ORDER);
1185 if (!thp) 1185 if (!thp)
1186 return NULL; 1186 return NULL;
1187 prep_transhuge_page(thp); 1187 prep_transhuge_page(thp);
@@ -2083,6 +2083,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2083 * @vma: Pointer to VMA or NULL if not available. 2083 * @vma: Pointer to VMA or NULL if not available.
2084 * @addr: Virtual Address of the allocation. Must be inside the VMA. 2084 * @addr: Virtual Address of the allocation. Must be inside the VMA.
2085 * @node: Which node to prefer for allocation (modulo policy). 2085 * @node: Which node to prefer for allocation (modulo policy).
2086 * @hugepage: for hugepages try only the preferred node if possible
2086 * 2087 *
2087 * This function allocates a page from the kernel page pool and applies 2088 * This function allocates a page from the kernel page pool and applies
2088 * a NUMA policy associated with the VMA or the current process. 2089 * a NUMA policy associated with the VMA or the current process.
@@ -2093,7 +2094,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2093 */ 2094 */
2094struct page * 2095struct page *
2095alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 2096alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2096 unsigned long addr, int node) 2097 unsigned long addr, int node, bool hugepage)
2097{ 2098{
2098 struct mempolicy *pol; 2099 struct mempolicy *pol;
2099 struct page *page; 2100 struct page *page;
@@ -2111,6 +2112,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2111 goto out; 2112 goto out;
2112 } 2113 }
2113 2114
2115 if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2116 int hpage_node = node;
2117
2118 /*
2119 * For hugepage allocation and non-interleave policy which
2120 * allows the current node (or other explicitly preferred
2121 * node) we only try to allocate from the current/preferred
2122 * node and don't fall back to other nodes, as the cost of
2123 * remote accesses would likely offset THP benefits.
2124 *
2125 * If the policy is interleave, or does not allow the current
2126 * node in its nodemask, we allocate the standard way.
2127 */
2128 if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
2129 hpage_node = pol->v.preferred_node;
2130
2131 nmask = policy_nodemask(gfp, pol);
2132 if (!nmask || node_isset(hpage_node, *nmask)) {
2133 mpol_cond_put(pol);
2134 page = __alloc_pages_node(hpage_node,
2135 gfp | __GFP_THISNODE, order);
2136 goto out;
2137 }
2138 }
2139
2114 nmask = policy_nodemask(gfp, pol); 2140 nmask = policy_nodemask(gfp, pol);
2115 preferred_nid = policy_node(gfp, pol, node); 2141 preferred_nid = policy_node(gfp, pol, node);
2116 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); 2142 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2bed4761f279..626d8c74b973 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
1466 1466
1467 shmem_pseudo_vma_init(&pvma, info, hindex); 1467 shmem_pseudo_vma_init(&pvma, info, hindex);
1468 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, 1468 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
1469 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); 1469 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
1470 shmem_pseudo_vma_destroy(&pvma); 1470 shmem_pseudo_vma_destroy(&pvma);
1471 if (page) 1471 if (page)
1472 prep_transhuge_page(page); 1472 prep_transhuge_page(page);