summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2018-12-07 17:50:16 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-08 13:26:20 -0500
commit356ff8a9a78fb35d6482584d260c3754dcbdf669 (patch)
tree1893db13840f8d37c1562d417b93827f5425170c
parent5f179793f0a73965681db6a3203fa1baabd9b3c3 (diff)
Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"
This reverts commit 89c83fb539f95491be80cdd5158e6f0ce329e317. This should have been done as part of 2f0799a0ffc0 ("mm, thp: restore node-local hugepage allocations"). The movement of the thp allocation policy from alloc_pages_vma() to alloc_hugepage_direct_gfpmask() was intended to only set __GFP_THISNODE for mempolicies that are not MPOL_BIND whereas the revert could set this regardless of mempolicy. While the check for MPOL_BIND between alloc_hugepage_direct_gfpmask() and alloc_pages_vma() was racy, that has since been removed since the revert. What is left is the possibility to use __GFP_THISNODE in policy_node() when it is unexpected because the special handling for hugepages in alloc_pages_vma() was removed as part of the consolidation. Secondly, prior to 89c83fb539f9, alloc_pages_vma() implemented a somewhat different policy for hugepage allocations, which were allocated through alloc_hugepage_vma(). For hugepage allocations, if the allocating process's node is in the set of allowed nodes, allocate with __GFP_THISNODE for that node (for MPOL_PREFERRED, use that node with __GFP_THISNODE instead). This was changed for shmem_alloc_hugepage() to allow fallback to other nodes in 89c83fb539f9 as it did for new_page() in mm/mempolicy.c which is functionally different behavior and removes the requirement to only allocate hugepages locally. So this commit does a full revert of 89c83fb539f9 instead of the partial revert that was done in 2f0799a0ffc0. The result is the same thp allocation policy for 4.20 that was in 4.19. Fixes: 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask") Fixes: 2f0799a0ffc0 ("mm, thp: restore node-local hugepage allocations") Signed-off-by: David Rientjes <rientjes@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h12
-rw-r--r--mm/huge_memory.c27
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/shmem.c2
4 files changed, 51 insertions, 22 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 76f8db0b0e71..0705164f928c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
510} 510}
511extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, 511extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
512 struct vm_area_struct *vma, unsigned long addr, 512 struct vm_area_struct *vma, unsigned long addr,
513 int node); 513 int node, bool hugepage);
514#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
515 alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
514#else 516#else
515#define alloc_pages(gfp_mask, order) \ 517#define alloc_pages(gfp_mask, order) \
516 alloc_pages_node(numa_node_id(), gfp_mask, order) 518 alloc_pages_node(numa_node_id(), gfp_mask, order)
517#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ 519#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
520 alloc_pages(gfp_mask, order)
521#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
518 alloc_pages(gfp_mask, order) 522 alloc_pages(gfp_mask, order)
519#endif 523#endif
520#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 524#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
521#define alloc_page_vma(gfp_mask, vma, addr) \ 525#define alloc_page_vma(gfp_mask, vma, addr) \
522 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) 526 alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
523#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ 527#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
524 alloc_pages_vma(gfp_mask, 0, vma, addr, node) 528 alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
525 529
526extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); 530extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
527extern unsigned long get_zeroed_page(gfp_t gfp_mask); 531extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f2d19e4fe854..5da55b38b1b7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,30 +629,30 @@ release:
629 * available 629 * available
630 * never: never stall for any thp allocation 630 * never: never stall for any thp allocation
631 */ 631 */
632static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) 632static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
633{ 633{
634 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); 634 const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
635 const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
636 635
637 /* Always do synchronous compaction */ 636 /* Always do synchronous compaction */
638 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) 637 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
639 return GFP_TRANSHUGE | __GFP_THISNODE | 638 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
640 (vma_madvised ? 0 : __GFP_NORETRY);
641 639
642 /* Kick kcompactd and fail quickly */ 640 /* Kick kcompactd and fail quickly */
643 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) 641 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
644 return gfp_mask | __GFP_KSWAPD_RECLAIM; 642 return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
645 643
646 /* Synchronous compaction if madvised, otherwise kick kcompactd */ 644 /* Synchronous compaction if madvised, otherwise kick kcompactd */
647 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) 645 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
648 return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 646 return GFP_TRANSHUGE_LIGHT |
649 __GFP_KSWAPD_RECLAIM); 647 (vma_madvised ? __GFP_DIRECT_RECLAIM :
648 __GFP_KSWAPD_RECLAIM);
650 649
651 /* Only do synchronous compaction if madvised */ 650 /* Only do synchronous compaction if madvised */
652 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) 651 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
653 return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); 652 return GFP_TRANSHUGE_LIGHT |
653 (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
654 654
655 return gfp_mask; 655 return GFP_TRANSHUGE_LIGHT;
656} 656}
657 657
658/* Caller must hold page table lock. */ 658/* Caller must hold page table lock. */
@@ -724,8 +724,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
724 pte_free(vma->vm_mm, pgtable); 724 pte_free(vma->vm_mm, pgtable);
725 return ret; 725 return ret;
726 } 726 }
727 gfp = alloc_hugepage_direct_gfpmask(vma, haddr); 727 gfp = alloc_hugepage_direct_gfpmask(vma);
728 page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); 728 page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
729 if (unlikely(!page)) { 729 if (unlikely(!page)) {
730 count_vm_event(THP_FAULT_FALLBACK); 730 count_vm_event(THP_FAULT_FALLBACK);
731 return VM_FAULT_FALLBACK; 731 return VM_FAULT_FALLBACK;
@@ -1295,9 +1295,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
1295alloc: 1295alloc:
1296 if (transparent_hugepage_enabled(vma) && 1296 if (transparent_hugepage_enabled(vma) &&
1297 !transparent_hugepage_debug_cow()) { 1297 !transparent_hugepage_debug_cow()) {
1298 huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); 1298 huge_gfp = alloc_hugepage_direct_gfpmask(vma);
1299 new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, 1299 new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
1300 haddr, numa_node_id());
1301 } else 1300 } else
1302 new_page = NULL; 1301 new_page = NULL;
1303 1302
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 69e278b469ef..d4496d9d34f5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
1116 } else if (PageTransHuge(page)) { 1116 } else if (PageTransHuge(page)) {
1117 struct page *thp; 1117 struct page *thp;
1118 1118
1119 thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, 1119 thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
1120 address, numa_node_id()); 1120 HPAGE_PMD_ORDER);
1121 if (!thp) 1121 if (!thp)
1122 return NULL; 1122 return NULL;
1123 prep_transhuge_page(thp); 1123 prep_transhuge_page(thp);
@@ -2011,6 +2011,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2011 * @vma: Pointer to VMA or NULL if not available. 2011 * @vma: Pointer to VMA or NULL if not available.
2012 * @addr: Virtual Address of the allocation. Must be inside the VMA. 2012 * @addr: Virtual Address of the allocation. Must be inside the VMA.
2013 * @node: Which node to prefer for allocation (modulo policy). 2013 * @node: Which node to prefer for allocation (modulo policy).
2014 * @hugepage: for hugepages try only the preferred node if possible
2014 * 2015 *
2015 * This function allocates a page from the kernel page pool and applies 2016 * This function allocates a page from the kernel page pool and applies
2016 * a NUMA policy associated with the VMA or the current process. 2017 * a NUMA policy associated with the VMA or the current process.
@@ -2021,7 +2022,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
2021 */ 2022 */
2022struct page * 2023struct page *
2023alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, 2024alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2024 unsigned long addr, int node) 2025 unsigned long addr, int node, bool hugepage)
2025{ 2026{
2026 struct mempolicy *pol; 2027 struct mempolicy *pol;
2027 struct page *page; 2028 struct page *page;
@@ -2039,6 +2040,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
2039 goto out; 2040 goto out;
2040 } 2041 }
2041 2042
2043 if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
2044 int hpage_node = node;
2045
2046 /*
2047 * For hugepage allocation and non-interleave policy which
2048 * allows the current node (or other explicitly preferred
2049 * node) we only try to allocate from the current/preferred
2050 * node and don't fall back to other nodes, as the cost of
2051 * remote accesses would likely offset THP benefits.
2052 *
2053 * If the policy is interleave, or does not allow the current
2054 * node in its nodemask, we allocate the standard way.
2055 */
2056 if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
2057 hpage_node = pol->v.preferred_node;
2058
2059 nmask = policy_nodemask(gfp, pol);
2060 if (!nmask || node_isset(hpage_node, *nmask)) {
2061 mpol_cond_put(pol);
2062 page = __alloc_pages_node(hpage_node,
2063 gfp | __GFP_THISNODE, order);
2064 goto out;
2065 }
2066 }
2067
2042 nmask = policy_nodemask(gfp, pol); 2068 nmask = policy_nodemask(gfp, pol);
2043 preferred_nid = policy_node(gfp, pol, node); 2069 preferred_nid = policy_node(gfp, pol, node);
2044 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); 2070 page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index cddc72ac44d8..921f80488bb3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1439,7 +1439,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
1439 1439
1440 shmem_pseudo_vma_init(&pvma, info, hindex); 1440 shmem_pseudo_vma_init(&pvma, info, hindex);
1441 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, 1441 page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
1442 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); 1442 HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
1443 shmem_pseudo_vma_destroy(&pvma); 1443 shmem_pseudo_vma_destroy(&pvma);
1444 if (page) 1444 if (page)
1445 prep_transhuge_page(page); 1445 prep_transhuge_page(page);