diff options
author | David Rientjes <rientjes@google.com> | 2018-12-07 17:50:16 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-08 13:26:20 -0500 |
commit | 356ff8a9a78fb35d6482584d260c3754dcbdf669 (patch) | |
tree | 1893db13840f8d37c1562d417b93827f5425170c | |
parent | 5f179793f0a73965681db6a3203fa1baabd9b3c3 (diff) |
Revert "mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask"
This reverts commit 89c83fb539f95491be80cdd5158e6f0ce329e317.
This should have been done as part of 2f0799a0ffc0 ("mm, thp: restore
node-local hugepage allocations"). The movement of the thp allocation
policy from alloc_pages_vma() to alloc_hugepage_direct_gfpmask() was
intended to only set __GFP_THISNODE for mempolicies that are not
MPOL_BIND whereas the revert could set this regardless of mempolicy.
While the check for MPOL_BIND between alloc_hugepage_direct_gfpmask()
and alloc_pages_vma() was racy, that has since been removed since the
revert. What is left is the possibility to use __GFP_THISNODE in
policy_node() when it is unexpected because the special handling for
hugepages in alloc_pages_vma() was removed as part of the consolidation.
Secondly, prior to 89c83fb539f9, alloc_pages_vma() implemented a somewhat
different policy for hugepage allocations, which were allocated through
alloc_hugepage_vma(). For hugepage allocations, if the allocating
process's node is in the set of allowed nodes, allocate with
__GFP_THISNODE for that node (for MPOL_PREFERRED, use that node with
__GFP_THISNODE instead). This was changed for shmem_alloc_hugepage() to
allow fallback to other nodes in 89c83fb539f9 as it did for new_page() in
mm/mempolicy.c which is functionally different behavior and removes the
requirement to only allocate hugepages locally.
So this commit does a full revert of 89c83fb539f9 instead of the partial
revert that was done in 2f0799a0ffc0. The result is the same thp
allocation policy for 4.20 that was in 4.19.
Fixes: 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask")
Fixes: 2f0799a0ffc0 ("mm, thp: restore node-local hugepage allocations")
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/gfp.h | 12 | ||||
-rw-r--r-- | mm/huge_memory.c | 27 | ||||
-rw-r--r-- | mm/mempolicy.c | 32 | ||||
-rw-r--r-- | mm/shmem.c | 2 |
4 files changed, 51 insertions, 22 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 76f8db0b0e71..0705164f928c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) | |||
510 | } | 510 | } |
511 | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, | 511 | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, |
512 | struct vm_area_struct *vma, unsigned long addr, | 512 | struct vm_area_struct *vma, unsigned long addr, |
513 | int node); | 513 | int node, bool hugepage); |
514 | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||
515 | alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) | ||
514 | #else | 516 | #else |
515 | #define alloc_pages(gfp_mask, order) \ | 517 | #define alloc_pages(gfp_mask, order) \ |
516 | alloc_pages_node(numa_node_id(), gfp_mask, order) | 518 | alloc_pages_node(numa_node_id(), gfp_mask, order) |
517 | #define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ | 519 | #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ |
520 | alloc_pages(gfp_mask, order) | ||
521 | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||
518 | alloc_pages(gfp_mask, order) | 522 | alloc_pages(gfp_mask, order) |
519 | #endif | 523 | #endif |
520 | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) | 524 | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) |
521 | #define alloc_page_vma(gfp_mask, vma, addr) \ | 525 | #define alloc_page_vma(gfp_mask, vma, addr) \ |
522 | alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) | 526 | alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) |
523 | #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ | 527 | #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ |
524 | alloc_pages_vma(gfp_mask, 0, vma, addr, node) | 528 | alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) |
525 | 529 | ||
526 | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); | 530 | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); |
527 | extern unsigned long get_zeroed_page(gfp_t gfp_mask); | 531 | extern unsigned long get_zeroed_page(gfp_t gfp_mask); |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f2d19e4fe854..5da55b38b1b7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -629,30 +629,30 @@ release: | |||
629 | * available | 629 | * available |
630 | * never: never stall for any thp allocation | 630 | * never: never stall for any thp allocation |
631 | */ | 631 | */ |
632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) | 632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) |
633 | { | 633 | { |
634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); | 634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); |
635 | const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE; | ||
636 | 635 | ||
637 | /* Always do synchronous compaction */ | 636 | /* Always do synchronous compaction */ |
638 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) | 637 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) |
639 | return GFP_TRANSHUGE | __GFP_THISNODE | | 638 | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); |
640 | (vma_madvised ? 0 : __GFP_NORETRY); | ||
641 | 639 | ||
642 | /* Kick kcompactd and fail quickly */ | 640 | /* Kick kcompactd and fail quickly */ |
643 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) | 641 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) |
644 | return gfp_mask | __GFP_KSWAPD_RECLAIM; | 642 | return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; |
645 | 643 | ||
646 | /* Synchronous compaction if madvised, otherwise kick kcompactd */ | 644 | /* Synchronous compaction if madvised, otherwise kick kcompactd */ |
647 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) | 645 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) |
648 | return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 646 | return GFP_TRANSHUGE_LIGHT | |
649 | __GFP_KSWAPD_RECLAIM); | 647 | (vma_madvised ? __GFP_DIRECT_RECLAIM : |
648 | __GFP_KSWAPD_RECLAIM); | ||
650 | 649 | ||
651 | /* Only do synchronous compaction if madvised */ | 650 | /* Only do synchronous compaction if madvised */ |
652 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) | 651 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) |
653 | return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); | 652 | return GFP_TRANSHUGE_LIGHT | |
653 | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0); | ||
654 | 654 | ||
655 | return gfp_mask; | 655 | return GFP_TRANSHUGE_LIGHT; |
656 | } | 656 | } |
657 | 657 | ||
658 | /* Caller must hold page table lock. */ | 658 | /* Caller must hold page table lock. */ |
@@ -724,8 +724,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) | |||
724 | pte_free(vma->vm_mm, pgtable); | 724 | pte_free(vma->vm_mm, pgtable); |
725 | return ret; | 725 | return ret; |
726 | } | 726 | } |
727 | gfp = alloc_hugepage_direct_gfpmask(vma, haddr); | 727 | gfp = alloc_hugepage_direct_gfpmask(vma); |
728 | page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); | 728 | page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); |
729 | if (unlikely(!page)) { | 729 | if (unlikely(!page)) { |
730 | count_vm_event(THP_FAULT_FALLBACK); | 730 | count_vm_event(THP_FAULT_FALLBACK); |
731 | return VM_FAULT_FALLBACK; | 731 | return VM_FAULT_FALLBACK; |
@@ -1295,9 +1295,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) | |||
1295 | alloc: | 1295 | alloc: |
1296 | if (transparent_hugepage_enabled(vma) && | 1296 | if (transparent_hugepage_enabled(vma) && |
1297 | !transparent_hugepage_debug_cow()) { | 1297 | !transparent_hugepage_debug_cow()) { |
1298 | huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); | 1298 | huge_gfp = alloc_hugepage_direct_gfpmask(vma); |
1299 | new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, | 1299 | new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); |
1300 | haddr, numa_node_id()); | ||
1301 | } else | 1300 | } else |
1302 | new_page = NULL; | 1301 | new_page = NULL; |
1303 | 1302 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 69e278b469ef..d4496d9d34f5 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start) | |||
1116 | } else if (PageTransHuge(page)) { | 1116 | } else if (PageTransHuge(page)) { |
1117 | struct page *thp; | 1117 | struct page *thp; |
1118 | 1118 | ||
1119 | thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, | 1119 | thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, |
1120 | address, numa_node_id()); | 1120 | HPAGE_PMD_ORDER); |
1121 | if (!thp) | 1121 | if (!thp) |
1122 | return NULL; | 1122 | return NULL; |
1123 | prep_transhuge_page(thp); | 1123 | prep_transhuge_page(thp); |
@@ -2011,6 +2011,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2011 | * @vma: Pointer to VMA or NULL if not available. | 2011 | * @vma: Pointer to VMA or NULL if not available. |
2012 | * @addr: Virtual Address of the allocation. Must be inside the VMA. | 2012 | * @addr: Virtual Address of the allocation. Must be inside the VMA. |
2013 | * @node: Which node to prefer for allocation (modulo policy). | 2013 | * @node: Which node to prefer for allocation (modulo policy). |
2014 | * @hugepage: for hugepages try only the preferred node if possible | ||
2014 | * | 2015 | * |
2015 | * This function allocates a page from the kernel page pool and applies | 2016 | * This function allocates a page from the kernel page pool and applies |
2016 | * a NUMA policy associated with the VMA or the current process. | 2017 | * a NUMA policy associated with the VMA or the current process. |
@@ -2021,7 +2022,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2021 | */ | 2022 | */ |
2022 | struct page * | 2023 | struct page * |
2023 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | 2024 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, |
2024 | unsigned long addr, int node) | 2025 | unsigned long addr, int node, bool hugepage) |
2025 | { | 2026 | { |
2026 | struct mempolicy *pol; | 2027 | struct mempolicy *pol; |
2027 | struct page *page; | 2028 | struct page *page; |
@@ -2039,6 +2040,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
2039 | goto out; | 2040 | goto out; |
2040 | } | 2041 | } |
2041 | 2042 | ||
2043 | if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { | ||
2044 | int hpage_node = node; | ||
2045 | |||
2046 | /* | ||
2047 | * For hugepage allocation and non-interleave policy which | ||
2048 | * allows the current node (or other explicitly preferred | ||
2049 | * node) we only try to allocate from the current/preferred | ||
2050 | * node and don't fall back to other nodes, as the cost of | ||
2051 | * remote accesses would likely offset THP benefits. | ||
2052 | * | ||
2053 | * If the policy is interleave, or does not allow the current | ||
2054 | * node in its nodemask, we allocate the standard way. | ||
2055 | */ | ||
2056 | if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL)) | ||
2057 | hpage_node = pol->v.preferred_node; | ||
2058 | |||
2059 | nmask = policy_nodemask(gfp, pol); | ||
2060 | if (!nmask || node_isset(hpage_node, *nmask)) { | ||
2061 | mpol_cond_put(pol); | ||
2062 | page = __alloc_pages_node(hpage_node, | ||
2063 | gfp | __GFP_THISNODE, order); | ||
2064 | goto out; | ||
2065 | } | ||
2066 | } | ||
2067 | |||
2042 | nmask = policy_nodemask(gfp, pol); | 2068 | nmask = policy_nodemask(gfp, pol); |
2043 | preferred_nid = policy_node(gfp, pol, node); | 2069 | preferred_nid = policy_node(gfp, pol, node); |
2044 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); | 2070 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); |
diff --git a/mm/shmem.c b/mm/shmem.c index cddc72ac44d8..921f80488bb3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1439,7 +1439,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, | |||
1439 | 1439 | ||
1440 | shmem_pseudo_vma_init(&pvma, info, hindex); | 1440 | shmem_pseudo_vma_init(&pvma, info, hindex); |
1441 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, | 1441 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, |
1442 | HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); | 1442 | HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); |
1443 | shmem_pseudo_vma_destroy(&pvma); | 1443 | shmem_pseudo_vma_destroy(&pvma); |
1444 | if (page) | 1444 | if (page) |
1445 | prep_transhuge_page(page); | 1445 | prep_transhuge_page(page); |