diff options
-rw-r--r-- | include/linux/gfp.h | 12 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 38 | ||||
-rw-r--r-- | mm/mempolicy.c | 63 | ||||
-rw-r--r-- | mm/shmem.c | 2 |
5 files changed, 40 insertions, 77 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 24bcc5eec6b4..76f8db0b0e71 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) | |||
510 | } | 510 | } |
511 | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, | 511 | extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, |
512 | struct vm_area_struct *vma, unsigned long addr, | 512 | struct vm_area_struct *vma, unsigned long addr, |
513 | int node, bool hugepage); | 513 | int node); |
514 | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||
515 | alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) | ||
516 | #else | 514 | #else |
517 | #define alloc_pages(gfp_mask, order) \ | 515 | #define alloc_pages(gfp_mask, order) \ |
518 | alloc_pages_node(numa_node_id(), gfp_mask, order) | 516 | alloc_pages_node(numa_node_id(), gfp_mask, order) |
519 | #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ | 517 | #define alloc_pages_vma(gfp_mask, order, vma, addr, node)\ |
520 | alloc_pages(gfp_mask, order) | ||
521 | #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ | ||
522 | alloc_pages(gfp_mask, order) | 518 | alloc_pages(gfp_mask, order) |
523 | #endif | 519 | #endif |
524 | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) | 520 | #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) |
525 | #define alloc_page_vma(gfp_mask, vma, addr) \ | 521 | #define alloc_page_vma(gfp_mask, vma, addr) \ |
526 | alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) | 522 | alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) |
527 | #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ | 523 | #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ |
528 | alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) | 524 | alloc_pages_vma(gfp_mask, 0, vma, addr, node) |
529 | 525 | ||
530 | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); | 526 | extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); |
531 | extern unsigned long get_zeroed_page(gfp_t gfp_mask); | 527 | extern unsigned long get_zeroed_page(gfp_t gfp_mask); |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..bac395f1d00a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, | |||
139 | struct mempolicy *get_task_policy(struct task_struct *p); | 139 | struct mempolicy *get_task_policy(struct task_struct *p); |
140 | struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, | 140 | struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, |
141 | unsigned long addr); | 141 | unsigned long addr); |
142 | struct mempolicy *get_vma_policy(struct vm_area_struct *vma, | ||
143 | unsigned long addr); | ||
142 | bool vma_policy_mof(struct vm_area_struct *vma); | 144 | bool vma_policy_mof(struct vm_area_struct *vma); |
143 | 145 | ||
144 | extern void numa_default_policy(void); | 146 | extern void numa_default_policy(void); |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4e4ef8fa479d..55478ab3c83b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -629,21 +629,40 @@ release: | |||
629 | * available | 629 | * available |
630 | * never: never stall for any thp allocation | 630 | * never: never stall for any thp allocation |
631 | */ | 631 | */ |
632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) | 632 | static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr) |
633 | { | 633 | { |
634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); | 634 | const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE); |
635 | gfp_t this_node = 0; | ||
636 | |||
637 | #ifdef CONFIG_NUMA | ||
638 | struct mempolicy *pol; | ||
639 | /* | ||
640 | * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not | ||
641 | * specified, to express a general desire to stay on the current | ||
642 | * node for optimistic allocation attempts. If the defrag mode | ||
643 | * and/or madvise hint requires the direct reclaim then we prefer | ||
644 | * to fallback to other node rather than node reclaim because that | ||
645 | * can lead to excessive reclaim even though there is free memory | ||
646 | * on other nodes. We expect that NUMA preferences are specified | ||
647 | * by memory policies. | ||
648 | */ | ||
649 | pol = get_vma_policy(vma, addr); | ||
650 | if (pol->mode != MPOL_BIND) | ||
651 | this_node = __GFP_THISNODE; | ||
652 | mpol_cond_put(pol); | ||
653 | #endif | ||
635 | 654 | ||
636 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) | 655 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) |
637 | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); | 656 | return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY); |
638 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) | 657 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) |
639 | return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM; | 658 | return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node; |
640 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) | 659 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags)) |
641 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 660 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : |
642 | __GFP_KSWAPD_RECLAIM); | 661 | __GFP_KSWAPD_RECLAIM | this_node); |
643 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) | 662 | if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags)) |
644 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : | 663 | return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM : |
645 | 0); | 664 | this_node); |
646 | return GFP_TRANSHUGE_LIGHT; | 665 | return GFP_TRANSHUGE_LIGHT | this_node; |
647 | } | 666 | } |
648 | 667 | ||
649 | /* Caller must hold page table lock. */ | 668 | /* Caller must hold page table lock. */ |
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) | |||
715 | pte_free(vma->vm_mm, pgtable); | 734 | pte_free(vma->vm_mm, pgtable); |
716 | return ret; | 735 | return ret; |
717 | } | 736 | } |
718 | gfp = alloc_hugepage_direct_gfpmask(vma); | 737 | gfp = alloc_hugepage_direct_gfpmask(vma, haddr); |
719 | page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); | 738 | page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); |
720 | if (unlikely(!page)) { | 739 | if (unlikely(!page)) { |
721 | count_vm_event(THP_FAULT_FALLBACK); | 740 | count_vm_event(THP_FAULT_FALLBACK); |
722 | return VM_FAULT_FALLBACK; | 741 | return VM_FAULT_FALLBACK; |
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) | |||
1286 | alloc: | 1305 | alloc: |
1287 | if (transparent_hugepage_enabled(vma) && | 1306 | if (transparent_hugepage_enabled(vma) && |
1288 | !transparent_hugepage_debug_cow()) { | 1307 | !transparent_hugepage_debug_cow()) { |
1289 | huge_gfp = alloc_hugepage_direct_gfpmask(vma); | 1308 | huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr); |
1290 | new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER); | 1309 | new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma, |
1310 | haddr, numa_node_id()); | ||
1291 | } else | 1311 | } else |
1292 | new_page = NULL; | 1312 | new_page = NULL; |
1293 | 1313 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 58fb833fce0c..5837a067124d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start) | |||
1116 | } else if (PageTransHuge(page)) { | 1116 | } else if (PageTransHuge(page)) { |
1117 | struct page *thp; | 1117 | struct page *thp; |
1118 | 1118 | ||
1119 | thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, | 1119 | thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma, |
1120 | HPAGE_PMD_ORDER); | 1120 | address, numa_node_id()); |
1121 | if (!thp) | 1121 | if (!thp) |
1122 | return NULL; | 1122 | return NULL; |
1123 | prep_transhuge_page(thp); | 1123 | prep_transhuge_page(thp); |
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, | |||
1662 | * freeing by another task. It is the caller's responsibility to free the | 1662 | * freeing by another task. It is the caller's responsibility to free the |
1663 | * extra reference for shared policies. | 1663 | * extra reference for shared policies. |
1664 | */ | 1664 | */ |
1665 | static struct mempolicy *get_vma_policy(struct vm_area_struct *vma, | 1665 | struct mempolicy *get_vma_policy(struct vm_area_struct *vma, |
1666 | unsigned long addr) | 1666 | unsigned long addr) |
1667 | { | 1667 | { |
1668 | struct mempolicy *pol = __get_vma_policy(vma, addr); | 1668 | struct mempolicy *pol = __get_vma_policy(vma, addr); |
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2011 | * @vma: Pointer to VMA or NULL if not available. | 2011 | * @vma: Pointer to VMA or NULL if not available. |
2012 | * @addr: Virtual Address of the allocation. Must be inside the VMA. | 2012 | * @addr: Virtual Address of the allocation. Must be inside the VMA. |
2013 | * @node: Which node to prefer for allocation (modulo policy). | 2013 | * @node: Which node to prefer for allocation (modulo policy). |
2014 | * @hugepage: for hugepages try only the preferred node if possible | ||
2015 | * | 2014 | * |
2016 | * This function allocates a page from the kernel page pool and applies | 2015 | * This function allocates a page from the kernel page pool and applies |
2017 | * a NUMA policy associated with the VMA or the current process. | 2016 | * a NUMA policy associated with the VMA or the current process. |
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
2022 | */ | 2021 | */ |
2023 | struct page * | 2022 | struct page * |
2024 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | 2023 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, |
2025 | unsigned long addr, int node, bool hugepage) | 2024 | unsigned long addr, int node) |
2026 | { | 2025 | { |
2027 | struct mempolicy *pol; | 2026 | struct mempolicy *pol; |
2028 | struct page *page; | 2027 | struct page *page; |
@@ -2040,60 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
2040 | goto out; | 2039 | goto out; |
2041 | } | 2040 | } |
2042 | 2041 | ||
2043 | if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) { | ||
2044 | int hpage_node = node; | ||
2045 | |||
2046 | /* | ||
2047 | * For hugepage allocation and non-interleave policy which | ||
2048 | * allows the current node (or other explicitly preferred | ||
2049 | * node) we only try to allocate from the current/preferred | ||
2050 | * node and don't fall back to other nodes, as the cost of | ||
2051 | * remote accesses would likely offset THP benefits. | ||
2052 | * | ||
2053 | * If the policy is interleave, or does not allow the current | ||
2054 | * node in its nodemask, we allocate the standard way. | ||
2055 | */ | ||
2056 | if (pol->mode == MPOL_PREFERRED && | ||
2057 | !(pol->flags & MPOL_F_LOCAL)) | ||
2058 | hpage_node = pol->v.preferred_node; | ||
2059 | |||
2060 | nmask = policy_nodemask(gfp, pol); | ||
2061 | if (!nmask || node_isset(hpage_node, *nmask)) { | ||
2062 | mpol_cond_put(pol); | ||
2063 | /* | ||
2064 | * We cannot invoke reclaim if __GFP_THISNODE | ||
2065 | * is set. Invoking reclaim with | ||
2066 | * __GFP_THISNODE set, would cause THP | ||
2067 | * allocations to trigger heavy swapping | ||
2068 | * despite there may be tons of free memory | ||
2069 | * (including potentially plenty of THP | ||
2070 | * already available in the buddy) on all the | ||
2071 | * other NUMA nodes. | ||
2072 | * | ||
2073 | * At most we could invoke compaction when | ||
2074 | * __GFP_THISNODE is set (but we would need to | ||
2075 | * refrain from invoking reclaim even if | ||
2076 | * compaction returned COMPACT_SKIPPED because | ||
2077 | * there wasn't not enough memory to succeed | ||
2078 | * compaction). For now just avoid | ||
2079 | * __GFP_THISNODE instead of limiting the | ||
2080 | * allocation path to a strict and single | ||
2081 | * compaction invocation. | ||
2082 | * | ||
2083 | * Supposedly if direct reclaim was enabled by | ||
2084 | * the caller, the app prefers THP regardless | ||
2085 | * of the node it comes from so this would be | ||
2086 | * more desiderable behavior than only | ||
2087 | * providing THP originated from the local | ||
2088 | * node in such case. | ||
2089 | */ | ||
2090 | if (!(gfp & __GFP_DIRECT_RECLAIM)) | ||
2091 | gfp |= __GFP_THISNODE; | ||
2092 | page = __alloc_pages_node(hpage_node, gfp, order); | ||
2093 | goto out; | ||
2094 | } | ||
2095 | } | ||
2096 | |||
2097 | nmask = policy_nodemask(gfp, pol); | 2042 | nmask = policy_nodemask(gfp, pol); |
2098 | preferred_nid = policy_node(gfp, pol, node); | 2043 | preferred_nid = policy_node(gfp, pol, node); |
2099 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); | 2044 | page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask); |
diff --git a/mm/shmem.c b/mm/shmem.c index 56bf122e0bb4..ea26d7a0342d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp, | |||
1435 | 1435 | ||
1436 | shmem_pseudo_vma_init(&pvma, info, hindex); | 1436 | shmem_pseudo_vma_init(&pvma, info, hindex); |
1437 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, | 1437 | page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN, |
1438 | HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true); | 1438 | HPAGE_PMD_ORDER, &pvma, 0, numa_node_id()); |
1439 | shmem_pseudo_vma_destroy(&pvma); | 1439 | shmem_pseudo_vma_destroy(&pvma); |
1440 | if (page) | 1440 | if (page) |
1441 | prep_transhuge_page(page); | 1441 | prep_transhuge_page(page); |