aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2015-04-15 19:13:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 19:35:17 -0400
commit3b3636924dfe1e80f9bef2c0dc1207e16f3b078a (patch)
tree6492cb6d3f3e461b6b5fdc5c8943d607e5d8dfd0 /mm
parentcc5993bd7b8cff4a3e37042ee1358d1d5eafa70c (diff)
mm, memcg: sync allocation and memcg charge gfp flags for THP
memcg currently uses hardcoded GFP_TRANSHUGE gfp flags for all THP charges. THP allocations, however, might be using different flags depending on /sys/kernel/mm/transparent_hugepage/{,khugepaged/}defrag and the current allocation context. The primary difference is that defrag configured to "madvise" value will clear __GFP_WAIT flag from the core gfp mask to make the allocation lighter for all mappings which are not backed by VM_HUGEPAGE vmas. If memcg charge path ignores this fact we will get light allocation but the a potential memcg reclaim would kill the whole point of the configuration. Fix the mismatch by providing the same gfp mask used for the allocation to the charge functions. This is quite easy for all paths except for hugepaged kernel thread with !CONFIG_NUMA which is doing a pre-allocation long before the allocated page is used in collapse_huge_page via khugepaged_alloc_page. To prevent from cluttering the whole code path from khugepaged_do_scan we simply return the current flags as per khugepaged_defrag() value which might have changed since the preallocation. If somebody changed the value of the knob we would charge differently but this shouldn't happen often and it is definitely not critical because it would only lead to a reduced success rate of one-off THP promotion. [akpm@linux-foundation.org: fix weird code layout while we're there] [rientjes@google.com: clean up around alloc_hugepage_gfpmask()] Signed-off-by: Michal Hocko <mhocko@suse.cz> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c42
1 files changed, 20 insertions, 22 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3afb5cbe1312..4914e1b29fdb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -708,7 +708,7 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
708static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, 708static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
709 struct vm_area_struct *vma, 709 struct vm_area_struct *vma,
710 unsigned long haddr, pmd_t *pmd, 710 unsigned long haddr, pmd_t *pmd,
711 struct page *page) 711 struct page *page, gfp_t gfp)
712{ 712{
713 struct mem_cgroup *memcg; 713 struct mem_cgroup *memcg;
714 pgtable_t pgtable; 714 pgtable_t pgtable;
@@ -716,7 +716,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
716 716
717 VM_BUG_ON_PAGE(!PageCompound(page), page); 717 VM_BUG_ON_PAGE(!PageCompound(page), page);
718 718
719 if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) 719 if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
720 return VM_FAULT_OOM; 720 return VM_FAULT_OOM;
721 721
722 pgtable = pte_alloc_one(mm, haddr); 722 pgtable = pte_alloc_one(mm, haddr);
@@ -822,7 +822,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
822 count_vm_event(THP_FAULT_FALLBACK); 822 count_vm_event(THP_FAULT_FALLBACK);
823 return VM_FAULT_FALLBACK; 823 return VM_FAULT_FALLBACK;
824 } 824 }
825 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { 825 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
826 put_page(page); 826 put_page(page);
827 count_vm_event(THP_FAULT_FALLBACK); 827 count_vm_event(THP_FAULT_FALLBACK);
828 return VM_FAULT_FALLBACK; 828 return VM_FAULT_FALLBACK;
@@ -1080,6 +1080,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1080 unsigned long haddr; 1080 unsigned long haddr;
1081 unsigned long mmun_start; /* For mmu_notifiers */ 1081 unsigned long mmun_start; /* For mmu_notifiers */
1082 unsigned long mmun_end; /* For mmu_notifiers */ 1082 unsigned long mmun_end; /* For mmu_notifiers */
1083 gfp_t huge_gfp; /* for allocation and charge */
1083 1084
1084 ptl = pmd_lockptr(mm, pmd); 1085 ptl = pmd_lockptr(mm, pmd);
1085 VM_BUG_ON_VMA(!vma->anon_vma, vma); 1086 VM_BUG_ON_VMA(!vma->anon_vma, vma);
@@ -1106,10 +1107,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1106alloc: 1107alloc:
1107 if (transparent_hugepage_enabled(vma) && 1108 if (transparent_hugepage_enabled(vma) &&
1108 !transparent_hugepage_debug_cow()) { 1109 !transparent_hugepage_debug_cow()) {
1109 gfp_t gfp; 1110 huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
1110 1111 new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
1111 gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
1112 new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
1113 } else 1112 } else
1114 new_page = NULL; 1113 new_page = NULL;
1115 1114
@@ -1130,8 +1129,7 @@ alloc:
1130 goto out; 1129 goto out;
1131 } 1130 }
1132 1131
1133 if (unlikely(mem_cgroup_try_charge(new_page, mm, 1132 if (unlikely(mem_cgroup_try_charge(new_page, mm, huge_gfp, &memcg))) {
1134 GFP_TRANSHUGE, &memcg))) {
1135 put_page(new_page); 1133 put_page(new_page);
1136 if (page) { 1134 if (page) {
1137 split_huge_page(page); 1135 split_huge_page(page);
@@ -2323,19 +2321,13 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2323 return true; 2321 return true;
2324} 2322}
2325 2323
2326static struct page 2324static struct page *
2327*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, 2325khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2328 struct vm_area_struct *vma, unsigned long address, 2326 struct vm_area_struct *vma, unsigned long address,
2329 int node) 2327 int node)
2330{ 2328{
2331 gfp_t flags;
2332
2333 VM_BUG_ON_PAGE(*hpage, *hpage); 2329 VM_BUG_ON_PAGE(*hpage, *hpage);
2334 2330
2335 /* Only allocate from the target node */
2336 flags = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
2337 __GFP_THISNODE;
2338
2339 /* 2331 /*
2340 * Before allocating the hugepage, release the mmap_sem read lock. 2332 * Before allocating the hugepage, release the mmap_sem read lock.
2341 * The allocation can take potentially a long time if it involves 2333 * The allocation can take potentially a long time if it involves
@@ -2344,7 +2336,7 @@ static struct page
2344 */ 2336 */
2345 up_read(&mm->mmap_sem); 2337 up_read(&mm->mmap_sem);
2346 2338
2347 *hpage = alloc_pages_exact_node(node, flags, HPAGE_PMD_ORDER); 2339 *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
2348 if (unlikely(!*hpage)) { 2340 if (unlikely(!*hpage)) {
2349 count_vm_event(THP_COLLAPSE_ALLOC_FAILED); 2341 count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
2350 *hpage = ERR_PTR(-ENOMEM); 2342 *hpage = ERR_PTR(-ENOMEM);
@@ -2397,13 +2389,14 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
2397 return true; 2389 return true;
2398} 2390}
2399 2391
2400static struct page 2392static struct page *
2401*khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, 2393khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
2402 struct vm_area_struct *vma, unsigned long address, 2394 struct vm_area_struct *vma, unsigned long address,
2403 int node) 2395 int node)
2404{ 2396{
2405 up_read(&mm->mmap_sem); 2397 up_read(&mm->mmap_sem);
2406 VM_BUG_ON(!*hpage); 2398 VM_BUG_ON(!*hpage);
2399
2407 return *hpage; 2400 return *hpage;
2408} 2401}
2409#endif 2402#endif
@@ -2438,16 +2431,21 @@ static void collapse_huge_page(struct mm_struct *mm,
2438 struct mem_cgroup *memcg; 2431 struct mem_cgroup *memcg;
2439 unsigned long mmun_start; /* For mmu_notifiers */ 2432 unsigned long mmun_start; /* For mmu_notifiers */
2440 unsigned long mmun_end; /* For mmu_notifiers */ 2433 unsigned long mmun_end; /* For mmu_notifiers */
2434 gfp_t gfp;
2441 2435
2442 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 2436 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
2443 2437
2438 /* Only allocate from the target node */
2439 gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
2440 __GFP_THISNODE;
2441
2444 /* release the mmap_sem read lock. */ 2442 /* release the mmap_sem read lock. */
2445 new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); 2443 new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
2446 if (!new_page) 2444 if (!new_page)
2447 return; 2445 return;
2448 2446
2449 if (unlikely(mem_cgroup_try_charge(new_page, mm, 2447 if (unlikely(mem_cgroup_try_charge(new_page, mm,
2450 GFP_TRANSHUGE, &memcg))) 2448 gfp, &memcg)))
2451 return; 2449 return;
2452 2450
2453 /* 2451 /*