aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2016-07-28 18:49:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commit2516035499b9555f6acd373c9f12e44bcb50dbec (patch)
treef8c4cce437490ff0b9da7ecaa7df338bada86fea
parent3eb2771b06d8e206a2f48cfc7c96bb4ef97e2471 (diff)
mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations
After the previous patch, we can distinguish costly allocations that should be really lightweight, such as THP page faults, with __GFP_NORETRY. This means we don't need to recognize khugepaged allocations via PF_KTHREAD anymore. We can also change THP page faults in areas where madvise(MADV_HUGEPAGE) was used to try as hard as khugepaged, as the process has indicated that it benefits from THP's and is willing to pay some initial latency costs. We can also make the flags handling less cryptic by distinguishing GFP_TRANSHUGE_LIGHT (no reclaim at all, default mode in page fault) from GFP_TRANSHUGE (only direct reclaim, khugepaged default). Adding __GFP_NORETRY or __GFP_KSWAPD_RECLAIM is done where needed. The patch effectively changes the current GFP_TRANSHUGE users as follows: * get_huge_zero_page() - the zero page lifetime should be relatively long and it's shared by multiple users, so it's worth spending some effort on it. We use GFP_TRANSHUGE, and __GFP_NORETRY is not added. This also restores direct reclaim to this allocation, which was unintentionally removed by commit e4a49efe4e7e ("mm: thp: set THP defrag by default to madvise and add a stall-free defrag option") * alloc_hugepage_khugepaged_gfpmask() - this is khugepaged, so latency is not an issue. So if khugepaged "defrag" is enabled (the default), do reclaim via GFP_TRANSHUGE without __GFP_NORETRY. We can remove the PF_KTHREAD check from page alloc. As a side-effect, khugepaged will now no longer check if the initial compaction was deferred or contended. This is OK, as khugepaged sleep times between collapsion attempts are long enough to prevent noticeable disruption, so we should allow it to spend some effort. * migrate_misplaced_transhuge_page() - already was masking out __GFP_RECLAIM, so just convert to GFP_TRANSHUGE_LIGHT which is equivalent. * alloc_hugepage_direct_gfpmask() - vma's with VM_HUGEPAGE (via madvise) are now allocating without __GFP_NORETRY. Other vma's keep using __GFP_NORETRY if direct reclaim/compaction is at all allowed (by default it's allowed only for madvised vma's). The rest is conversion to GFP_TRANSHUGE(_LIGHT). [mhocko@suse.com: suggested GFP_TRANSHUGE_LIGHT] Link: http://lkml.kernel.org/r/20160721073614.24395-7-vbabka@suse.cz Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h14
-rw-r--r--include/trace/events/mmflags.h1
-rw-r--r--mm/huge_memory.c29
-rw-r--r--mm/khugepaged.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/page_alloc.c6
-rw-r--r--tools/perf/builtin-kmem.c1
7 files changed, 30 insertions, 25 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c29e9d347bc6..f8041f9de31e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -237,9 +237,11 @@ struct vm_area_struct;
237 * are expected to be movable via page reclaim or page migration. Typically, 237 * are expected to be movable via page reclaim or page migration. Typically,
238 * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. 238 * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
239 * 239 *
240 * GFP_TRANSHUGE is used for THP allocations. They are compound allocations 240 * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
241 * that will fail quickly if memory is not available and will not wake 241 * compound allocations that will generally fail quickly if memory is not
242 * kswapd on failure. 242 * available and will not wake kswapd/kcompactd on failure. The _LIGHT
243 * version does not attempt reclaim/compaction at all and is by default used
244 * in page fault path, while the non-light is used by khugepaged.
243 */ 245 */
244#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) 246#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
245#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) 247#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -254,9 +256,9 @@ struct vm_area_struct;
254#define GFP_DMA32 __GFP_DMA32 256#define GFP_DMA32 __GFP_DMA32
255#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) 257#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
256#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) 258#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
257#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 259#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
258 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ 260 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
259 ~__GFP_RECLAIM) 261#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
260 262
261/* Convert GFP flags to their corresponding migrate type */ 263/* Convert GFP flags to their corresponding migrate type */
262#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) 264#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 43cedbf0c759..5a81ab48a2fb 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -11,6 +11,7 @@
11 11
12#define __def_gfpflag_names \ 12#define __def_gfpflag_names \
13 {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ 13 {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \
14 {(unsigned long)GFP_TRANSHUGE_LIGHT, "GFP_TRANSHUGE_LIGHT"}, \
14 {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\ 15 {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"},\
15 {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ 16 {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \
16 {(unsigned long)GFP_USER, "GFP_USER"}, \ 17 {(unsigned long)GFP_USER, "GFP_USER"}, \
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8f0cd35345dc..2373f0a7d340 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -539,23 +539,26 @@ static int __do_huge_pmd_anonymous_page(struct fault_env *fe, struct page *page,
539} 539}
540 540
541/* 541/*
542 * If THP is set to always then directly reclaim/compact as necessary 542 * If THP defrag is set to always then directly reclaim/compact as necessary
543 * If set to defer then do no reclaim and defer to khugepaged 543 * If set to defer then do only background reclaim/compact and defer to khugepaged
544 * If set to madvise and the VMA is flagged then directly reclaim/compact 544 * If set to madvise and the VMA is flagged then directly reclaim/compact
545 * When direct reclaim/compact is allowed, don't retry except for flagged VMA's
545 */ 546 */
546static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma) 547static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
547{ 548{
548 gfp_t reclaim_flags = 0; 549 bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
549 550
550 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags) && 551 if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
551 (vma->vm_flags & VM_HUGEPAGE)) 552 &transparent_hugepage_flags) && vma_madvised)
552 reclaim_flags = __GFP_DIRECT_RECLAIM; 553 return GFP_TRANSHUGE;
553 else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags)) 554 else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
554 reclaim_flags = __GFP_KSWAPD_RECLAIM; 555 &transparent_hugepage_flags))
555 else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags)) 556 return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
556 reclaim_flags = __GFP_DIRECT_RECLAIM; 557 else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
557 558 &transparent_hugepage_flags))
558 return GFP_TRANSHUGE | reclaim_flags; 559 return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
560
561 return GFP_TRANSHUGE_LIGHT;
559} 562}
560 563
561/* Caller must hold page table lock. */ 564/* Caller must hold page table lock. */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d1423d790f6d..79c52d0061af 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -694,7 +694,7 @@ static bool khugepaged_scan_abort(int nid)
694/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */ 694/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
695static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void) 695static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
696{ 696{
697 return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0); 697 return khugepaged_defrag() ? GFP_TRANSHUGE : GFP_TRANSHUGE_LIGHT;
698} 698}
699 699
700#ifdef CONFIG_NUMA 700#ifdef CONFIG_NUMA
diff --git a/mm/migrate.c b/mm/migrate.c
index ed0268268e93..f7ee04a5ae27 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1934,7 +1934,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1934 goto out_dropref; 1934 goto out_dropref;
1935 1935
1936 new_page = alloc_pages_node(node, 1936 new_page = alloc_pages_node(node,
1937 (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM, 1937 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
1938 HPAGE_PMD_ORDER); 1938 HPAGE_PMD_ORDER);
1939 if (!new_page) 1939 if (!new_page)
1940 goto out_fail; 1940 goto out_fail;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c42ec374df96..09ba67487897 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3556,11 +3556,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3556 /* 3556 /*
3557 * Looks like reclaim/compaction is worth trying, but 3557 * Looks like reclaim/compaction is worth trying, but
3558 * sync compaction could be very expensive, so keep 3558 * sync compaction could be very expensive, so keep
3559 * using async compaction, unless it's khugepaged 3559 * using async compaction.
3560 * trying to collapse.
3561 */ 3560 */
3562 if (!(current->flags & PF_KTHREAD)) 3561 migration_mode = MIGRATE_ASYNC;
3563 migration_mode = MIGRATE_ASYNC;
3564 } 3562 }
3565 } 3563 }
3566 3564
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index b1d491c2e704..fdde1bd3e306 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -608,6 +608,7 @@ static const struct {
608 const char *compact; 608 const char *compact;
609} gfp_compact_table[] = { 609} gfp_compact_table[] = {
610 { "GFP_TRANSHUGE", "THP" }, 610 { "GFP_TRANSHUGE", "THP" },
611 { "GFP_TRANSHUGE_LIGHT", "THL" },
611 { "GFP_HIGHUSER_MOVABLE", "HUM" }, 612 { "GFP_HIGHUSER_MOVABLE", "HUM" },
612 { "GFP_HIGHUSER", "HU" }, 613 { "GFP_HIGHUSER", "HU" },
613 { "GFP_USER", "U" }, 614 { "GFP_USER", "U" },