summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2019-03-05 18:45:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 00:07:17 -0500
commit5e1f0f098b4649fad53011246bcaeff011ffdf5d (patch)
tree88e1399008fd89a44a52b3dabb6da42eef15da05
parente332f741a8dd1ec9a6dc8aa997296ecbfe64323e (diff)
mm, compaction: capture a page under direct compaction
Compaction is inherently race-prone as a suitable page freed during compaction can be allocated by any parallel task. This patch uses a capture_control structure to isolate a page immediately when it is freed by a direct compactor in the slow path of the page allocator. The intent is to avoid redundant scanning. 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Amean fault-both-1 0.00 ( 0.00%) 0.00 * 0.00%* Amean fault-both-3 2582.11 ( 0.00%) 2563.68 ( 0.71%) Amean fault-both-5 4500.26 ( 0.00%) 4233.52 ( 5.93%) Amean fault-both-7 5819.53 ( 0.00%) 6333.65 ( -8.83%) Amean fault-both-12 9321.18 ( 0.00%) 9759.38 ( -4.70%) Amean fault-both-18 9782.76 ( 0.00%) 10338.76 ( -5.68%) Amean fault-both-24 15272.81 ( 0.00%) 13379.55 * 12.40%* Amean fault-both-30 15121.34 ( 0.00%) 16158.25 ( -6.86%) Amean fault-both-32 18466.67 ( 0.00%) 18971.21 ( -2.73%) Latency is only moderately affected but the devil is in the details. A closer examination indicates that base page fault latency is reduced but latency of huge pages is increased as it takes creater care to succeed. Part of the "problem" is that allocation success rates are close to 100% even when under pressure and compaction gets harder 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Percentage huge-3 96.70 ( 0.00%) 98.23 ( 1.58%) Percentage huge-5 96.99 ( 0.00%) 95.30 ( -1.75%) Percentage huge-7 94.19 ( 0.00%) 97.24 ( 3.24%) Percentage huge-12 94.95 ( 0.00%) 97.35 ( 2.53%) Percentage huge-18 96.74 ( 0.00%) 97.30 ( 0.58%) Percentage huge-24 97.07 ( 0.00%) 97.55 ( 0.50%) Percentage huge-30 95.69 ( 0.00%) 98.50 ( 2.95%) Percentage huge-32 96.70 ( 0.00%) 99.27 ( 2.65%) And scan rates are reduced as expected by 6% for the migration scanner and 29% for the free scanner indicating that there is less redundant work. Compaction migrate scanned 20815362 19573286 Compaction free scanned 16352612 11510663 [mgorman@techsingularity.net: remove redundant check] Link: http://lkml.kernel.org/r/20190201143853.GH9565@techsingularity.net Link: http://lkml.kernel.org/r/20190118175136.31341-23-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: YueHaibing <yuehaibing@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/compaction.h3
-rw-r--r--include/linux/sched.h4
-rw-r--r--kernel/sched/core.c3
-rw-r--r--mm/compaction.c31
-rw-r--r--mm/internal.h9
-rw-r--r--mm/page_alloc.c73
6 files changed, 111 insertions, 12 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 70d0256edd31..c960923d9ec2 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -93,7 +93,8 @@ extern int sysctl_compact_unevictable_allowed;
93extern int fragmentation_index(struct zone *zone, unsigned int order); 93extern int fragmentation_index(struct zone *zone, unsigned int order);
94extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, 94extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
95 unsigned int order, unsigned int alloc_flags, 95 unsigned int order, unsigned int alloc_flags,
96 const struct alloc_context *ac, enum compact_priority prio); 96 const struct alloc_context *ac, enum compact_priority prio,
97 struct page **page);
97extern void reset_isolation_suitable(pg_data_t *pgdat); 98extern void reset_isolation_suitable(pg_data_t *pgdat);
98extern enum compact_result compaction_suitable(struct zone *zone, int order, 99extern enum compact_result compaction_suitable(struct zone *zone, int order,
99 unsigned int alloc_flags, int classzone_idx); 100 unsigned int alloc_flags, int classzone_idx);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9b43c989577..ebfb34fb9b30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@ struct pid_namespace;
47struct pipe_inode_info; 47struct pipe_inode_info;
48struct rcu_node; 48struct rcu_node;
49struct reclaim_state; 49struct reclaim_state;
50struct capture_control;
50struct robust_list_head; 51struct robust_list_head;
51struct sched_attr; 52struct sched_attr;
52struct sched_param; 53struct sched_param;
@@ -958,6 +959,9 @@ struct task_struct {
958 959
959 struct io_context *io_context; 960 struct io_context *io_context;
960 961
962#ifdef CONFIG_COMPACTION
963 struct capture_control *capture_control;
964#endif
961 /* Ptrace state: */ 965 /* Ptrace state: */
962 unsigned long ptrace_message; 966 unsigned long ptrace_message;
963 kernel_siginfo_t *last_siginfo; 967 kernel_siginfo_t *last_siginfo;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7cbb5658be80..916e956e92be 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2190,6 +2190,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
2190 INIT_HLIST_HEAD(&p->preempt_notifiers); 2190 INIT_HLIST_HEAD(&p->preempt_notifiers);
2191#endif 2191#endif
2192 2192
2193#ifdef CONFIG_COMPACTION
2194 p->capture_control = NULL;
2195#endif
2193 init_numa_balancing(clone_flags, p); 2196 init_numa_balancing(clone_flags, p);
2194} 2197}
2195 2198
diff --git a/mm/compaction.c b/mm/compaction.c
index 3084cee77fda..1cc871da3fda 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2056,7 +2056,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
2056 return false; 2056 return false;
2057} 2057}
2058 2058
2059static enum compact_result compact_zone(struct compact_control *cc) 2059static enum compact_result
2060compact_zone(struct compact_control *cc, struct capture_control *capc)
2060{ 2061{
2061 enum compact_result ret; 2062 enum compact_result ret;
2062 unsigned long start_pfn = cc->zone->zone_start_pfn; 2063 unsigned long start_pfn = cc->zone->zone_start_pfn;
@@ -2225,6 +2226,11 @@ check_drain:
2225 } 2226 }
2226 } 2227 }
2227 2228
2229 /* Stop if a page has been captured */
2230 if (capc && capc->page) {
2231 ret = COMPACT_SUCCESS;
2232 break;
2233 }
2228 } 2234 }
2229 2235
2230out: 2236out:
@@ -2258,7 +2264,8 @@ out:
2258 2264
2259static enum compact_result compact_zone_order(struct zone *zone, int order, 2265static enum compact_result compact_zone_order(struct zone *zone, int order,
2260 gfp_t gfp_mask, enum compact_priority prio, 2266 gfp_t gfp_mask, enum compact_priority prio,
2261 unsigned int alloc_flags, int classzone_idx) 2267 unsigned int alloc_flags, int classzone_idx,
2268 struct page **capture)
2262{ 2269{
2263 enum compact_result ret; 2270 enum compact_result ret;
2264 struct compact_control cc = { 2271 struct compact_control cc = {
@@ -2279,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
2279 .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), 2286 .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
2280 .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) 2287 .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
2281 }; 2288 };
2289 struct capture_control capc = {
2290 .cc = &cc,
2291 .page = NULL,
2292 };
2293
2294 if (capture)
2295 current->capture_control = &capc;
2282 INIT_LIST_HEAD(&cc.freepages); 2296 INIT_LIST_HEAD(&cc.freepages);
2283 INIT_LIST_HEAD(&cc.migratepages); 2297 INIT_LIST_HEAD(&cc.migratepages);
2284 2298
2285 ret = compact_zone(&cc); 2299 ret = compact_zone(&cc, &capc);
2286 2300
2287 VM_BUG_ON(!list_empty(&cc.freepages)); 2301 VM_BUG_ON(!list_empty(&cc.freepages));
2288 VM_BUG_ON(!list_empty(&cc.migratepages)); 2302 VM_BUG_ON(!list_empty(&cc.migratepages));
2289 2303
2304 *capture = capc.page;
2305 current->capture_control = NULL;
2306
2290 return ret; 2307 return ret;
2291} 2308}
2292 2309
@@ -2304,7 +2321,7 @@ int sysctl_extfrag_threshold = 500;
2304 */ 2321 */
2305enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, 2322enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
2306 unsigned int alloc_flags, const struct alloc_context *ac, 2323 unsigned int alloc_flags, const struct alloc_context *ac,
2307 enum compact_priority prio) 2324 enum compact_priority prio, struct page **capture)
2308{ 2325{
2309 int may_perform_io = gfp_mask & __GFP_IO; 2326 int may_perform_io = gfp_mask & __GFP_IO;
2310 struct zoneref *z; 2327 struct zoneref *z;
@@ -2332,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
2332 } 2349 }
2333 2350
2334 status = compact_zone_order(zone, order, gfp_mask, prio, 2351 status = compact_zone_order(zone, order, gfp_mask, prio,
2335 alloc_flags, ac_classzone_idx(ac)); 2352 alloc_flags, ac_classzone_idx(ac), capture);
2336 rc = max(status, rc); 2353 rc = max(status, rc);
2337 2354
2338 /* The allocation should succeed, stop compacting */ 2355 /* The allocation should succeed, stop compacting */
@@ -2400,7 +2417,7 @@ static void compact_node(int nid)
2400 INIT_LIST_HEAD(&cc.freepages); 2417 INIT_LIST_HEAD(&cc.freepages);
2401 INIT_LIST_HEAD(&cc.migratepages); 2418 INIT_LIST_HEAD(&cc.migratepages);
2402 2419
2403 compact_zone(&cc); 2420 compact_zone(&cc, NULL);
2404 2421
2405 VM_BUG_ON(!list_empty(&cc.freepages)); 2422 VM_BUG_ON(!list_empty(&cc.freepages));
2406 VM_BUG_ON(!list_empty(&cc.migratepages)); 2423 VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -2535,7 +2552,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
2535 2552
2536 if (kthread_should_stop()) 2553 if (kthread_should_stop())
2537 return; 2554 return;
2538 status = compact_zone(&cc); 2555 status = compact_zone(&cc, NULL);
2539 2556
2540 if (status == COMPACT_SUCCESS) { 2557 if (status == COMPACT_SUCCESS) {
2541 compaction_defer_reset(zone, cc.order, false); 2558 compaction_defer_reset(zone, cc.order, false);
diff --git a/mm/internal.h b/mm/internal.h
index 31bb0be6fd52..9eeaf2b95166 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -209,6 +209,15 @@ struct compact_control {
209 bool rescan; /* Rescanning the same pageblock */ 209 bool rescan; /* Rescanning the same pageblock */
210}; 210};
211 211
212/*
213 * Used in direct compaction when a page should be taken from the freelists
214 * immediately when one is created during the free path.
215 */
216struct capture_control {
217 struct compact_control *cc;
218 struct page *page;
219};
220
212unsigned long 221unsigned long
213isolate_freepages_range(struct compact_control *cc, 222isolate_freepages_range(struct compact_control *cc,
214 unsigned long start_pfn, unsigned long end_pfn); 223 unsigned long start_pfn, unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2e132b9e7a93..09bf2c5f8b4b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
789 return 0; 789 return 0;
790} 790}
791 791
792#ifdef CONFIG_COMPACTION
793static inline struct capture_control *task_capc(struct zone *zone)
794{
795 struct capture_control *capc = current->capture_control;
796
797 return capc &&
798 !(current->flags & PF_KTHREAD) &&
799 !capc->page &&
800 capc->cc->zone == zone &&
801 capc->cc->direct_compaction ? capc : NULL;
802}
803
804static inline bool
805compaction_capture(struct capture_control *capc, struct page *page,
806 int order, int migratetype)
807{
808 if (!capc || order != capc->cc->order)
809 return false;
810
811 /* Do not accidentally pollute CMA or isolated regions*/
812 if (is_migrate_cma(migratetype) ||
813 is_migrate_isolate(migratetype))
814 return false;
815
816 /*
817 * Do not let lower order allocations polluate a movable pageblock.
818 * This might let an unmovable request use a reclaimable pageblock
819 * and vice-versa but no more than normal fallback logic which can
820 * have trouble finding a high-order free page.
821 */
822 if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
823 return false;
824
825 capc->page = page;
826 return true;
827}
828
829#else
830static inline struct capture_control *task_capc(struct zone *zone)
831{
832 return NULL;
833}
834
835static inline bool
836compaction_capture(struct capture_control *capc, struct page *page,
837 int order, int migratetype)
838{
839 return false;
840}
841#endif /* CONFIG_COMPACTION */
842
792/* 843/*
793 * Freeing function for a buddy system allocator. 844 * Freeing function for a buddy system allocator.
794 * 845 *
@@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page,
822 unsigned long uninitialized_var(buddy_pfn); 873 unsigned long uninitialized_var(buddy_pfn);
823 struct page *buddy; 874 struct page *buddy;
824 unsigned int max_order; 875 unsigned int max_order;
876 struct capture_control *capc = task_capc(zone);
825 877
826 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); 878 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
827 879
@@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page,
837 889
838continue_merging: 890continue_merging:
839 while (order < max_order - 1) { 891 while (order < max_order - 1) {
892 if (compaction_capture(capc, page, order, migratetype)) {
893 __mod_zone_freepage_state(zone, -(1 << order),
894 migratetype);
895 return;
896 }
840 buddy_pfn = __find_buddy_pfn(pfn, order); 897 buddy_pfn = __find_buddy_pfn(pfn, order);
841 buddy = page + (buddy_pfn - pfn); 898 buddy = page + (buddy_pfn - pfn);
842 899
@@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3710 unsigned int alloc_flags, const struct alloc_context *ac, 3767 unsigned int alloc_flags, const struct alloc_context *ac,
3711 enum compact_priority prio, enum compact_result *compact_result) 3768 enum compact_priority prio, enum compact_result *compact_result)
3712{ 3769{
3713 struct page *page; 3770 struct page *page = NULL;
3714 unsigned long pflags; 3771 unsigned long pflags;
3715 unsigned int noreclaim_flag; 3772 unsigned int noreclaim_flag;
3716 3773
@@ -3721,13 +3778,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3721 noreclaim_flag = memalloc_noreclaim_save(); 3778 noreclaim_flag = memalloc_noreclaim_save();
3722 3779
3723 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, 3780 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
3724 prio); 3781 prio, &page);
3725 3782
3726 memalloc_noreclaim_restore(noreclaim_flag); 3783 memalloc_noreclaim_restore(noreclaim_flag);
3727 psi_memstall_leave(&pflags); 3784 psi_memstall_leave(&pflags);
3728 3785
3729 if (*compact_result <= COMPACT_INACTIVE) 3786 if (*compact_result <= COMPACT_INACTIVE) {
3787 WARN_ON_ONCE(page);
3730 return NULL; 3788 return NULL;
3789 }
3731 3790
3732 /* 3791 /*
3733 * At least in one zone compaction wasn't deferred or skipped, so let's 3792 * At least in one zone compaction wasn't deferred or skipped, so let's
@@ -3735,7 +3794,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3735 */ 3794 */
3736 count_vm_event(COMPACTSTALL); 3795 count_vm_event(COMPACTSTALL);
3737 3796
3738 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); 3797 /* Prep a captured page if available */
3798 if (page)
3799 prep_new_page(page, order, gfp_mask, alloc_flags);
3800
3801 /* Try get a page from the freelist if available */
3802 if (!page)
3803 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
3739 3804
3740 if (page) { 3805 if (page) {
3741 struct zone *zone = page_zone(page); 3806 struct zone *zone = page_zone(page);