mm, page_alloc: drain per-cpu pages from workqueue context

The per-cpu page allocator can be drained immediately via drain_all_pages() which sends IPIs to every CPU. In the next patch, the per-cpu allocator will only be used for interrupt-safe allocations which prevents draining it from IPI context. This patch uses workqueues to drain the per-cpu lists instead. This is slower but no slowdown during intensive reclaim was measured and the paths that use drain_all_pages() are not that sensitive to performance. This is particularly true as the path would only be triggered when reclaim is failing. It also makes a some sense to avoid storming a machine with IPIs when it's under memory pressure. Arguably, it should be further adjusted so that only one caller at a time is draining pages but it's beyond the scope of the current patch. Link: http://lkml.kernel.org/r/20170123153906.3122-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Mel Gorman <mgorman@techsingularity.net> 2017-02-24 17:56:32 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-24 20:46:54 -0500
commit: 0ccce3b924212e121503619df97cc0f17189b77b (patch)
tree: 8f365e995db4d0dd9cc0735750376c8866f279ba /mm/page_alloc.c
parent: 9cd7555875bb09dad875e89a76f41f576e11c638 (diff)
1 files changed, 37 insertions, 7 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 678b2882faaa..610a3db680ae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2339,19 +2339,21 @@ void drain_local_pages(struct zone *zone)
                drain_pages(cpu);
 }
+static void drain_local_pages_wq(struct work_struct *work)
+{
+        drain_local_pages(NULL);
+}
 /*
 * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
 *
 * When zone parameter is non-NULL, spill just the single zone's pages.
 *
- * Note that this code is protected against sending an IPI to an offline
+ * Note that this can be extremely slow as the draining happens in a workqueue.
- * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
- * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
- * nothing keeps CPUs from showing up after we populated the cpumask and
- * before the call to on_each_cpu_mask().
 */
 void drain_all_pages(struct zone *zone)
 {
+        struct work_struct __percpu *works;
        int cpu;
        /*
@@ -2360,6 +2362,17 @@ void drain_all_pages(struct zone *zone)
         */
        static cpumask_t cpus_with_pcps;
+        /* Workqueues cannot recurse */
+        if (current->flags & PF_WQ_WORKER)
+                return;
+        /*
+         * As this can be called from reclaim context, do not reenter reclaim.
+         * An allocation failure can be handled, it's simply slower
+         */
+        get_online_cpus();
+        works = alloc_percpu_gfp(struct work_struct, GFP_ATOMIC);
        /*
         * We don't care about racing with CPU hotplug event
         * as offline notification will cause the notified
@@ -2390,8 +2403,25 @@ void drain_all_pages(struct zone *zone)
                else
                        cpumask_clear_cpu(cpu, &cpus_with_pcps);
        }
-        on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
-                                                                zone, 1);
+        if (works) {
+                for_each_cpu(cpu, &cpus_with_pcps) {
+                        struct work_struct *work = per_cpu_ptr(works, cpu);
+                        INIT_WORK(work, drain_local_pages_wq);
+                        schedule_work_on(cpu, work);
+                }
+                for_each_cpu(cpu, &cpus_with_pcps)
+                        flush_work(per_cpu_ptr(works, cpu));
+        } else {
+                for_each_cpu(cpu, &cpus_with_pcps) {
+                        struct work_struct work;
+                        INIT_WORK(&work, drain_local_pages_wq);
+                        schedule_work_on(cpu, &work);
+                        flush_work(&work);
+                }
+        }
+        put_online_cpus();
 }
 #ifdef CONFIG_HIBERNATION
author	Mel Gorman <mgorman@techsingularity.net>	2017-02-24 17:56:32 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-24 20:46:54 -0500
commit	0ccce3b924212e121503619df97cc0f17189b77b (patch)
tree	8f365e995db4d0dd9cc0735750376c8866f279ba /mm/page_alloc.c
parent	9cd7555875bb09dad875e89a76f41f576e11c638 (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 678b2882faaa..610a3db680ae 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -2339,19 +2339,21 @@ void drain_local_pages(struct zone *zone)
2339	drain_pages(cpu);	2339	drain_pages(cpu);
2340	}	2340	}
2341		2341
		2342	static void drain_local_pages_wq(struct work_struct *work)
		2343	{
		2344	drain_local_pages(NULL);
		2345	}
		2346
2342	/*	2347	/*
2343	* Spill all the per-cpu pages from all CPUs back into the buddy allocator.	2348	* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
2344	*	2349	*
2345	* When zone parameter is non-NULL, spill just the single zone's pages.	2350	* When zone parameter is non-NULL, spill just the single zone's pages.
2346	*	2351	*
2347	* Note that this code is protected against sending an IPI to an offline	2352	* Note that this can be extremely slow as the draining happens in a workqueue.
2348	* CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
2349	* on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
2350	* nothing keeps CPUs from showing up after we populated the cpumask and
2351	* before the call to on_each_cpu_mask().
2352	*/	2353	*/
2353	void drain_all_pages(struct zone *zone)	2354	void drain_all_pages(struct zone *zone)
2354	{	2355	{
		2356	struct work_struct __percpu *works;
2355	int cpu;	2357	int cpu;
2356		2358
2357	/*	2359	/*
@@ -2360,6 +2362,17 @@ void drain_all_pages(struct zone *zone)
2360	*/	2362	*/
2361	static cpumask_t cpus_with_pcps;	2363	static cpumask_t cpus_with_pcps;
2362		2364
		2365	/* Workqueues cannot recurse */
		2366	if (current->flags & PF_WQ_WORKER)
		2367	return;
		2368
		2369	/*
		2370	* As this can be called from reclaim context, do not reenter reclaim.
		2371	* An allocation failure can be handled, it's simply slower
		2372	*/
		2373	get_online_cpus();
		2374	works = alloc_percpu_gfp(struct work_struct, GFP_ATOMIC);
		2375
2363	/*	2376	/*
2364	* We don't care about racing with CPU hotplug event	2377	* We don't care about racing with CPU hotplug event
2365	* as offline notification will cause the notified	2378	* as offline notification will cause the notified
@@ -2390,8 +2403,25 @@ void drain_all_pages(struct zone *zone)
2390	else	2403	else
2391	cpumask_clear_cpu(cpu, &cpus_with_pcps);	2404	cpumask_clear_cpu(cpu, &cpus_with_pcps);
2392	}	2405	}
2393	on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,	2406
2394	zone, 1);	2407	if (works) {
		2408	for_each_cpu(cpu, &cpus_with_pcps) {
		2409	struct work_struct *work = per_cpu_ptr(works, cpu);
		2410	INIT_WORK(work, drain_local_pages_wq);
		2411	schedule_work_on(cpu, work);
		2412	}
		2413	for_each_cpu(cpu, &cpus_with_pcps)
		2414	flush_work(per_cpu_ptr(works, cpu));
		2415	} else {
		2416	for_each_cpu(cpu, &cpus_with_pcps) {
		2417	struct work_struct work;
		2418
		2419	INIT_WORK(&work, drain_local_pages_wq);
		2420	schedule_work_on(cpu, &work);
		2421	flush_work(&work);
		2422	}
		2423	}
		2424	put_online_cpus();
2395	}	2425	}
2396		2426
2397	#ifdef CONFIG_HIBERNATION	2427	#ifdef CONFIG_HIBERNATION