mm: only IPI CPUs to drain local pages if they exist

Calculate a cpumask of CPUs with per-cpu pages in any zone and only send an IPI requesting CPUs to drain these pages to the buddy allocator if they actually have pages when asked to flush. This patch saves 85%+ of IPIs asking to drain per-cpu pages in case of severe memory pressure that leads to OOM since in these cases multiple, possibly concurrent, allocation requests end up in the direct reclaim code path so when the per-cpu pages end up reclaimed on first allocation failure for most of the proceeding allocation attempts until the memory pressure is off (possibly via the OOM killer) there are no per-cpu pages on most CPUs (and there can easily be hundreds of them). This also has the side effect of shortening the average latency of direct reclaim by 1 or more order of magnitude since waiting for all the CPUs to ACK the IPI takes a long time. Tested by running "hackbench 400" on a 8 CPU x86 VM and observing the difference between the number of direct reclaim attempts that end up in drain_all_pages() and those were more then 1/2 of the online CPU had any per-cpu page in them, using the vmstat counters introduced in the next patch in the series and using proc/interrupts. In the test sceanrio, this was seen to save around 3600 global IPIs after trigerring an OOM on a concurrent workload: $ cat /proc/vmstat | tail -n 2 pcp_global_drain 0 pcp_global_ipi_saved 0 $ cat /proc/interrupts | grep CAL CAL: 1 2 1 2 2 2 2 2 Function call interrupts $ hackbench 400 [OOM messages snipped] $ cat /proc/vmstat | tail -n 2 pcp_global_drain 3647 pcp_global_ipi_saved 3642 $ cat /proc/interrupts | grep CAL CAL: 6 13 6 3 3 3 1 2 7 Function call interrupts Please note that if the global drain is removed from the direct reclaim path as a patch from Mel Gorman currently suggests this should be replaced with an on_each_cpu_cond invocation. Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: Christoph Lameter <cl@linux.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Pekka Enberg <penberg@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Acked-by: Michal Nazarewicz <mina86@mina86.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Gilad Ben-Yossef <gilad@benyossef.com> 2012-03-28 17:42:45 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-03-28 20:14:35 -0400
commit: 74046494ea68676d29ef6501a4bd950f08112a2c (patch)
tree: 4fb862c2ebeba25b089ed64d5cc36437ad9e3df2 /mm/page_alloc.c
parent: 42be35d0390b966253136a285f507f5ad00fd9e8 (diff)
1 files changed, 38 insertions, 2 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c313afcc8e5a..a712fb9e04ce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
 }
 /*
- * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+ * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
+ *
+ * Note that this code is protected against sending an IPI to an offline
+ * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
+ * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
+ * nothing keeps CPUs from showing up after we populated the cpumask and
+ * before the call to on_each_cpu_mask().
 */
 void drain_all_pages(void)
 {
-        on_each_cpu(drain_local_pages, NULL, 1);
+        int cpu;
+        struct per_cpu_pageset *pcp;
+        struct zone *zone;
+        /*
+         * Allocate in the BSS so we wont require allocation in
+         * direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
+         */
+        static cpumask_t cpus_with_pcps;
+        /*
+         * We don't care about racing with CPU hotplug event
+         * as offline notification will cause the notified
+         * cpu to drain that CPU pcps and on_each_cpu_mask
+         * disables preemption as part of its processing
+         */
+        for_each_online_cpu(cpu) {
+                bool has_pcps = false;
+                for_each_populated_zone(zone) {
+                        pcp = per_cpu_ptr(zone->pageset, cpu);
+                        if (pcp->pcp.count) {
+                                has_pcps = true;
+                                break;
+                        }
+                }
+                if (has_pcps)
+                        cpumask_set_cpu(cpu, &cpus_with_pcps);
+                else
+                        cpumask_clear_cpu(cpu, &cpus_with_pcps);
+        }
+        on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
 }
 #ifdef CONFIG_HIBERNATION
author	Gilad Ben-Yossef <gilad@benyossef.com>	2012-03-28 17:42:45 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-03-28 20:14:35 -0400
commit	74046494ea68676d29ef6501a4bd950f08112a2c (patch)
tree	4fb862c2ebeba25b089ed64d5cc36437ad9e3df2 /mm/page_alloc.c
parent	42be35d0390b966253136a285f507f5ad00fd9e8 (diff)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c313afcc8e5a..a712fb9e04ce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -1161,11 +1161,47 @@ void drain_local_pages(void *arg)
1161	}	1161	}
1162		1162
1163	/*	1163	/*
1164	* Spill all the per-cpu pages from all CPUs back into the buddy allocator	1164	* Spill all the per-cpu pages from all CPUs back into the buddy allocator.
		1165	*
		1166	* Note that this code is protected against sending an IPI to an offline
		1167	* CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
		1168	* on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
		1169	* nothing keeps CPUs from showing up after we populated the cpumask and
		1170	* before the call to on_each_cpu_mask().
1165	*/	1171	*/
1166	void drain_all_pages(void)	1172	void drain_all_pages(void)
1167	{	1173	{
1168	on_each_cpu(drain_local_pages, NULL, 1);	1174	int cpu;
		1175	struct per_cpu_pageset *pcp;
		1176	struct zone *zone;
		1177
		1178	/*
		1179	* Allocate in the BSS so we wont require allocation in
		1180	* direct reclaim path for CONFIG_CPUMASK_OFFSTACK=y
		1181	*/
		1182	static cpumask_t cpus_with_pcps;
		1183
		1184	/*
		1185	* We don't care about racing with CPU hotplug event
		1186	* as offline notification will cause the notified
		1187	* cpu to drain that CPU pcps and on_each_cpu_mask
		1188	* disables preemption as part of its processing
		1189	*/
		1190	for_each_online_cpu(cpu) {
		1191	bool has_pcps = false;
		1192	for_each_populated_zone(zone) {
		1193	pcp = per_cpu_ptr(zone->pageset, cpu);
		1194	if (pcp->pcp.count) {
		1195	has_pcps = true;
		1196	break;
		1197	}
		1198	}
		1199	if (has_pcps)
		1200	cpumask_set_cpu(cpu, &cpus_with_pcps);
		1201	else
		1202	cpumask_clear_cpu(cpu, &cpus_with_pcps);
		1203	}
		1204	on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
1169	}	1205	}
1170		1206
1171	#ifdef CONFIG_HIBERNATION	1207	#ifdef CONFIG_HIBERNATION