aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2005-06-21 20:14:57 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:18 -0400
commit4ae7c03943fca73f23bc0cdb938070f41b98101f (patch)
treed4b3a7369896af7aa7bb58d0d1699be91fc4aa0d /mm
parent578c2fd6a7f378434655e5c480e23152a3994404 (diff)
[PATCH] Periodically drain non local pagesets
The pageset array can potentially acquire a huge amount of memory on large NUMA systems. F.e. on a system with 512 processors and 256 nodes there will be 256*512 pagesets. If each pageset only holds 5 pages then we are talking about 655360 pages.With a 16K page size on IA64 this results in potentially 10 Gigabytes of memory being trapped in pagesets. The typical cases are much less for smaller systems but there is still the potential of memory being trapped in off node pagesets. Off node memory may be rarely used if local memory is available and so we may potentially have memory in seldom used pagesets without this patch. The slab allocator flushes its per cpu caches every 2 seconds. The following patch flushes the off node pageset caches in the same way by tying into the slab flush. The patch also changes /proc/zoneinfo to include the number of pages currently in each pageset. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c35
-rw-r--r--mm/slab.c1
2 files changed, 34 insertions, 2 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index be05d17bd7df..a95e72d7f945 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -516,6 +516,36 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
516 return allocated; 516 return allocated;
517} 517}
518 518
519#ifdef CONFIG_NUMA
520/* Called from the slab reaper to drain remote pagesets */
521void drain_remote_pages(void)
522{
523 struct zone *zone;
524 int i;
525 unsigned long flags;
526
527 local_irq_save(flags);
528 for_each_zone(zone) {
529 struct per_cpu_pageset *pset;
530
531 /* Do not drain local pagesets */
532 if (zone->zone_pgdat->node_id == numa_node_id())
533 continue;
534
535 pset = zone->pageset[smp_processor_id()];
536 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
537 struct per_cpu_pages *pcp;
538
539 pcp = &pset->pcp[i];
540 if (pcp->count)
541 pcp->count -= free_pages_bulk(zone, pcp->count,
542 &pcp->list, 0);
543 }
544 }
545 local_irq_restore(flags);
546}
547#endif
548
519#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) 549#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
520static void __drain_pages(unsigned int cpu) 550static void __drain_pages(unsigned int cpu)
521{ 551{
@@ -1271,12 +1301,13 @@ void show_free_areas(void)
1271 pageset = zone_pcp(zone, cpu); 1301 pageset = zone_pcp(zone, cpu);
1272 1302
1273 for (temperature = 0; temperature < 2; temperature++) 1303 for (temperature = 0; temperature < 2; temperature++)
1274 printk("cpu %d %s: low %d, high %d, batch %d\n", 1304 printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
1275 cpu, 1305 cpu,
1276 temperature ? "cold" : "hot", 1306 temperature ? "cold" : "hot",
1277 pageset->pcp[temperature].low, 1307 pageset->pcp[temperature].low,
1278 pageset->pcp[temperature].high, 1308 pageset->pcp[temperature].high,
1279 pageset->pcp[temperature].batch); 1309 pageset->pcp[temperature].batch,
1310 pageset->pcp[temperature].count);
1280 } 1311 }
1281 } 1312 }
1282 1313
diff --git a/mm/slab.c b/mm/slab.c
index c78d343b3c5f..93cbbbb39f42 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2851,6 +2851,7 @@ next:
2851 } 2851 }
2852 check_irq_on(); 2852 check_irq_on();
2853 up(&cache_chain_sem); 2853 up(&cache_chain_sem);
2854 drain_remote_pages();
2854 /* Setup the next iteration */ 2855 /* Setup the next iteration */
2855 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id()); 2856 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
2856} 2857}