aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/gfp.h6
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--mm/page_alloc.c45
-rw-r--r--mm/slab.c6
-rw-r--r--mm/slub.c84
-rw-r--r--mm/vmstat.c54
6 files changed, 67 insertions, 131 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 97a36c3d96e2..0d2ef0b082a6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -176,10 +176,6 @@ extern void FASTCALL(free_cold_page(struct page *page));
176#define free_page(addr) free_pages((addr),0) 176#define free_page(addr) free_pages((addr),0)
177 177
178void page_alloc_init(void); 178void page_alloc_init(void);
179#ifdef CONFIG_NUMA 179void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
180void drain_node_pages(int node);
181#else
182static inline void drain_node_pages(int node) { };
183#endif
184 180
185#endif /* __LINUX_GFP_H */ 181#endif /* __LINUX_GFP_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2f1544e83042..d09b1345a3a1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -83,6 +83,9 @@ struct per_cpu_pages {
83 83
84struct per_cpu_pageset { 84struct per_cpu_pageset {
85 struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ 85 struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */
86#ifdef CONFIG_NUMA
87 s8 expire;
88#endif
86#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
87 s8 stat_threshold; 90 s8 stat_threshold;
88 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; 91 s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d53cbf8acb8e..f9b5d6d5f4d6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -691,43 +691,26 @@ static void __init setup_nr_node_ids(void) {}
691 691
692#ifdef CONFIG_NUMA 692#ifdef CONFIG_NUMA
693/* 693/*
694 * Called from the slab reaper to drain pagesets on a particular node that 694 * Called from the vmstat counter updater to drain pagesets of this
695 * belongs to the currently executing processor. 695 * currently executing processor on remote nodes after they have
696 * expired.
697 *
696 * Note that this function must be called with the thread pinned to 698 * Note that this function must be called with the thread pinned to
697 * a single processor. 699 * a single processor.
698 */ 700 */
699void drain_node_pages(int nodeid) 701void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
700{ 702{
701 int i;
702 enum zone_type z;
703 unsigned long flags; 703 unsigned long flags;
704 int to_drain;
704 705
705 for (z = 0; z < MAX_NR_ZONES; z++) { 706 local_irq_save(flags);
706 struct zone *zone = NODE_DATA(nodeid)->node_zones + z; 707 if (pcp->count >= pcp->batch)
707 struct per_cpu_pageset *pset; 708 to_drain = pcp->batch;
708 709 else
709 if (!populated_zone(zone)) 710 to_drain = pcp->count;
710 continue; 711 free_pages_bulk(zone, to_drain, &pcp->list, 0);
711 712 pcp->count -= to_drain;
712 pset = zone_pcp(zone, smp_processor_id()); 713 local_irq_restore(flags);
713 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
714 struct per_cpu_pages *pcp;
715
716 pcp = &pset->pcp[i];
717 if (pcp->count) {
718 int to_drain;
719
720 local_irq_save(flags);
721 if (pcp->count >= pcp->batch)
722 to_drain = pcp->batch;
723 else
724 to_drain = pcp->count;
725 free_pages_bulk(zone, to_drain, &pcp->list, 0);
726 pcp->count -= to_drain;
727 local_irq_restore(flags);
728 }
729 }
730 }
731} 714}
732#endif 715#endif
733 716
diff --git a/mm/slab.c b/mm/slab.c
index e50908b2bfac..944b20581f8c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -928,12 +928,6 @@ static void next_reap_node(void)
928{ 928{
929 int node = __get_cpu_var(reap_node); 929 int node = __get_cpu_var(reap_node);
930 930
931 /*
932 * Also drain per cpu pages on remote zones
933 */
934 if (node != numa_node_id())
935 drain_node_pages(node);
936
937 node = next_node(node, node_online_map); 931 node = next_node(node, node_online_map);
938 if (unlikely(node >= MAX_NUMNODES)) 932 if (unlikely(node >= MAX_NUMNODES))
939 node = first_node(node_online_map); 933 node = first_node(node_online_map);
diff --git a/mm/slub.c b/mm/slub.c
index dbb206503a8d..bd2efae02bcd 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2530,90 +2530,6 @@ static struct notifier_block __cpuinitdata slab_notifier =
2530 2530
2531#endif 2531#endif
2532 2532
2533#ifdef CONFIG_NUMA
2534
2535/*****************************************************************
2536 * Generic reaper used to support the page allocator
2537 * (the cpu slabs are reaped by a per slab workqueue).
2538 *
2539 * Maybe move this to the page allocator?
2540 ****************************************************************/
2541
2542static DEFINE_PER_CPU(unsigned long, reap_node);
2543
2544static void init_reap_node(int cpu)
2545{
2546 int node;
2547
2548 node = next_node(cpu_to_node(cpu), node_online_map);
2549 if (node == MAX_NUMNODES)
2550 node = first_node(node_online_map);
2551
2552 __get_cpu_var(reap_node) = node;
2553}
2554
2555static void next_reap_node(void)
2556{
2557 int node = __get_cpu_var(reap_node);
2558
2559 /*
2560 * Also drain per cpu pages on remote zones
2561 */
2562 if (node != numa_node_id())
2563 drain_node_pages(node);
2564
2565 node = next_node(node, node_online_map);
2566 if (unlikely(node >= MAX_NUMNODES))
2567 node = first_node(node_online_map);
2568 __get_cpu_var(reap_node) = node;
2569}
2570#else
2571#define init_reap_node(cpu) do { } while (0)
2572#define next_reap_node(void) do { } while (0)
2573#endif
2574
2575#define REAPTIMEOUT_CPUC (2*HZ)
2576
2577#ifdef CONFIG_SMP
2578static DEFINE_PER_CPU(struct delayed_work, reap_work);
2579
2580static void cache_reap(struct work_struct *unused)
2581{
2582 next_reap_node();
2583 schedule_delayed_work(&__get_cpu_var(reap_work),
2584 REAPTIMEOUT_CPUC);
2585}
2586
2587static void __devinit start_cpu_timer(int cpu)
2588{
2589 struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
2590
2591 /*
2592 * When this gets called from do_initcalls via cpucache_init(),
2593 * init_workqueues() has already run, so keventd will be setup
2594 * at that time.
2595 */
2596 if (keventd_up() && reap_work->work.func == NULL) {
2597 init_reap_node(cpu);
2598 INIT_DELAYED_WORK(reap_work, cache_reap);
2599 schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
2600 }
2601}
2602
2603static int __init cpucache_init(void)
2604{
2605 int cpu;
2606
2607 /*
2608 * Register the timers that drain pcp pages and update vm statistics
2609 */
2610 for_each_online_cpu(cpu)
2611 start_cpu_timer(cpu);
2612 return 0;
2613}
2614__initcall(cpucache_init);
2615#endif
2616
2617void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) 2533void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
2618{ 2534{
2619 struct kmem_cache *s = get_slab(size, gfpflags); 2535 struct kmem_cache *s = get_slab(size, gfpflags);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 006eb7621869..9832d9a41d8c 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -281,6 +281,17 @@ EXPORT_SYMBOL(dec_zone_page_state);
281 281
282/* 282/*
283 * Update the zone counters for one cpu. 283 * Update the zone counters for one cpu.
284 *
285 * Note that refresh_cpu_vm_stats strives to only access
286 * node local memory. The per cpu pagesets on remote zones are placed
287 * in the memory local to the processor using that pageset. So the
288 * loop over all zones will access a series of cachelines local to
289 * the processor.
290 *
291 * The call to zone_page_state_add updates the cachelines with the
292 * statistics in the remote zone struct as well as the global cachelines
293 * with the global counters. These could cause remote node cache line
294 * bouncing and will have to be only done when necessary.
284 */ 295 */
285void refresh_cpu_vm_stats(int cpu) 296void refresh_cpu_vm_stats(int cpu)
286{ 297{
@@ -289,21 +300,54 @@ void refresh_cpu_vm_stats(int cpu)
289 unsigned long flags; 300 unsigned long flags;
290 301
291 for_each_zone(zone) { 302 for_each_zone(zone) {
292 struct per_cpu_pageset *pcp; 303 struct per_cpu_pageset *p;
293 304
294 if (!populated_zone(zone)) 305 if (!populated_zone(zone))
295 continue; 306 continue;
296 307
297 pcp = zone_pcp(zone, cpu); 308 p = zone_pcp(zone, cpu);
298 309
299 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 310 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
300 if (pcp->vm_stat_diff[i]) { 311 if (p->vm_stat_diff[i]) {
301 local_irq_save(flags); 312 local_irq_save(flags);
302 zone_page_state_add(pcp->vm_stat_diff[i], 313 zone_page_state_add(p->vm_stat_diff[i],
303 zone, i); 314 zone, i);
304 pcp->vm_stat_diff[i] = 0; 315 p->vm_stat_diff[i] = 0;
316#ifdef CONFIG_NUMA
317 /* 3 seconds idle till flush */
318 p->expire = 3;
319#endif
305 local_irq_restore(flags); 320 local_irq_restore(flags);
306 } 321 }
322#ifdef CONFIG_NUMA
323 /*
324 * Deal with draining the remote pageset of this
325 * processor
326 *
327 * Check if there are pages remaining in this pageset
328 * if not then there is nothing to expire.
329 */
330 if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count))
331 continue;
332
333 /*
334 * We never drain zones local to this processor.
335 */
336 if (zone_to_nid(zone) == numa_node_id()) {
337 p->expire = 0;
338 continue;
339 }
340
341 p->expire--;
342 if (p->expire)
343 continue;
344
345 if (p->pcp[0].count)
346 drain_zone_pages(zone, p->pcp + 0);
347
348 if (p->pcp[1].count)
349 drain_zone_pages(zone, p->pcp + 1);
350#endif
307 } 351 }
308} 352}
309 353