diff options
-rw-r--r-- | include/linux/gfp.h | 6 | ||||
-rw-r--r-- | include/linux/mmzone.h | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 45 | ||||
-rw-r--r-- | mm/slab.c | 6 | ||||
-rw-r--r-- | mm/slub.c | 84 | ||||
-rw-r--r-- | mm/vmstat.c | 54 |
6 files changed, 67 insertions, 131 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 97a36c3d96e2..0d2ef0b082a6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -176,10 +176,6 @@ extern void FASTCALL(free_cold_page(struct page *page)); | |||
176 | #define free_page(addr) free_pages((addr),0) | 176 | #define free_page(addr) free_pages((addr),0) |
177 | 177 | ||
178 | void page_alloc_init(void); | 178 | void page_alloc_init(void); |
179 | #ifdef CONFIG_NUMA | 179 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); |
180 | void drain_node_pages(int node); | ||
181 | #else | ||
182 | static inline void drain_node_pages(int node) { }; | ||
183 | #endif | ||
184 | 180 | ||
185 | #endif /* __LINUX_GFP_H */ | 181 | #endif /* __LINUX_GFP_H */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f1544e83042..d09b1345a3a1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -83,6 +83,9 @@ struct per_cpu_pages { | |||
83 | 83 | ||
84 | struct per_cpu_pageset { | 84 | struct per_cpu_pageset { |
85 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ | 85 | struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ |
86 | #ifdef CONFIG_NUMA | ||
87 | s8 expire; | ||
88 | #endif | ||
86 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
87 | s8 stat_threshold; | 90 | s8 stat_threshold; |
88 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; | 91 | s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS]; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d53cbf8acb8e..f9b5d6d5f4d6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -691,43 +691,26 @@ static void __init setup_nr_node_ids(void) {} | |||
691 | 691 | ||
692 | #ifdef CONFIG_NUMA | 692 | #ifdef CONFIG_NUMA |
693 | /* | 693 | /* |
694 | * Called from the slab reaper to drain pagesets on a particular node that | 694 | * Called from the vmstat counter updater to drain pagesets of this |
695 | * belongs to the currently executing processor. | 695 | * currently executing processor on remote nodes after they have |
696 | * expired. | ||
697 | * | ||
696 | * Note that this function must be called with the thread pinned to | 698 | * Note that this function must be called with the thread pinned to |
697 | * a single processor. | 699 | * a single processor. |
698 | */ | 700 | */ |
699 | void drain_node_pages(int nodeid) | 701 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
700 | { | 702 | { |
701 | int i; | ||
702 | enum zone_type z; | ||
703 | unsigned long flags; | 703 | unsigned long flags; |
704 | int to_drain; | ||
704 | 705 | ||
705 | for (z = 0; z < MAX_NR_ZONES; z++) { | 706 | local_irq_save(flags); |
706 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 707 | if (pcp->count >= pcp->batch) |
707 | struct per_cpu_pageset *pset; | 708 | to_drain = pcp->batch; |
708 | 709 | else | |
709 | if (!populated_zone(zone)) | 710 | to_drain = pcp->count; |
710 | continue; | 711 | free_pages_bulk(zone, to_drain, &pcp->list, 0); |
711 | 712 | pcp->count -= to_drain; | |
712 | pset = zone_pcp(zone, smp_processor_id()); | 713 | local_irq_restore(flags); |
713 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | ||
714 | struct per_cpu_pages *pcp; | ||
715 | |||
716 | pcp = &pset->pcp[i]; | ||
717 | if (pcp->count) { | ||
718 | int to_drain; | ||
719 | |||
720 | local_irq_save(flags); | ||
721 | if (pcp->count >= pcp->batch) | ||
722 | to_drain = pcp->batch; | ||
723 | else | ||
724 | to_drain = pcp->count; | ||
725 | free_pages_bulk(zone, to_drain, &pcp->list, 0); | ||
726 | pcp->count -= to_drain; | ||
727 | local_irq_restore(flags); | ||
728 | } | ||
729 | } | ||
730 | } | ||
731 | } | 714 | } |
732 | #endif | 715 | #endif |
733 | 716 | ||
@@ -928,12 +928,6 @@ static void next_reap_node(void) | |||
928 | { | 928 | { |
929 | int node = __get_cpu_var(reap_node); | 929 | int node = __get_cpu_var(reap_node); |
930 | 930 | ||
931 | /* | ||
932 | * Also drain per cpu pages on remote zones | ||
933 | */ | ||
934 | if (node != numa_node_id()) | ||
935 | drain_node_pages(node); | ||
936 | |||
937 | node = next_node(node, node_online_map); | 931 | node = next_node(node, node_online_map); |
938 | if (unlikely(node >= MAX_NUMNODES)) | 932 | if (unlikely(node >= MAX_NUMNODES)) |
939 | node = first_node(node_online_map); | 933 | node = first_node(node_online_map); |
@@ -2530,90 +2530,6 @@ static struct notifier_block __cpuinitdata slab_notifier = | |||
2530 | 2530 | ||
2531 | #endif | 2531 | #endif |
2532 | 2532 | ||
2533 | #ifdef CONFIG_NUMA | ||
2534 | |||
2535 | /***************************************************************** | ||
2536 | * Generic reaper used to support the page allocator | ||
2537 | * (the cpu slabs are reaped by a per slab workqueue). | ||
2538 | * | ||
2539 | * Maybe move this to the page allocator? | ||
2540 | ****************************************************************/ | ||
2541 | |||
2542 | static DEFINE_PER_CPU(unsigned long, reap_node); | ||
2543 | |||
2544 | static void init_reap_node(int cpu) | ||
2545 | { | ||
2546 | int node; | ||
2547 | |||
2548 | node = next_node(cpu_to_node(cpu), node_online_map); | ||
2549 | if (node == MAX_NUMNODES) | ||
2550 | node = first_node(node_online_map); | ||
2551 | |||
2552 | __get_cpu_var(reap_node) = node; | ||
2553 | } | ||
2554 | |||
2555 | static void next_reap_node(void) | ||
2556 | { | ||
2557 | int node = __get_cpu_var(reap_node); | ||
2558 | |||
2559 | /* | ||
2560 | * Also drain per cpu pages on remote zones | ||
2561 | */ | ||
2562 | if (node != numa_node_id()) | ||
2563 | drain_node_pages(node); | ||
2564 | |||
2565 | node = next_node(node, node_online_map); | ||
2566 | if (unlikely(node >= MAX_NUMNODES)) | ||
2567 | node = first_node(node_online_map); | ||
2568 | __get_cpu_var(reap_node) = node; | ||
2569 | } | ||
2570 | #else | ||
2571 | #define init_reap_node(cpu) do { } while (0) | ||
2572 | #define next_reap_node(void) do { } while (0) | ||
2573 | #endif | ||
2574 | |||
2575 | #define REAPTIMEOUT_CPUC (2*HZ) | ||
2576 | |||
2577 | #ifdef CONFIG_SMP | ||
2578 | static DEFINE_PER_CPU(struct delayed_work, reap_work); | ||
2579 | |||
2580 | static void cache_reap(struct work_struct *unused) | ||
2581 | { | ||
2582 | next_reap_node(); | ||
2583 | schedule_delayed_work(&__get_cpu_var(reap_work), | ||
2584 | REAPTIMEOUT_CPUC); | ||
2585 | } | ||
2586 | |||
2587 | static void __devinit start_cpu_timer(int cpu) | ||
2588 | { | ||
2589 | struct delayed_work *reap_work = &per_cpu(reap_work, cpu); | ||
2590 | |||
2591 | /* | ||
2592 | * When this gets called from do_initcalls via cpucache_init(), | ||
2593 | * init_workqueues() has already run, so keventd will be setup | ||
2594 | * at that time. | ||
2595 | */ | ||
2596 | if (keventd_up() && reap_work->work.func == NULL) { | ||
2597 | init_reap_node(cpu); | ||
2598 | INIT_DELAYED_WORK(reap_work, cache_reap); | ||
2599 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); | ||
2600 | } | ||
2601 | } | ||
2602 | |||
2603 | static int __init cpucache_init(void) | ||
2604 | { | ||
2605 | int cpu; | ||
2606 | |||
2607 | /* | ||
2608 | * Register the timers that drain pcp pages and update vm statistics | ||
2609 | */ | ||
2610 | for_each_online_cpu(cpu) | ||
2611 | start_cpu_timer(cpu); | ||
2612 | return 0; | ||
2613 | } | ||
2614 | __initcall(cpucache_init); | ||
2615 | #endif | ||
2616 | |||
2617 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) | 2533 | void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) |
2618 | { | 2534 | { |
2619 | struct kmem_cache *s = get_slab(size, gfpflags); | 2535 | struct kmem_cache *s = get_slab(size, gfpflags); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 006eb7621869..9832d9a41d8c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -281,6 +281,17 @@ EXPORT_SYMBOL(dec_zone_page_state); | |||
281 | 281 | ||
282 | /* | 282 | /* |
283 | * Update the zone counters for one cpu. | 283 | * Update the zone counters for one cpu. |
284 | * | ||
285 | * Note that refresh_cpu_vm_stats strives to only access | ||
286 | * node local memory. The per cpu pagesets on remote zones are placed | ||
287 | * in the memory local to the processor using that pageset. So the | ||
288 | * loop over all zones will access a series of cachelines local to | ||
289 | * the processor. | ||
290 | * | ||
291 | * The call to zone_page_state_add updates the cachelines with the | ||
292 | * statistics in the remote zone struct as well as the global cachelines | ||
293 | * with the global counters. These could cause remote node cache line | ||
294 | * bouncing and will have to be only done when necessary. | ||
284 | */ | 295 | */ |
285 | void refresh_cpu_vm_stats(int cpu) | 296 | void refresh_cpu_vm_stats(int cpu) |
286 | { | 297 | { |
@@ -289,21 +300,54 @@ void refresh_cpu_vm_stats(int cpu) | |||
289 | unsigned long flags; | 300 | unsigned long flags; |
290 | 301 | ||
291 | for_each_zone(zone) { | 302 | for_each_zone(zone) { |
292 | struct per_cpu_pageset *pcp; | 303 | struct per_cpu_pageset *p; |
293 | 304 | ||
294 | if (!populated_zone(zone)) | 305 | if (!populated_zone(zone)) |
295 | continue; | 306 | continue; |
296 | 307 | ||
297 | pcp = zone_pcp(zone, cpu); | 308 | p = zone_pcp(zone, cpu); |
298 | 309 | ||
299 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 310 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
300 | if (pcp->vm_stat_diff[i]) { | 311 | if (p->vm_stat_diff[i]) { |
301 | local_irq_save(flags); | 312 | local_irq_save(flags); |
302 | zone_page_state_add(pcp->vm_stat_diff[i], | 313 | zone_page_state_add(p->vm_stat_diff[i], |
303 | zone, i); | 314 | zone, i); |
304 | pcp->vm_stat_diff[i] = 0; | 315 | p->vm_stat_diff[i] = 0; |
316 | #ifdef CONFIG_NUMA | ||
317 | /* 3 seconds idle till flush */ | ||
318 | p->expire = 3; | ||
319 | #endif | ||
305 | local_irq_restore(flags); | 320 | local_irq_restore(flags); |
306 | } | 321 | } |
322 | #ifdef CONFIG_NUMA | ||
323 | /* | ||
324 | * Deal with draining the remote pageset of this | ||
325 | * processor | ||
326 | * | ||
327 | * Check if there are pages remaining in this pageset | ||
328 | * if not then there is nothing to expire. | ||
329 | */ | ||
330 | if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count)) | ||
331 | continue; | ||
332 | |||
333 | /* | ||
334 | * We never drain zones local to this processor. | ||
335 | */ | ||
336 | if (zone_to_nid(zone) == numa_node_id()) { | ||
337 | p->expire = 0; | ||
338 | continue; | ||
339 | } | ||
340 | |||
341 | p->expire--; | ||
342 | if (p->expire) | ||
343 | continue; | ||
344 | |||
345 | if (p->pcp[0].count) | ||
346 | drain_zone_pages(zone, p->pcp + 0); | ||
347 | |||
348 | if (p->pcp[1].count) | ||
349 | drain_zone_pages(zone, p->pcp + 1); | ||
350 | #endif | ||
307 | } | 351 | } |
308 | } | 352 | } |
309 | 353 | ||