aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-03-22 19:33:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-22 20:44:05 -0400
commit78afd5612deb8268bafc8b6507d72341d5ed9aac (patch)
treeb96131d6f237b9ce6848d95c8eccdd1f7855365c
parent11bc82d67d1150767901bca54a24466621d763d7 (diff)
mm: add __GFP_OTHER_NODE flag
Add a new __GFP_OTHER_NODE flag to tell the low level numa statistics in zone_statistics() that an allocation is on behalf of another thread. This way the local and remote counters can be still correct, even when background daemons like khugepaged are changing memory mappings. This only affects the accounting, but I think it's worth doing that right to avoid confusing users. I first tried to just pass down the right node, but this required a lot of changes to pass down this parameter and at least one addition of a 10th argument to a 9 argument function. Using the flag is a lot less intrusive. Open: should be also used for migration? [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/vmstat.h4
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/vmstat.c9
4 files changed, 12 insertions, 5 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index dca31761b311..bfb8f934521e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -35,6 +35,7 @@ struct vm_area_struct;
35#define ___GFP_NOTRACK 0 35#define ___GFP_NOTRACK 0
36#endif 36#endif
37#define ___GFP_NO_KSWAPD 0x400000u 37#define ___GFP_NO_KSWAPD 0x400000u
38#define ___GFP_OTHER_NODE 0x800000u
38 39
39/* 40/*
40 * GFP bitmasks.. 41 * GFP bitmasks..
@@ -83,6 +84,7 @@ struct vm_area_struct;
83#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ 84#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
84 85
85#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) 86#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
87#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
86 88
87/* 89/*
88 * This may seem redundant, but it's a way of annotating false positives vs. 90 * This may seem redundant, but it's a way of annotating false positives vs.
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 833e676d6d92..461c0119664f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -220,12 +220,12 @@ static inline unsigned long node_page_state(int node,
220 zone_page_state(&zones[ZONE_MOVABLE], item); 220 zone_page_state(&zones[ZONE_MOVABLE], item);
221} 221}
222 222
223extern void zone_statistics(struct zone *, struct zone *); 223extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);
224 224
225#else 225#else
226 226
227#define node_page_state(node, item) global_page_state(item) 227#define node_page_state(node, item) global_page_state(item)
228#define zone_statistics(_zl,_z) do { } while (0) 228#define zone_statistics(_zl, _z, gfp) do { } while (0)
229 229
230#endif /* CONFIG_NUMA */ 230#endif /* CONFIG_NUMA */
231 231
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d0032bdb5d8..136a547262a0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1337,7 +1337,7 @@ again:
1337 } 1337 }
1338 1338
1339 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1339 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1340 zone_statistics(preferred_zone, zone); 1340 zone_statistics(preferred_zone, zone, gfp_flags);
1341 local_irq_restore(flags); 1341 local_irq_restore(flags);
1342 1342
1343 VM_BUG_ON(bad_range(zone, page)); 1343 VM_BUG_ON(bad_range(zone, page));
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 0c3b5048773e..772b39b87d95 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -500,8 +500,12 @@ void refresh_cpu_vm_stats(int cpu)
500 * z = the zone from which the allocation occurred. 500 * z = the zone from which the allocation occurred.
501 * 501 *
502 * Must be called with interrupts disabled. 502 * Must be called with interrupts disabled.
503 *
504 * When __GFP_OTHER_NODE is set assume the node of the preferred
505 * zone is the local node. This is useful for daemons who allocate
506 * memory on behalf of other processes.
503 */ 507 */
504void zone_statistics(struct zone *preferred_zone, struct zone *z) 508void zone_statistics(struct zone *preferred_zone, struct zone *z, gfp_t flags)
505{ 509{
506 if (z->zone_pgdat == preferred_zone->zone_pgdat) { 510 if (z->zone_pgdat == preferred_zone->zone_pgdat) {
507 __inc_zone_state(z, NUMA_HIT); 511 __inc_zone_state(z, NUMA_HIT);
@@ -509,7 +513,8 @@ void zone_statistics(struct zone *preferred_zone, struct zone *z)
509 __inc_zone_state(z, NUMA_MISS); 513 __inc_zone_state(z, NUMA_MISS);
510 __inc_zone_state(preferred_zone, NUMA_FOREIGN); 514 __inc_zone_state(preferred_zone, NUMA_FOREIGN);
511 } 515 }
512 if (z->node == numa_node_id()) 516 if (z->node == ((flags & __GFP_OTHER_NODE) ?
517 preferred_zone->node : numa_node_id()))
513 __inc_zone_state(z, NUMA_LOCAL); 518 __inc_zone_state(z, NUMA_LOCAL);
514 else 519 else
515 __inc_zone_state(z, NUMA_OTHER); 520 __inc_zone_state(z, NUMA_OTHER);