aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2007-08-22 17:02:05 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-08-22 22:52:47 -0400
commitb377fd3982ad957c796758a90e2988401a884241 (patch)
tree3d7449ccdf7038bffffa9323873f4095cc1ac6ce
parent8e92f21ba3ea3f54e4be062b87ef9fc4af2d33e2 (diff)
Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE
The NUMA layer only supports NUMA policies for the highest zone. When ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes ZONE_MOVABLE. The result is that policies are only applied to allocations like anonymous pages and page cache allocated from ZONE_MOVABLE when the zone is used. This patch applies policies to the two highest zones when the highest zone is ZONE_MOVABLE. As ZONE_MOVABLE consists of pages from the highest "real" zone, it's always functionally equivalent. The patch has been tested on a variety of machines both NUMA and non-NUMA covering x86, x86_64 and ppc64. No abnormal results were seen in kernbench, tbench, dbench or hackbench. It passes regression tests from the numactl package with and without kernelcore= once numactl tests are patched to wait for vmstat counters to update. akpm: this is the nasty hack to fix NUMA mempolicies in the presence of ZONE_MOVABLE and kernelcore= in 2.6.23. Christoph says "For .24 either merge the mobility or get the other solution that Mel is working on. That solution would only use a single zonelist per node and filter on the fly. That may help performance and also help to make memory policies work better." Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Tested-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@suse.de> Cc: Paul Mundt <lethal@linux-sh.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/mmzone.h18
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/page_alloc.c13
4 files changed, 33 insertions, 2 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index e147cf50529f..5bdd656e88cf 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -166,7 +166,7 @@ extern enum zone_type policy_zone;
166 166
167static inline void check_highest_zone(enum zone_type k) 167static inline void check_highest_zone(enum zone_type k)
168{ 168{
169 if (k > policy_zone) 169 if (k > policy_zone && k != ZONE_MOVABLE)
170 policy_zone = k; 170 policy_zone = k;
171} 171}
172 172
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3ea68cd3b61f..4e5627379b09 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -410,6 +410,24 @@ struct zonelist {
410#endif 410#endif
411}; 411};
412 412
413#ifdef CONFIG_NUMA
414/*
415 * Only custom zonelists like MPOL_BIND need to be filtered as part of
416 * policies. As described in the comment for struct zonelist_cache, these
417 * zonelists will not have a zlcache so zlcache_ptr will not be set. Use
418 * that to determine if the zonelists needs to be filtered or not.
419 */
420static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
421{
422 return !zonelist->zlcache_ptr;
423}
424#else
425static inline int alloc_should_filter_zonelist(struct zonelist *zonelist)
426{
427 return 0;
428}
429#endif /* CONFIG_NUMA */
430
413#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 431#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
414struct node_active_region { 432struct node_active_region {
415 unsigned long start_pfn; 433 unsigned long start_pfn;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 71b84b45154a..172abffeb2e3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
149 lower zones etc. Avoid empty zones because the memory allocator 149 lower zones etc. Avoid empty zones because the memory allocator
150 doesn't like them. If you implement node hot removal you 150 doesn't like them. If you implement node hot removal you
151 have to fix that. */ 151 have to fix that. */
152 k = policy_zone; 152 k = MAX_NR_ZONES - 1;
153 while (1) { 153 while (1) {
154 for_each_node_mask(nd, *nodes) { 154 for_each_node_mask(nd, *nodes) {
155 struct zone *z = &NODE_DATA(nd)->node_zones[k]; 155 struct zone *z = &NODE_DATA(nd)->node_zones[k];
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3da85b81dabb..6427653023aa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1157 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ 1157 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
1158 int zlc_active = 0; /* set if using zonelist_cache */ 1158 int zlc_active = 0; /* set if using zonelist_cache */
1159 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1159 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1160 enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
1160 1161
1161zonelist_scan: 1162zonelist_scan:
1162 /* 1163 /*
@@ -1166,6 +1167,18 @@ zonelist_scan:
1166 z = zonelist->zones; 1167 z = zonelist->zones;
1167 1168
1168 do { 1169 do {
1170 /*
1171 * In NUMA, this could be a policy zonelist which contains
1172 * zones that may not be allowed by the current gfp_mask.
1173 * Check the zone is allowed by the current flags
1174 */
1175 if (unlikely(alloc_should_filter_zonelist(zonelist))) {
1176 if (highest_zoneidx == -1)
1177 highest_zoneidx = gfp_zone(gfp_mask);
1178 if (zone_idx(*z) > highest_zoneidx)
1179 continue;
1180 }
1181
1169 if (NUMA_BUILD && zlc_active && 1182 if (NUMA_BUILD && zlc_active &&
1170 !zlc_zone_worth_trying(zonelist, z, allowednodes)) 1183 !zlc_zone_worth_trying(zonelist, z, allowednodes))
1171 continue; 1184 continue;