diff options
author | Mel Gorman <mel@csn.ul.ie> | 2007-08-22 17:02:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-08-22 22:52:47 -0400 |
commit | b377fd3982ad957c796758a90e2988401a884241 (patch) | |
tree | 3d7449ccdf7038bffffa9323873f4095cc1ac6ce | |
parent | 8e92f21ba3ea3f54e4be062b87ef9fc4af2d33e2 (diff) |
Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE
The NUMA layer only supports NUMA policies for the highest zone. When
ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes
ZONE_MOVABLE. The result is that policies are only applied to allocations
like anonymous pages and page cache allocated from ZONE_MOVABLE when the
zone is used.
This patch applies policies to the two highest zones when the highest zone
is ZONE_MOVABLE. As ZONE_MOVABLE consists of pages from the highest "real"
zone, it's always functionally equivalent.
The patch has been tested on a variety of machines both NUMA and non-NUMA
covering x86, x86_64 and ppc64. No abnormal results were seen in
kernbench, tbench, dbench or hackbench. It passes regression tests from
the numactl package with and without kernelcore= once numactl tests are
patched to wait for vmstat counters to update.
akpm: this is the nasty hack to fix NUMA mempolicies in the presence of
ZONE_MOVABLE and kernelcore= in 2.6.23. Christoph says "For .24 either merge
the mobility or get the other solution that Mel is working on. That solution
would only use a single zonelist per node and filter on the fly. That may
help performance and also help to make memory policies work better."
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Tested-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | include/linux/mmzone.h | 18 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 13 |
4 files changed, 33 insertions, 2 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e147cf50529f..5bdd656e88cf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -166,7 +166,7 @@ extern enum zone_type policy_zone; | |||
166 | 166 | ||
167 | static inline void check_highest_zone(enum zone_type k) | 167 | static inline void check_highest_zone(enum zone_type k) |
168 | { | 168 | { |
169 | if (k > policy_zone) | 169 | if (k > policy_zone && k != ZONE_MOVABLE) |
170 | policy_zone = k; | 170 | policy_zone = k; |
171 | } | 171 | } |
172 | 172 | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3ea68cd3b61f..4e5627379b09 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -410,6 +410,24 @@ struct zonelist { | |||
410 | #endif | 410 | #endif |
411 | }; | 411 | }; |
412 | 412 | ||
413 | #ifdef CONFIG_NUMA | ||
414 | /* | ||
415 | * Only custom zonelists like MPOL_BIND need to be filtered as part of | ||
416 | * policies. As described in the comment for struct zonelist_cache, these | ||
417 | * zonelists will not have a zlcache so zlcache_ptr will not be set. Use | ||
418 | * that to determine if the zonelists needs to be filtered or not. | ||
419 | */ | ||
420 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
421 | { | ||
422 | return !zonelist->zlcache_ptr; | ||
423 | } | ||
424 | #else | ||
425 | static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) | ||
426 | { | ||
427 | return 0; | ||
428 | } | ||
429 | #endif /* CONFIG_NUMA */ | ||
430 | |||
413 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 431 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
414 | struct node_active_region { | 432 | struct node_active_region { |
415 | unsigned long start_pfn; | 433 | unsigned long start_pfn; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b84b45154a..172abffeb2e3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) | |||
149 | lower zones etc. Avoid empty zones because the memory allocator | 149 | lower zones etc. Avoid empty zones because the memory allocator |
150 | doesn't like them. If you implement node hot removal you | 150 | doesn't like them. If you implement node hot removal you |
151 | have to fix that. */ | 151 | have to fix that. */ |
152 | k = policy_zone; | 152 | k = MAX_NR_ZONES - 1; |
153 | while (1) { | 153 | while (1) { |
154 | for_each_node_mask(nd, *nodes) { | 154 | for_each_node_mask(nd, *nodes) { |
155 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; | 155 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3da85b81dabb..6427653023aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
1157 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ | 1157 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ |
1158 | int zlc_active = 0; /* set if using zonelist_cache */ | 1158 | int zlc_active = 0; /* set if using zonelist_cache */ |
1159 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ | 1159 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ |
1160 | enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ | ||
1160 | 1161 | ||
1161 | zonelist_scan: | 1162 | zonelist_scan: |
1162 | /* | 1163 | /* |
@@ -1166,6 +1167,18 @@ zonelist_scan: | |||
1166 | z = zonelist->zones; | 1167 | z = zonelist->zones; |
1167 | 1168 | ||
1168 | do { | 1169 | do { |
1170 | /* | ||
1171 | * In NUMA, this could be a policy zonelist which contains | ||
1172 | * zones that may not be allowed by the current gfp_mask. | ||
1173 | * Check the zone is allowed by the current flags | ||
1174 | */ | ||
1175 | if (unlikely(alloc_should_filter_zonelist(zonelist))) { | ||
1176 | if (highest_zoneidx == -1) | ||
1177 | highest_zoneidx = gfp_zone(gfp_mask); | ||
1178 | if (zone_idx(*z) > highest_zoneidx) | ||
1179 | continue; | ||
1180 | } | ||
1181 | |||
1169 | if (NUMA_BUILD && zlc_active && | 1182 | if (NUMA_BUILD && zlc_active && |
1170 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1183 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1171 | continue; | 1184 | continue; |