aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 05:12:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:18 -0400
commitdac1d27bc8d5ca636d3014ecfdf94407031d1970 (patch)
tree2d92f6533f91da05c5f3d61e314f36b9313be89a /mm/vmscan.c
parent9d02dbc8137759e4c2f91db0b7f9c8a1ec2a9276 (diff)
mm: use zonelists instead of zones when direct reclaiming pages
The following patches replace multiple zonelists per node with two zonelists that are filtered based on the GFP flags. The patches as a set fix a bug with regard to the use of MPOL_BIND and ZONE_MOVABLE. With this patchset, the MPOL_BIND will apply to the two highest zones when the highest zone is ZONE_MOVABLE. This should be considered as an alternative fix for the MPOL_BIND+ZONE_MOVABLE in 2.6.23 to the previously discussed hack that filters only custom zonelists. The first patch cleans up an inconsistency where direct reclaim uses zonelist->zones where other places use zonelist. The second patch introduces a helper function node_zonelist() for looking up the appropriate zonelist for a GFP mask which simplifies patches later in the set. The third patch defines/remembers the "preferred zone" for numa statistics, as it is no longer always the first zone in a zonelist. The forth patch replaces multiple zonelists with two zonelists that are filtered. The two zonelists are due to the fact that the memoryless patchset introduces a second set of zonelists for __GFP_THISNODE. The fifth patch introduces helper macros for retrieving the zone and node indices of entries in a zonelist. The final patch introduces filtering of the zonelists based on a nodemask. Two zonelists exist per node, one for normal allocations and one for __GFP_THISNODE. Performance results varied depending on the machine configuration. In real workloads the gain/loss will depend on how much the userspace portion of the benchmark benefits from having more cache available due to reduced referencing of zonelists. These are the range of performance losses/gains when running against 2.6.24-rc4-mm1. The set and these machines are a mix of i386, x86_64 and ppc64 both NUMA and non-NUMA. loss to gain Total CPU time on Kernbench: -0.86% to 1.13% Elapsed time on Kernbench: -0.79% to 0.76% page_test from aim9: -4.37% to 0.79% brk_test from aim9: -0.71% to 4.07% fork_test from aim9: -1.84% to 4.60% exec_test from aim9: -0.71% to 1.08% This patch: The allocator deals with zonelists which indicate the order in which zones should be targeted for an allocation. Similarly, direct reclaim of pages iterates over an array of zones. For consistency, this patch converts direct reclaim to use a zonelist. No functionality is changed by this patch. This simplifies zonelist iterators in the next patch. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <clameter@sgi.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c21
1 files changed, 12 insertions, 9 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f80a5b7c057f..ef8551e0d2d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1246,10 +1246,11 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1246 * If a zone is deemed to be full of pinned pages then just give it a light 1246 * If a zone is deemed to be full of pinned pages then just give it a light
1247 * scan then give up on it. 1247 * scan then give up on it.
1248 */ 1248 */
1249static unsigned long shrink_zones(int priority, struct zone **zones, 1249static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
1250 struct scan_control *sc) 1250 struct scan_control *sc)
1251{ 1251{
1252 unsigned long nr_reclaimed = 0; 1252 unsigned long nr_reclaimed = 0;
1253 struct zone **zones = zonelist->zones;
1253 int i; 1254 int i;
1254 1255
1255 1256
@@ -1301,8 +1302,8 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
1301 * holds filesystem locks which prevent writeout this might not work, and the 1302 * holds filesystem locks which prevent writeout this might not work, and the
1302 * allocation attempt will fail. 1303 * allocation attempt will fail.
1303 */ 1304 */
1304static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask, 1305static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1305 struct scan_control *sc) 1306 gfp_t gfp_mask, struct scan_control *sc)
1306{ 1307{
1307 int priority; 1308 int priority;
1308 int ret = 0; 1309 int ret = 0;
@@ -1310,6 +1311,7 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
1310 unsigned long nr_reclaimed = 0; 1311 unsigned long nr_reclaimed = 0;
1311 struct reclaim_state *reclaim_state = current->reclaim_state; 1312 struct reclaim_state *reclaim_state = current->reclaim_state;
1312 unsigned long lru_pages = 0; 1313 unsigned long lru_pages = 0;
1314 struct zone **zones = zonelist->zones;
1313 int i; 1315 int i;
1314 1316
1315 if (scan_global_lru(sc)) 1317 if (scan_global_lru(sc))
@@ -1333,7 +1335,7 @@ static unsigned long do_try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
1333 sc->nr_scanned = 0; 1335 sc->nr_scanned = 0;
1334 if (!priority) 1336 if (!priority)
1335 disable_swap_token(); 1337 disable_swap_token();
1336 nr_reclaimed += shrink_zones(priority, zones, sc); 1338 nr_reclaimed += shrink_zones(priority, zonelist, sc);
1337 /* 1339 /*
1338 * Don't shrink slabs when reclaiming memory from 1340 * Don't shrink slabs when reclaiming memory from
1339 * over limit cgroups 1341 * over limit cgroups
@@ -1397,7 +1399,8 @@ out:
1397 return ret; 1399 return ret;
1398} 1400}
1399 1401
1400unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) 1402unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1403 gfp_t gfp_mask)
1401{ 1404{
1402 struct scan_control sc = { 1405 struct scan_control sc = {
1403 .gfp_mask = gfp_mask, 1406 .gfp_mask = gfp_mask,
@@ -1410,7 +1413,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
1410 .isolate_pages = isolate_pages_global, 1413 .isolate_pages = isolate_pages_global,
1411 }; 1414 };
1412 1415
1413 return do_try_to_free_pages(zones, gfp_mask, &sc); 1416 return do_try_to_free_pages(zonelist, gfp_mask, &sc);
1414} 1417}
1415 1418
1416#ifdef CONFIG_CGROUP_MEM_RES_CTLR 1419#ifdef CONFIG_CGROUP_MEM_RES_CTLR
@@ -1428,11 +1431,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1428 .mem_cgroup = mem_cont, 1431 .mem_cgroup = mem_cont,
1429 .isolate_pages = mem_cgroup_isolate_pages, 1432 .isolate_pages = mem_cgroup_isolate_pages,
1430 }; 1433 };
1431 struct zone **zones; 1434 struct zonelist *zonelist;
1432 int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE); 1435 int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
1433 1436
1434 zones = NODE_DATA(numa_node_id())->node_zonelists[target_zone].zones; 1437 zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
1435 if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) 1438 if (do_try_to_free_pages(zonelist, sc.gfp_mask, &sc))
1436 return 1; 1439 return 1;
1437 return 0; 1440 return 0;
1438} 1441}