aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 05:12:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:18 -0400
commitdd1a239f6f2d4d3eedd318583ec319aa145b324c (patch)
treeaff4224c96b5e2e67588c3946858a724863eeaf9 /mm/page_alloc.c
parent54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (diff)
mm: have zonelist contains structs with both a zone pointer and zone_idx
Filtering zonelists requires very frequent use of zone_idx(). This is costly as it involves a lookup of another structure and a substraction operation. As the zone_idx is often required, it should be quickly accessible. The node idx could also be stored here if it was found that accessing zone->node is significant which may be the case on workloads where nodemasks are heavily used. This patch introduces a struct zoneref to store a zone pointer and a zone index. The zonelist then consists of an array of these struct zonerefs which are looked up as necessary. Helpers are given for accessing the zone index as well as the node index. [kamezawa.hiroyu@jp.fujitsu.com: Suggested struct zoneref instead of embedding information in pointers] [hugh@veritas.com: mm-have-zonelist: fix memcg ooms] [hugh@veritas.com: just return do_try_to_free_pages] [hugh@veritas.com: do_try_to_free_pages gfp_mask redundant] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <clameter@sgi.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c68
1 files changed, 38 insertions, 30 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ccb8651cf22..6d94d04ea784 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1317,7 +1317,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
1317 * We are low on memory in the second scan, and should leave no stone 1317 * We are low on memory in the second scan, and should leave no stone
1318 * unturned looking for a free page. 1318 * unturned looking for a free page.
1319 */ 1319 */
1320static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, 1320static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
1321 nodemask_t *allowednodes) 1321 nodemask_t *allowednodes)
1322{ 1322{
1323 struct zonelist_cache *zlc; /* cached zonelist speedup info */ 1323 struct zonelist_cache *zlc; /* cached zonelist speedup info */
@@ -1328,7 +1328,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
1328 if (!zlc) 1328 if (!zlc)
1329 return 1; 1329 return 1;
1330 1330
1331 i = z - zonelist->zones; 1331 i = z - zonelist->_zonerefs;
1332 n = zlc->z_to_n[i]; 1332 n = zlc->z_to_n[i];
1333 1333
1334 /* This zone is worth trying if it is allowed but not full */ 1334 /* This zone is worth trying if it is allowed but not full */
@@ -1340,7 +1340,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
1340 * zlc->fullzones, so that subsequent attempts to allocate a page 1340 * zlc->fullzones, so that subsequent attempts to allocate a page
1341 * from that zone don't waste time re-examining it. 1341 * from that zone don't waste time re-examining it.
1342 */ 1342 */
1343static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) 1343static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1344{ 1344{
1345 struct zonelist_cache *zlc; /* cached zonelist speedup info */ 1345 struct zonelist_cache *zlc; /* cached zonelist speedup info */
1346 int i; /* index of *z in zonelist zones */ 1346 int i; /* index of *z in zonelist zones */
@@ -1349,7 +1349,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
1349 if (!zlc) 1349 if (!zlc)
1350 return; 1350 return;
1351 1351
1352 i = z - zonelist->zones; 1352 i = z - zonelist->_zonerefs;
1353 1353
1354 set_bit(i, zlc->fullzones); 1354 set_bit(i, zlc->fullzones);
1355} 1355}
@@ -1361,13 +1361,13 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
1361 return NULL; 1361 return NULL;
1362} 1362}
1363 1363
1364static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, 1364static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
1365 nodemask_t *allowednodes) 1365 nodemask_t *allowednodes)
1366{ 1366{
1367 return 1; 1367 return 1;
1368} 1368}
1369 1369
1370static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) 1370static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1371{ 1371{
1372} 1372}
1373#endif /* CONFIG_NUMA */ 1373#endif /* CONFIG_NUMA */
@@ -1380,7 +1380,7 @@ static struct page *
1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order, 1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags) 1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
1382{ 1382{
1383 struct zone **z; 1383 struct zoneref *z;
1384 struct page *page = NULL; 1384 struct page *page = NULL;
1385 int classzone_idx; 1385 int classzone_idx;
1386 struct zone *zone, *preferred_zone; 1386 struct zone *zone, *preferred_zone;
@@ -1389,8 +1389,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1390 1390
1391 z = first_zones_zonelist(zonelist, high_zoneidx); 1391 z = first_zones_zonelist(zonelist, high_zoneidx);
1392 classzone_idx = zone_idx(*z); 1392 classzone_idx = zonelist_zone_idx(z);
1393 preferred_zone = *z; 1393 preferred_zone = zonelist_zone(z);
1394 1394
1395zonelist_scan: 1395zonelist_scan:
1396 /* 1396 /*
@@ -1453,7 +1453,8 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
1453{ 1453{
1454 const gfp_t wait = gfp_mask & __GFP_WAIT; 1454 const gfp_t wait = gfp_mask & __GFP_WAIT;
1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
1456 struct zone **z; 1456 struct zoneref *z;
1457 struct zone *zone;
1457 struct page *page; 1458 struct page *page;
1458 struct reclaim_state reclaim_state; 1459 struct reclaim_state reclaim_state;
1459 struct task_struct *p = current; 1460 struct task_struct *p = current;
@@ -1467,9 +1468,9 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
1467 return NULL; 1468 return NULL;
1468 1469
1469restart: 1470restart:
1470 z = zonelist->zones; /* the list of zones suitable for gfp_mask */ 1471 z = zonelist->_zonerefs; /* the list of zones suitable for gfp_mask */
1471 1472
1472 if (unlikely(*z == NULL)) { 1473 if (unlikely(!z->zone)) {
1473 /* 1474 /*
1474 * Happens if we have an empty zonelist as a result of 1475 * Happens if we have an empty zonelist as a result of
1475 * GFP_THISNODE being used on a memoryless node 1476 * GFP_THISNODE being used on a memoryless node
@@ -1493,8 +1494,8 @@ restart:
1493 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) 1494 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
1494 goto nopage; 1495 goto nopage;
1495 1496
1496 for (z = zonelist->zones; *z; z++) 1497 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
1497 wakeup_kswapd(*z, order); 1498 wakeup_kswapd(zone, order);
1498 1499
1499 /* 1500 /*
1500 * OK, we're below the kswapd watermark and have kicked background 1501 * OK, we're below the kswapd watermark and have kicked background
@@ -1575,7 +1576,7 @@ nofail_alloc:
1575 if (page) 1576 if (page)
1576 goto got_pg; 1577 goto got_pg;
1577 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 1578 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
1578 if (!try_set_zone_oom(zonelist)) { 1579 if (!try_set_zone_oom(zonelist, gfp_mask)) {
1579 schedule_timeout_uninterruptible(1); 1580 schedule_timeout_uninterruptible(1);
1580 goto restart; 1581 goto restart;
1581 } 1582 }
@@ -1589,18 +1590,18 @@ nofail_alloc:
1589 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, 1590 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
1590 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); 1591 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
1591 if (page) { 1592 if (page) {
1592 clear_zonelist_oom(zonelist); 1593 clear_zonelist_oom(zonelist, gfp_mask);
1593 goto got_pg; 1594 goto got_pg;
1594 } 1595 }
1595 1596
1596 /* The OOM killer will not help higher order allocs so fail */ 1597 /* The OOM killer will not help higher order allocs so fail */
1597 if (order > PAGE_ALLOC_COSTLY_ORDER) { 1598 if (order > PAGE_ALLOC_COSTLY_ORDER) {
1598 clear_zonelist_oom(zonelist); 1599 clear_zonelist_oom(zonelist, gfp_mask);
1599 goto nopage; 1600 goto nopage;
1600 } 1601 }
1601 1602
1602 out_of_memory(zonelist, gfp_mask, order); 1603 out_of_memory(zonelist, gfp_mask, order);
1603 clear_zonelist_oom(zonelist); 1604 clear_zonelist_oom(zonelist, gfp_mask);
1604 goto restart; 1605 goto restart;
1605 } 1606 }
1606 1607
@@ -1702,7 +1703,7 @@ EXPORT_SYMBOL(free_pages);
1702 1703
1703static unsigned int nr_free_zone_pages(int offset) 1704static unsigned int nr_free_zone_pages(int offset)
1704{ 1705{
1705 struct zone **z; 1706 struct zoneref *z;
1706 struct zone *zone; 1707 struct zone *zone;
1707 1708
1708 /* Just pick one node, since fallback list is circular */ 1709 /* Just pick one node, since fallback list is circular */
@@ -1896,7 +1897,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
1896 zone_type--; 1897 zone_type--;
1897 zone = pgdat->node_zones + zone_type; 1898 zone = pgdat->node_zones + zone_type;
1898 if (populated_zone(zone)) { 1899 if (populated_zone(zone)) {
1899 zonelist->zones[nr_zones++] = zone; 1900 zoneref_set_zone(zone,
1901 &zonelist->_zonerefs[nr_zones++]);
1900 check_highest_zone(zone_type); 1902 check_highest_zone(zone_type);
1901 } 1903 }
1902 1904
@@ -2072,11 +2074,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
2072 struct zonelist *zonelist; 2074 struct zonelist *zonelist;
2073 2075
2074 zonelist = &pgdat->node_zonelists[0]; 2076 zonelist = &pgdat->node_zonelists[0];
2075 for (j = 0; zonelist->zones[j] != NULL; j++) 2077 for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
2076 ; 2078 ;
2077 j = build_zonelists_node(NODE_DATA(node), zonelist, j, 2079 j = build_zonelists_node(NODE_DATA(node), zonelist, j,
2078 MAX_NR_ZONES - 1); 2080 MAX_NR_ZONES - 1);
2079 zonelist->zones[j] = NULL; 2081 zonelist->_zonerefs[j].zone = NULL;
2082 zonelist->_zonerefs[j].zone_idx = 0;
2080} 2083}
2081 2084
2082/* 2085/*
@@ -2089,7 +2092,8 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
2089 2092
2090 zonelist = &pgdat->node_zonelists[1]; 2093 zonelist = &pgdat->node_zonelists[1];
2091 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); 2094 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
2092 zonelist->zones[j] = NULL; 2095 zonelist->_zonerefs[j].zone = NULL;
2096 zonelist->_zonerefs[j].zone_idx = 0;
2093} 2097}
2094 2098
2095/* 2099/*
@@ -2114,12 +2118,14 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
2114 node = node_order[j]; 2118 node = node_order[j];
2115 z = &NODE_DATA(node)->node_zones[zone_type]; 2119 z = &NODE_DATA(node)->node_zones[zone_type];
2116 if (populated_zone(z)) { 2120 if (populated_zone(z)) {
2117 zonelist->zones[pos++] = z; 2121 zoneref_set_zone(z,
2122 &zonelist->_zonerefs[pos++]);
2118 check_highest_zone(zone_type); 2123 check_highest_zone(zone_type);
2119 } 2124 }
2120 } 2125 }
2121 } 2126 }
2122 zonelist->zones[pos] = NULL; 2127 zonelist->_zonerefs[pos].zone = NULL;
2128 zonelist->_zonerefs[pos].zone_idx = 0;
2123} 2129}
2124 2130
2125static int default_zonelist_order(void) 2131static int default_zonelist_order(void)
@@ -2196,7 +2202,8 @@ static void build_zonelists(pg_data_t *pgdat)
2196 /* initialize zonelists */ 2202 /* initialize zonelists */
2197 for (i = 0; i < MAX_ZONELISTS; i++) { 2203 for (i = 0; i < MAX_ZONELISTS; i++) {
2198 zonelist = pgdat->node_zonelists + i; 2204 zonelist = pgdat->node_zonelists + i;
2199 zonelist->zones[0] = NULL; 2205 zonelist->_zonerefs[0].zone = NULL;
2206 zonelist->_zonerefs[0].zone_idx = 0;
2200 } 2207 }
2201 2208
2202 /* NUMA-aware ordering of nodes */ 2209 /* NUMA-aware ordering of nodes */
@@ -2248,13 +2255,13 @@ static void build_zonelist_cache(pg_data_t *pgdat)
2248{ 2255{
2249 struct zonelist *zonelist; 2256 struct zonelist *zonelist;
2250 struct zonelist_cache *zlc; 2257 struct zonelist_cache *zlc;
2251 struct zone **z; 2258 struct zoneref *z;
2252 2259
2253 zonelist = &pgdat->node_zonelists[0]; 2260 zonelist = &pgdat->node_zonelists[0];
2254 zonelist->zlcache_ptr = zlc = &zonelist->zlcache; 2261 zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
2255 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); 2262 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
2256 for (z = zonelist->zones; *z; z++) 2263 for (z = zonelist->_zonerefs; z->zone; z++)
2257 zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); 2264 zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
2258} 2265}
2259 2266
2260 2267
@@ -2297,7 +2304,8 @@ static void build_zonelists(pg_data_t *pgdat)
2297 MAX_NR_ZONES - 1); 2304 MAX_NR_ZONES - 1);
2298 } 2305 }
2299 2306
2300 zonelist->zones[j] = NULL; 2307 zonelist->_zonerefs[j].zone = NULL;
2308 zonelist->_zonerefs[j].zone_idx = 0;
2301} 2309}
2302 2310
2303/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ 2311/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */