diff options
author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:18 -0400 |
commit | dd1a239f6f2d4d3eedd318583ec319aa145b324c (patch) | |
tree | aff4224c96b5e2e67588c3946858a724863eeaf9 /mm/page_alloc.c | |
parent | 54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (diff) |
mm: have zonelist contains structs with both a zone pointer and zone_idx
Filtering zonelists requires very frequent use of zone_idx(). This is costly
as it involves a lookup of another structure and a substraction operation. As
the zone_idx is often required, it should be quickly accessible. The node idx
could also be stored here if it was found that accessing zone->node is
significant which may be the case on workloads where nodemasks are heavily
used.
This patch introduces a struct zoneref to store a zone pointer and a zone
index. The zonelist then consists of an array of these struct zonerefs which
are looked up as necessary. Helpers are given for accessing the zone index as
well as the node index.
[kamezawa.hiroyu@jp.fujitsu.com: Suggested struct zoneref instead of embedding information in pointers]
[hugh@veritas.com: mm-have-zonelist: fix memcg ooms]
[hugh@veritas.com: just return do_try_to_free_pages]
[hugh@veritas.com: do_try_to_free_pages gfp_mask redundant]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 68 |
1 files changed, 38 insertions, 30 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4ccb8651cf22..6d94d04ea784 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1317,7 +1317,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | |||
1317 | * We are low on memory in the second scan, and should leave no stone | 1317 | * We are low on memory in the second scan, and should leave no stone |
1318 | * unturned looking for a free page. | 1318 | * unturned looking for a free page. |
1319 | */ | 1319 | */ |
1320 | static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, | 1320 | static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, |
1321 | nodemask_t *allowednodes) | 1321 | nodemask_t *allowednodes) |
1322 | { | 1322 | { |
1323 | struct zonelist_cache *zlc; /* cached zonelist speedup info */ | 1323 | struct zonelist_cache *zlc; /* cached zonelist speedup info */ |
@@ -1328,7 +1328,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, | |||
1328 | if (!zlc) | 1328 | if (!zlc) |
1329 | return 1; | 1329 | return 1; |
1330 | 1330 | ||
1331 | i = z - zonelist->zones; | 1331 | i = z - zonelist->_zonerefs; |
1332 | n = zlc->z_to_n[i]; | 1332 | n = zlc->z_to_n[i]; |
1333 | 1333 | ||
1334 | /* This zone is worth trying if it is allowed but not full */ | 1334 | /* This zone is worth trying if it is allowed but not full */ |
@@ -1340,7 +1340,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, | |||
1340 | * zlc->fullzones, so that subsequent attempts to allocate a page | 1340 | * zlc->fullzones, so that subsequent attempts to allocate a page |
1341 | * from that zone don't waste time re-examining it. | 1341 | * from that zone don't waste time re-examining it. |
1342 | */ | 1342 | */ |
1343 | static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) | 1343 | static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) |
1344 | { | 1344 | { |
1345 | struct zonelist_cache *zlc; /* cached zonelist speedup info */ | 1345 | struct zonelist_cache *zlc; /* cached zonelist speedup info */ |
1346 | int i; /* index of *z in zonelist zones */ | 1346 | int i; /* index of *z in zonelist zones */ |
@@ -1349,7 +1349,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) | |||
1349 | if (!zlc) | 1349 | if (!zlc) |
1350 | return; | 1350 | return; |
1351 | 1351 | ||
1352 | i = z - zonelist->zones; | 1352 | i = z - zonelist->_zonerefs; |
1353 | 1353 | ||
1354 | set_bit(i, zlc->fullzones); | 1354 | set_bit(i, zlc->fullzones); |
1355 | } | 1355 | } |
@@ -1361,13 +1361,13 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) | |||
1361 | return NULL; | 1361 | return NULL; |
1362 | } | 1362 | } |
1363 | 1363 | ||
1364 | static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, | 1364 | static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z, |
1365 | nodemask_t *allowednodes) | 1365 | nodemask_t *allowednodes) |
1366 | { | 1366 | { |
1367 | return 1; | 1367 | return 1; |
1368 | } | 1368 | } |
1369 | 1369 | ||
1370 | static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) | 1370 | static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) |
1371 | { | 1371 | { |
1372 | } | 1372 | } |
1373 | #endif /* CONFIG_NUMA */ | 1373 | #endif /* CONFIG_NUMA */ |
@@ -1380,7 +1380,7 @@ static struct page * | |||
1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | 1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, |
1381 | struct zonelist *zonelist, int high_zoneidx, int alloc_flags) | 1381 | struct zonelist *zonelist, int high_zoneidx, int alloc_flags) |
1382 | { | 1382 | { |
1383 | struct zone **z; | 1383 | struct zoneref *z; |
1384 | struct page *page = NULL; | 1384 | struct page *page = NULL; |
1385 | int classzone_idx; | 1385 | int classzone_idx; |
1386 | struct zone *zone, *preferred_zone; | 1386 | struct zone *zone, *preferred_zone; |
@@ -1389,8 +1389,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ | 1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ |
1390 | 1390 | ||
1391 | z = first_zones_zonelist(zonelist, high_zoneidx); | 1391 | z = first_zones_zonelist(zonelist, high_zoneidx); |
1392 | classzone_idx = zone_idx(*z); | 1392 | classzone_idx = zonelist_zone_idx(z); |
1393 | preferred_zone = *z; | 1393 | preferred_zone = zonelist_zone(z); |
1394 | 1394 | ||
1395 | zonelist_scan: | 1395 | zonelist_scan: |
1396 | /* | 1396 | /* |
@@ -1453,7 +1453,8 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
1453 | { | 1453 | { |
1454 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 1454 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
1455 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 1455 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
1456 | struct zone **z; | 1456 | struct zoneref *z; |
1457 | struct zone *zone; | ||
1457 | struct page *page; | 1458 | struct page *page; |
1458 | struct reclaim_state reclaim_state; | 1459 | struct reclaim_state reclaim_state; |
1459 | struct task_struct *p = current; | 1460 | struct task_struct *p = current; |
@@ -1467,9 +1468,9 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, | |||
1467 | return NULL; | 1468 | return NULL; |
1468 | 1469 | ||
1469 | restart: | 1470 | restart: |
1470 | z = zonelist->zones; /* the list of zones suitable for gfp_mask */ | 1471 | z = zonelist->_zonerefs; /* the list of zones suitable for gfp_mask */ |
1471 | 1472 | ||
1472 | if (unlikely(*z == NULL)) { | 1473 | if (unlikely(!z->zone)) { |
1473 | /* | 1474 | /* |
1474 | * Happens if we have an empty zonelist as a result of | 1475 | * Happens if we have an empty zonelist as a result of |
1475 | * GFP_THISNODE being used on a memoryless node | 1476 | * GFP_THISNODE being used on a memoryless node |
@@ -1493,8 +1494,8 @@ restart: | |||
1493 | if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | 1494 | if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) |
1494 | goto nopage; | 1495 | goto nopage; |
1495 | 1496 | ||
1496 | for (z = zonelist->zones; *z; z++) | 1497 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) |
1497 | wakeup_kswapd(*z, order); | 1498 | wakeup_kswapd(zone, order); |
1498 | 1499 | ||
1499 | /* | 1500 | /* |
1500 | * OK, we're below the kswapd watermark and have kicked background | 1501 | * OK, we're below the kswapd watermark and have kicked background |
@@ -1575,7 +1576,7 @@ nofail_alloc: | |||
1575 | if (page) | 1576 | if (page) |
1576 | goto got_pg; | 1577 | goto got_pg; |
1577 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { | 1578 | } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { |
1578 | if (!try_set_zone_oom(zonelist)) { | 1579 | if (!try_set_zone_oom(zonelist, gfp_mask)) { |
1579 | schedule_timeout_uninterruptible(1); | 1580 | schedule_timeout_uninterruptible(1); |
1580 | goto restart; | 1581 | goto restart; |
1581 | } | 1582 | } |
@@ -1589,18 +1590,18 @@ nofail_alloc: | |||
1589 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1590 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, |
1590 | zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); | 1591 | zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); |
1591 | if (page) { | 1592 | if (page) { |
1592 | clear_zonelist_oom(zonelist); | 1593 | clear_zonelist_oom(zonelist, gfp_mask); |
1593 | goto got_pg; | 1594 | goto got_pg; |
1594 | } | 1595 | } |
1595 | 1596 | ||
1596 | /* The OOM killer will not help higher order allocs so fail */ | 1597 | /* The OOM killer will not help higher order allocs so fail */ |
1597 | if (order > PAGE_ALLOC_COSTLY_ORDER) { | 1598 | if (order > PAGE_ALLOC_COSTLY_ORDER) { |
1598 | clear_zonelist_oom(zonelist); | 1599 | clear_zonelist_oom(zonelist, gfp_mask); |
1599 | goto nopage; | 1600 | goto nopage; |
1600 | } | 1601 | } |
1601 | 1602 | ||
1602 | out_of_memory(zonelist, gfp_mask, order); | 1603 | out_of_memory(zonelist, gfp_mask, order); |
1603 | clear_zonelist_oom(zonelist); | 1604 | clear_zonelist_oom(zonelist, gfp_mask); |
1604 | goto restart; | 1605 | goto restart; |
1605 | } | 1606 | } |
1606 | 1607 | ||
@@ -1702,7 +1703,7 @@ EXPORT_SYMBOL(free_pages); | |||
1702 | 1703 | ||
1703 | static unsigned int nr_free_zone_pages(int offset) | 1704 | static unsigned int nr_free_zone_pages(int offset) |
1704 | { | 1705 | { |
1705 | struct zone **z; | 1706 | struct zoneref *z; |
1706 | struct zone *zone; | 1707 | struct zone *zone; |
1707 | 1708 | ||
1708 | /* Just pick one node, since fallback list is circular */ | 1709 | /* Just pick one node, since fallback list is circular */ |
@@ -1896,7 +1897,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, | |||
1896 | zone_type--; | 1897 | zone_type--; |
1897 | zone = pgdat->node_zones + zone_type; | 1898 | zone = pgdat->node_zones + zone_type; |
1898 | if (populated_zone(zone)) { | 1899 | if (populated_zone(zone)) { |
1899 | zonelist->zones[nr_zones++] = zone; | 1900 | zoneref_set_zone(zone, |
1901 | &zonelist->_zonerefs[nr_zones++]); | ||
1900 | check_highest_zone(zone_type); | 1902 | check_highest_zone(zone_type); |
1901 | } | 1903 | } |
1902 | 1904 | ||
@@ -2072,11 +2074,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node) | |||
2072 | struct zonelist *zonelist; | 2074 | struct zonelist *zonelist; |
2073 | 2075 | ||
2074 | zonelist = &pgdat->node_zonelists[0]; | 2076 | zonelist = &pgdat->node_zonelists[0]; |
2075 | for (j = 0; zonelist->zones[j] != NULL; j++) | 2077 | for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++) |
2076 | ; | 2078 | ; |
2077 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, | 2079 | j = build_zonelists_node(NODE_DATA(node), zonelist, j, |
2078 | MAX_NR_ZONES - 1); | 2080 | MAX_NR_ZONES - 1); |
2079 | zonelist->zones[j] = NULL; | 2081 | zonelist->_zonerefs[j].zone = NULL; |
2082 | zonelist->_zonerefs[j].zone_idx = 0; | ||
2080 | } | 2083 | } |
2081 | 2084 | ||
2082 | /* | 2085 | /* |
@@ -2089,7 +2092,8 @@ static void build_thisnode_zonelists(pg_data_t *pgdat) | |||
2089 | 2092 | ||
2090 | zonelist = &pgdat->node_zonelists[1]; | 2093 | zonelist = &pgdat->node_zonelists[1]; |
2091 | j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); | 2094 | j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1); |
2092 | zonelist->zones[j] = NULL; | 2095 | zonelist->_zonerefs[j].zone = NULL; |
2096 | zonelist->_zonerefs[j].zone_idx = 0; | ||
2093 | } | 2097 | } |
2094 | 2098 | ||
2095 | /* | 2099 | /* |
@@ -2114,12 +2118,14 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes) | |||
2114 | node = node_order[j]; | 2118 | node = node_order[j]; |
2115 | z = &NODE_DATA(node)->node_zones[zone_type]; | 2119 | z = &NODE_DATA(node)->node_zones[zone_type]; |
2116 | if (populated_zone(z)) { | 2120 | if (populated_zone(z)) { |
2117 | zonelist->zones[pos++] = z; | 2121 | zoneref_set_zone(z, |
2122 | &zonelist->_zonerefs[pos++]); | ||
2118 | check_highest_zone(zone_type); | 2123 | check_highest_zone(zone_type); |
2119 | } | 2124 | } |
2120 | } | 2125 | } |
2121 | } | 2126 | } |
2122 | zonelist->zones[pos] = NULL; | 2127 | zonelist->_zonerefs[pos].zone = NULL; |
2128 | zonelist->_zonerefs[pos].zone_idx = 0; | ||
2123 | } | 2129 | } |
2124 | 2130 | ||
2125 | static int default_zonelist_order(void) | 2131 | static int default_zonelist_order(void) |
@@ -2196,7 +2202,8 @@ static void build_zonelists(pg_data_t *pgdat) | |||
2196 | /* initialize zonelists */ | 2202 | /* initialize zonelists */ |
2197 | for (i = 0; i < MAX_ZONELISTS; i++) { | 2203 | for (i = 0; i < MAX_ZONELISTS; i++) { |
2198 | zonelist = pgdat->node_zonelists + i; | 2204 | zonelist = pgdat->node_zonelists + i; |
2199 | zonelist->zones[0] = NULL; | 2205 | zonelist->_zonerefs[0].zone = NULL; |
2206 | zonelist->_zonerefs[0].zone_idx = 0; | ||
2200 | } | 2207 | } |
2201 | 2208 | ||
2202 | /* NUMA-aware ordering of nodes */ | 2209 | /* NUMA-aware ordering of nodes */ |
@@ -2248,13 +2255,13 @@ static void build_zonelist_cache(pg_data_t *pgdat) | |||
2248 | { | 2255 | { |
2249 | struct zonelist *zonelist; | 2256 | struct zonelist *zonelist; |
2250 | struct zonelist_cache *zlc; | 2257 | struct zonelist_cache *zlc; |
2251 | struct zone **z; | 2258 | struct zoneref *z; |
2252 | 2259 | ||
2253 | zonelist = &pgdat->node_zonelists[0]; | 2260 | zonelist = &pgdat->node_zonelists[0]; |
2254 | zonelist->zlcache_ptr = zlc = &zonelist->zlcache; | 2261 | zonelist->zlcache_ptr = zlc = &zonelist->zlcache; |
2255 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); | 2262 | bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); |
2256 | for (z = zonelist->zones; *z; z++) | 2263 | for (z = zonelist->_zonerefs; z->zone; z++) |
2257 | zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); | 2264 | zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); |
2258 | } | 2265 | } |
2259 | 2266 | ||
2260 | 2267 | ||
@@ -2297,7 +2304,8 @@ static void build_zonelists(pg_data_t *pgdat) | |||
2297 | MAX_NR_ZONES - 1); | 2304 | MAX_NR_ZONES - 1); |
2298 | } | 2305 | } |
2299 | 2306 | ||
2300 | zonelist->zones[j] = NULL; | 2307 | zonelist->_zonerefs[j].zone = NULL; |
2308 | zonelist->_zonerefs[j].zone_idx = 0; | ||
2301 | } | 2309 | } |
2302 | 2310 | ||
2303 | /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ | 2311 | /* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ |