aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 05:12:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:19 -0400
commit19770b32609b6bf97a3dece2529089494cbfc549 (patch)
tree3b5922d1b20aabdf929bde9309f323841717747a /mm/page_alloc.c
parentdd1a239f6f2d4d3eedd318583ec319aa145b324c (diff)
mm: filter based on a nodemask as well as a gfp_mask
The MPOL_BIND policy creates a zonelist that is used for allocations controlled by that mempolicy. As the per-node zonelist is already being filtered based on a zone id, this patch adds a version of __alloc_pages() that takes a nodemask for further filtering. This eliminates the need for MPOL_BIND to create a custom zonelist. A positive benefit of this is that allocations using MPOL_BIND now use the local node's distance-ordered zonelist instead of a custom node-id-ordered zonelist. I.e., pages will be allocated from the closest allowed node with available memory. [Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask] [Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c50
1 files changed, 36 insertions, 14 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d94d04ea784..b4beb3eea8b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1377,7 +1377,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1377 * a page. 1377 * a page.
1378 */ 1378 */
1379static struct page * 1379static struct page *
1380get_page_from_freelist(gfp_t gfp_mask, unsigned int order, 1380get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags) 1381 struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
1382{ 1382{
1383 struct zoneref *z; 1383 struct zoneref *z;
@@ -1388,16 +1388,17 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1388 int zlc_active = 0; /* set if using zonelist_cache */ 1388 int zlc_active = 0; /* set if using zonelist_cache */
1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1389 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1390 1390
1391 z = first_zones_zonelist(zonelist, high_zoneidx); 1391 (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
1392 classzone_idx = zonelist_zone_idx(z); 1392 &preferred_zone);
1393 preferred_zone = zonelist_zone(z); 1393 classzone_idx = zone_idx(preferred_zone);
1394 1394
1395zonelist_scan: 1395zonelist_scan:
1396 /* 1396 /*
1397 * Scan zonelist, looking for a zone with enough free. 1397 * Scan zonelist, looking for a zone with enough free.
1398 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1398 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1399 */ 1399 */
1400 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 1400 for_each_zone_zonelist_nodemask(zone, z, zonelist,
1401 high_zoneidx, nodemask) {
1401 if (NUMA_BUILD && zlc_active && 1402 if (NUMA_BUILD && zlc_active &&
1402 !zlc_zone_worth_trying(zonelist, z, allowednodes)) 1403 !zlc_zone_worth_trying(zonelist, z, allowednodes))
1403 continue; 1404 continue;
@@ -1447,9 +1448,9 @@ try_next_zone:
1447/* 1448/*
1448 * This is the 'heart' of the zoned buddy allocator. 1449 * This is the 'heart' of the zoned buddy allocator.
1449 */ 1450 */
1450struct page * 1451static struct page *
1451__alloc_pages(gfp_t gfp_mask, unsigned int order, 1452__alloc_pages_internal(gfp_t gfp_mask, unsigned int order,
1452 struct zonelist *zonelist) 1453 struct zonelist *zonelist, nodemask_t *nodemask)
1453{ 1454{
1454 const gfp_t wait = gfp_mask & __GFP_WAIT; 1455 const gfp_t wait = gfp_mask & __GFP_WAIT;
1455 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 1456 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1478,7 +1479,7 @@ restart:
1478 return NULL; 1479 return NULL;
1479 } 1480 }
1480 1481
1481 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, 1482 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
1482 zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); 1483 zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
1483 if (page) 1484 if (page)
1484 goto got_pg; 1485 goto got_pg;
@@ -1523,7 +1524,7 @@ restart:
1523 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. 1524 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
1524 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1525 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1525 */ 1526 */
1526 page = get_page_from_freelist(gfp_mask, order, zonelist, 1527 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
1527 high_zoneidx, alloc_flags); 1528 high_zoneidx, alloc_flags);
1528 if (page) 1529 if (page)
1529 goto got_pg; 1530 goto got_pg;
@@ -1536,7 +1537,7 @@ rebalance:
1536 if (!(gfp_mask & __GFP_NOMEMALLOC)) { 1537 if (!(gfp_mask & __GFP_NOMEMALLOC)) {
1537nofail_alloc: 1538nofail_alloc:
1538 /* go through the zonelist yet again, ignoring mins */ 1539 /* go through the zonelist yet again, ignoring mins */
1539 page = get_page_from_freelist(gfp_mask, order, 1540 page = get_page_from_freelist(gfp_mask, nodemask, order,
1540 zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); 1541 zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
1541 if (page) 1542 if (page)
1542 goto got_pg; 1543 goto got_pg;
@@ -1571,7 +1572,7 @@ nofail_alloc:
1571 drain_all_pages(); 1572 drain_all_pages();
1572 1573
1573 if (likely(did_some_progress)) { 1574 if (likely(did_some_progress)) {
1574 page = get_page_from_freelist(gfp_mask, order, 1575 page = get_page_from_freelist(gfp_mask, nodemask, order,
1575 zonelist, high_zoneidx, alloc_flags); 1576 zonelist, high_zoneidx, alloc_flags);
1576 if (page) 1577 if (page)
1577 goto got_pg; 1578 goto got_pg;
@@ -1587,8 +1588,9 @@ nofail_alloc:
1587 * a parallel oom killing, we must fail if we're still 1588 * a parallel oom killing, we must fail if we're still
1588 * under heavy pressure. 1589 * under heavy pressure.
1589 */ 1590 */
1590 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, 1591 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
1591 zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); 1592 order, zonelist, high_zoneidx,
1593 ALLOC_WMARK_HIGH|ALLOC_CPUSET);
1592 if (page) { 1594 if (page) {
1593 clear_zonelist_oom(zonelist, gfp_mask); 1595 clear_zonelist_oom(zonelist, gfp_mask);
1594 goto got_pg; 1596 goto got_pg;
@@ -1637,6 +1639,20 @@ got_pg:
1637 return page; 1639 return page;
1638} 1640}
1639 1641
1642struct page *
1643__alloc_pages(gfp_t gfp_mask, unsigned int order,
1644 struct zonelist *zonelist)
1645{
1646 return __alloc_pages_internal(gfp_mask, order, zonelist, NULL);
1647}
1648
1649struct page *
1650__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
1651 struct zonelist *zonelist, nodemask_t *nodemask)
1652{
1653 return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask);
1654}
1655
1640EXPORT_SYMBOL(__alloc_pages); 1656EXPORT_SYMBOL(__alloc_pages);
1641 1657
1642/* 1658/*
@@ -1880,6 +1896,12 @@ void show_free_areas(void)
1880 show_swap_cache_info(); 1896 show_swap_cache_info();
1881} 1897}
1882 1898
1899static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
1900{
1901 zoneref->zone = zone;
1902 zoneref->zone_idx = zone_idx(zone);
1903}
1904
1883/* 1905/*
1884 * Builds allocation fallback zone lists. 1906 * Builds allocation fallback zone lists.
1885 * 1907 *