diff options
author | Mel Gorman <mel@csn.ul.ie> | 2008-04-28 05:12:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:19 -0400 |
commit | 19770b32609b6bf97a3dece2529089494cbfc549 (patch) | |
tree | 3b5922d1b20aabdf929bde9309f323841717747a /mm/page_alloc.c | |
parent | dd1a239f6f2d4d3eedd318583ec319aa145b324c (diff) |
mm: filter based on a nodemask as well as a gfp_mask
The MPOL_BIND policy creates a zonelist that is used for allocations
controlled by that mempolicy. As the per-node zonelist is already being
filtered based on a zone id, this patch adds a version of __alloc_pages() that
takes a nodemask for further filtering. This eliminates the need for
MPOL_BIND to create a custom zonelist.
A positive benefit of this is that allocations using MPOL_BIND now use the
local node's distance-ordered zonelist instead of a custom node-id-ordered
zonelist. I.e., pages will be allocated from the closest allowed node with
available memory.
[Lee.Schermerhorn@hp.com: Mempolicy: update stale documentation and comments]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask]
[Lee.Schermerhorn@hp.com: Mempolicy: make dequeue_huge_page_vma() obey MPOL_BIND nodemask rework]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 50 |
1 files changed, 36 insertions, 14 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d94d04ea784..b4beb3eea8b7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1377,7 +1377,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z) | |||
1377 | * a page. | 1377 | * a page. |
1378 | */ | 1378 | */ |
1379 | static struct page * | 1379 | static struct page * |
1380 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | 1380 | get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, |
1381 | struct zonelist *zonelist, int high_zoneidx, int alloc_flags) | 1381 | struct zonelist *zonelist, int high_zoneidx, int alloc_flags) |
1382 | { | 1382 | { |
1383 | struct zoneref *z; | 1383 | struct zoneref *z; |
@@ -1388,16 +1388,17 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
1388 | int zlc_active = 0; /* set if using zonelist_cache */ | 1388 | int zlc_active = 0; /* set if using zonelist_cache */ |
1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ | 1389 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ |
1390 | 1390 | ||
1391 | z = first_zones_zonelist(zonelist, high_zoneidx); | 1391 | (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask, |
1392 | classzone_idx = zonelist_zone_idx(z); | 1392 | &preferred_zone); |
1393 | preferred_zone = zonelist_zone(z); | 1393 | classzone_idx = zone_idx(preferred_zone); |
1394 | 1394 | ||
1395 | zonelist_scan: | 1395 | zonelist_scan: |
1396 | /* | 1396 | /* |
1397 | * Scan zonelist, looking for a zone with enough free. | 1397 | * Scan zonelist, looking for a zone with enough free. |
1398 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1398 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1399 | */ | 1399 | */ |
1400 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 1400 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1401 | high_zoneidx, nodemask) { | ||
1401 | if (NUMA_BUILD && zlc_active && | 1402 | if (NUMA_BUILD && zlc_active && |
1402 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1403 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1403 | continue; | 1404 | continue; |
@@ -1447,9 +1448,9 @@ try_next_zone: | |||
1447 | /* | 1448 | /* |
1448 | * This is the 'heart' of the zoned buddy allocator. | 1449 | * This is the 'heart' of the zoned buddy allocator. |
1449 | */ | 1450 | */ |
1450 | struct page * | 1451 | static struct page * |
1451 | __alloc_pages(gfp_t gfp_mask, unsigned int order, | 1452 | __alloc_pages_internal(gfp_t gfp_mask, unsigned int order, |
1452 | struct zonelist *zonelist) | 1453 | struct zonelist *zonelist, nodemask_t *nodemask) |
1453 | { | 1454 | { |
1454 | const gfp_t wait = gfp_mask & __GFP_WAIT; | 1455 | const gfp_t wait = gfp_mask & __GFP_WAIT; |
1455 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 1456 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
@@ -1478,7 +1479,7 @@ restart: | |||
1478 | return NULL; | 1479 | return NULL; |
1479 | } | 1480 | } |
1480 | 1481 | ||
1481 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1482 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
1482 | zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); | 1483 | zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET); |
1483 | if (page) | 1484 | if (page) |
1484 | goto got_pg; | 1485 | goto got_pg; |
@@ -1523,7 +1524,7 @@ restart: | |||
1523 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. | 1524 | * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. |
1524 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. | 1525 | * See also cpuset_zone_allowed() comment in kernel/cpuset.c. |
1525 | */ | 1526 | */ |
1526 | page = get_page_from_freelist(gfp_mask, order, zonelist, | 1527 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
1527 | high_zoneidx, alloc_flags); | 1528 | high_zoneidx, alloc_flags); |
1528 | if (page) | 1529 | if (page) |
1529 | goto got_pg; | 1530 | goto got_pg; |
@@ -1536,7 +1537,7 @@ rebalance: | |||
1536 | if (!(gfp_mask & __GFP_NOMEMALLOC)) { | 1537 | if (!(gfp_mask & __GFP_NOMEMALLOC)) { |
1537 | nofail_alloc: | 1538 | nofail_alloc: |
1538 | /* go through the zonelist yet again, ignoring mins */ | 1539 | /* go through the zonelist yet again, ignoring mins */ |
1539 | page = get_page_from_freelist(gfp_mask, order, | 1540 | page = get_page_from_freelist(gfp_mask, nodemask, order, |
1540 | zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); | 1541 | zonelist, high_zoneidx, ALLOC_NO_WATERMARKS); |
1541 | if (page) | 1542 | if (page) |
1542 | goto got_pg; | 1543 | goto got_pg; |
@@ -1571,7 +1572,7 @@ nofail_alloc: | |||
1571 | drain_all_pages(); | 1572 | drain_all_pages(); |
1572 | 1573 | ||
1573 | if (likely(did_some_progress)) { | 1574 | if (likely(did_some_progress)) { |
1574 | page = get_page_from_freelist(gfp_mask, order, | 1575 | page = get_page_from_freelist(gfp_mask, nodemask, order, |
1575 | zonelist, high_zoneidx, alloc_flags); | 1576 | zonelist, high_zoneidx, alloc_flags); |
1576 | if (page) | 1577 | if (page) |
1577 | goto got_pg; | 1578 | goto got_pg; |
@@ -1587,8 +1588,9 @@ nofail_alloc: | |||
1587 | * a parallel oom killing, we must fail if we're still | 1588 | * a parallel oom killing, we must fail if we're still |
1588 | * under heavy pressure. | 1589 | * under heavy pressure. |
1589 | */ | 1590 | */ |
1590 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, | 1591 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, |
1591 | zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET); | 1592 | order, zonelist, high_zoneidx, |
1593 | ALLOC_WMARK_HIGH|ALLOC_CPUSET); | ||
1592 | if (page) { | 1594 | if (page) { |
1593 | clear_zonelist_oom(zonelist, gfp_mask); | 1595 | clear_zonelist_oom(zonelist, gfp_mask); |
1594 | goto got_pg; | 1596 | goto got_pg; |
@@ -1637,6 +1639,20 @@ got_pg: | |||
1637 | return page; | 1639 | return page; |
1638 | } | 1640 | } |
1639 | 1641 | ||
1642 | struct page * | ||
1643 | __alloc_pages(gfp_t gfp_mask, unsigned int order, | ||
1644 | struct zonelist *zonelist) | ||
1645 | { | ||
1646 | return __alloc_pages_internal(gfp_mask, order, zonelist, NULL); | ||
1647 | } | ||
1648 | |||
1649 | struct page * | ||
1650 | __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | ||
1651 | struct zonelist *zonelist, nodemask_t *nodemask) | ||
1652 | { | ||
1653 | return __alloc_pages_internal(gfp_mask, order, zonelist, nodemask); | ||
1654 | } | ||
1655 | |||
1640 | EXPORT_SYMBOL(__alloc_pages); | 1656 | EXPORT_SYMBOL(__alloc_pages); |
1641 | 1657 | ||
1642 | /* | 1658 | /* |
@@ -1880,6 +1896,12 @@ void show_free_areas(void) | |||
1880 | show_swap_cache_info(); | 1896 | show_swap_cache_info(); |
1881 | } | 1897 | } |
1882 | 1898 | ||
1899 | static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref) | ||
1900 | { | ||
1901 | zoneref->zone = zone; | ||
1902 | zoneref->zone_idx = zone_idx(zone); | ||
1903 | } | ||
1904 | |||
1883 | /* | 1905 | /* |
1884 | * Builds allocation fallback zone lists. | 1906 | * Builds allocation fallback zone lists. |
1885 | * | 1907 | * |