aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2012-10-08 19:33:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:56 -0400
commit957f822a0ab95e88b146638bad6209bbc315bedd (patch)
tree2e1336ddc1c574f54d582c6b74dcc1d1230482f8
parenta0c5e813f087dffc0d9b173d2e7d3328b1482fd5 (diff)
mm, numa: reclaim from all nodes within reclaim distance
RECLAIM_DISTANCE represents the distance between nodes at which it is deemed too costly to allocate from; it's preferred to try to reclaim from a local zone before falling back to allocating on a remote node with such a distance. To do this, zone_reclaim_mode is set if the distance between any two nodes on the system is greather than this distance. This, however, ends up causing the page allocator to reclaim from every zone regardless of its affinity. What we really want is to reclaim only from zones that are closer than RECLAIM_DISTANCE. This patch adds a nodemask to each node that represents the set of nodes that are within this distance. During the zone iteration, if the bit for a zone's node is set for the local node, then reclaim is attempted; otherwise, the zone is skipped. [akpm@linux-foundation.org: fix CONFIG_NUMA=n build] Signed-off-by: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Minchan Kim <minchan@kernel.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--mm/page_alloc.c41
2 files changed, 31 insertions, 11 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d240efa8f846..a5578871d033 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -709,6 +709,7 @@ typedef struct pglist_data {
709 unsigned long node_spanned_pages; /* total size of physical page 709 unsigned long node_spanned_pages; /* total size of physical page
710 range, including holes */ 710 range, including holes */
711 int node_id; 711 int node_id;
712 nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */
712 wait_queue_head_t kswapd_wait; 713 wait_queue_head_t kswapd_wait;
713 wait_queue_head_t pfmemalloc_wait; 714 wait_queue_head_t pfmemalloc_wait;
714 struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ 715 struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dbb53866c3aa..9b8e6243a524 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1799,6 +1799,22 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
1799 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); 1799 bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
1800} 1800}
1801 1801
1802static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
1803{
1804 return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
1805}
1806
1807static void __paginginit init_zone_allows_reclaim(int nid)
1808{
1809 int i;
1810
1811 for_each_online_node(i)
1812 if (node_distance(nid, i) <= RECLAIM_DISTANCE) {
1813 node_set(i, NODE_DATA(nid)->reclaim_nodes);
1814 zone_reclaim_mode = 1;
1815 }
1816}
1817
1802#else /* CONFIG_NUMA */ 1818#else /* CONFIG_NUMA */
1803 1819
1804static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) 1820static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
@@ -1819,6 +1835,15 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
1819static void zlc_clear_zones_full(struct zonelist *zonelist) 1835static void zlc_clear_zones_full(struct zonelist *zonelist)
1820{ 1836{
1821} 1837}
1838
1839static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
1840{
1841 return true;
1842}
1843
1844static inline void init_zone_allows_reclaim(int nid)
1845{
1846}
1822#endif /* CONFIG_NUMA */ 1847#endif /* CONFIG_NUMA */
1823 1848
1824/* 1849/*
@@ -1903,7 +1928,8 @@ zonelist_scan:
1903 did_zlc_setup = 1; 1928 did_zlc_setup = 1;
1904 } 1929 }
1905 1930
1906 if (zone_reclaim_mode == 0) 1931 if (zone_reclaim_mode == 0 ||
1932 !zone_allows_reclaim(preferred_zone, zone))
1907 goto this_zone_full; 1933 goto this_zone_full;
1908 1934
1909 /* 1935 /*
@@ -3364,21 +3390,13 @@ static void build_zonelists(pg_data_t *pgdat)
3364 j = 0; 3390 j = 0;
3365 3391
3366 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { 3392 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
3367 int distance = node_distance(local_node, node);
3368
3369 /*
3370 * If another node is sufficiently far away then it is better
3371 * to reclaim pages in a zone before going off node.
3372 */
3373 if (distance > RECLAIM_DISTANCE)
3374 zone_reclaim_mode = 1;
3375
3376 /* 3393 /*
3377 * We don't want to pressure a particular node. 3394 * We don't want to pressure a particular node.
3378 * So adding penalty to the first node in same 3395 * So adding penalty to the first node in same
3379 * distance group to make it round-robin. 3396 * distance group to make it round-robin.
3380 */ 3397 */
3381 if (distance != node_distance(local_node, prev_node)) 3398 if (node_distance(local_node, node) !=
3399 node_distance(local_node, prev_node))
3382 node_load[node] = load; 3400 node_load[node] = load;
3383 3401
3384 prev_node = node; 3402 prev_node = node;
@@ -4552,6 +4570,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4552 4570
4553 pgdat->node_id = nid; 4571 pgdat->node_id = nid;
4554 pgdat->node_start_pfn = node_start_pfn; 4572 pgdat->node_start_pfn = node_start_pfn;
4573 init_zone_allows_reclaim(nid);
4555 calculate_node_totalpages(pgdat, zones_size, zholes_size); 4574 calculate_node_totalpages(pgdat, zones_size, zholes_size);
4556 4575
4557 alloc_node_mem_map(pgdat); 4576 alloc_node_mem_map(pgdat);