aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-01-18 20:42:31 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-18 22:20:17 -0500
commit9eeff2395e3cfd05c9b2e6074ff943a34b0c5c21 (patch)
tree20160098ec6ed8738cfecfc5f81181ad22b44e60 /mm/page_alloc.c
parentf1fd1067ece574ab56e4a70878b9a5a1ed4c3c42 (diff)
[PATCH] Zone reclaim: Reclaim logic
Some bits for zone reclaim exists in 2.6.15 but they are not usable. This patch fixes them up, removes unused code and makes zone reclaim usable. Zone reclaim allows the reclaiming of pages from a zone if the number of free pages falls below the watermarks even if other zones still have enough pages available. Zone reclaim is of particular importance for NUMA machines. It can be more beneficial to reclaim a page than taking the performance penalties that come with allocating a page on a remote zone. Zone reclaim is enabled if the maximum distance to another node is higher than RECLAIM_DISTANCE, which may be defined by an arch. By default RECLAIM_DISTANCE is 20. 20 is the distance to another node in the same component (enclosure or motherboard) on IA64. The meaning of the NUMA distance information seems to vary by arch. If zone reclaim is not successful then no further reclaim attempts will occur for a certain time period (ZONE_RECLAIM_INTERVAL). This patch was discussed before. See http://marc.theaimsgroup.com/?l=linux-kernel&m=113519961504207&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113408418232531&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113389027420032&w=2 http://marc.theaimsgroup.com/?l=linux-kernel&m=113380938612205&w=2 Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c17
1 files changed, 14 insertions, 3 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c2e29743a8d1..df54e2fc8ee0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -878,7 +878,9 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
878 mark = (*z)->pages_high; 878 mark = (*z)->pages_high;
879 if (!zone_watermark_ok(*z, order, mark, 879 if (!zone_watermark_ok(*z, order, mark,
880 classzone_idx, alloc_flags)) 880 classzone_idx, alloc_flags))
881 continue; 881 if (!zone_reclaim_mode ||
882 !zone_reclaim(*z, gfp_mask, order))
883 continue;
882 } 884 }
883 885
884 page = buffered_rmqueue(zonelist, *z, order, gfp_mask); 886 page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
@@ -1595,13 +1597,22 @@ static void __init build_zonelists(pg_data_t *pgdat)
1595 prev_node = local_node; 1597 prev_node = local_node;
1596 nodes_clear(used_mask); 1598 nodes_clear(used_mask);
1597 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { 1599 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
1600 int distance = node_distance(local_node, node);
1601
1602 /*
1603 * If another node is sufficiently far away then it is better
1604 * to reclaim pages in a zone before going off node.
1605 */
1606 if (distance > RECLAIM_DISTANCE)
1607 zone_reclaim_mode = 1;
1608
1598 /* 1609 /*
1599 * We don't want to pressure a particular node. 1610 * We don't want to pressure a particular node.
1600 * So adding penalty to the first node in same 1611 * So adding penalty to the first node in same
1601 * distance group to make it round-robin. 1612 * distance group to make it round-robin.
1602 */ 1613 */
1603 if (node_distance(local_node, node) != 1614
1604 node_distance(local_node, prev_node)) 1615 if (distance != node_distance(local_node, prev_node))
1605 node_load[node] += load; 1616 node_load[node] += load;
1606 prev_node = node; 1617 prev_node = node;
1607 load--; 1618 load--;