aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmzone.h12
-rw-r--r--include/linux/swap.h11
-rw-r--r--include/linux/topology.h8
-rw-r--r--mm/page_alloc.c17
-rw-r--r--mm/vmscan.c68
5 files changed, 108 insertions, 8 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 34cbefd2ebde..93a849f742db 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -149,15 +149,17 @@ struct zone {
149 unsigned long pages_scanned; /* since last reclaim */ 149 unsigned long pages_scanned; /* since last reclaim */
150 int all_unreclaimable; /* All pages pinned */ 150 int all_unreclaimable; /* All pages pinned */
151 151
152 /*
153 * Does the allocator try to reclaim pages from the zone as soon
154 * as it fails a watermark_ok() in __alloc_pages?
155 */
156 int reclaim_pages;
157 /* A count of how many reclaimers are scanning this zone */ 152 /* A count of how many reclaimers are scanning this zone */
158 atomic_t reclaim_in_progress; 153 atomic_t reclaim_in_progress;
159 154
160 /* 155 /*
156 * timestamp (in jiffies) of the last zone reclaim that did not
157 * result in freeing of pages. This is used to avoid repeated scans
158 * if all memory in the zone is in use.
159 */
160 unsigned long last_unsuccessful_zone_reclaim;
161
162 /*
161 * prev_priority holds the scanning priority for this zone. It is 163 * prev_priority holds the scanning priority for this zone. It is
162 * defined as the scanning priority at which we achieved our reclaim 164 * defined as the scanning priority at which we achieved our reclaim
163 * target at the previous try_to_free_pages() or balance_pgdat() 165 * target at the previous try_to_free_pages() or balance_pgdat()
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d01f7efb0f2c..4a99e4a7fbf3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -176,6 +176,17 @@ extern int try_to_free_pages(struct zone **, gfp_t);
176extern int shrink_all_memory(int); 176extern int shrink_all_memory(int);
177extern int vm_swappiness; 177extern int vm_swappiness;
178 178
179#ifdef CONFIG_NUMA
180extern int zone_reclaim_mode;
181extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
182#else
183#define zone_reclaim_mode 0
184static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
185{
186 return 0;
187}
188#endif
189
179#ifdef CONFIG_MIGRATION 190#ifdef CONFIG_MIGRATION
180extern int isolate_lru_page(struct page *p); 191extern int isolate_lru_page(struct page *p);
181extern int putback_lru_pages(struct list_head *l); 192extern int putback_lru_pages(struct list_head *l);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 315a5163d6a0..e8eb0040ce3a 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -56,6 +56,14 @@
56#define REMOTE_DISTANCE 20 56#define REMOTE_DISTANCE 20
57#define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) 57#define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE)
58#endif 58#endif
59#ifndef RECLAIM_DISTANCE
60/*
61 * If the distance between nodes in a system is larger than RECLAIM_DISTANCE
62 * (in whatever arch specific measurement units returned by node_distance())
63 * then switch on zone reclaim on boot.
64 */
65#define RECLAIM_DISTANCE 20
66#endif
59#ifndef PENALTY_FOR_NODE_WITH_CPUS 67#ifndef PENALTY_FOR_NODE_WITH_CPUS
60#define PENALTY_FOR_NODE_WITH_CPUS (1) 68#define PENALTY_FOR_NODE_WITH_CPUS (1)
61#endif 69#endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c2e29743a8d1..df54e2fc8ee0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -878,7 +878,9 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
878 mark = (*z)->pages_high; 878 mark = (*z)->pages_high;
879 if (!zone_watermark_ok(*z, order, mark, 879 if (!zone_watermark_ok(*z, order, mark,
880 classzone_idx, alloc_flags)) 880 classzone_idx, alloc_flags))
881 continue; 881 if (!zone_reclaim_mode ||
882 !zone_reclaim(*z, gfp_mask, order))
883 continue;
882 } 884 }
883 885
884 page = buffered_rmqueue(zonelist, *z, order, gfp_mask); 886 page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
@@ -1595,13 +1597,22 @@ static void __init build_zonelists(pg_data_t *pgdat)
1595 prev_node = local_node; 1597 prev_node = local_node;
1596 nodes_clear(used_mask); 1598 nodes_clear(used_mask);
1597 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { 1599 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
1600 int distance = node_distance(local_node, node);
1601
1602 /*
1603 * If another node is sufficiently far away then it is better
1604 * to reclaim pages in a zone before going off node.
1605 */
1606 if (distance > RECLAIM_DISTANCE)
1607 zone_reclaim_mode = 1;
1608
1598 /* 1609 /*
1599 * We don't want to pressure a particular node. 1610 * We don't want to pressure a particular node.
1600 * So adding penalty to the first node in same 1611 * So adding penalty to the first node in same
1601 * distance group to make it round-robin. 1612 * distance group to make it round-robin.
1602 */ 1613 */
1603 if (node_distance(local_node, node) != 1614
1604 node_distance(local_node, prev_node)) 1615 if (distance != node_distance(local_node, prev_node))
1605 node_load[node] += load; 1616 node_load[node] += load;
1606 prev_node = node; 1617 prev_node = node;
1607 load--; 1618 load--;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5117b6897a9..2e34b61a70c7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1572,3 +1572,71 @@ static int __init kswapd_init(void)
1572} 1572}
1573 1573
1574module_init(kswapd_init) 1574module_init(kswapd_init)
1575
1576#ifdef CONFIG_NUMA
1577/*
1578 * Zone reclaim mode
1579 *
1580 * If non-zero call zone_reclaim when the number of free pages falls below
1581 * the watermarks.
1582 *
1583 * In the future we may add flags to the mode. However, the page allocator
1584 * should only have to check that zone_reclaim_mode != 0 before calling
1585 * zone_reclaim().
1586 */
1587int zone_reclaim_mode __read_mostly;
1588
1589/*
1590 * Mininum time between zone reclaim scans
1591 */
1592#define ZONE_RECLAIM_INTERVAL HZ/2
1593/*
1594 * Try to free up some pages from this zone through reclaim.
1595 */
1596int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1597{
1598 int nr_pages = 1 << order;
1599 struct task_struct *p = current;
1600 struct reclaim_state reclaim_state;
1601 struct scan_control sc = {
1602 .gfp_mask = gfp_mask,
1603 .may_writepage = 0,
1604 .may_swap = 0,
1605 .nr_mapped = read_page_state(nr_mapped),
1606 .nr_scanned = 0,
1607 .nr_reclaimed = 0,
1608 .priority = 0
1609 };
1610
1611 if (!(gfp_mask & __GFP_WAIT) ||
1612 zone->zone_pgdat->node_id != numa_node_id() ||
1613 zone->all_unreclaimable ||
1614 atomic_read(&zone->reclaim_in_progress) > 0)
1615 return 0;
1616
1617 if (time_before(jiffies,
1618 zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL))
1619 return 0;
1620
1621 disable_swap_token();
1622
1623 if (nr_pages > SWAP_CLUSTER_MAX)
1624 sc.swap_cluster_max = nr_pages;
1625 else
1626 sc.swap_cluster_max = SWAP_CLUSTER_MAX;
1627
1628 cond_resched();
1629 p->flags |= PF_MEMALLOC;
1630 reclaim_state.reclaimed_slab = 0;
1631 p->reclaim_state = &reclaim_state;
1632 shrink_zone(zone, &sc);
1633 p->reclaim_state = NULL;
1634 current->flags &= ~PF_MEMALLOC;
1635
1636 if (sc.nr_reclaimed == 0)
1637 zone->last_unsuccessful_zone_reclaim = jiffies;
1638
1639 return sc.nr_reclaimed > nr_pages;
1640}
1641#endif
1642