diff options
-rw-r--r-- | include/linux/mmzone.h | 12 | ||||
-rw-r--r-- | include/linux/swap.h | 11 | ||||
-rw-r--r-- | include/linux/topology.h | 8 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/vmscan.c | 68 |
5 files changed, 108 insertions, 8 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 34cbefd2ebde..93a849f742db 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -149,15 +149,17 @@ struct zone { | |||
149 | unsigned long pages_scanned; /* since last reclaim */ | 149 | unsigned long pages_scanned; /* since last reclaim */ |
150 | int all_unreclaimable; /* All pages pinned */ | 150 | int all_unreclaimable; /* All pages pinned */ |
151 | 151 | ||
152 | /* | ||
153 | * Does the allocator try to reclaim pages from the zone as soon | ||
154 | * as it fails a watermark_ok() in __alloc_pages? | ||
155 | */ | ||
156 | int reclaim_pages; | ||
157 | /* A count of how many reclaimers are scanning this zone */ | 152 | /* A count of how many reclaimers are scanning this zone */ |
158 | atomic_t reclaim_in_progress; | 153 | atomic_t reclaim_in_progress; |
159 | 154 | ||
160 | /* | 155 | /* |
156 | * timestamp (in jiffies) of the last zone reclaim that did not | ||
157 | * result in freeing of pages. This is used to avoid repeated scans | ||
158 | * if all memory in the zone is in use. | ||
159 | */ | ||
160 | unsigned long last_unsuccessful_zone_reclaim; | ||
161 | |||
162 | /* | ||
161 | * prev_priority holds the scanning priority for this zone. It is | 163 | * prev_priority holds the scanning priority for this zone. It is |
162 | * defined as the scanning priority at which we achieved our reclaim | 164 | * defined as the scanning priority at which we achieved our reclaim |
163 | * target at the previous try_to_free_pages() or balance_pgdat() | 165 | * target at the previous try_to_free_pages() or balance_pgdat() |
diff --git a/include/linux/swap.h b/include/linux/swap.h index d01f7efb0f2c..4a99e4a7fbf3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -176,6 +176,17 @@ extern int try_to_free_pages(struct zone **, gfp_t); | |||
176 | extern int shrink_all_memory(int); | 176 | extern int shrink_all_memory(int); |
177 | extern int vm_swappiness; | 177 | extern int vm_swappiness; |
178 | 178 | ||
179 | #ifdef CONFIG_NUMA | ||
180 | extern int zone_reclaim_mode; | ||
181 | extern int zone_reclaim(struct zone *, gfp_t, unsigned int); | ||
182 | #else | ||
183 | #define zone_reclaim_mode 0 | ||
184 | static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) | ||
185 | { | ||
186 | return 0; | ||
187 | } | ||
188 | #endif | ||
189 | |||
179 | #ifdef CONFIG_MIGRATION | 190 | #ifdef CONFIG_MIGRATION |
180 | extern int isolate_lru_page(struct page *p); | 191 | extern int isolate_lru_page(struct page *p); |
181 | extern int putback_lru_pages(struct list_head *l); | 192 | extern int putback_lru_pages(struct list_head *l); |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 315a5163d6a0..e8eb0040ce3a 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
@@ -56,6 +56,14 @@ | |||
56 | #define REMOTE_DISTANCE 20 | 56 | #define REMOTE_DISTANCE 20 |
57 | #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) | 57 | #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) |
58 | #endif | 58 | #endif |
59 | #ifndef RECLAIM_DISTANCE | ||
60 | /* | ||
61 | * If the distance between nodes in a system is larger than RECLAIM_DISTANCE | ||
62 | * (in whatever arch specific measurement units returned by node_distance()) | ||
63 | * then switch on zone reclaim on boot. | ||
64 | */ | ||
65 | #define RECLAIM_DISTANCE 20 | ||
66 | #endif | ||
59 | #ifndef PENALTY_FOR_NODE_WITH_CPUS | 67 | #ifndef PENALTY_FOR_NODE_WITH_CPUS |
60 | #define PENALTY_FOR_NODE_WITH_CPUS (1) | 68 | #define PENALTY_FOR_NODE_WITH_CPUS (1) |
61 | #endif | 69 | #endif |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c2e29743a8d1..df54e2fc8ee0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -878,7 +878,9 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
878 | mark = (*z)->pages_high; | 878 | mark = (*z)->pages_high; |
879 | if (!zone_watermark_ok(*z, order, mark, | 879 | if (!zone_watermark_ok(*z, order, mark, |
880 | classzone_idx, alloc_flags)) | 880 | classzone_idx, alloc_flags)) |
881 | continue; | 881 | if (!zone_reclaim_mode || |
882 | !zone_reclaim(*z, gfp_mask, order)) | ||
883 | continue; | ||
882 | } | 884 | } |
883 | 885 | ||
884 | page = buffered_rmqueue(zonelist, *z, order, gfp_mask); | 886 | page = buffered_rmqueue(zonelist, *z, order, gfp_mask); |
@@ -1595,13 +1597,22 @@ static void __init build_zonelists(pg_data_t *pgdat) | |||
1595 | prev_node = local_node; | 1597 | prev_node = local_node; |
1596 | nodes_clear(used_mask); | 1598 | nodes_clear(used_mask); |
1597 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { | 1599 | while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { |
1600 | int distance = node_distance(local_node, node); | ||
1601 | |||
1602 | /* | ||
1603 | * If another node is sufficiently far away then it is better | ||
1604 | * to reclaim pages in a zone before going off node. | ||
1605 | */ | ||
1606 | if (distance > RECLAIM_DISTANCE) | ||
1607 | zone_reclaim_mode = 1; | ||
1608 | |||
1598 | /* | 1609 | /* |
1599 | * We don't want to pressure a particular node. | 1610 | * We don't want to pressure a particular node. |
1600 | * So adding penalty to the first node in same | 1611 | * So adding penalty to the first node in same |
1601 | * distance group to make it round-robin. | 1612 | * distance group to make it round-robin. |
1602 | */ | 1613 | */ |
1603 | if (node_distance(local_node, node) != | 1614 | |
1604 | node_distance(local_node, prev_node)) | 1615 | if (distance != node_distance(local_node, prev_node)) |
1605 | node_load[node] += load; | 1616 | node_load[node] += load; |
1606 | prev_node = node; | 1617 | prev_node = node; |
1607 | load--; | 1618 | load--; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index e5117b6897a9..2e34b61a70c7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1572,3 +1572,71 @@ static int __init kswapd_init(void) | |||
1572 | } | 1572 | } |
1573 | 1573 | ||
1574 | module_init(kswapd_init) | 1574 | module_init(kswapd_init) |
1575 | |||
1576 | #ifdef CONFIG_NUMA | ||
1577 | /* | ||
1578 | * Zone reclaim mode | ||
1579 | * | ||
1580 | * If non-zero call zone_reclaim when the number of free pages falls below | ||
1581 | * the watermarks. | ||
1582 | * | ||
1583 | * In the future we may add flags to the mode. However, the page allocator | ||
1584 | * should only have to check that zone_reclaim_mode != 0 before calling | ||
1585 | * zone_reclaim(). | ||
1586 | */ | ||
1587 | int zone_reclaim_mode __read_mostly; | ||
1588 | |||
1589 | /* | ||
1590 | * Mininum time between zone reclaim scans | ||
1591 | */ | ||
1592 | #define ZONE_RECLAIM_INTERVAL HZ/2 | ||
1593 | /* | ||
1594 | * Try to free up some pages from this zone through reclaim. | ||
1595 | */ | ||
1596 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | ||
1597 | { | ||
1598 | int nr_pages = 1 << order; | ||
1599 | struct task_struct *p = current; | ||
1600 | struct reclaim_state reclaim_state; | ||
1601 | struct scan_control sc = { | ||
1602 | .gfp_mask = gfp_mask, | ||
1603 | .may_writepage = 0, | ||
1604 | .may_swap = 0, | ||
1605 | .nr_mapped = read_page_state(nr_mapped), | ||
1606 | .nr_scanned = 0, | ||
1607 | .nr_reclaimed = 0, | ||
1608 | .priority = 0 | ||
1609 | }; | ||
1610 | |||
1611 | if (!(gfp_mask & __GFP_WAIT) || | ||
1612 | zone->zone_pgdat->node_id != numa_node_id() || | ||
1613 | zone->all_unreclaimable || | ||
1614 | atomic_read(&zone->reclaim_in_progress) > 0) | ||
1615 | return 0; | ||
1616 | |||
1617 | if (time_before(jiffies, | ||
1618 | zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) | ||
1619 | return 0; | ||
1620 | |||
1621 | disable_swap_token(); | ||
1622 | |||
1623 | if (nr_pages > SWAP_CLUSTER_MAX) | ||
1624 | sc.swap_cluster_max = nr_pages; | ||
1625 | else | ||
1626 | sc.swap_cluster_max = SWAP_CLUSTER_MAX; | ||
1627 | |||
1628 | cond_resched(); | ||
1629 | p->flags |= PF_MEMALLOC; | ||
1630 | reclaim_state.reclaimed_slab = 0; | ||
1631 | p->reclaim_state = &reclaim_state; | ||
1632 | shrink_zone(zone, &sc); | ||
1633 | p->reclaim_state = NULL; | ||
1634 | current->flags &= ~PF_MEMALLOC; | ||
1635 | |||
1636 | if (sc.nr_reclaimed == 0) | ||
1637 | zone->last_unsuccessful_zone_reclaim = jiffies; | ||
1638 | |||
1639 | return sc.nr_reclaimed > nr_pages; | ||
1640 | } | ||
1641 | #endif | ||
1642 | |||