aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2008-10-18 23:26:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:50:25 -0400
commit556adecba110bf5f1db6c6b56416cfab5bcab698 (patch)
treea721d84d28c4d99a54632b472b452ea3d4b2b137 /mm/vmscan.c
parent4f98a2fee8acdb4ac84545df98cccecfd130f8db (diff)
vmscan: second chance replacement for anonymous pages
We avoid evicting and scanning anonymous pages for the most part, but under some workloads we can end up with most of memory filled with anonymous pages. At that point, we suddenly need to clear the referenced bits on all of memory, which can take ages on very large memory systems. We can reduce the maximum number of pages that need to be scanned by not taking the referenced state into account when deactivating an anonymous page. After all, every anonymous page starts out referenced, so why check? If an anonymous page gets referenced again before it reaches the end of the inactive list, we move it back to the active list. To keep the maximum amount of necessary work reasonable, we scale the active to inactive ratio with the size of memory, using the formula active:inactive ratio = sqrt(memory in GB * 10). Kswapd CPU use now seems to scale by the amount of pageout bandwidth, instead of by the amount of memory present in the system. [kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg] [kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix] Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c38
1 files changed, 34 insertions, 4 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d10d2f9a33f3..c82ee9a33cfc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1090,6 +1090,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1090 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); 1090 __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
1091 spin_unlock_irq(&zone->lru_lock); 1091 spin_unlock_irq(&zone->lru_lock);
1092 1092
1093 pgmoved = 0;
1093 while (!list_empty(&l_hold)) { 1094 while (!list_empty(&l_hold)) {
1094 cond_resched(); 1095 cond_resched();
1095 page = lru_to_page(&l_hold); 1096 page = lru_to_page(&l_hold);
@@ -1098,6 +1099,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1098 } 1099 }
1099 1100
1100 /* 1101 /*
1102 * Count the referenced pages as rotated, even when they are moved
1103 * to the inactive list. This helps balance scan pressure between
1104 * file and anonymous pages in get_scan_ratio.
1105 */
1106 zone->recent_rotated[!!file] += pgmoved;
1107
1108 /*
1101 * Now put the pages back on the appropriate [file or anon] inactive 1109 * Now put the pages back on the appropriate [file or anon] inactive
1102 * and active lists. 1110 * and active lists.
1103 */ 1111 */
@@ -1158,7 +1166,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1158 } 1166 }
1159 } 1167 }
1160 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); 1168 __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
1161 zone->recent_rotated[!!file] += pgmoved;
1162 1169
1163 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1170 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1164 __count_vm_events(PGDEACTIVATE, pgdeactivate); 1171 __count_vm_events(PGDEACTIVATE, pgdeactivate);
@@ -1174,7 +1181,13 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1174{ 1181{
1175 int file = is_file_lru(lru); 1182 int file = is_file_lru(lru);
1176 1183
1177 if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) { 1184 if (lru == LRU_ACTIVE_FILE) {
1185 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1186 return 0;
1187 }
1188
1189 if (lru == LRU_ACTIVE_ANON &&
1190 (!scan_global_lru(sc) || inactive_anon_is_low(zone))) {
1178 shrink_active_list(nr_to_scan, zone, sc, priority, file); 1191 shrink_active_list(nr_to_scan, zone, sc, priority, file);
1179 return 0; 1192 return 0;
1180 } 1193 }
@@ -1310,8 +1323,8 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1310 } 1323 }
1311 } 1324 }
1312 1325
1313 while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] || 1326 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1314 nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { 1327 nr[LRU_INACTIVE_FILE]) {
1315 for_each_lru(l) { 1328 for_each_lru(l) {
1316 if (nr[l]) { 1329 if (nr[l]) {
1317 nr_to_scan = min(nr[l], 1330 nr_to_scan = min(nr[l],
@@ -1324,6 +1337,15 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
1324 } 1337 }
1325 } 1338 }
1326 1339
1340 /*
1341 * Even if we did not try to evict anon pages at all, we want to
1342 * rebalance the anon lru active/inactive ratio.
1343 */
1344 if (!scan_global_lru(sc) || inactive_anon_is_low(zone))
1345 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1346 else if (!scan_global_lru(sc))
1347 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
1348
1327 throttle_vm_writeout(sc->gfp_mask); 1349 throttle_vm_writeout(sc->gfp_mask);
1328 return nr_reclaimed; 1350 return nr_reclaimed;
1329} 1351}
@@ -1617,6 +1639,14 @@ loop_again:
1617 priority != DEF_PRIORITY) 1639 priority != DEF_PRIORITY)
1618 continue; 1640 continue;
1619 1641
1642 /*
1643 * Do some background aging of the anon list, to give
1644 * pages a chance to be referenced before reclaiming.
1645 */
1646 if (inactive_anon_is_low(zone))
1647 shrink_active_list(SWAP_CLUSTER_MAX, zone,
1648 &sc, priority, 0);
1649
1620 if (!zone_watermark_ok(zone, order, zone->pages_high, 1650 if (!zone_watermark_ok(zone, order, zone->pages_high,
1621 0, 0)) { 1651 0, 0)) {
1622 end_zone = i; 1652 end_zone = i;