diff options
author | Rik van Riel <riel@redhat.com> | 2008-10-18 23:26:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-20 11:50:25 -0400 |
commit | 556adecba110bf5f1db6c6b56416cfab5bcab698 (patch) | |
tree | a721d84d28c4d99a54632b472b452ea3d4b2b137 /mm/vmscan.c | |
parent | 4f98a2fee8acdb4ac84545df98cccecfd130f8db (diff) |
vmscan: second chance replacement for anonymous pages
We avoid evicting and scanning anonymous pages for the most part, but
under some workloads we can end up with most of memory filled with
anonymous pages. At that point, we suddenly need to clear the referenced
bits on all of memory, which can take ages on very large memory systems.
We can reduce the maximum number of pages that need to be scanned by not
taking the referenced state into account when deactivating an anonymous
page. After all, every anonymous page starts out referenced, so why
check?
If an anonymous page gets referenced again before it reaches the end of
the inactive list, we move it back to the active list.
To keep the maximum amount of necessary work reasonable, we scale the
active to inactive ratio with the size of memory, using the formula
active:inactive ratio = sqrt(memory in GB * 10).
Kswapd CPU use now seems to scale by the amount of pageout bandwidth,
instead of by the amount of memory present in the system.
[kamezawa.hiroyu@jp.fujitsu.com: fix OOM with memcg]
[kamezawa.hiroyu@jp.fujitsu.com: memcg: lru scan fix]
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 38 |
1 files changed, 34 insertions, 4 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index d10d2f9a33f3..c82ee9a33cfc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1090,6 +1090,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1090 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); | 1090 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); |
1091 | spin_unlock_irq(&zone->lru_lock); | 1091 | spin_unlock_irq(&zone->lru_lock); |
1092 | 1092 | ||
1093 | pgmoved = 0; | ||
1093 | while (!list_empty(&l_hold)) { | 1094 | while (!list_empty(&l_hold)) { |
1094 | cond_resched(); | 1095 | cond_resched(); |
1095 | page = lru_to_page(&l_hold); | 1096 | page = lru_to_page(&l_hold); |
@@ -1098,6 +1099,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1098 | } | 1099 | } |
1099 | 1100 | ||
1100 | /* | 1101 | /* |
1102 | * Count the referenced pages as rotated, even when they are moved | ||
1103 | * to the inactive list. This helps balance scan pressure between | ||
1104 | * file and anonymous pages in get_scan_ratio. | ||
1105 | */ | ||
1106 | zone->recent_rotated[!!file] += pgmoved; | ||
1107 | |||
1108 | /* | ||
1101 | * Now put the pages back on the appropriate [file or anon] inactive | 1109 | * Now put the pages back on the appropriate [file or anon] inactive |
1102 | * and active lists. | 1110 | * and active lists. |
1103 | */ | 1111 | */ |
@@ -1158,7 +1166,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1158 | } | 1166 | } |
1159 | } | 1167 | } |
1160 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | 1168 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
1161 | zone->recent_rotated[!!file] += pgmoved; | ||
1162 | 1169 | ||
1163 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1170 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
1164 | __count_vm_events(PGDEACTIVATE, pgdeactivate); | 1171 | __count_vm_events(PGDEACTIVATE, pgdeactivate); |
@@ -1174,7 +1181,13 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | |||
1174 | { | 1181 | { |
1175 | int file = is_file_lru(lru); | 1182 | int file = is_file_lru(lru); |
1176 | 1183 | ||
1177 | if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) { | 1184 | if (lru == LRU_ACTIVE_FILE) { |
1185 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
1186 | return 0; | ||
1187 | } | ||
1188 | |||
1189 | if (lru == LRU_ACTIVE_ANON && | ||
1190 | (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { | ||
1178 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1191 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1179 | return 0; | 1192 | return 0; |
1180 | } | 1193 | } |
@@ -1310,8 +1323,8 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1310 | } | 1323 | } |
1311 | } | 1324 | } |
1312 | 1325 | ||
1313 | while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] || | 1326 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1314 | nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { | 1327 | nr[LRU_INACTIVE_FILE]) { |
1315 | for_each_lru(l) { | 1328 | for_each_lru(l) { |
1316 | if (nr[l]) { | 1329 | if (nr[l]) { |
1317 | nr_to_scan = min(nr[l], | 1330 | nr_to_scan = min(nr[l], |
@@ -1324,6 +1337,15 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1324 | } | 1337 | } |
1325 | } | 1338 | } |
1326 | 1339 | ||
1340 | /* | ||
1341 | * Even if we did not try to evict anon pages at all, we want to | ||
1342 | * rebalance the anon lru active/inactive ratio. | ||
1343 | */ | ||
1344 | if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) | ||
1345 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | ||
1346 | else if (!scan_global_lru(sc)) | ||
1347 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | ||
1348 | |||
1327 | throttle_vm_writeout(sc->gfp_mask); | 1349 | throttle_vm_writeout(sc->gfp_mask); |
1328 | return nr_reclaimed; | 1350 | return nr_reclaimed; |
1329 | } | 1351 | } |
@@ -1617,6 +1639,14 @@ loop_again: | |||
1617 | priority != DEF_PRIORITY) | 1639 | priority != DEF_PRIORITY) |
1618 | continue; | 1640 | continue; |
1619 | 1641 | ||
1642 | /* | ||
1643 | * Do some background aging of the anon list, to give | ||
1644 | * pages a chance to be referenced before reclaiming. | ||
1645 | */ | ||
1646 | if (inactive_anon_is_low(zone)) | ||
1647 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | ||
1648 | &sc, priority, 0); | ||
1649 | |||
1620 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1650 | if (!zone_watermark_ok(zone, order, zone->pages_high, |
1621 | 0, 0)) { | 1651 | 0, 0)) { |
1622 | end_zone = i; | 1652 | end_zone = i; |