diff options
-rw-r--r-- | include/linux/mm_inline.h | 19 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 41 | ||||
-rw-r--r-- | mm/vmscan.c | 38 | ||||
-rw-r--r-- | mm/vmstat.c | 6 |
5 files changed, 104 insertions, 6 deletions
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 2eb599465d56..f451fedd1e75 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -117,4 +117,23 @@ static inline enum lru_list page_lru(struct page *page) | |||
117 | return lru; | 117 | return lru; |
118 | } | 118 | } |
119 | 119 | ||
120 | /** | ||
121 | * inactive_anon_is_low - check if anonymous pages need to be deactivated | ||
122 | * @zone: zone to check | ||
123 | * | ||
124 | * Returns true if the zone does not have enough inactive anon pages, | ||
125 | * meaning some active anon pages need to be deactivated. | ||
126 | */ | ||
127 | static inline int inactive_anon_is_low(struct zone *zone) | ||
128 | { | ||
129 | unsigned long active, inactive; | ||
130 | |||
131 | active = zone_page_state(zone, NR_ACTIVE_ANON); | ||
132 | inactive = zone_page_state(zone, NR_INACTIVE_ANON); | ||
133 | |||
134 | if (inactive * zone->inactive_ratio < active) | ||
135 | return 1; | ||
136 | |||
137 | return 0; | ||
138 | } | ||
120 | #endif | 139 | #endif |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 59a4c8fd6ebd..9c5111f49a32 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -323,6 +323,12 @@ struct zone { | |||
323 | */ | 323 | */ |
324 | int prev_priority; | 324 | int prev_priority; |
325 | 325 | ||
326 | /* | ||
327 | * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on | ||
328 | * this zone's LRU. Maintained by the pageout code. | ||
329 | */ | ||
330 | unsigned int inactive_ratio; | ||
331 | |||
326 | 332 | ||
327 | ZONE_PADDING(_pad2_) | 333 | ZONE_PADDING(_pad2_) |
328 | /* Rarely used or read-mostly fields */ | 334 | /* Rarely used or read-mostly fields */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 740a16a32c22..79c0981b1d32 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4263,6 +4263,46 @@ void setup_per_zone_pages_min(void) | |||
4263 | calculate_totalreserve_pages(); | 4263 | calculate_totalreserve_pages(); |
4264 | } | 4264 | } |
4265 | 4265 | ||
4266 | /** | ||
4267 | * setup_per_zone_inactive_ratio - called when min_free_kbytes changes. | ||
4268 | * | ||
4269 | * The inactive anon list should be small enough that the VM never has to | ||
4270 | * do too much work, but large enough that each inactive page has a chance | ||
4271 | * to be referenced again before it is swapped out. | ||
4272 | * | ||
4273 | * The inactive_anon ratio is the target ratio of ACTIVE_ANON to | ||
4274 | * INACTIVE_ANON pages on this zone's LRU, maintained by the | ||
4275 | * pageout code. A zone->inactive_ratio of 3 means 3:1 or 25% of | ||
4276 | * the anonymous pages are kept on the inactive list. | ||
4277 | * | ||
4278 | * total target max | ||
4279 | * memory ratio inactive anon | ||
4280 | * ------------------------------------- | ||
4281 | * 10MB 1 5MB | ||
4282 | * 100MB 1 50MB | ||
4283 | * 1GB 3 250MB | ||
4284 | * 10GB 10 0.9GB | ||
4285 | * 100GB 31 3GB | ||
4286 | * 1TB 101 10GB | ||
4287 | * 10TB 320 32GB | ||
4288 | */ | ||
4289 | void setup_per_zone_inactive_ratio(void) | ||
4290 | { | ||
4291 | struct zone *zone; | ||
4292 | |||
4293 | for_each_zone(zone) { | ||
4294 | unsigned int gb, ratio; | ||
4295 | |||
4296 | /* Zone size in gigabytes */ | ||
4297 | gb = zone->present_pages >> (30 - PAGE_SHIFT); | ||
4298 | ratio = int_sqrt(10 * gb); | ||
4299 | if (!ratio) | ||
4300 | ratio = 1; | ||
4301 | |||
4302 | zone->inactive_ratio = ratio; | ||
4303 | } | ||
4304 | } | ||
4305 | |||
4266 | /* | 4306 | /* |
4267 | * Initialise min_free_kbytes. | 4307 | * Initialise min_free_kbytes. |
4268 | * | 4308 | * |
@@ -4300,6 +4340,7 @@ static int __init init_per_zone_pages_min(void) | |||
4300 | min_free_kbytes = 65536; | 4340 | min_free_kbytes = 65536; |
4301 | setup_per_zone_pages_min(); | 4341 | setup_per_zone_pages_min(); |
4302 | setup_per_zone_lowmem_reserve(); | 4342 | setup_per_zone_lowmem_reserve(); |
4343 | setup_per_zone_inactive_ratio(); | ||
4303 | return 0; | 4344 | return 0; |
4304 | } | 4345 | } |
4305 | module_init(init_per_zone_pages_min) | 4346 | module_init(init_per_zone_pages_min) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index d10d2f9a33f3..c82ee9a33cfc 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1090,6 +1090,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1090 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); | 1090 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); |
1091 | spin_unlock_irq(&zone->lru_lock); | 1091 | spin_unlock_irq(&zone->lru_lock); |
1092 | 1092 | ||
1093 | pgmoved = 0; | ||
1093 | while (!list_empty(&l_hold)) { | 1094 | while (!list_empty(&l_hold)) { |
1094 | cond_resched(); | 1095 | cond_resched(); |
1095 | page = lru_to_page(&l_hold); | 1096 | page = lru_to_page(&l_hold); |
@@ -1098,6 +1099,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1098 | } | 1099 | } |
1099 | 1100 | ||
1100 | /* | 1101 | /* |
1102 | * Count the referenced pages as rotated, even when they are moved | ||
1103 | * to the inactive list. This helps balance scan pressure between | ||
1104 | * file and anonymous pages in get_scan_ratio. | ||
1105 | */ | ||
1106 | zone->recent_rotated[!!file] += pgmoved; | ||
1107 | |||
1108 | /* | ||
1101 | * Now put the pages back on the appropriate [file or anon] inactive | 1109 | * Now put the pages back on the appropriate [file or anon] inactive |
1102 | * and active lists. | 1110 | * and active lists. |
1103 | */ | 1111 | */ |
@@ -1158,7 +1166,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1158 | } | 1166 | } |
1159 | } | 1167 | } |
1160 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | 1168 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
1161 | zone->recent_rotated[!!file] += pgmoved; | ||
1162 | 1169 | ||
1163 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1170 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
1164 | __count_vm_events(PGDEACTIVATE, pgdeactivate); | 1171 | __count_vm_events(PGDEACTIVATE, pgdeactivate); |
@@ -1174,7 +1181,13 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | |||
1174 | { | 1181 | { |
1175 | int file = is_file_lru(lru); | 1182 | int file = is_file_lru(lru); |
1176 | 1183 | ||
1177 | if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) { | 1184 | if (lru == LRU_ACTIVE_FILE) { |
1185 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
1186 | return 0; | ||
1187 | } | ||
1188 | |||
1189 | if (lru == LRU_ACTIVE_ANON && | ||
1190 | (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { | ||
1178 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1191 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1179 | return 0; | 1192 | return 0; |
1180 | } | 1193 | } |
@@ -1310,8 +1323,8 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1310 | } | 1323 | } |
1311 | } | 1324 | } |
1312 | 1325 | ||
1313 | while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] || | 1326 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1314 | nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { | 1327 | nr[LRU_INACTIVE_FILE]) { |
1315 | for_each_lru(l) { | 1328 | for_each_lru(l) { |
1316 | if (nr[l]) { | 1329 | if (nr[l]) { |
1317 | nr_to_scan = min(nr[l], | 1330 | nr_to_scan = min(nr[l], |
@@ -1324,6 +1337,15 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1324 | } | 1337 | } |
1325 | } | 1338 | } |
1326 | 1339 | ||
1340 | /* | ||
1341 | * Even if we did not try to evict anon pages at all, we want to | ||
1342 | * rebalance the anon lru active/inactive ratio. | ||
1343 | */ | ||
1344 | if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) | ||
1345 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | ||
1346 | else if (!scan_global_lru(sc)) | ||
1347 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | ||
1348 | |||
1327 | throttle_vm_writeout(sc->gfp_mask); | 1349 | throttle_vm_writeout(sc->gfp_mask); |
1328 | return nr_reclaimed; | 1350 | return nr_reclaimed; |
1329 | } | 1351 | } |
@@ -1617,6 +1639,14 @@ loop_again: | |||
1617 | priority != DEF_PRIORITY) | 1639 | priority != DEF_PRIORITY) |
1618 | continue; | 1640 | continue; |
1619 | 1641 | ||
1642 | /* | ||
1643 | * Do some background aging of the anon list, to give | ||
1644 | * pages a chance to be referenced before reclaiming. | ||
1645 | */ | ||
1646 | if (inactive_anon_is_low(zone)) | ||
1647 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | ||
1648 | &sc, priority, 0); | ||
1649 | |||
1620 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1650 | if (!zone_watermark_ok(zone, order, zone->pages_high, |
1621 | 0, 0)) { | 1651 | 0, 0)) { |
1622 | end_zone = i; | 1652 | end_zone = i; |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 27400b7da7c4..4380b0dba6d9 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -738,10 +738,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | |||
738 | seq_printf(m, | 738 | seq_printf(m, |
739 | "\n all_unreclaimable: %u" | 739 | "\n all_unreclaimable: %u" |
740 | "\n prev_priority: %i" | 740 | "\n prev_priority: %i" |
741 | "\n start_pfn: %lu", | 741 | "\n start_pfn: %lu" |
742 | "\n inactive_ratio: %u", | ||
742 | zone_is_all_unreclaimable(zone), | 743 | zone_is_all_unreclaimable(zone), |
743 | zone->prev_priority, | 744 | zone->prev_priority, |
744 | zone->zone_start_pfn); | 745 | zone->zone_start_pfn, |
746 | zone->inactive_ratio); | ||
745 | seq_putc(m, '\n'); | 747 | seq_putc(m, '\n'); |
746 | } | 748 | } |
747 | 749 | ||