diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 324 |
1 files changed, 194 insertions, 130 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index d196f46c8808..9a27c44aa327 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -52,6 +52,9 @@ struct scan_control { | |||
52 | /* Incremented by the number of inactive pages that were scanned */ | 52 | /* Incremented by the number of inactive pages that were scanned */ |
53 | unsigned long nr_scanned; | 53 | unsigned long nr_scanned; |
54 | 54 | ||
55 | /* Number of pages freed so far during a call to shrink_zones() */ | ||
56 | unsigned long nr_reclaimed; | ||
57 | |||
55 | /* This context's GFP mask */ | 58 | /* This context's GFP mask */ |
56 | gfp_t gfp_mask; | 59 | gfp_t gfp_mask; |
57 | 60 | ||
@@ -122,11 +125,30 @@ static LIST_HEAD(shrinker_list); | |||
122 | static DECLARE_RWSEM(shrinker_rwsem); | 125 | static DECLARE_RWSEM(shrinker_rwsem); |
123 | 126 | ||
124 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 127 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
125 | #define scan_global_lru(sc) (!(sc)->mem_cgroup) | 128 | #define scanning_global_lru(sc) (!(sc)->mem_cgroup) |
126 | #else | 129 | #else |
127 | #define scan_global_lru(sc) (1) | 130 | #define scanning_global_lru(sc) (1) |
128 | #endif | 131 | #endif |
129 | 132 | ||
133 | static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, | ||
134 | struct scan_control *sc) | ||
135 | { | ||
136 | if (!scanning_global_lru(sc)) | ||
137 | return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone); | ||
138 | |||
139 | return &zone->reclaim_stat; | ||
140 | } | ||
141 | |||
142 | static unsigned long zone_nr_pages(struct zone *zone, struct scan_control *sc, | ||
143 | enum lru_list lru) | ||
144 | { | ||
145 | if (!scanning_global_lru(sc)) | ||
146 | return mem_cgroup_zone_nr_pages(sc->mem_cgroup, zone, lru); | ||
147 | |||
148 | return zone_page_state(zone, NR_LRU_BASE + lru); | ||
149 | } | ||
150 | |||
151 | |||
130 | /* | 152 | /* |
131 | * Add a shrinker callback to be called from the vm | 153 | * Add a shrinker callback to be called from the vm |
132 | */ | 154 | */ |
@@ -509,7 +531,6 @@ redo: | |||
509 | lru = LRU_UNEVICTABLE; | 531 | lru = LRU_UNEVICTABLE; |
510 | add_page_to_unevictable_list(page); | 532 | add_page_to_unevictable_list(page); |
511 | } | 533 | } |
512 | mem_cgroup_move_lists(page, lru); | ||
513 | 534 | ||
514 | /* | 535 | /* |
515 | * page's status can change while we move it among lru. If an evictable | 536 | * page's status can change while we move it among lru. If an evictable |
@@ -544,7 +565,6 @@ void putback_lru_page(struct page *page) | |||
544 | 565 | ||
545 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); | 566 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); |
546 | lru_cache_add_lru(page, lru); | 567 | lru_cache_add_lru(page, lru); |
547 | mem_cgroup_move_lists(page, lru); | ||
548 | put_page(page); | 568 | put_page(page); |
549 | } | 569 | } |
550 | #endif /* CONFIG_UNEVICTABLE_LRU */ | 570 | #endif /* CONFIG_UNEVICTABLE_LRU */ |
@@ -617,7 +637,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
617 | referenced && page_mapping_inuse(page)) | 637 | referenced && page_mapping_inuse(page)) |
618 | goto activate_locked; | 638 | goto activate_locked; |
619 | 639 | ||
620 | #ifdef CONFIG_SWAP | ||
621 | /* | 640 | /* |
622 | * Anonymous process memory has backing store? | 641 | * Anonymous process memory has backing store? |
623 | * Try to allocate it some swap space here. | 642 | * Try to allocate it some swap space here. |
@@ -625,20 +644,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
625 | if (PageAnon(page) && !PageSwapCache(page)) { | 644 | if (PageAnon(page) && !PageSwapCache(page)) { |
626 | if (!(sc->gfp_mask & __GFP_IO)) | 645 | if (!(sc->gfp_mask & __GFP_IO)) |
627 | goto keep_locked; | 646 | goto keep_locked; |
628 | switch (try_to_munlock(page)) { | 647 | if (!add_to_swap(page)) |
629 | case SWAP_FAIL: /* shouldn't happen */ | ||
630 | case SWAP_AGAIN: | ||
631 | goto keep_locked; | ||
632 | case SWAP_MLOCK: | ||
633 | goto cull_mlocked; | ||
634 | case SWAP_SUCCESS: | ||
635 | ; /* fall thru'; add to swap cache */ | ||
636 | } | ||
637 | if (!add_to_swap(page, GFP_ATOMIC)) | ||
638 | goto activate_locked; | 648 | goto activate_locked; |
639 | may_enter_fs = 1; | 649 | may_enter_fs = 1; |
640 | } | 650 | } |
641 | #endif /* CONFIG_SWAP */ | ||
642 | 651 | ||
643 | mapping = page_mapping(page); | 652 | mapping = page_mapping(page); |
644 | 653 | ||
@@ -752,6 +761,8 @@ free_it: | |||
752 | continue; | 761 | continue; |
753 | 762 | ||
754 | cull_mlocked: | 763 | cull_mlocked: |
764 | if (PageSwapCache(page)) | ||
765 | try_to_free_swap(page); | ||
755 | unlock_page(page); | 766 | unlock_page(page); |
756 | putback_lru_page(page); | 767 | putback_lru_page(page); |
757 | continue; | 768 | continue; |
@@ -759,7 +770,7 @@ cull_mlocked: | |||
759 | activate_locked: | 770 | activate_locked: |
760 | /* Not a candidate for swapping, so reclaim swap space. */ | 771 | /* Not a candidate for swapping, so reclaim swap space. */ |
761 | if (PageSwapCache(page) && vm_swap_full()) | 772 | if (PageSwapCache(page) && vm_swap_full()) |
762 | remove_exclusive_swap_page_ref(page); | 773 | try_to_free_swap(page); |
763 | VM_BUG_ON(PageActive(page)); | 774 | VM_BUG_ON(PageActive(page)); |
764 | SetPageActive(page); | 775 | SetPageActive(page); |
765 | pgactivate++; | 776 | pgactivate++; |
@@ -819,6 +830,7 @@ int __isolate_lru_page(struct page *page, int mode, int file) | |||
819 | return ret; | 830 | return ret; |
820 | 831 | ||
821 | ret = -EBUSY; | 832 | ret = -EBUSY; |
833 | |||
822 | if (likely(get_page_unless_zero(page))) { | 834 | if (likely(get_page_unless_zero(page))) { |
823 | /* | 835 | /* |
824 | * Be careful not to clear PageLRU until after we're | 836 | * Be careful not to clear PageLRU until after we're |
@@ -827,6 +839,7 @@ int __isolate_lru_page(struct page *page, int mode, int file) | |||
827 | */ | 839 | */ |
828 | ClearPageLRU(page); | 840 | ClearPageLRU(page); |
829 | ret = 0; | 841 | ret = 0; |
842 | mem_cgroup_del_lru(page); | ||
830 | } | 843 | } |
831 | 844 | ||
832 | return ret; | 845 | return ret; |
@@ -1035,6 +1048,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1035 | struct pagevec pvec; | 1048 | struct pagevec pvec; |
1036 | unsigned long nr_scanned = 0; | 1049 | unsigned long nr_scanned = 0; |
1037 | unsigned long nr_reclaimed = 0; | 1050 | unsigned long nr_reclaimed = 0; |
1051 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | ||
1038 | 1052 | ||
1039 | pagevec_init(&pvec, 1); | 1053 | pagevec_init(&pvec, 1); |
1040 | 1054 | ||
@@ -1076,13 +1090,14 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1076 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, | 1090 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, |
1077 | -count[LRU_INACTIVE_ANON]); | 1091 | -count[LRU_INACTIVE_ANON]); |
1078 | 1092 | ||
1079 | if (scan_global_lru(sc)) { | 1093 | if (scanning_global_lru(sc)) |
1080 | zone->pages_scanned += nr_scan; | 1094 | zone->pages_scanned += nr_scan; |
1081 | zone->recent_scanned[0] += count[LRU_INACTIVE_ANON]; | 1095 | |
1082 | zone->recent_scanned[0] += count[LRU_ACTIVE_ANON]; | 1096 | reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON]; |
1083 | zone->recent_scanned[1] += count[LRU_INACTIVE_FILE]; | 1097 | reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON]; |
1084 | zone->recent_scanned[1] += count[LRU_ACTIVE_FILE]; | 1098 | reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE]; |
1085 | } | 1099 | reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE]; |
1100 | |||
1086 | spin_unlock_irq(&zone->lru_lock); | 1101 | spin_unlock_irq(&zone->lru_lock); |
1087 | 1102 | ||
1088 | nr_scanned += nr_scan; | 1103 | nr_scanned += nr_scan; |
@@ -1114,7 +1129,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1114 | if (current_is_kswapd()) { | 1129 | if (current_is_kswapd()) { |
1115 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); | 1130 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); |
1116 | __count_vm_events(KSWAPD_STEAL, nr_freed); | 1131 | __count_vm_events(KSWAPD_STEAL, nr_freed); |
1117 | } else if (scan_global_lru(sc)) | 1132 | } else if (scanning_global_lru(sc)) |
1118 | __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); | 1133 | __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); |
1119 | 1134 | ||
1120 | __count_zone_vm_events(PGSTEAL, zone, nr_freed); | 1135 | __count_zone_vm_events(PGSTEAL, zone, nr_freed); |
@@ -1140,10 +1155,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1140 | SetPageLRU(page); | 1155 | SetPageLRU(page); |
1141 | lru = page_lru(page); | 1156 | lru = page_lru(page); |
1142 | add_page_to_lru_list(zone, page, lru); | 1157 | add_page_to_lru_list(zone, page, lru); |
1143 | mem_cgroup_move_lists(page, lru); | 1158 | if (PageActive(page)) { |
1144 | if (PageActive(page) && scan_global_lru(sc)) { | ||
1145 | int file = !!page_is_file_cache(page); | 1159 | int file = !!page_is_file_cache(page); |
1146 | zone->recent_rotated[file]++; | 1160 | reclaim_stat->recent_rotated[file]++; |
1147 | } | 1161 | } |
1148 | if (!pagevec_add(&pvec, page)) { | 1162 | if (!pagevec_add(&pvec, page)) { |
1149 | spin_unlock_irq(&zone->lru_lock); | 1163 | spin_unlock_irq(&zone->lru_lock); |
@@ -1173,11 +1187,6 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) | |||
1173 | zone->prev_priority = priority; | 1187 | zone->prev_priority = priority; |
1174 | } | 1188 | } |
1175 | 1189 | ||
1176 | static inline int zone_is_near_oom(struct zone *zone) | ||
1177 | { | ||
1178 | return zone->pages_scanned >= (zone_lru_pages(zone) * 3); | ||
1179 | } | ||
1180 | |||
1181 | /* | 1190 | /* |
1182 | * This moves pages from the active list to the inactive list. | 1191 | * This moves pages from the active list to the inactive list. |
1183 | * | 1192 | * |
@@ -1208,6 +1217,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1208 | struct page *page; | 1217 | struct page *page; |
1209 | struct pagevec pvec; | 1218 | struct pagevec pvec; |
1210 | enum lru_list lru; | 1219 | enum lru_list lru; |
1220 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | ||
1211 | 1221 | ||
1212 | lru_add_drain(); | 1222 | lru_add_drain(); |
1213 | spin_lock_irq(&zone->lru_lock); | 1223 | spin_lock_irq(&zone->lru_lock); |
@@ -1218,10 +1228,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1218 | * zone->pages_scanned is used for detect zone's oom | 1228 | * zone->pages_scanned is used for detect zone's oom |
1219 | * mem_cgroup remembers nr_scan by itself. | 1229 | * mem_cgroup remembers nr_scan by itself. |
1220 | */ | 1230 | */ |
1221 | if (scan_global_lru(sc)) { | 1231 | if (scanning_global_lru(sc)) { |
1222 | zone->pages_scanned += pgscanned; | 1232 | zone->pages_scanned += pgscanned; |
1223 | zone->recent_scanned[!!file] += pgmoved; | ||
1224 | } | 1233 | } |
1234 | reclaim_stat->recent_scanned[!!file] += pgmoved; | ||
1225 | 1235 | ||
1226 | if (file) | 1236 | if (file) |
1227 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); | 1237 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); |
@@ -1248,6 +1258,13 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1248 | list_add(&page->lru, &l_inactive); | 1258 | list_add(&page->lru, &l_inactive); |
1249 | } | 1259 | } |
1250 | 1260 | ||
1261 | /* | ||
1262 | * Move the pages to the [file or anon] inactive list. | ||
1263 | */ | ||
1264 | pagevec_init(&pvec, 1); | ||
1265 | pgmoved = 0; | ||
1266 | lru = LRU_BASE + file * LRU_FILE; | ||
1267 | |||
1251 | spin_lock_irq(&zone->lru_lock); | 1268 | spin_lock_irq(&zone->lru_lock); |
1252 | /* | 1269 | /* |
1253 | * Count referenced pages from currently used mappings as | 1270 | * Count referenced pages from currently used mappings as |
@@ -1255,15 +1272,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1255 | * This helps balance scan pressure between file and anonymous | 1272 | * This helps balance scan pressure between file and anonymous |
1256 | * pages in get_scan_ratio. | 1273 | * pages in get_scan_ratio. |
1257 | */ | 1274 | */ |
1258 | zone->recent_rotated[!!file] += pgmoved; | 1275 | reclaim_stat->recent_rotated[!!file] += pgmoved; |
1259 | 1276 | ||
1260 | /* | ||
1261 | * Move the pages to the [file or anon] inactive list. | ||
1262 | */ | ||
1263 | pagevec_init(&pvec, 1); | ||
1264 | |||
1265 | pgmoved = 0; | ||
1266 | lru = LRU_BASE + file * LRU_FILE; | ||
1267 | while (!list_empty(&l_inactive)) { | 1277 | while (!list_empty(&l_inactive)) { |
1268 | page = lru_to_page(&l_inactive); | 1278 | page = lru_to_page(&l_inactive); |
1269 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 1279 | prefetchw_prev_lru_page(page, &l_inactive, flags); |
@@ -1273,7 +1283,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1273 | ClearPageActive(page); | 1283 | ClearPageActive(page); |
1274 | 1284 | ||
1275 | list_move(&page->lru, &zone->lru[lru].list); | 1285 | list_move(&page->lru, &zone->lru[lru].list); |
1276 | mem_cgroup_move_lists(page, lru); | 1286 | mem_cgroup_add_lru_list(page, lru); |
1277 | pgmoved++; | 1287 | pgmoved++; |
1278 | if (!pagevec_add(&pvec, page)) { | 1288 | if (!pagevec_add(&pvec, page)) { |
1279 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | 1289 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
@@ -1302,6 +1312,38 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1302 | pagevec_release(&pvec); | 1312 | pagevec_release(&pvec); |
1303 | } | 1313 | } |
1304 | 1314 | ||
1315 | static int inactive_anon_is_low_global(struct zone *zone) | ||
1316 | { | ||
1317 | unsigned long active, inactive; | ||
1318 | |||
1319 | active = zone_page_state(zone, NR_ACTIVE_ANON); | ||
1320 | inactive = zone_page_state(zone, NR_INACTIVE_ANON); | ||
1321 | |||
1322 | if (inactive * zone->inactive_ratio < active) | ||
1323 | return 1; | ||
1324 | |||
1325 | return 0; | ||
1326 | } | ||
1327 | |||
1328 | /** | ||
1329 | * inactive_anon_is_low - check if anonymous pages need to be deactivated | ||
1330 | * @zone: zone to check | ||
1331 | * @sc: scan control of this context | ||
1332 | * | ||
1333 | * Returns true if the zone does not have enough inactive anon pages, | ||
1334 | * meaning some active anon pages need to be deactivated. | ||
1335 | */ | ||
1336 | static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | ||
1337 | { | ||
1338 | int low; | ||
1339 | |||
1340 | if (scanning_global_lru(sc)) | ||
1341 | low = inactive_anon_is_low_global(zone); | ||
1342 | else | ||
1343 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); | ||
1344 | return low; | ||
1345 | } | ||
1346 | |||
1305 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1347 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1306 | struct zone *zone, struct scan_control *sc, int priority) | 1348 | struct zone *zone, struct scan_control *sc, int priority) |
1307 | { | 1349 | { |
@@ -1312,8 +1354,7 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | |||
1312 | return 0; | 1354 | return 0; |
1313 | } | 1355 | } |
1314 | 1356 | ||
1315 | if (lru == LRU_ACTIVE_ANON && | 1357 | if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) { |
1316 | (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { | ||
1317 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1358 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1318 | return 0; | 1359 | return 0; |
1319 | } | 1360 | } |
@@ -1335,12 +1376,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1335 | unsigned long anon, file, free; | 1376 | unsigned long anon, file, free; |
1336 | unsigned long anon_prio, file_prio; | 1377 | unsigned long anon_prio, file_prio; |
1337 | unsigned long ap, fp; | 1378 | unsigned long ap, fp; |
1338 | 1379 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | |
1339 | anon = zone_page_state(zone, NR_ACTIVE_ANON) + | ||
1340 | zone_page_state(zone, NR_INACTIVE_ANON); | ||
1341 | file = zone_page_state(zone, NR_ACTIVE_FILE) + | ||
1342 | zone_page_state(zone, NR_INACTIVE_FILE); | ||
1343 | free = zone_page_state(zone, NR_FREE_PAGES); | ||
1344 | 1380 | ||
1345 | /* If we have no swap space, do not bother scanning anon pages. */ | 1381 | /* If we have no swap space, do not bother scanning anon pages. */ |
1346 | if (nr_swap_pages <= 0) { | 1382 | if (nr_swap_pages <= 0) { |
@@ -1349,11 +1385,20 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1349 | return; | 1385 | return; |
1350 | } | 1386 | } |
1351 | 1387 | ||
1352 | /* If we have very few page cache pages, force-scan anon pages. */ | 1388 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + |
1353 | if (unlikely(file + free <= zone->pages_high)) { | 1389 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); |
1354 | percent[0] = 100; | 1390 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + |
1355 | percent[1] = 0; | 1391 | zone_nr_pages(zone, sc, LRU_INACTIVE_FILE); |
1356 | return; | 1392 | |
1393 | if (scanning_global_lru(sc)) { | ||
1394 | free = zone_page_state(zone, NR_FREE_PAGES); | ||
1395 | /* If we have very few page cache pages, | ||
1396 | force-scan anon pages. */ | ||
1397 | if (unlikely(file + free <= zone->pages_high)) { | ||
1398 | percent[0] = 100; | ||
1399 | percent[1] = 0; | ||
1400 | return; | ||
1401 | } | ||
1357 | } | 1402 | } |
1358 | 1403 | ||
1359 | /* | 1404 | /* |
@@ -1367,17 +1412,17 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1367 | * | 1412 | * |
1368 | * anon in [0], file in [1] | 1413 | * anon in [0], file in [1] |
1369 | */ | 1414 | */ |
1370 | if (unlikely(zone->recent_scanned[0] > anon / 4)) { | 1415 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
1371 | spin_lock_irq(&zone->lru_lock); | 1416 | spin_lock_irq(&zone->lru_lock); |
1372 | zone->recent_scanned[0] /= 2; | 1417 | reclaim_stat->recent_scanned[0] /= 2; |
1373 | zone->recent_rotated[0] /= 2; | 1418 | reclaim_stat->recent_rotated[0] /= 2; |
1374 | spin_unlock_irq(&zone->lru_lock); | 1419 | spin_unlock_irq(&zone->lru_lock); |
1375 | } | 1420 | } |
1376 | 1421 | ||
1377 | if (unlikely(zone->recent_scanned[1] > file / 4)) { | 1422 | if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) { |
1378 | spin_lock_irq(&zone->lru_lock); | 1423 | spin_lock_irq(&zone->lru_lock); |
1379 | zone->recent_scanned[1] /= 2; | 1424 | reclaim_stat->recent_scanned[1] /= 2; |
1380 | zone->recent_rotated[1] /= 2; | 1425 | reclaim_stat->recent_rotated[1] /= 2; |
1381 | spin_unlock_irq(&zone->lru_lock); | 1426 | spin_unlock_irq(&zone->lru_lock); |
1382 | } | 1427 | } |
1383 | 1428 | ||
@@ -1393,11 +1438,11 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1393 | * proportional to the fraction of recently scanned pages on | 1438 | * proportional to the fraction of recently scanned pages on |
1394 | * each list that were recently referenced and in active use. | 1439 | * each list that were recently referenced and in active use. |
1395 | */ | 1440 | */ |
1396 | ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); | 1441 | ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); |
1397 | ap /= zone->recent_rotated[0] + 1; | 1442 | ap /= reclaim_stat->recent_rotated[0] + 1; |
1398 | 1443 | ||
1399 | fp = (file_prio + 1) * (zone->recent_scanned[1] + 1); | 1444 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); |
1400 | fp /= zone->recent_rotated[1] + 1; | 1445 | fp /= reclaim_stat->recent_rotated[1] + 1; |
1401 | 1446 | ||
1402 | /* Normalize to percentages */ | 1447 | /* Normalize to percentages */ |
1403 | percent[0] = 100 * ap / (ap + fp + 1); | 1448 | percent[0] = 100 * ap / (ap + fp + 1); |
@@ -1408,69 +1453,72 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1408 | /* | 1453 | /* |
1409 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1454 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1410 | */ | 1455 | */ |
1411 | static unsigned long shrink_zone(int priority, struct zone *zone, | 1456 | static void shrink_zone(int priority, struct zone *zone, |
1412 | struct scan_control *sc) | 1457 | struct scan_control *sc) |
1413 | { | 1458 | { |
1414 | unsigned long nr[NR_LRU_LISTS]; | 1459 | unsigned long nr[NR_LRU_LISTS]; |
1415 | unsigned long nr_to_scan; | 1460 | unsigned long nr_to_scan; |
1416 | unsigned long nr_reclaimed = 0; | ||
1417 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ | 1461 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ |
1418 | enum lru_list l; | 1462 | enum lru_list l; |
1463 | unsigned long nr_reclaimed = sc->nr_reclaimed; | ||
1464 | unsigned long swap_cluster_max = sc->swap_cluster_max; | ||
1419 | 1465 | ||
1420 | get_scan_ratio(zone, sc, percent); | 1466 | get_scan_ratio(zone, sc, percent); |
1421 | 1467 | ||
1422 | for_each_evictable_lru(l) { | 1468 | for_each_evictable_lru(l) { |
1423 | if (scan_global_lru(sc)) { | 1469 | int file = is_file_lru(l); |
1424 | int file = is_file_lru(l); | 1470 | int scan; |
1425 | int scan; | 1471 | |
1426 | 1472 | scan = zone_page_state(zone, NR_LRU_BASE + l); | |
1427 | scan = zone_page_state(zone, NR_LRU_BASE + l); | 1473 | if (priority) { |
1428 | if (priority) { | 1474 | scan >>= priority; |
1429 | scan >>= priority; | 1475 | scan = (scan * percent[file]) / 100; |
1430 | scan = (scan * percent[file]) / 100; | 1476 | } |
1431 | } | 1477 | if (scanning_global_lru(sc)) { |
1432 | zone->lru[l].nr_scan += scan; | 1478 | zone->lru[l].nr_scan += scan; |
1433 | nr[l] = zone->lru[l].nr_scan; | 1479 | nr[l] = zone->lru[l].nr_scan; |
1434 | if (nr[l] >= sc->swap_cluster_max) | 1480 | if (nr[l] >= swap_cluster_max) |
1435 | zone->lru[l].nr_scan = 0; | 1481 | zone->lru[l].nr_scan = 0; |
1436 | else | 1482 | else |
1437 | nr[l] = 0; | 1483 | nr[l] = 0; |
1438 | } else { | 1484 | } else |
1439 | /* | 1485 | nr[l] = scan; |
1440 | * This reclaim occurs not because zone memory shortage | ||
1441 | * but because memory controller hits its limit. | ||
1442 | * Don't modify zone reclaim related data. | ||
1443 | */ | ||
1444 | nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone, | ||
1445 | priority, l); | ||
1446 | } | ||
1447 | } | 1486 | } |
1448 | 1487 | ||
1449 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1488 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1450 | nr[LRU_INACTIVE_FILE]) { | 1489 | nr[LRU_INACTIVE_FILE]) { |
1451 | for_each_evictable_lru(l) { | 1490 | for_each_evictable_lru(l) { |
1452 | if (nr[l]) { | 1491 | if (nr[l]) { |
1453 | nr_to_scan = min(nr[l], | 1492 | nr_to_scan = min(nr[l], swap_cluster_max); |
1454 | (unsigned long)sc->swap_cluster_max); | ||
1455 | nr[l] -= nr_to_scan; | 1493 | nr[l] -= nr_to_scan; |
1456 | 1494 | ||
1457 | nr_reclaimed += shrink_list(l, nr_to_scan, | 1495 | nr_reclaimed += shrink_list(l, nr_to_scan, |
1458 | zone, sc, priority); | 1496 | zone, sc, priority); |
1459 | } | 1497 | } |
1460 | } | 1498 | } |
1499 | /* | ||
1500 | * On large memory systems, scan >> priority can become | ||
1501 | * really large. This is fine for the starting priority; | ||
1502 | * we want to put equal scanning pressure on each zone. | ||
1503 | * However, if the VM has a harder time of freeing pages, | ||
1504 | * with multiple processes reclaiming pages, the total | ||
1505 | * freeing target can get unreasonably large. | ||
1506 | */ | ||
1507 | if (nr_reclaimed > swap_cluster_max && | ||
1508 | priority < DEF_PRIORITY && !current_is_kswapd()) | ||
1509 | break; | ||
1461 | } | 1510 | } |
1462 | 1511 | ||
1512 | sc->nr_reclaimed = nr_reclaimed; | ||
1513 | |||
1463 | /* | 1514 | /* |
1464 | * Even if we did not try to evict anon pages at all, we want to | 1515 | * Even if we did not try to evict anon pages at all, we want to |
1465 | * rebalance the anon lru active/inactive ratio. | 1516 | * rebalance the anon lru active/inactive ratio. |
1466 | */ | 1517 | */ |
1467 | if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) | 1518 | if (inactive_anon_is_low(zone, sc)) |
1468 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | ||
1469 | else if (!scan_global_lru(sc)) | ||
1470 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1519 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1471 | 1520 | ||
1472 | throttle_vm_writeout(sc->gfp_mask); | 1521 | throttle_vm_writeout(sc->gfp_mask); |
1473 | return nr_reclaimed; | ||
1474 | } | 1522 | } |
1475 | 1523 | ||
1476 | /* | 1524 | /* |
@@ -1484,16 +1532,13 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1484 | * b) The zones may be over pages_high but they must go *over* pages_high to | 1532 | * b) The zones may be over pages_high but they must go *over* pages_high to |
1485 | * satisfy the `incremental min' zone defense algorithm. | 1533 | * satisfy the `incremental min' zone defense algorithm. |
1486 | * | 1534 | * |
1487 | * Returns the number of reclaimed pages. | ||
1488 | * | ||
1489 | * If a zone is deemed to be full of pinned pages then just give it a light | 1535 | * If a zone is deemed to be full of pinned pages then just give it a light |
1490 | * scan then give up on it. | 1536 | * scan then give up on it. |
1491 | */ | 1537 | */ |
1492 | static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | 1538 | static void shrink_zones(int priority, struct zonelist *zonelist, |
1493 | struct scan_control *sc) | 1539 | struct scan_control *sc) |
1494 | { | 1540 | { |
1495 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | 1541 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); |
1496 | unsigned long nr_reclaimed = 0; | ||
1497 | struct zoneref *z; | 1542 | struct zoneref *z; |
1498 | struct zone *zone; | 1543 | struct zone *zone; |
1499 | 1544 | ||
@@ -1505,7 +1550,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | |||
1505 | * Take care memory controller reclaiming has small influence | 1550 | * Take care memory controller reclaiming has small influence |
1506 | * to global LRU. | 1551 | * to global LRU. |
1507 | */ | 1552 | */ |
1508 | if (scan_global_lru(sc)) { | 1553 | if (scanning_global_lru(sc)) { |
1509 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1554 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1510 | continue; | 1555 | continue; |
1511 | note_zone_scanning_priority(zone, priority); | 1556 | note_zone_scanning_priority(zone, priority); |
@@ -1524,10 +1569,8 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | |||
1524 | priority); | 1569 | priority); |
1525 | } | 1570 | } |
1526 | 1571 | ||
1527 | nr_reclaimed += shrink_zone(priority, zone, sc); | 1572 | shrink_zone(priority, zone, sc); |
1528 | } | 1573 | } |
1529 | |||
1530 | return nr_reclaimed; | ||
1531 | } | 1574 | } |
1532 | 1575 | ||
1533 | /* | 1576 | /* |
@@ -1552,7 +1595,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1552 | int priority; | 1595 | int priority; |
1553 | unsigned long ret = 0; | 1596 | unsigned long ret = 0; |
1554 | unsigned long total_scanned = 0; | 1597 | unsigned long total_scanned = 0; |
1555 | unsigned long nr_reclaimed = 0; | ||
1556 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1598 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1557 | unsigned long lru_pages = 0; | 1599 | unsigned long lru_pages = 0; |
1558 | struct zoneref *z; | 1600 | struct zoneref *z; |
@@ -1561,12 +1603,12 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1561 | 1603 | ||
1562 | delayacct_freepages_start(); | 1604 | delayacct_freepages_start(); |
1563 | 1605 | ||
1564 | if (scan_global_lru(sc)) | 1606 | if (scanning_global_lru(sc)) |
1565 | count_vm_event(ALLOCSTALL); | 1607 | count_vm_event(ALLOCSTALL); |
1566 | /* | 1608 | /* |
1567 | * mem_cgroup will not do shrink_slab. | 1609 | * mem_cgroup will not do shrink_slab. |
1568 | */ | 1610 | */ |
1569 | if (scan_global_lru(sc)) { | 1611 | if (scanning_global_lru(sc)) { |
1570 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 1612 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1571 | 1613 | ||
1572 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1614 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
@@ -1580,21 +1622,21 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1580 | sc->nr_scanned = 0; | 1622 | sc->nr_scanned = 0; |
1581 | if (!priority) | 1623 | if (!priority) |
1582 | disable_swap_token(); | 1624 | disable_swap_token(); |
1583 | nr_reclaimed += shrink_zones(priority, zonelist, sc); | 1625 | shrink_zones(priority, zonelist, sc); |
1584 | /* | 1626 | /* |
1585 | * Don't shrink slabs when reclaiming memory from | 1627 | * Don't shrink slabs when reclaiming memory from |
1586 | * over limit cgroups | 1628 | * over limit cgroups |
1587 | */ | 1629 | */ |
1588 | if (scan_global_lru(sc)) { | 1630 | if (scanning_global_lru(sc)) { |
1589 | shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); | 1631 | shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); |
1590 | if (reclaim_state) { | 1632 | if (reclaim_state) { |
1591 | nr_reclaimed += reclaim_state->reclaimed_slab; | 1633 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; |
1592 | reclaim_state->reclaimed_slab = 0; | 1634 | reclaim_state->reclaimed_slab = 0; |
1593 | } | 1635 | } |
1594 | } | 1636 | } |
1595 | total_scanned += sc->nr_scanned; | 1637 | total_scanned += sc->nr_scanned; |
1596 | if (nr_reclaimed >= sc->swap_cluster_max) { | 1638 | if (sc->nr_reclaimed >= sc->swap_cluster_max) { |
1597 | ret = nr_reclaimed; | 1639 | ret = sc->nr_reclaimed; |
1598 | goto out; | 1640 | goto out; |
1599 | } | 1641 | } |
1600 | 1642 | ||
@@ -1616,8 +1658,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1616 | congestion_wait(WRITE, HZ/10); | 1658 | congestion_wait(WRITE, HZ/10); |
1617 | } | 1659 | } |
1618 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 1660 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
1619 | if (!sc->all_unreclaimable && scan_global_lru(sc)) | 1661 | if (!sc->all_unreclaimable && scanning_global_lru(sc)) |
1620 | ret = nr_reclaimed; | 1662 | ret = sc->nr_reclaimed; |
1621 | out: | 1663 | out: |
1622 | /* | 1664 | /* |
1623 | * Now that we've scanned all the zones at this priority level, note | 1665 | * Now that we've scanned all the zones at this priority level, note |
@@ -1629,7 +1671,7 @@ out: | |||
1629 | if (priority < 0) | 1671 | if (priority < 0) |
1630 | priority = 0; | 1672 | priority = 0; |
1631 | 1673 | ||
1632 | if (scan_global_lru(sc)) { | 1674 | if (scanning_global_lru(sc)) { |
1633 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 1675 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
1634 | 1676 | ||
1635 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1677 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
@@ -1665,19 +1707,24 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1665 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1707 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
1666 | 1708 | ||
1667 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 1709 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
1668 | gfp_t gfp_mask) | 1710 | gfp_t gfp_mask, |
1711 | bool noswap, | ||
1712 | unsigned int swappiness) | ||
1669 | { | 1713 | { |
1670 | struct scan_control sc = { | 1714 | struct scan_control sc = { |
1671 | .may_writepage = !laptop_mode, | 1715 | .may_writepage = !laptop_mode, |
1672 | .may_swap = 1, | 1716 | .may_swap = 1, |
1673 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1717 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1674 | .swappiness = vm_swappiness, | 1718 | .swappiness = swappiness, |
1675 | .order = 0, | 1719 | .order = 0, |
1676 | .mem_cgroup = mem_cont, | 1720 | .mem_cgroup = mem_cont, |
1677 | .isolate_pages = mem_cgroup_isolate_pages, | 1721 | .isolate_pages = mem_cgroup_isolate_pages, |
1678 | }; | 1722 | }; |
1679 | struct zonelist *zonelist; | 1723 | struct zonelist *zonelist; |
1680 | 1724 | ||
1725 | if (noswap) | ||
1726 | sc.may_swap = 0; | ||
1727 | |||
1681 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 1728 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1682 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 1729 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
1683 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; | 1730 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; |
@@ -1712,7 +1759,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1712 | int priority; | 1759 | int priority; |
1713 | int i; | 1760 | int i; |
1714 | unsigned long total_scanned; | 1761 | unsigned long total_scanned; |
1715 | unsigned long nr_reclaimed; | ||
1716 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1762 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1717 | struct scan_control sc = { | 1763 | struct scan_control sc = { |
1718 | .gfp_mask = GFP_KERNEL, | 1764 | .gfp_mask = GFP_KERNEL, |
@@ -1731,7 +1777,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1731 | 1777 | ||
1732 | loop_again: | 1778 | loop_again: |
1733 | total_scanned = 0; | 1779 | total_scanned = 0; |
1734 | nr_reclaimed = 0; | 1780 | sc.nr_reclaimed = 0; |
1735 | sc.may_writepage = !laptop_mode; | 1781 | sc.may_writepage = !laptop_mode; |
1736 | count_vm_event(PAGEOUTRUN); | 1782 | count_vm_event(PAGEOUTRUN); |
1737 | 1783 | ||
@@ -1766,7 +1812,7 @@ loop_again: | |||
1766 | * Do some background aging of the anon list, to give | 1812 | * Do some background aging of the anon list, to give |
1767 | * pages a chance to be referenced before reclaiming. | 1813 | * pages a chance to be referenced before reclaiming. |
1768 | */ | 1814 | */ |
1769 | if (inactive_anon_is_low(zone)) | 1815 | if (inactive_anon_is_low(zone, &sc)) |
1770 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | 1816 | shrink_active_list(SWAP_CLUSTER_MAX, zone, |
1771 | &sc, priority, 0); | 1817 | &sc, priority, 0); |
1772 | 1818 | ||
@@ -1817,11 +1863,11 @@ loop_again: | |||
1817 | */ | 1863 | */ |
1818 | if (!zone_watermark_ok(zone, order, 8*zone->pages_high, | 1864 | if (!zone_watermark_ok(zone, order, 8*zone->pages_high, |
1819 | end_zone, 0)) | 1865 | end_zone, 0)) |
1820 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1866 | shrink_zone(priority, zone, &sc); |
1821 | reclaim_state->reclaimed_slab = 0; | 1867 | reclaim_state->reclaimed_slab = 0; |
1822 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1868 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
1823 | lru_pages); | 1869 | lru_pages); |
1824 | nr_reclaimed += reclaim_state->reclaimed_slab; | 1870 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; |
1825 | total_scanned += sc.nr_scanned; | 1871 | total_scanned += sc.nr_scanned; |
1826 | if (zone_is_all_unreclaimable(zone)) | 1872 | if (zone_is_all_unreclaimable(zone)) |
1827 | continue; | 1873 | continue; |
@@ -1835,7 +1881,7 @@ loop_again: | |||
1835 | * even in laptop mode | 1881 | * even in laptop mode |
1836 | */ | 1882 | */ |
1837 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && | 1883 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && |
1838 | total_scanned > nr_reclaimed + nr_reclaimed / 2) | 1884 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
1839 | sc.may_writepage = 1; | 1885 | sc.may_writepage = 1; |
1840 | } | 1886 | } |
1841 | if (all_zones_ok) | 1887 | if (all_zones_ok) |
@@ -1853,7 +1899,7 @@ loop_again: | |||
1853 | * matches the direct reclaim path behaviour in terms of impact | 1899 | * matches the direct reclaim path behaviour in terms of impact |
1854 | * on zone->*_priority. | 1900 | * on zone->*_priority. |
1855 | */ | 1901 | */ |
1856 | if (nr_reclaimed >= SWAP_CLUSTER_MAX) | 1902 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) |
1857 | break; | 1903 | break; |
1858 | } | 1904 | } |
1859 | out: | 1905 | out: |
@@ -1872,10 +1918,27 @@ out: | |||
1872 | 1918 | ||
1873 | try_to_freeze(); | 1919 | try_to_freeze(); |
1874 | 1920 | ||
1921 | /* | ||
1922 | * Fragmentation may mean that the system cannot be | ||
1923 | * rebalanced for high-order allocations in all zones. | ||
1924 | * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, | ||
1925 | * it means the zones have been fully scanned and are still | ||
1926 | * not balanced. For high-order allocations, there is | ||
1927 | * little point trying all over again as kswapd may | ||
1928 | * infinite loop. | ||
1929 | * | ||
1930 | * Instead, recheck all watermarks at order-0 as they | ||
1931 | * are the most important. If watermarks are ok, kswapd will go | ||
1932 | * back to sleep. High-order users can still perform direct | ||
1933 | * reclaim if they wish. | ||
1934 | */ | ||
1935 | if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) | ||
1936 | order = sc.order = 0; | ||
1937 | |||
1875 | goto loop_again; | 1938 | goto loop_again; |
1876 | } | 1939 | } |
1877 | 1940 | ||
1878 | return nr_reclaimed; | 1941 | return sc.nr_reclaimed; |
1879 | } | 1942 | } |
1880 | 1943 | ||
1881 | /* | 1944 | /* |
@@ -2227,7 +2290,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2227 | struct task_struct *p = current; | 2290 | struct task_struct *p = current; |
2228 | struct reclaim_state reclaim_state; | 2291 | struct reclaim_state reclaim_state; |
2229 | int priority; | 2292 | int priority; |
2230 | unsigned long nr_reclaimed = 0; | ||
2231 | struct scan_control sc = { | 2293 | struct scan_control sc = { |
2232 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 2294 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
2233 | .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 2295 | .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
@@ -2260,9 +2322,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2260 | priority = ZONE_RECLAIM_PRIORITY; | 2322 | priority = ZONE_RECLAIM_PRIORITY; |
2261 | do { | 2323 | do { |
2262 | note_zone_scanning_priority(zone, priority); | 2324 | note_zone_scanning_priority(zone, priority); |
2263 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 2325 | shrink_zone(priority, zone, &sc); |
2264 | priority--; | 2326 | priority--; |
2265 | } while (priority >= 0 && nr_reclaimed < nr_pages); | 2327 | } while (priority >= 0 && sc.nr_reclaimed < nr_pages); |
2266 | } | 2328 | } |
2267 | 2329 | ||
2268 | slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); | 2330 | slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
@@ -2286,13 +2348,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2286 | * Update nr_reclaimed by the number of slab pages we | 2348 | * Update nr_reclaimed by the number of slab pages we |
2287 | * reclaimed from this zone. | 2349 | * reclaimed from this zone. |
2288 | */ | 2350 | */ |
2289 | nr_reclaimed += slab_reclaimable - | 2351 | sc.nr_reclaimed += slab_reclaimable - |
2290 | zone_page_state(zone, NR_SLAB_RECLAIMABLE); | 2352 | zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
2291 | } | 2353 | } |
2292 | 2354 | ||
2293 | p->reclaim_state = NULL; | 2355 | p->reclaim_state = NULL; |
2294 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); | 2356 | current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); |
2295 | return nr_reclaimed >= nr_pages; | 2357 | return sc.nr_reclaimed >= nr_pages; |
2296 | } | 2358 | } |
2297 | 2359 | ||
2298 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 2360 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
@@ -2393,6 +2455,7 @@ retry: | |||
2393 | 2455 | ||
2394 | __dec_zone_state(zone, NR_UNEVICTABLE); | 2456 | __dec_zone_state(zone, NR_UNEVICTABLE); |
2395 | list_move(&page->lru, &zone->lru[l].list); | 2457 | list_move(&page->lru, &zone->lru[l].list); |
2458 | mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l); | ||
2396 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | 2459 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); |
2397 | __count_vm_event(UNEVICTABLE_PGRESCUED); | 2460 | __count_vm_event(UNEVICTABLE_PGRESCUED); |
2398 | } else { | 2461 | } else { |
@@ -2401,6 +2464,7 @@ retry: | |||
2401 | */ | 2464 | */ |
2402 | SetPageUnevictable(page); | 2465 | SetPageUnevictable(page); |
2403 | list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); | 2466 | list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); |
2467 | mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE); | ||
2404 | if (page_evictable(page, NULL)) | 2468 | if (page_evictable(page, NULL)) |
2405 | goto retry; | 2469 | goto retry; |
2406 | } | 2470 | } |
@@ -2472,7 +2536,7 @@ void scan_mapping_unevictable_pages(struct address_space *mapping) | |||
2472 | * back onto @zone's unevictable list. | 2536 | * back onto @zone's unevictable list. |
2473 | */ | 2537 | */ |
2474 | #define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ | 2538 | #define SCAN_UNEVICTABLE_BATCH_SIZE 16UL /* arbitrary lock hold batch size */ |
2475 | void scan_zone_unevictable_pages(struct zone *zone) | 2539 | static void scan_zone_unevictable_pages(struct zone *zone) |
2476 | { | 2540 | { |
2477 | struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; | 2541 | struct list_head *l_unevictable = &zone->lru[LRU_UNEVICTABLE].list; |
2478 | unsigned long scan; | 2542 | unsigned long scan; |
@@ -2514,7 +2578,7 @@ void scan_zone_unevictable_pages(struct zone *zone) | |||
2514 | * that has possibly/probably made some previously unevictable pages | 2578 | * that has possibly/probably made some previously unevictable pages |
2515 | * evictable. | 2579 | * evictable. |
2516 | */ | 2580 | */ |
2517 | void scan_all_zones_unevictable_pages(void) | 2581 | static void scan_all_zones_unevictable_pages(void) |
2518 | { | 2582 | { |
2519 | struct zone *zone; | 2583 | struct zone *zone; |
2520 | 2584 | ||