diff options
Diffstat (limited to 'mm/vmscan.c')
| -rw-r--r-- | mm/vmscan.c | 372 |
1 files changed, 235 insertions, 137 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 95c08a8cc2ba..4139aa52b941 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -470,8 +470,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) | |||
| 470 | swp_entry_t swap = { .val = page_private(page) }; | 470 | swp_entry_t swap = { .val = page_private(page) }; |
| 471 | __delete_from_swap_cache(page); | 471 | __delete_from_swap_cache(page); |
| 472 | spin_unlock_irq(&mapping->tree_lock); | 472 | spin_unlock_irq(&mapping->tree_lock); |
| 473 | mem_cgroup_uncharge_swapcache(page, swap); | 473 | swapcache_free(swap, page); |
| 474 | swap_free(swap); | ||
| 475 | } else { | 474 | } else { |
| 476 | __remove_from_page_cache(page); | 475 | __remove_from_page_cache(page); |
| 477 | spin_unlock_irq(&mapping->tree_lock); | 476 | spin_unlock_irq(&mapping->tree_lock); |
| @@ -514,7 +513,6 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
| 514 | * | 513 | * |
| 515 | * lru_lock must not be held, interrupts must be enabled. | 514 | * lru_lock must not be held, interrupts must be enabled. |
| 516 | */ | 515 | */ |
| 517 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 518 | void putback_lru_page(struct page *page) | 516 | void putback_lru_page(struct page *page) |
| 519 | { | 517 | { |
| 520 | int lru; | 518 | int lru; |
| @@ -568,20 +566,6 @@ redo: | |||
| 568 | put_page(page); /* drop ref from isolate */ | 566 | put_page(page); /* drop ref from isolate */ |
| 569 | } | 567 | } |
| 570 | 568 | ||
| 571 | #else /* CONFIG_UNEVICTABLE_LRU */ | ||
| 572 | |||
| 573 | void putback_lru_page(struct page *page) | ||
| 574 | { | ||
| 575 | int lru; | ||
| 576 | VM_BUG_ON(PageLRU(page)); | ||
| 577 | |||
| 578 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); | ||
| 579 | lru_cache_add_lru(page, lru); | ||
| 580 | put_page(page); | ||
| 581 | } | ||
| 582 | #endif /* CONFIG_UNEVICTABLE_LRU */ | ||
| 583 | |||
| 584 | |||
| 585 | /* | 569 | /* |
| 586 | * shrink_page_list() returns the number of reclaimed pages | 570 | * shrink_page_list() returns the number of reclaimed pages |
| 587 | */ | 571 | */ |
| @@ -593,6 +577,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 593 | struct pagevec freed_pvec; | 577 | struct pagevec freed_pvec; |
| 594 | int pgactivate = 0; | 578 | int pgactivate = 0; |
| 595 | unsigned long nr_reclaimed = 0; | 579 | unsigned long nr_reclaimed = 0; |
| 580 | unsigned long vm_flags; | ||
| 596 | 581 | ||
| 597 | cond_resched(); | 582 | cond_resched(); |
| 598 | 583 | ||
| @@ -643,7 +628,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 643 | goto keep_locked; | 628 | goto keep_locked; |
| 644 | } | 629 | } |
| 645 | 630 | ||
| 646 | referenced = page_referenced(page, 1, sc->mem_cgroup); | 631 | referenced = page_referenced(page, 1, |
| 632 | sc->mem_cgroup, &vm_flags); | ||
| 647 | /* In active use or really unfreeable? Activate it. */ | 633 | /* In active use or really unfreeable? Activate it. */ |
| 648 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | 634 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
| 649 | referenced && page_mapping_inuse(page)) | 635 | referenced && page_mapping_inuse(page)) |
| @@ -943,18 +929,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
| 943 | /* Check that we have not crossed a zone boundary. */ | 929 | /* Check that we have not crossed a zone boundary. */ |
| 944 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | 930 | if (unlikely(page_zone_id(cursor_page) != zone_id)) |
| 945 | continue; | 931 | continue; |
| 946 | switch (__isolate_lru_page(cursor_page, mode, file)) { | 932 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { |
| 947 | case 0: | ||
| 948 | list_move(&cursor_page->lru, dst); | 933 | list_move(&cursor_page->lru, dst); |
| 949 | nr_taken++; | 934 | nr_taken++; |
| 950 | scan++; | 935 | scan++; |
| 951 | break; | ||
| 952 | |||
| 953 | case -EBUSY: | ||
| 954 | /* else it is being freed elsewhere */ | ||
| 955 | list_move(&cursor_page->lru, src); | ||
| 956 | default: | ||
| 957 | break; /* ! on LRU or wrong list */ | ||
| 958 | } | 936 | } |
| 959 | } | 937 | } |
| 960 | } | 938 | } |
| @@ -1061,6 +1039,19 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
| 1061 | unsigned long nr_scanned = 0; | 1039 | unsigned long nr_scanned = 0; |
| 1062 | unsigned long nr_reclaimed = 0; | 1040 | unsigned long nr_reclaimed = 0; |
| 1063 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1041 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
| 1042 | int lumpy_reclaim = 0; | ||
| 1043 | |||
| 1044 | /* | ||
| 1045 | * If we need a large contiguous chunk of memory, or have | ||
| 1046 | * trouble getting a small set of contiguous pages, we | ||
| 1047 | * will reclaim both active and inactive pages. | ||
| 1048 | * | ||
| 1049 | * We use the same threshold as pageout congestion_wait below. | ||
| 1050 | */ | ||
| 1051 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
| 1052 | lumpy_reclaim = 1; | ||
| 1053 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
| 1054 | lumpy_reclaim = 1; | ||
| 1064 | 1055 | ||
| 1065 | pagevec_init(&pvec, 1); | 1056 | pagevec_init(&pvec, 1); |
| 1066 | 1057 | ||
| @@ -1073,19 +1064,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
| 1073 | unsigned long nr_freed; | 1064 | unsigned long nr_freed; |
| 1074 | unsigned long nr_active; | 1065 | unsigned long nr_active; |
| 1075 | unsigned int count[NR_LRU_LISTS] = { 0, }; | 1066 | unsigned int count[NR_LRU_LISTS] = { 0, }; |
| 1076 | int mode = ISOLATE_INACTIVE; | 1067 | int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE; |
| 1077 | |||
| 1078 | /* | ||
| 1079 | * If we need a large contiguous chunk of memory, or have | ||
| 1080 | * trouble getting a small set of contiguous pages, we | ||
| 1081 | * will reclaim both active and inactive pages. | ||
| 1082 | * | ||
| 1083 | * We use the same threshold as pageout congestion_wait below. | ||
| 1084 | */ | ||
| 1085 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
| 1086 | mode = ISOLATE_BOTH; | ||
| 1087 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
| 1088 | mode = ISOLATE_BOTH; | ||
| 1089 | 1068 | ||
| 1090 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, | 1069 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, |
| 1091 | &page_list, &nr_scan, sc->order, mode, | 1070 | &page_list, &nr_scan, sc->order, mode, |
| @@ -1122,7 +1101,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
| 1122 | * but that should be acceptable to the caller | 1101 | * but that should be acceptable to the caller |
| 1123 | */ | 1102 | */ |
| 1124 | if (nr_freed < nr_taken && !current_is_kswapd() && | 1103 | if (nr_freed < nr_taken && !current_is_kswapd() && |
| 1125 | sc->order > PAGE_ALLOC_COSTLY_ORDER) { | 1104 | lumpy_reclaim) { |
| 1126 | congestion_wait(WRITE, HZ/10); | 1105 | congestion_wait(WRITE, HZ/10); |
| 1127 | 1106 | ||
| 1128 | /* | 1107 | /* |
| @@ -1217,18 +1196,54 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) | |||
| 1217 | * But we had to alter page->flags anyway. | 1196 | * But we had to alter page->flags anyway. |
| 1218 | */ | 1197 | */ |
| 1219 | 1198 | ||
| 1199 | static void move_active_pages_to_lru(struct zone *zone, | ||
| 1200 | struct list_head *list, | ||
| 1201 | enum lru_list lru) | ||
| 1202 | { | ||
| 1203 | unsigned long pgmoved = 0; | ||
| 1204 | struct pagevec pvec; | ||
| 1205 | struct page *page; | ||
| 1206 | |||
| 1207 | pagevec_init(&pvec, 1); | ||
| 1208 | |||
| 1209 | while (!list_empty(list)) { | ||
| 1210 | page = lru_to_page(list); | ||
| 1211 | prefetchw_prev_lru_page(page, list, flags); | ||
| 1212 | |||
| 1213 | VM_BUG_ON(PageLRU(page)); | ||
| 1214 | SetPageLRU(page); | ||
| 1215 | |||
| 1216 | VM_BUG_ON(!PageActive(page)); | ||
| 1217 | if (!is_active_lru(lru)) | ||
| 1218 | ClearPageActive(page); /* we are de-activating */ | ||
| 1219 | |||
| 1220 | list_move(&page->lru, &zone->lru[lru].list); | ||
| 1221 | mem_cgroup_add_lru_list(page, lru); | ||
| 1222 | pgmoved++; | ||
| 1223 | |||
| 1224 | if (!pagevec_add(&pvec, page) || list_empty(list)) { | ||
| 1225 | spin_unlock_irq(&zone->lru_lock); | ||
| 1226 | if (buffer_heads_over_limit) | ||
| 1227 | pagevec_strip(&pvec); | ||
| 1228 | __pagevec_release(&pvec); | ||
| 1229 | spin_lock_irq(&zone->lru_lock); | ||
| 1230 | } | ||
| 1231 | } | ||
| 1232 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
| 1233 | if (!is_active_lru(lru)) | ||
| 1234 | __count_vm_events(PGDEACTIVATE, pgmoved); | ||
| 1235 | } | ||
| 1220 | 1236 | ||
| 1221 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 1237 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
| 1222 | struct scan_control *sc, int priority, int file) | 1238 | struct scan_control *sc, int priority, int file) |
| 1223 | { | 1239 | { |
| 1224 | unsigned long pgmoved; | 1240 | unsigned long pgmoved; |
| 1225 | int pgdeactivate = 0; | ||
| 1226 | unsigned long pgscanned; | 1241 | unsigned long pgscanned; |
| 1242 | unsigned long vm_flags; | ||
| 1227 | LIST_HEAD(l_hold); /* The pages which were snipped off */ | 1243 | LIST_HEAD(l_hold); /* The pages which were snipped off */ |
| 1244 | LIST_HEAD(l_active); | ||
| 1228 | LIST_HEAD(l_inactive); | 1245 | LIST_HEAD(l_inactive); |
| 1229 | struct page *page; | 1246 | struct page *page; |
| 1230 | struct pagevec pvec; | ||
| 1231 | enum lru_list lru; | ||
| 1232 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1247 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
| 1233 | 1248 | ||
| 1234 | lru_add_drain(); | 1249 | lru_add_drain(); |
| @@ -1245,13 +1260,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1245 | } | 1260 | } |
| 1246 | reclaim_stat->recent_scanned[!!file] += pgmoved; | 1261 | reclaim_stat->recent_scanned[!!file] += pgmoved; |
| 1247 | 1262 | ||
| 1263 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | ||
| 1248 | if (file) | 1264 | if (file) |
| 1249 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); | 1265 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); |
| 1250 | else | 1266 | else |
| 1251 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); | 1267 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); |
| 1252 | spin_unlock_irq(&zone->lru_lock); | 1268 | spin_unlock_irq(&zone->lru_lock); |
| 1253 | 1269 | ||
| 1254 | pgmoved = 0; | 1270 | pgmoved = 0; /* count referenced (mapping) mapped pages */ |
| 1255 | while (!list_empty(&l_hold)) { | 1271 | while (!list_empty(&l_hold)) { |
| 1256 | cond_resched(); | 1272 | cond_resched(); |
| 1257 | page = lru_to_page(&l_hold); | 1273 | page = lru_to_page(&l_hold); |
| @@ -1264,58 +1280,44 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1264 | 1280 | ||
| 1265 | /* page_referenced clears PageReferenced */ | 1281 | /* page_referenced clears PageReferenced */ |
| 1266 | if (page_mapping_inuse(page) && | 1282 | if (page_mapping_inuse(page) && |
| 1267 | page_referenced(page, 0, sc->mem_cgroup)) | 1283 | page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { |
| 1268 | pgmoved++; | 1284 | pgmoved++; |
| 1285 | /* | ||
| 1286 | * Identify referenced, file-backed active pages and | ||
| 1287 | * give them one more trip around the active list. So | ||
| 1288 | * that executable code get better chances to stay in | ||
| 1289 | * memory under moderate memory pressure. Anon pages | ||
| 1290 | * are not likely to be evicted by use-once streaming | ||
| 1291 | * IO, plus JVM can create lots of anon VM_EXEC pages, | ||
| 1292 | * so we ignore them here. | ||
| 1293 | */ | ||
| 1294 | if ((vm_flags & VM_EXEC) && !PageAnon(page)) { | ||
| 1295 | list_add(&page->lru, &l_active); | ||
| 1296 | continue; | ||
| 1297 | } | ||
| 1298 | } | ||
| 1269 | 1299 | ||
| 1270 | list_add(&page->lru, &l_inactive); | 1300 | list_add(&page->lru, &l_inactive); |
| 1271 | } | 1301 | } |
| 1272 | 1302 | ||
| 1273 | /* | 1303 | /* |
| 1274 | * Move the pages to the [file or anon] inactive list. | 1304 | * Move pages back to the lru list. |
| 1275 | */ | 1305 | */ |
| 1276 | pagevec_init(&pvec, 1); | ||
| 1277 | lru = LRU_BASE + file * LRU_FILE; | ||
| 1278 | |||
| 1279 | spin_lock_irq(&zone->lru_lock); | 1306 | spin_lock_irq(&zone->lru_lock); |
| 1280 | /* | 1307 | /* |
| 1281 | * Count referenced pages from currently used mappings as | 1308 | * Count referenced pages from currently used mappings as rotated, |
| 1282 | * rotated, even though they are moved to the inactive list. | 1309 | * even though only some of them are actually re-activated. This |
| 1283 | * This helps balance scan pressure between file and anonymous | 1310 | * helps balance scan pressure between file and anonymous pages in |
| 1284 | * pages in get_scan_ratio. | 1311 | * get_scan_ratio. |
| 1285 | */ | 1312 | */ |
| 1286 | reclaim_stat->recent_rotated[!!file] += pgmoved; | 1313 | reclaim_stat->recent_rotated[!!file] += pgmoved; |
| 1287 | 1314 | ||
| 1288 | pgmoved = 0; | 1315 | move_active_pages_to_lru(zone, &l_active, |
| 1289 | while (!list_empty(&l_inactive)) { | 1316 | LRU_ACTIVE + file * LRU_FILE); |
| 1290 | page = lru_to_page(&l_inactive); | 1317 | move_active_pages_to_lru(zone, &l_inactive, |
| 1291 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 1318 | LRU_BASE + file * LRU_FILE); |
| 1292 | VM_BUG_ON(PageLRU(page)); | ||
| 1293 | SetPageLRU(page); | ||
| 1294 | VM_BUG_ON(!PageActive(page)); | ||
| 1295 | ClearPageActive(page); | ||
| 1296 | 1319 | ||
| 1297 | list_move(&page->lru, &zone->lru[lru].list); | ||
| 1298 | mem_cgroup_add_lru_list(page, lru); | ||
| 1299 | pgmoved++; | ||
| 1300 | if (!pagevec_add(&pvec, page)) { | ||
| 1301 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
| 1302 | spin_unlock_irq(&zone->lru_lock); | ||
| 1303 | pgdeactivate += pgmoved; | ||
| 1304 | pgmoved = 0; | ||
| 1305 | if (buffer_heads_over_limit) | ||
| 1306 | pagevec_strip(&pvec); | ||
| 1307 | __pagevec_release(&pvec); | ||
| 1308 | spin_lock_irq(&zone->lru_lock); | ||
| 1309 | } | ||
| 1310 | } | ||
| 1311 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
| 1312 | pgdeactivate += pgmoved; | ||
| 1313 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | ||
| 1314 | __count_vm_events(PGDEACTIVATE, pgdeactivate); | ||
| 1315 | spin_unlock_irq(&zone->lru_lock); | 1320 | spin_unlock_irq(&zone->lru_lock); |
| 1316 | if (buffer_heads_over_limit) | ||
| 1317 | pagevec_strip(&pvec); | ||
| 1318 | pagevec_release(&pvec); | ||
| 1319 | } | 1321 | } |
| 1320 | 1322 | ||
| 1321 | static int inactive_anon_is_low_global(struct zone *zone) | 1323 | static int inactive_anon_is_low_global(struct zone *zone) |
| @@ -1350,12 +1352,48 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
| 1350 | return low; | 1352 | return low; |
| 1351 | } | 1353 | } |
| 1352 | 1354 | ||
| 1355 | static int inactive_file_is_low_global(struct zone *zone) | ||
| 1356 | { | ||
| 1357 | unsigned long active, inactive; | ||
| 1358 | |||
| 1359 | active = zone_page_state(zone, NR_ACTIVE_FILE); | ||
| 1360 | inactive = zone_page_state(zone, NR_INACTIVE_FILE); | ||
| 1361 | |||
| 1362 | return (active > inactive); | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | /** | ||
| 1366 | * inactive_file_is_low - check if file pages need to be deactivated | ||
| 1367 | * @zone: zone to check | ||
| 1368 | * @sc: scan control of this context | ||
| 1369 | * | ||
| 1370 | * When the system is doing streaming IO, memory pressure here | ||
| 1371 | * ensures that active file pages get deactivated, until more | ||
| 1372 | * than half of the file pages are on the inactive list. | ||
| 1373 | * | ||
| 1374 | * Once we get to that situation, protect the system's working | ||
| 1375 | * set from being evicted by disabling active file page aging. | ||
| 1376 | * | ||
| 1377 | * This uses a different ratio than the anonymous pages, because | ||
| 1378 | * the page cache uses a use-once replacement algorithm. | ||
| 1379 | */ | ||
| 1380 | static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | ||
| 1381 | { | ||
| 1382 | int low; | ||
| 1383 | |||
| 1384 | if (scanning_global_lru(sc)) | ||
| 1385 | low = inactive_file_is_low_global(zone); | ||
| 1386 | else | ||
| 1387 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); | ||
| 1388 | return low; | ||
| 1389 | } | ||
| 1390 | |||
| 1353 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1391 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
| 1354 | struct zone *zone, struct scan_control *sc, int priority) | 1392 | struct zone *zone, struct scan_control *sc, int priority) |
| 1355 | { | 1393 | { |
| 1356 | int file = is_file_lru(lru); | 1394 | int file = is_file_lru(lru); |
| 1357 | 1395 | ||
| 1358 | if (lru == LRU_ACTIVE_FILE) { | 1396 | if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) { |
| 1359 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1397 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
| 1360 | return 0; | 1398 | return 0; |
| 1361 | } | 1399 | } |
| @@ -1384,13 +1422,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
| 1384 | unsigned long ap, fp; | 1422 | unsigned long ap, fp; |
| 1385 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1423 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
| 1386 | 1424 | ||
| 1387 | /* If we have no swap space, do not bother scanning anon pages. */ | ||
| 1388 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | ||
| 1389 | percent[0] = 0; | ||
| 1390 | percent[1] = 100; | ||
| 1391 | return; | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + | 1425 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + |
| 1395 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); | 1426 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); |
| 1396 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + | 1427 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + |
| @@ -1400,7 +1431,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
| 1400 | free = zone_page_state(zone, NR_FREE_PAGES); | 1431 | free = zone_page_state(zone, NR_FREE_PAGES); |
| 1401 | /* If we have very few page cache pages, | 1432 | /* If we have very few page cache pages, |
| 1402 | force-scan anon pages. */ | 1433 | force-scan anon pages. */ |
| 1403 | if (unlikely(file + free <= zone->pages_high)) { | 1434 | if (unlikely(file + free <= high_wmark_pages(zone))) { |
| 1404 | percent[0] = 100; | 1435 | percent[0] = 100; |
| 1405 | percent[1] = 0; | 1436 | percent[1] = 0; |
| 1406 | return; | 1437 | return; |
| @@ -1455,6 +1486,26 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
| 1455 | percent[1] = 100 - percent[0]; | 1486 | percent[1] = 100 - percent[0]; |
| 1456 | } | 1487 | } |
| 1457 | 1488 | ||
| 1489 | /* | ||
| 1490 | * Smallish @nr_to_scan's are deposited in @nr_saved_scan, | ||
| 1491 | * until we collected @swap_cluster_max pages to scan. | ||
| 1492 | */ | ||
| 1493 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, | ||
| 1494 | unsigned long *nr_saved_scan, | ||
| 1495 | unsigned long swap_cluster_max) | ||
| 1496 | { | ||
| 1497 | unsigned long nr; | ||
| 1498 | |||
| 1499 | *nr_saved_scan += nr_to_scan; | ||
| 1500 | nr = *nr_saved_scan; | ||
| 1501 | |||
| 1502 | if (nr >= swap_cluster_max) | ||
| 1503 | *nr_saved_scan = 0; | ||
| 1504 | else | ||
| 1505 | nr = 0; | ||
| 1506 | |||
| 1507 | return nr; | ||
| 1508 | } | ||
| 1458 | 1509 | ||
| 1459 | /* | 1510 | /* |
| 1460 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1511 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
| @@ -1468,26 +1519,30 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1468 | enum lru_list l; | 1519 | enum lru_list l; |
| 1469 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1520 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
| 1470 | unsigned long swap_cluster_max = sc->swap_cluster_max; | 1521 | unsigned long swap_cluster_max = sc->swap_cluster_max; |
| 1522 | int noswap = 0; | ||
| 1471 | 1523 | ||
| 1472 | get_scan_ratio(zone, sc, percent); | 1524 | /* If we have no swap space, do not bother scanning anon pages. */ |
| 1525 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | ||
| 1526 | noswap = 1; | ||
| 1527 | percent[0] = 0; | ||
| 1528 | percent[1] = 100; | ||
| 1529 | } else | ||
| 1530 | get_scan_ratio(zone, sc, percent); | ||
| 1473 | 1531 | ||
| 1474 | for_each_evictable_lru(l) { | 1532 | for_each_evictable_lru(l) { |
| 1475 | int file = is_file_lru(l); | 1533 | int file = is_file_lru(l); |
| 1476 | unsigned long scan; | 1534 | unsigned long scan; |
| 1477 | 1535 | ||
| 1478 | scan = zone_nr_pages(zone, sc, l); | 1536 | scan = zone_nr_pages(zone, sc, l); |
| 1479 | if (priority) { | 1537 | if (priority || noswap) { |
| 1480 | scan >>= priority; | 1538 | scan >>= priority; |
| 1481 | scan = (scan * percent[file]) / 100; | 1539 | scan = (scan * percent[file]) / 100; |
| 1482 | } | 1540 | } |
| 1483 | if (scanning_global_lru(sc)) { | 1541 | if (scanning_global_lru(sc)) |
| 1484 | zone->lru[l].nr_scan += scan; | 1542 | nr[l] = nr_scan_try_batch(scan, |
| 1485 | nr[l] = zone->lru[l].nr_scan; | 1543 | &zone->lru[l].nr_saved_scan, |
| 1486 | if (nr[l] >= swap_cluster_max) | 1544 | swap_cluster_max); |
| 1487 | zone->lru[l].nr_scan = 0; | 1545 | else |
| 1488 | else | ||
| 1489 | nr[l] = 0; | ||
| 1490 | } else | ||
| 1491 | nr[l] = scan; | 1546 | nr[l] = scan; |
| 1492 | } | 1547 | } |
| 1493 | 1548 | ||
| @@ -1521,7 +1576,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1521 | * Even if we did not try to evict anon pages at all, we want to | 1576 | * Even if we did not try to evict anon pages at all, we want to |
| 1522 | * rebalance the anon lru active/inactive ratio. | 1577 | * rebalance the anon lru active/inactive ratio. |
| 1523 | */ | 1578 | */ |
| 1524 | if (inactive_anon_is_low(zone, sc)) | 1579 | if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0) |
| 1525 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1580 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
| 1526 | 1581 | ||
| 1527 | throttle_vm_writeout(sc->gfp_mask); | 1582 | throttle_vm_writeout(sc->gfp_mask); |
| @@ -1532,11 +1587,13 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1532 | * try to reclaim pages from zones which will satisfy the caller's allocation | 1587 | * try to reclaim pages from zones which will satisfy the caller's allocation |
| 1533 | * request. | 1588 | * request. |
| 1534 | * | 1589 | * |
| 1535 | * We reclaim from a zone even if that zone is over pages_high. Because: | 1590 | * We reclaim from a zone even if that zone is over high_wmark_pages(zone). |
| 1591 | * Because: | ||
| 1536 | * a) The caller may be trying to free *extra* pages to satisfy a higher-order | 1592 | * a) The caller may be trying to free *extra* pages to satisfy a higher-order |
| 1537 | * allocation or | 1593 | * allocation or |
| 1538 | * b) The zones may be over pages_high but they must go *over* pages_high to | 1594 | * b) The target zone may be at high_wmark_pages(zone) but the lower zones |
| 1539 | * satisfy the `incremental min' zone defense algorithm. | 1595 | * must go *over* high_wmark_pages(zone) to satisfy the `incremental min' |
| 1596 | * zone defense algorithm. | ||
| 1540 | * | 1597 | * |
| 1541 | * If a zone is deemed to be full of pinned pages then just give it a light | 1598 | * If a zone is deemed to be full of pinned pages then just give it a light |
| 1542 | * scan then give up on it. | 1599 | * scan then give up on it. |
| @@ -1742,7 +1799,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 1742 | 1799 | ||
| 1743 | /* | 1800 | /* |
| 1744 | * For kswapd, balance_pgdat() will work across all this node's zones until | 1801 | * For kswapd, balance_pgdat() will work across all this node's zones until |
| 1745 | * they are all at pages_high. | 1802 | * they are all at high_wmark_pages(zone). |
| 1746 | * | 1803 | * |
| 1747 | * Returns the number of pages which were actually freed. | 1804 | * Returns the number of pages which were actually freed. |
| 1748 | * | 1805 | * |
| @@ -1755,11 +1812,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
| 1755 | * the zone for when the problem goes away. | 1812 | * the zone for when the problem goes away. |
| 1756 | * | 1813 | * |
| 1757 | * kswapd scans the zones in the highmem->normal->dma direction. It skips | 1814 | * kswapd scans the zones in the highmem->normal->dma direction. It skips |
| 1758 | * zones which have free_pages > pages_high, but once a zone is found to have | 1815 | * zones which have free_pages > high_wmark_pages(zone), but once a zone is |
| 1759 | * free_pages <= pages_high, we scan that zone and the lower zones regardless | 1816 | * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the |
| 1760 | * of the number of free pages in the lower zones. This interoperates with | 1817 | * lower zones regardless of the number of free pages in the lower zones. This |
| 1761 | * the page allocator fallback scheme to ensure that aging of pages is balanced | 1818 | * interoperates with the page allocator fallback scheme to ensure that aging |
| 1762 | * across the zones. | 1819 | * of pages is balanced across the zones. |
| 1763 | */ | 1820 | */ |
| 1764 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | 1821 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) |
| 1765 | { | 1822 | { |
| @@ -1780,7 +1837,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
| 1780 | }; | 1837 | }; |
| 1781 | /* | 1838 | /* |
| 1782 | * temp_priority is used to remember the scanning priority at which | 1839 | * temp_priority is used to remember the scanning priority at which |
| 1783 | * this zone was successfully refilled to free_pages == pages_high. | 1840 | * this zone was successfully refilled to |
| 1841 | * free_pages == high_wmark_pages(zone). | ||
| 1784 | */ | 1842 | */ |
| 1785 | int temp_priority[MAX_NR_ZONES]; | 1843 | int temp_priority[MAX_NR_ZONES]; |
| 1786 | 1844 | ||
| @@ -1825,8 +1883,8 @@ loop_again: | |||
| 1825 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | 1883 | shrink_active_list(SWAP_CLUSTER_MAX, zone, |
| 1826 | &sc, priority, 0); | 1884 | &sc, priority, 0); |
| 1827 | 1885 | ||
| 1828 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1886 | if (!zone_watermark_ok(zone, order, |
| 1829 | 0, 0)) { | 1887 | high_wmark_pages(zone), 0, 0)) { |
| 1830 | end_zone = i; | 1888 | end_zone = i; |
| 1831 | break; | 1889 | break; |
| 1832 | } | 1890 | } |
| @@ -1860,8 +1918,8 @@ loop_again: | |||
| 1860 | priority != DEF_PRIORITY) | 1918 | priority != DEF_PRIORITY) |
| 1861 | continue; | 1919 | continue; |
| 1862 | 1920 | ||
| 1863 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1921 | if (!zone_watermark_ok(zone, order, |
| 1864 | end_zone, 0)) | 1922 | high_wmark_pages(zone), end_zone, 0)) |
| 1865 | all_zones_ok = 0; | 1923 | all_zones_ok = 0; |
| 1866 | temp_priority[i] = priority; | 1924 | temp_priority[i] = priority; |
| 1867 | sc.nr_scanned = 0; | 1925 | sc.nr_scanned = 0; |
| @@ -1870,8 +1928,8 @@ loop_again: | |||
| 1870 | * We put equal pressure on every zone, unless one | 1928 | * We put equal pressure on every zone, unless one |
| 1871 | * zone has way too many pages free already. | 1929 | * zone has way too many pages free already. |
| 1872 | */ | 1930 | */ |
| 1873 | if (!zone_watermark_ok(zone, order, 8*zone->pages_high, | 1931 | if (!zone_watermark_ok(zone, order, |
| 1874 | end_zone, 0)) | 1932 | 8*high_wmark_pages(zone), end_zone, 0)) |
| 1875 | shrink_zone(priority, zone, &sc); | 1933 | shrink_zone(priority, zone, &sc); |
| 1876 | reclaim_state->reclaimed_slab = 0; | 1934 | reclaim_state->reclaimed_slab = 0; |
| 1877 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1935 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
| @@ -2037,7 +2095,7 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
| 2037 | return; | 2095 | return; |
| 2038 | 2096 | ||
| 2039 | pgdat = zone->zone_pgdat; | 2097 | pgdat = zone->zone_pgdat; |
| 2040 | if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) | 2098 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) |
| 2041 | return; | 2099 | return; |
| 2042 | if (pgdat->kswapd_max_order < order) | 2100 | if (pgdat->kswapd_max_order < order) |
| 2043 | pgdat->kswapd_max_order = order; | 2101 | pgdat->kswapd_max_order = order; |
| @@ -2084,11 +2142,11 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
| 2084 | l == LRU_ACTIVE_FILE)) | 2142 | l == LRU_ACTIVE_FILE)) |
| 2085 | continue; | 2143 | continue; |
| 2086 | 2144 | ||
| 2087 | zone->lru[l].nr_scan += (lru_pages >> prio) + 1; | 2145 | zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; |
| 2088 | if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { | 2146 | if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { |
| 2089 | unsigned long nr_to_scan; | 2147 | unsigned long nr_to_scan; |
| 2090 | 2148 | ||
| 2091 | zone->lru[l].nr_scan = 0; | 2149 | zone->lru[l].nr_saved_scan = 0; |
| 2092 | nr_to_scan = min(nr_pages, lru_pages); | 2150 | nr_to_scan = min(nr_pages, lru_pages); |
| 2093 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | 2151 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
| 2094 | sc, prio); | 2152 | sc, prio); |
| @@ -2290,6 +2348,48 @@ int sysctl_min_unmapped_ratio = 1; | |||
| 2290 | */ | 2348 | */ |
| 2291 | int sysctl_min_slab_ratio = 5; | 2349 | int sysctl_min_slab_ratio = 5; |
| 2292 | 2350 | ||
| 2351 | static inline unsigned long zone_unmapped_file_pages(struct zone *zone) | ||
| 2352 | { | ||
| 2353 | unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED); | ||
| 2354 | unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) + | ||
| 2355 | zone_page_state(zone, NR_ACTIVE_FILE); | ||
| 2356 | |||
| 2357 | /* | ||
| 2358 | * It's possible for there to be more file mapped pages than | ||
| 2359 | * accounted for by the pages on the file LRU lists because | ||
| 2360 | * tmpfs pages accounted for as ANON can also be FILE_MAPPED | ||
| 2361 | */ | ||
| 2362 | return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; | ||
| 2363 | } | ||
| 2364 | |||
| 2365 | /* Work out how many page cache pages we can reclaim in this reclaim_mode */ | ||
| 2366 | static long zone_pagecache_reclaimable(struct zone *zone) | ||
| 2367 | { | ||
| 2368 | long nr_pagecache_reclaimable; | ||
| 2369 | long delta = 0; | ||
| 2370 | |||
| 2371 | /* | ||
| 2372 | * If RECLAIM_SWAP is set, then all file pages are considered | ||
| 2373 | * potentially reclaimable. Otherwise, we have to worry about | ||
| 2374 | * pages like swapcache and zone_unmapped_file_pages() provides | ||
| 2375 | * a better estimate | ||
| 2376 | */ | ||
| 2377 | if (zone_reclaim_mode & RECLAIM_SWAP) | ||
| 2378 | nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES); | ||
| 2379 | else | ||
| 2380 | nr_pagecache_reclaimable = zone_unmapped_file_pages(zone); | ||
| 2381 | |||
| 2382 | /* If we can't clean pages, remove dirty pages from consideration */ | ||
| 2383 | if (!(zone_reclaim_mode & RECLAIM_WRITE)) | ||
| 2384 | delta += zone_page_state(zone, NR_FILE_DIRTY); | ||
| 2385 | |||
| 2386 | /* Watch for any possible underflows due to delta */ | ||
| 2387 | if (unlikely(delta > nr_pagecache_reclaimable)) | ||
| 2388 | delta = nr_pagecache_reclaimable; | ||
| 2389 | |||
| 2390 | return nr_pagecache_reclaimable - delta; | ||
| 2391 | } | ||
| 2392 | |||
| 2293 | /* | 2393 | /* |
| 2294 | * Try to free up some pages from this zone through reclaim. | 2394 | * Try to free up some pages from this zone through reclaim. |
| 2295 | */ | 2395 | */ |
| @@ -2324,9 +2424,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2324 | reclaim_state.reclaimed_slab = 0; | 2424 | reclaim_state.reclaimed_slab = 0; |
| 2325 | p->reclaim_state = &reclaim_state; | 2425 | p->reclaim_state = &reclaim_state; |
| 2326 | 2426 | ||
| 2327 | if (zone_page_state(zone, NR_FILE_PAGES) - | 2427 | if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) { |
| 2328 | zone_page_state(zone, NR_FILE_MAPPED) > | ||
| 2329 | zone->min_unmapped_pages) { | ||
| 2330 | /* | 2428 | /* |
| 2331 | * Free memory by calling shrink zone with increasing | 2429 | * Free memory by calling shrink zone with increasing |
| 2332 | * priorities until we have enough memory freed. | 2430 | * priorities until we have enough memory freed. |
| @@ -2384,20 +2482,18 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2384 | * if less than a specified percentage of the zone is used by | 2482 | * if less than a specified percentage of the zone is used by |
| 2385 | * unmapped file backed pages. | 2483 | * unmapped file backed pages. |
| 2386 | */ | 2484 | */ |
| 2387 | if (zone_page_state(zone, NR_FILE_PAGES) - | 2485 | if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages && |
| 2388 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages | 2486 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) |
| 2389 | && zone_page_state(zone, NR_SLAB_RECLAIMABLE) | 2487 | return ZONE_RECLAIM_FULL; |
| 2390 | <= zone->min_slab_pages) | ||
| 2391 | return 0; | ||
| 2392 | 2488 | ||
| 2393 | if (zone_is_all_unreclaimable(zone)) | 2489 | if (zone_is_all_unreclaimable(zone)) |
| 2394 | return 0; | 2490 | return ZONE_RECLAIM_FULL; |
| 2395 | 2491 | ||
| 2396 | /* | 2492 | /* |
| 2397 | * Do not scan if the allocation should not be delayed. | 2493 | * Do not scan if the allocation should not be delayed. |
| 2398 | */ | 2494 | */ |
| 2399 | if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) | 2495 | if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) |
| 2400 | return 0; | 2496 | return ZONE_RECLAIM_NOSCAN; |
| 2401 | 2497 | ||
| 2402 | /* | 2498 | /* |
| 2403 | * Only run zone reclaim on the local zone or on zones that do not | 2499 | * Only run zone reclaim on the local zone or on zones that do not |
| @@ -2407,18 +2503,21 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
| 2407 | */ | 2503 | */ |
| 2408 | node_id = zone_to_nid(zone); | 2504 | node_id = zone_to_nid(zone); |
| 2409 | if (node_state(node_id, N_CPU) && node_id != numa_node_id()) | 2505 | if (node_state(node_id, N_CPU) && node_id != numa_node_id()) |
| 2410 | return 0; | 2506 | return ZONE_RECLAIM_NOSCAN; |
| 2411 | 2507 | ||
| 2412 | if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) | 2508 | if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) |
| 2413 | return 0; | 2509 | return ZONE_RECLAIM_NOSCAN; |
| 2510 | |||
| 2414 | ret = __zone_reclaim(zone, gfp_mask, order); | 2511 | ret = __zone_reclaim(zone, gfp_mask, order); |
| 2415 | zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); | 2512 | zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); |
| 2416 | 2513 | ||
| 2514 | if (!ret) | ||
| 2515 | count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); | ||
| 2516 | |||
| 2417 | return ret; | 2517 | return ret; |
| 2418 | } | 2518 | } |
| 2419 | #endif | 2519 | #endif |
| 2420 | 2520 | ||
| 2421 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 2422 | /* | 2521 | /* |
| 2423 | * page_evictable - test whether a page is evictable | 2522 | * page_evictable - test whether a page is evictable |
| 2424 | * @page: the page to test | 2523 | * @page: the page to test |
| @@ -2665,4 +2764,3 @@ void scan_unevictable_unregister_node(struct node *node) | |||
| 2665 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); | 2764 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); |
| 2666 | } | 2765 | } |
| 2667 | 2766 | ||
| 2668 | #endif | ||
