diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 372 |
1 files changed, 235 insertions, 137 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 95c08a8cc2ba..4139aa52b941 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -470,8 +470,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) | |||
470 | swp_entry_t swap = { .val = page_private(page) }; | 470 | swp_entry_t swap = { .val = page_private(page) }; |
471 | __delete_from_swap_cache(page); | 471 | __delete_from_swap_cache(page); |
472 | spin_unlock_irq(&mapping->tree_lock); | 472 | spin_unlock_irq(&mapping->tree_lock); |
473 | mem_cgroup_uncharge_swapcache(page, swap); | 473 | swapcache_free(swap, page); |
474 | swap_free(swap); | ||
475 | } else { | 474 | } else { |
476 | __remove_from_page_cache(page); | 475 | __remove_from_page_cache(page); |
477 | spin_unlock_irq(&mapping->tree_lock); | 476 | spin_unlock_irq(&mapping->tree_lock); |
@@ -514,7 +513,6 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
514 | * | 513 | * |
515 | * lru_lock must not be held, interrupts must be enabled. | 514 | * lru_lock must not be held, interrupts must be enabled. |
516 | */ | 515 | */ |
517 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
518 | void putback_lru_page(struct page *page) | 516 | void putback_lru_page(struct page *page) |
519 | { | 517 | { |
520 | int lru; | 518 | int lru; |
@@ -568,20 +566,6 @@ redo: | |||
568 | put_page(page); /* drop ref from isolate */ | 566 | put_page(page); /* drop ref from isolate */ |
569 | } | 567 | } |
570 | 568 | ||
571 | #else /* CONFIG_UNEVICTABLE_LRU */ | ||
572 | |||
573 | void putback_lru_page(struct page *page) | ||
574 | { | ||
575 | int lru; | ||
576 | VM_BUG_ON(PageLRU(page)); | ||
577 | |||
578 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); | ||
579 | lru_cache_add_lru(page, lru); | ||
580 | put_page(page); | ||
581 | } | ||
582 | #endif /* CONFIG_UNEVICTABLE_LRU */ | ||
583 | |||
584 | |||
585 | /* | 569 | /* |
586 | * shrink_page_list() returns the number of reclaimed pages | 570 | * shrink_page_list() returns the number of reclaimed pages |
587 | */ | 571 | */ |
@@ -593,6 +577,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
593 | struct pagevec freed_pvec; | 577 | struct pagevec freed_pvec; |
594 | int pgactivate = 0; | 578 | int pgactivate = 0; |
595 | unsigned long nr_reclaimed = 0; | 579 | unsigned long nr_reclaimed = 0; |
580 | unsigned long vm_flags; | ||
596 | 581 | ||
597 | cond_resched(); | 582 | cond_resched(); |
598 | 583 | ||
@@ -643,7 +628,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
643 | goto keep_locked; | 628 | goto keep_locked; |
644 | } | 629 | } |
645 | 630 | ||
646 | referenced = page_referenced(page, 1, sc->mem_cgroup); | 631 | referenced = page_referenced(page, 1, |
632 | sc->mem_cgroup, &vm_flags); | ||
647 | /* In active use or really unfreeable? Activate it. */ | 633 | /* In active use or really unfreeable? Activate it. */ |
648 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | 634 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
649 | referenced && page_mapping_inuse(page)) | 635 | referenced && page_mapping_inuse(page)) |
@@ -943,18 +929,10 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
943 | /* Check that we have not crossed a zone boundary. */ | 929 | /* Check that we have not crossed a zone boundary. */ |
944 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | 930 | if (unlikely(page_zone_id(cursor_page) != zone_id)) |
945 | continue; | 931 | continue; |
946 | switch (__isolate_lru_page(cursor_page, mode, file)) { | 932 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { |
947 | case 0: | ||
948 | list_move(&cursor_page->lru, dst); | 933 | list_move(&cursor_page->lru, dst); |
949 | nr_taken++; | 934 | nr_taken++; |
950 | scan++; | 935 | scan++; |
951 | break; | ||
952 | |||
953 | case -EBUSY: | ||
954 | /* else it is being freed elsewhere */ | ||
955 | list_move(&cursor_page->lru, src); | ||
956 | default: | ||
957 | break; /* ! on LRU or wrong list */ | ||
958 | } | 936 | } |
959 | } | 937 | } |
960 | } | 938 | } |
@@ -1061,6 +1039,19 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1061 | unsigned long nr_scanned = 0; | 1039 | unsigned long nr_scanned = 0; |
1062 | unsigned long nr_reclaimed = 0; | 1040 | unsigned long nr_reclaimed = 0; |
1063 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1041 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1042 | int lumpy_reclaim = 0; | ||
1043 | |||
1044 | /* | ||
1045 | * If we need a large contiguous chunk of memory, or have | ||
1046 | * trouble getting a small set of contiguous pages, we | ||
1047 | * will reclaim both active and inactive pages. | ||
1048 | * | ||
1049 | * We use the same threshold as pageout congestion_wait below. | ||
1050 | */ | ||
1051 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1052 | lumpy_reclaim = 1; | ||
1053 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
1054 | lumpy_reclaim = 1; | ||
1064 | 1055 | ||
1065 | pagevec_init(&pvec, 1); | 1056 | pagevec_init(&pvec, 1); |
1066 | 1057 | ||
@@ -1073,19 +1064,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1073 | unsigned long nr_freed; | 1064 | unsigned long nr_freed; |
1074 | unsigned long nr_active; | 1065 | unsigned long nr_active; |
1075 | unsigned int count[NR_LRU_LISTS] = { 0, }; | 1066 | unsigned int count[NR_LRU_LISTS] = { 0, }; |
1076 | int mode = ISOLATE_INACTIVE; | 1067 | int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE; |
1077 | |||
1078 | /* | ||
1079 | * If we need a large contiguous chunk of memory, or have | ||
1080 | * trouble getting a small set of contiguous pages, we | ||
1081 | * will reclaim both active and inactive pages. | ||
1082 | * | ||
1083 | * We use the same threshold as pageout congestion_wait below. | ||
1084 | */ | ||
1085 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1086 | mode = ISOLATE_BOTH; | ||
1087 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
1088 | mode = ISOLATE_BOTH; | ||
1089 | 1068 | ||
1090 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, | 1069 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, |
1091 | &page_list, &nr_scan, sc->order, mode, | 1070 | &page_list, &nr_scan, sc->order, mode, |
@@ -1122,7 +1101,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1122 | * but that should be acceptable to the caller | 1101 | * but that should be acceptable to the caller |
1123 | */ | 1102 | */ |
1124 | if (nr_freed < nr_taken && !current_is_kswapd() && | 1103 | if (nr_freed < nr_taken && !current_is_kswapd() && |
1125 | sc->order > PAGE_ALLOC_COSTLY_ORDER) { | 1104 | lumpy_reclaim) { |
1126 | congestion_wait(WRITE, HZ/10); | 1105 | congestion_wait(WRITE, HZ/10); |
1127 | 1106 | ||
1128 | /* | 1107 | /* |
@@ -1217,18 +1196,54 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) | |||
1217 | * But we had to alter page->flags anyway. | 1196 | * But we had to alter page->flags anyway. |
1218 | */ | 1197 | */ |
1219 | 1198 | ||
1199 | static void move_active_pages_to_lru(struct zone *zone, | ||
1200 | struct list_head *list, | ||
1201 | enum lru_list lru) | ||
1202 | { | ||
1203 | unsigned long pgmoved = 0; | ||
1204 | struct pagevec pvec; | ||
1205 | struct page *page; | ||
1206 | |||
1207 | pagevec_init(&pvec, 1); | ||
1208 | |||
1209 | while (!list_empty(list)) { | ||
1210 | page = lru_to_page(list); | ||
1211 | prefetchw_prev_lru_page(page, list, flags); | ||
1212 | |||
1213 | VM_BUG_ON(PageLRU(page)); | ||
1214 | SetPageLRU(page); | ||
1215 | |||
1216 | VM_BUG_ON(!PageActive(page)); | ||
1217 | if (!is_active_lru(lru)) | ||
1218 | ClearPageActive(page); /* we are de-activating */ | ||
1219 | |||
1220 | list_move(&page->lru, &zone->lru[lru].list); | ||
1221 | mem_cgroup_add_lru_list(page, lru); | ||
1222 | pgmoved++; | ||
1223 | |||
1224 | if (!pagevec_add(&pvec, page) || list_empty(list)) { | ||
1225 | spin_unlock_irq(&zone->lru_lock); | ||
1226 | if (buffer_heads_over_limit) | ||
1227 | pagevec_strip(&pvec); | ||
1228 | __pagevec_release(&pvec); | ||
1229 | spin_lock_irq(&zone->lru_lock); | ||
1230 | } | ||
1231 | } | ||
1232 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
1233 | if (!is_active_lru(lru)) | ||
1234 | __count_vm_events(PGDEACTIVATE, pgmoved); | ||
1235 | } | ||
1220 | 1236 | ||
1221 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 1237 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
1222 | struct scan_control *sc, int priority, int file) | 1238 | struct scan_control *sc, int priority, int file) |
1223 | { | 1239 | { |
1224 | unsigned long pgmoved; | 1240 | unsigned long pgmoved; |
1225 | int pgdeactivate = 0; | ||
1226 | unsigned long pgscanned; | 1241 | unsigned long pgscanned; |
1242 | unsigned long vm_flags; | ||
1227 | LIST_HEAD(l_hold); /* The pages which were snipped off */ | 1243 | LIST_HEAD(l_hold); /* The pages which were snipped off */ |
1244 | LIST_HEAD(l_active); | ||
1228 | LIST_HEAD(l_inactive); | 1245 | LIST_HEAD(l_inactive); |
1229 | struct page *page; | 1246 | struct page *page; |
1230 | struct pagevec pvec; | ||
1231 | enum lru_list lru; | ||
1232 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1247 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1233 | 1248 | ||
1234 | lru_add_drain(); | 1249 | lru_add_drain(); |
@@ -1245,13 +1260,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1245 | } | 1260 | } |
1246 | reclaim_stat->recent_scanned[!!file] += pgmoved; | 1261 | reclaim_stat->recent_scanned[!!file] += pgmoved; |
1247 | 1262 | ||
1263 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | ||
1248 | if (file) | 1264 | if (file) |
1249 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); | 1265 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); |
1250 | else | 1266 | else |
1251 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); | 1267 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); |
1252 | spin_unlock_irq(&zone->lru_lock); | 1268 | spin_unlock_irq(&zone->lru_lock); |
1253 | 1269 | ||
1254 | pgmoved = 0; | 1270 | pgmoved = 0; /* count referenced (mapping) mapped pages */ |
1255 | while (!list_empty(&l_hold)) { | 1271 | while (!list_empty(&l_hold)) { |
1256 | cond_resched(); | 1272 | cond_resched(); |
1257 | page = lru_to_page(&l_hold); | 1273 | page = lru_to_page(&l_hold); |
@@ -1264,58 +1280,44 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1264 | 1280 | ||
1265 | /* page_referenced clears PageReferenced */ | 1281 | /* page_referenced clears PageReferenced */ |
1266 | if (page_mapping_inuse(page) && | 1282 | if (page_mapping_inuse(page) && |
1267 | page_referenced(page, 0, sc->mem_cgroup)) | 1283 | page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { |
1268 | pgmoved++; | 1284 | pgmoved++; |
1285 | /* | ||
1286 | * Identify referenced, file-backed active pages and | ||
1287 | * give them one more trip around the active list. So | ||
1288 | * that executable code get better chances to stay in | ||
1289 | * memory under moderate memory pressure. Anon pages | ||
1290 | * are not likely to be evicted by use-once streaming | ||
1291 | * IO, plus JVM can create lots of anon VM_EXEC pages, | ||
1292 | * so we ignore them here. | ||
1293 | */ | ||
1294 | if ((vm_flags & VM_EXEC) && !PageAnon(page)) { | ||
1295 | list_add(&page->lru, &l_active); | ||
1296 | continue; | ||
1297 | } | ||
1298 | } | ||
1269 | 1299 | ||
1270 | list_add(&page->lru, &l_inactive); | 1300 | list_add(&page->lru, &l_inactive); |
1271 | } | 1301 | } |
1272 | 1302 | ||
1273 | /* | 1303 | /* |
1274 | * Move the pages to the [file or anon] inactive list. | 1304 | * Move pages back to the lru list. |
1275 | */ | 1305 | */ |
1276 | pagevec_init(&pvec, 1); | ||
1277 | lru = LRU_BASE + file * LRU_FILE; | ||
1278 | |||
1279 | spin_lock_irq(&zone->lru_lock); | 1306 | spin_lock_irq(&zone->lru_lock); |
1280 | /* | 1307 | /* |
1281 | * Count referenced pages from currently used mappings as | 1308 | * Count referenced pages from currently used mappings as rotated, |
1282 | * rotated, even though they are moved to the inactive list. | 1309 | * even though only some of them are actually re-activated. This |
1283 | * This helps balance scan pressure between file and anonymous | 1310 | * helps balance scan pressure between file and anonymous pages in |
1284 | * pages in get_scan_ratio. | 1311 | * get_scan_ratio. |
1285 | */ | 1312 | */ |
1286 | reclaim_stat->recent_rotated[!!file] += pgmoved; | 1313 | reclaim_stat->recent_rotated[!!file] += pgmoved; |
1287 | 1314 | ||
1288 | pgmoved = 0; | 1315 | move_active_pages_to_lru(zone, &l_active, |
1289 | while (!list_empty(&l_inactive)) { | 1316 | LRU_ACTIVE + file * LRU_FILE); |
1290 | page = lru_to_page(&l_inactive); | 1317 | move_active_pages_to_lru(zone, &l_inactive, |
1291 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 1318 | LRU_BASE + file * LRU_FILE); |
1292 | VM_BUG_ON(PageLRU(page)); | ||
1293 | SetPageLRU(page); | ||
1294 | VM_BUG_ON(!PageActive(page)); | ||
1295 | ClearPageActive(page); | ||
1296 | 1319 | ||
1297 | list_move(&page->lru, &zone->lru[lru].list); | ||
1298 | mem_cgroup_add_lru_list(page, lru); | ||
1299 | pgmoved++; | ||
1300 | if (!pagevec_add(&pvec, page)) { | ||
1301 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
1302 | spin_unlock_irq(&zone->lru_lock); | ||
1303 | pgdeactivate += pgmoved; | ||
1304 | pgmoved = 0; | ||
1305 | if (buffer_heads_over_limit) | ||
1306 | pagevec_strip(&pvec); | ||
1307 | __pagevec_release(&pvec); | ||
1308 | spin_lock_irq(&zone->lru_lock); | ||
1309 | } | ||
1310 | } | ||
1311 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
1312 | pgdeactivate += pgmoved; | ||
1313 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | ||
1314 | __count_vm_events(PGDEACTIVATE, pgdeactivate); | ||
1315 | spin_unlock_irq(&zone->lru_lock); | 1320 | spin_unlock_irq(&zone->lru_lock); |
1316 | if (buffer_heads_over_limit) | ||
1317 | pagevec_strip(&pvec); | ||
1318 | pagevec_release(&pvec); | ||
1319 | } | 1321 | } |
1320 | 1322 | ||
1321 | static int inactive_anon_is_low_global(struct zone *zone) | 1323 | static int inactive_anon_is_low_global(struct zone *zone) |
@@ -1350,12 +1352,48 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
1350 | return low; | 1352 | return low; |
1351 | } | 1353 | } |
1352 | 1354 | ||
1355 | static int inactive_file_is_low_global(struct zone *zone) | ||
1356 | { | ||
1357 | unsigned long active, inactive; | ||
1358 | |||
1359 | active = zone_page_state(zone, NR_ACTIVE_FILE); | ||
1360 | inactive = zone_page_state(zone, NR_INACTIVE_FILE); | ||
1361 | |||
1362 | return (active > inactive); | ||
1363 | } | ||
1364 | |||
1365 | /** | ||
1366 | * inactive_file_is_low - check if file pages need to be deactivated | ||
1367 | * @zone: zone to check | ||
1368 | * @sc: scan control of this context | ||
1369 | * | ||
1370 | * When the system is doing streaming IO, memory pressure here | ||
1371 | * ensures that active file pages get deactivated, until more | ||
1372 | * than half of the file pages are on the inactive list. | ||
1373 | * | ||
1374 | * Once we get to that situation, protect the system's working | ||
1375 | * set from being evicted by disabling active file page aging. | ||
1376 | * | ||
1377 | * This uses a different ratio than the anonymous pages, because | ||
1378 | * the page cache uses a use-once replacement algorithm. | ||
1379 | */ | ||
1380 | static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | ||
1381 | { | ||
1382 | int low; | ||
1383 | |||
1384 | if (scanning_global_lru(sc)) | ||
1385 | low = inactive_file_is_low_global(zone); | ||
1386 | else | ||
1387 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); | ||
1388 | return low; | ||
1389 | } | ||
1390 | |||
1353 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1391 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1354 | struct zone *zone, struct scan_control *sc, int priority) | 1392 | struct zone *zone, struct scan_control *sc, int priority) |
1355 | { | 1393 | { |
1356 | int file = is_file_lru(lru); | 1394 | int file = is_file_lru(lru); |
1357 | 1395 | ||
1358 | if (lru == LRU_ACTIVE_FILE) { | 1396 | if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) { |
1359 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1397 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1360 | return 0; | 1398 | return 0; |
1361 | } | 1399 | } |
@@ -1384,13 +1422,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1384 | unsigned long ap, fp; | 1422 | unsigned long ap, fp; |
1385 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1423 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1386 | 1424 | ||
1387 | /* If we have no swap space, do not bother scanning anon pages. */ | ||
1388 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | ||
1389 | percent[0] = 0; | ||
1390 | percent[1] = 100; | ||
1391 | return; | ||
1392 | } | ||
1393 | |||
1394 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + | 1425 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + |
1395 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); | 1426 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); |
1396 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + | 1427 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + |
@@ -1400,7 +1431,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1400 | free = zone_page_state(zone, NR_FREE_PAGES); | 1431 | free = zone_page_state(zone, NR_FREE_PAGES); |
1401 | /* If we have very few page cache pages, | 1432 | /* If we have very few page cache pages, |
1402 | force-scan anon pages. */ | 1433 | force-scan anon pages. */ |
1403 | if (unlikely(file + free <= zone->pages_high)) { | 1434 | if (unlikely(file + free <= high_wmark_pages(zone))) { |
1404 | percent[0] = 100; | 1435 | percent[0] = 100; |
1405 | percent[1] = 0; | 1436 | percent[1] = 0; |
1406 | return; | 1437 | return; |
@@ -1455,6 +1486,26 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1455 | percent[1] = 100 - percent[0]; | 1486 | percent[1] = 100 - percent[0]; |
1456 | } | 1487 | } |
1457 | 1488 | ||
1489 | /* | ||
1490 | * Smallish @nr_to_scan's are deposited in @nr_saved_scan, | ||
1491 | * until we collected @swap_cluster_max pages to scan. | ||
1492 | */ | ||
1493 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, | ||
1494 | unsigned long *nr_saved_scan, | ||
1495 | unsigned long swap_cluster_max) | ||
1496 | { | ||
1497 | unsigned long nr; | ||
1498 | |||
1499 | *nr_saved_scan += nr_to_scan; | ||
1500 | nr = *nr_saved_scan; | ||
1501 | |||
1502 | if (nr >= swap_cluster_max) | ||
1503 | *nr_saved_scan = 0; | ||
1504 | else | ||
1505 | nr = 0; | ||
1506 | |||
1507 | return nr; | ||
1508 | } | ||
1458 | 1509 | ||
1459 | /* | 1510 | /* |
1460 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1511 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
@@ -1468,26 +1519,30 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1468 | enum lru_list l; | 1519 | enum lru_list l; |
1469 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1520 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
1470 | unsigned long swap_cluster_max = sc->swap_cluster_max; | 1521 | unsigned long swap_cluster_max = sc->swap_cluster_max; |
1522 | int noswap = 0; | ||
1471 | 1523 | ||
1472 | get_scan_ratio(zone, sc, percent); | 1524 | /* If we have no swap space, do not bother scanning anon pages. */ |
1525 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | ||
1526 | noswap = 1; | ||
1527 | percent[0] = 0; | ||
1528 | percent[1] = 100; | ||
1529 | } else | ||
1530 | get_scan_ratio(zone, sc, percent); | ||
1473 | 1531 | ||
1474 | for_each_evictable_lru(l) { | 1532 | for_each_evictable_lru(l) { |
1475 | int file = is_file_lru(l); | 1533 | int file = is_file_lru(l); |
1476 | unsigned long scan; | 1534 | unsigned long scan; |
1477 | 1535 | ||
1478 | scan = zone_nr_pages(zone, sc, l); | 1536 | scan = zone_nr_pages(zone, sc, l); |
1479 | if (priority) { | 1537 | if (priority || noswap) { |
1480 | scan >>= priority; | 1538 | scan >>= priority; |
1481 | scan = (scan * percent[file]) / 100; | 1539 | scan = (scan * percent[file]) / 100; |
1482 | } | 1540 | } |
1483 | if (scanning_global_lru(sc)) { | 1541 | if (scanning_global_lru(sc)) |
1484 | zone->lru[l].nr_scan += scan; | 1542 | nr[l] = nr_scan_try_batch(scan, |
1485 | nr[l] = zone->lru[l].nr_scan; | 1543 | &zone->lru[l].nr_saved_scan, |
1486 | if (nr[l] >= swap_cluster_max) | 1544 | swap_cluster_max); |
1487 | zone->lru[l].nr_scan = 0; | 1545 | else |
1488 | else | ||
1489 | nr[l] = 0; | ||
1490 | } else | ||
1491 | nr[l] = scan; | 1546 | nr[l] = scan; |
1492 | } | 1547 | } |
1493 | 1548 | ||
@@ -1521,7 +1576,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1521 | * Even if we did not try to evict anon pages at all, we want to | 1576 | * Even if we did not try to evict anon pages at all, we want to |
1522 | * rebalance the anon lru active/inactive ratio. | 1577 | * rebalance the anon lru active/inactive ratio. |
1523 | */ | 1578 | */ |
1524 | if (inactive_anon_is_low(zone, sc)) | 1579 | if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0) |
1525 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1580 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1526 | 1581 | ||
1527 | throttle_vm_writeout(sc->gfp_mask); | 1582 | throttle_vm_writeout(sc->gfp_mask); |
@@ -1532,11 +1587,13 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1532 | * try to reclaim pages from zones which will satisfy the caller's allocation | 1587 | * try to reclaim pages from zones which will satisfy the caller's allocation |
1533 | * request. | 1588 | * request. |
1534 | * | 1589 | * |
1535 | * We reclaim from a zone even if that zone is over pages_high. Because: | 1590 | * We reclaim from a zone even if that zone is over high_wmark_pages(zone). |
1591 | * Because: | ||
1536 | * a) The caller may be trying to free *extra* pages to satisfy a higher-order | 1592 | * a) The caller may be trying to free *extra* pages to satisfy a higher-order |
1537 | * allocation or | 1593 | * allocation or |
1538 | * b) The zones may be over pages_high but they must go *over* pages_high to | 1594 | * b) The target zone may be at high_wmark_pages(zone) but the lower zones |
1539 | * satisfy the `incremental min' zone defense algorithm. | 1595 | * must go *over* high_wmark_pages(zone) to satisfy the `incremental min' |
1596 | * zone defense algorithm. | ||
1540 | * | 1597 | * |
1541 | * If a zone is deemed to be full of pinned pages then just give it a light | 1598 | * If a zone is deemed to be full of pinned pages then just give it a light |
1542 | * scan then give up on it. | 1599 | * scan then give up on it. |
@@ -1742,7 +1799,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1742 | 1799 | ||
1743 | /* | 1800 | /* |
1744 | * For kswapd, balance_pgdat() will work across all this node's zones until | 1801 | * For kswapd, balance_pgdat() will work across all this node's zones until |
1745 | * they are all at pages_high. | 1802 | * they are all at high_wmark_pages(zone). |
1746 | * | 1803 | * |
1747 | * Returns the number of pages which were actually freed. | 1804 | * Returns the number of pages which were actually freed. |
1748 | * | 1805 | * |
@@ -1755,11 +1812,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1755 | * the zone for when the problem goes away. | 1812 | * the zone for when the problem goes away. |
1756 | * | 1813 | * |
1757 | * kswapd scans the zones in the highmem->normal->dma direction. It skips | 1814 | * kswapd scans the zones in the highmem->normal->dma direction. It skips |
1758 | * zones which have free_pages > pages_high, but once a zone is found to have | 1815 | * zones which have free_pages > high_wmark_pages(zone), but once a zone is |
1759 | * free_pages <= pages_high, we scan that zone and the lower zones regardless | 1816 | * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the |
1760 | * of the number of free pages in the lower zones. This interoperates with | 1817 | * lower zones regardless of the number of free pages in the lower zones. This |
1761 | * the page allocator fallback scheme to ensure that aging of pages is balanced | 1818 | * interoperates with the page allocator fallback scheme to ensure that aging |
1762 | * across the zones. | 1819 | * of pages is balanced across the zones. |
1763 | */ | 1820 | */ |
1764 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | 1821 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) |
1765 | { | 1822 | { |
@@ -1780,7 +1837,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1780 | }; | 1837 | }; |
1781 | /* | 1838 | /* |
1782 | * temp_priority is used to remember the scanning priority at which | 1839 | * temp_priority is used to remember the scanning priority at which |
1783 | * this zone was successfully refilled to free_pages == pages_high. | 1840 | * this zone was successfully refilled to |
1841 | * free_pages == high_wmark_pages(zone). | ||
1784 | */ | 1842 | */ |
1785 | int temp_priority[MAX_NR_ZONES]; | 1843 | int temp_priority[MAX_NR_ZONES]; |
1786 | 1844 | ||
@@ -1825,8 +1883,8 @@ loop_again: | |||
1825 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | 1883 | shrink_active_list(SWAP_CLUSTER_MAX, zone, |
1826 | &sc, priority, 0); | 1884 | &sc, priority, 0); |
1827 | 1885 | ||
1828 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1886 | if (!zone_watermark_ok(zone, order, |
1829 | 0, 0)) { | 1887 | high_wmark_pages(zone), 0, 0)) { |
1830 | end_zone = i; | 1888 | end_zone = i; |
1831 | break; | 1889 | break; |
1832 | } | 1890 | } |
@@ -1860,8 +1918,8 @@ loop_again: | |||
1860 | priority != DEF_PRIORITY) | 1918 | priority != DEF_PRIORITY) |
1861 | continue; | 1919 | continue; |
1862 | 1920 | ||
1863 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1921 | if (!zone_watermark_ok(zone, order, |
1864 | end_zone, 0)) | 1922 | high_wmark_pages(zone), end_zone, 0)) |
1865 | all_zones_ok = 0; | 1923 | all_zones_ok = 0; |
1866 | temp_priority[i] = priority; | 1924 | temp_priority[i] = priority; |
1867 | sc.nr_scanned = 0; | 1925 | sc.nr_scanned = 0; |
@@ -1870,8 +1928,8 @@ loop_again: | |||
1870 | * We put equal pressure on every zone, unless one | 1928 | * We put equal pressure on every zone, unless one |
1871 | * zone has way too many pages free already. | 1929 | * zone has way too many pages free already. |
1872 | */ | 1930 | */ |
1873 | if (!zone_watermark_ok(zone, order, 8*zone->pages_high, | 1931 | if (!zone_watermark_ok(zone, order, |
1874 | end_zone, 0)) | 1932 | 8*high_wmark_pages(zone), end_zone, 0)) |
1875 | shrink_zone(priority, zone, &sc); | 1933 | shrink_zone(priority, zone, &sc); |
1876 | reclaim_state->reclaimed_slab = 0; | 1934 | reclaim_state->reclaimed_slab = 0; |
1877 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1935 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
@@ -2037,7 +2095,7 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
2037 | return; | 2095 | return; |
2038 | 2096 | ||
2039 | pgdat = zone->zone_pgdat; | 2097 | pgdat = zone->zone_pgdat; |
2040 | if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) | 2098 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) |
2041 | return; | 2099 | return; |
2042 | if (pgdat->kswapd_max_order < order) | 2100 | if (pgdat->kswapd_max_order < order) |
2043 | pgdat->kswapd_max_order = order; | 2101 | pgdat->kswapd_max_order = order; |
@@ -2084,11 +2142,11 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
2084 | l == LRU_ACTIVE_FILE)) | 2142 | l == LRU_ACTIVE_FILE)) |
2085 | continue; | 2143 | continue; |
2086 | 2144 | ||
2087 | zone->lru[l].nr_scan += (lru_pages >> prio) + 1; | 2145 | zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; |
2088 | if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { | 2146 | if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { |
2089 | unsigned long nr_to_scan; | 2147 | unsigned long nr_to_scan; |
2090 | 2148 | ||
2091 | zone->lru[l].nr_scan = 0; | 2149 | zone->lru[l].nr_saved_scan = 0; |
2092 | nr_to_scan = min(nr_pages, lru_pages); | 2150 | nr_to_scan = min(nr_pages, lru_pages); |
2093 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | 2151 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
2094 | sc, prio); | 2152 | sc, prio); |
@@ -2290,6 +2348,48 @@ int sysctl_min_unmapped_ratio = 1; | |||
2290 | */ | 2348 | */ |
2291 | int sysctl_min_slab_ratio = 5; | 2349 | int sysctl_min_slab_ratio = 5; |
2292 | 2350 | ||
2351 | static inline unsigned long zone_unmapped_file_pages(struct zone *zone) | ||
2352 | { | ||
2353 | unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED); | ||
2354 | unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) + | ||
2355 | zone_page_state(zone, NR_ACTIVE_FILE); | ||
2356 | |||
2357 | /* | ||
2358 | * It's possible for there to be more file mapped pages than | ||
2359 | * accounted for by the pages on the file LRU lists because | ||
2360 | * tmpfs pages accounted for as ANON can also be FILE_MAPPED | ||
2361 | */ | ||
2362 | return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; | ||
2363 | } | ||
2364 | |||
2365 | /* Work out how many page cache pages we can reclaim in this reclaim_mode */ | ||
2366 | static long zone_pagecache_reclaimable(struct zone *zone) | ||
2367 | { | ||
2368 | long nr_pagecache_reclaimable; | ||
2369 | long delta = 0; | ||
2370 | |||
2371 | /* | ||
2372 | * If RECLAIM_SWAP is set, then all file pages are considered | ||
2373 | * potentially reclaimable. Otherwise, we have to worry about | ||
2374 | * pages like swapcache and zone_unmapped_file_pages() provides | ||
2375 | * a better estimate | ||
2376 | */ | ||
2377 | if (zone_reclaim_mode & RECLAIM_SWAP) | ||
2378 | nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES); | ||
2379 | else | ||
2380 | nr_pagecache_reclaimable = zone_unmapped_file_pages(zone); | ||
2381 | |||
2382 | /* If we can't clean pages, remove dirty pages from consideration */ | ||
2383 | if (!(zone_reclaim_mode & RECLAIM_WRITE)) | ||
2384 | delta += zone_page_state(zone, NR_FILE_DIRTY); | ||
2385 | |||
2386 | /* Watch for any possible underflows due to delta */ | ||
2387 | if (unlikely(delta > nr_pagecache_reclaimable)) | ||
2388 | delta = nr_pagecache_reclaimable; | ||
2389 | |||
2390 | return nr_pagecache_reclaimable - delta; | ||
2391 | } | ||
2392 | |||
2293 | /* | 2393 | /* |
2294 | * Try to free up some pages from this zone through reclaim. | 2394 | * Try to free up some pages from this zone through reclaim. |
2295 | */ | 2395 | */ |
@@ -2324,9 +2424,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2324 | reclaim_state.reclaimed_slab = 0; | 2424 | reclaim_state.reclaimed_slab = 0; |
2325 | p->reclaim_state = &reclaim_state; | 2425 | p->reclaim_state = &reclaim_state; |
2326 | 2426 | ||
2327 | if (zone_page_state(zone, NR_FILE_PAGES) - | 2427 | if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) { |
2328 | zone_page_state(zone, NR_FILE_MAPPED) > | ||
2329 | zone->min_unmapped_pages) { | ||
2330 | /* | 2428 | /* |
2331 | * Free memory by calling shrink zone with increasing | 2429 | * Free memory by calling shrink zone with increasing |
2332 | * priorities until we have enough memory freed. | 2430 | * priorities until we have enough memory freed. |
@@ -2384,20 +2482,18 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2384 | * if less than a specified percentage of the zone is used by | 2482 | * if less than a specified percentage of the zone is used by |
2385 | * unmapped file backed pages. | 2483 | * unmapped file backed pages. |
2386 | */ | 2484 | */ |
2387 | if (zone_page_state(zone, NR_FILE_PAGES) - | 2485 | if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages && |
2388 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages | 2486 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) |
2389 | && zone_page_state(zone, NR_SLAB_RECLAIMABLE) | 2487 | return ZONE_RECLAIM_FULL; |
2390 | <= zone->min_slab_pages) | ||
2391 | return 0; | ||
2392 | 2488 | ||
2393 | if (zone_is_all_unreclaimable(zone)) | 2489 | if (zone_is_all_unreclaimable(zone)) |
2394 | return 0; | 2490 | return ZONE_RECLAIM_FULL; |
2395 | 2491 | ||
2396 | /* | 2492 | /* |
2397 | * Do not scan if the allocation should not be delayed. | 2493 | * Do not scan if the allocation should not be delayed. |
2398 | */ | 2494 | */ |
2399 | if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) | 2495 | if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) |
2400 | return 0; | 2496 | return ZONE_RECLAIM_NOSCAN; |
2401 | 2497 | ||
2402 | /* | 2498 | /* |
2403 | * Only run zone reclaim on the local zone or on zones that do not | 2499 | * Only run zone reclaim on the local zone or on zones that do not |
@@ -2407,18 +2503,21 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2407 | */ | 2503 | */ |
2408 | node_id = zone_to_nid(zone); | 2504 | node_id = zone_to_nid(zone); |
2409 | if (node_state(node_id, N_CPU) && node_id != numa_node_id()) | 2505 | if (node_state(node_id, N_CPU) && node_id != numa_node_id()) |
2410 | return 0; | 2506 | return ZONE_RECLAIM_NOSCAN; |
2411 | 2507 | ||
2412 | if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) | 2508 | if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) |
2413 | return 0; | 2509 | return ZONE_RECLAIM_NOSCAN; |
2510 | |||
2414 | ret = __zone_reclaim(zone, gfp_mask, order); | 2511 | ret = __zone_reclaim(zone, gfp_mask, order); |
2415 | zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); | 2512 | zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); |
2416 | 2513 | ||
2514 | if (!ret) | ||
2515 | count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); | ||
2516 | |||
2417 | return ret; | 2517 | return ret; |
2418 | } | 2518 | } |
2419 | #endif | 2519 | #endif |
2420 | 2520 | ||
2421 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
2422 | /* | 2521 | /* |
2423 | * page_evictable - test whether a page is evictable | 2522 | * page_evictable - test whether a page is evictable |
2424 | * @page: the page to test | 2523 | * @page: the page to test |
@@ -2665,4 +2764,3 @@ void scan_unevictable_unregister_node(struct node *node) | |||
2665 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); | 2764 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); |
2666 | } | 2765 | } |
2667 | 2766 | ||
2668 | #endif | ||