diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:55:21 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:55:21 -0400 |
commit | bbb20089a3275a19e475dbc21320c3742e3ca423 (patch) | |
tree | 216fdc1cbef450ca688135c5b8969169482d9a48 /mm/vmscan.c | |
parent | 3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff) | |
parent | 657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff) |
Merge branch 'dmaengine' into async-tx-next
Conflicts:
crypto/async_tx/async_xor.c
drivers/dma/ioat/dma_v2.h
drivers/dma/ioat/pci.c
drivers/md/raid5.c
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 380 |
1 files changed, 240 insertions, 140 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index d254306562cd..54155268dfca 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -470,8 +470,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) | |||
470 | swp_entry_t swap = { .val = page_private(page) }; | 470 | swp_entry_t swap = { .val = page_private(page) }; |
471 | __delete_from_swap_cache(page); | 471 | __delete_from_swap_cache(page); |
472 | spin_unlock_irq(&mapping->tree_lock); | 472 | spin_unlock_irq(&mapping->tree_lock); |
473 | mem_cgroup_uncharge_swapcache(page, swap); | 473 | swapcache_free(swap, page); |
474 | swap_free(swap); | ||
475 | } else { | 474 | } else { |
476 | __remove_from_page_cache(page); | 475 | __remove_from_page_cache(page); |
477 | spin_unlock_irq(&mapping->tree_lock); | 476 | spin_unlock_irq(&mapping->tree_lock); |
@@ -514,7 +513,6 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
514 | * | 513 | * |
515 | * lru_lock must not be held, interrupts must be enabled. | 514 | * lru_lock must not be held, interrupts must be enabled. |
516 | */ | 515 | */ |
517 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
518 | void putback_lru_page(struct page *page) | 516 | void putback_lru_page(struct page *page) |
519 | { | 517 | { |
520 | int lru; | 518 | int lru; |
@@ -568,20 +566,6 @@ redo: | |||
568 | put_page(page); /* drop ref from isolate */ | 566 | put_page(page); /* drop ref from isolate */ |
569 | } | 567 | } |
570 | 568 | ||
571 | #else /* CONFIG_UNEVICTABLE_LRU */ | ||
572 | |||
573 | void putback_lru_page(struct page *page) | ||
574 | { | ||
575 | int lru; | ||
576 | VM_BUG_ON(PageLRU(page)); | ||
577 | |||
578 | lru = !!TestClearPageActive(page) + page_is_file_cache(page); | ||
579 | lru_cache_add_lru(page, lru); | ||
580 | put_page(page); | ||
581 | } | ||
582 | #endif /* CONFIG_UNEVICTABLE_LRU */ | ||
583 | |||
584 | |||
585 | /* | 569 | /* |
586 | * shrink_page_list() returns the number of reclaimed pages | 570 | * shrink_page_list() returns the number of reclaimed pages |
587 | */ | 571 | */ |
@@ -593,6 +577,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
593 | struct pagevec freed_pvec; | 577 | struct pagevec freed_pvec; |
594 | int pgactivate = 0; | 578 | int pgactivate = 0; |
595 | unsigned long nr_reclaimed = 0; | 579 | unsigned long nr_reclaimed = 0; |
580 | unsigned long vm_flags; | ||
596 | 581 | ||
597 | cond_resched(); | 582 | cond_resched(); |
598 | 583 | ||
@@ -643,7 +628,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
643 | goto keep_locked; | 628 | goto keep_locked; |
644 | } | 629 | } |
645 | 630 | ||
646 | referenced = page_referenced(page, 1, sc->mem_cgroup); | 631 | referenced = page_referenced(page, 1, |
632 | sc->mem_cgroup, &vm_flags); | ||
647 | /* In active use or really unfreeable? Activate it. */ | 633 | /* In active use or really unfreeable? Activate it. */ |
648 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | 634 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
649 | referenced && page_mapping_inuse(page)) | 635 | referenced && page_mapping_inuse(page)) |
@@ -851,7 +837,6 @@ int __isolate_lru_page(struct page *page, int mode, int file) | |||
851 | */ | 837 | */ |
852 | ClearPageLRU(page); | 838 | ClearPageLRU(page); |
853 | ret = 0; | 839 | ret = 0; |
854 | mem_cgroup_del_lru(page); | ||
855 | } | 840 | } |
856 | 841 | ||
857 | return ret; | 842 | return ret; |
@@ -899,12 +884,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
899 | switch (__isolate_lru_page(page, mode, file)) { | 884 | switch (__isolate_lru_page(page, mode, file)) { |
900 | case 0: | 885 | case 0: |
901 | list_move(&page->lru, dst); | 886 | list_move(&page->lru, dst); |
887 | mem_cgroup_del_lru(page); | ||
902 | nr_taken++; | 888 | nr_taken++; |
903 | break; | 889 | break; |
904 | 890 | ||
905 | case -EBUSY: | 891 | case -EBUSY: |
906 | /* else it is being freed elsewhere */ | 892 | /* else it is being freed elsewhere */ |
907 | list_move(&page->lru, src); | 893 | list_move(&page->lru, src); |
894 | mem_cgroup_rotate_lru_list(page, page_lru(page)); | ||
908 | continue; | 895 | continue; |
909 | 896 | ||
910 | default: | 897 | default: |
@@ -943,18 +930,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
943 | /* Check that we have not crossed a zone boundary. */ | 930 | /* Check that we have not crossed a zone boundary. */ |
944 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | 931 | if (unlikely(page_zone_id(cursor_page) != zone_id)) |
945 | continue; | 932 | continue; |
946 | switch (__isolate_lru_page(cursor_page, mode, file)) { | 933 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { |
947 | case 0: | ||
948 | list_move(&cursor_page->lru, dst); | 934 | list_move(&cursor_page->lru, dst); |
935 | mem_cgroup_del_lru(cursor_page); | ||
949 | nr_taken++; | 936 | nr_taken++; |
950 | scan++; | 937 | scan++; |
951 | break; | ||
952 | |||
953 | case -EBUSY: | ||
954 | /* else it is being freed elsewhere */ | ||
955 | list_move(&cursor_page->lru, src); | ||
956 | default: | ||
957 | break; /* ! on LRU or wrong list */ | ||
958 | } | 938 | } |
959 | } | 939 | } |
960 | } | 940 | } |
@@ -1061,6 +1041,19 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1061 | unsigned long nr_scanned = 0; | 1041 | unsigned long nr_scanned = 0; |
1062 | unsigned long nr_reclaimed = 0; | 1042 | unsigned long nr_reclaimed = 0; |
1063 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1043 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1044 | int lumpy_reclaim = 0; | ||
1045 | |||
1046 | /* | ||
1047 | * If we need a large contiguous chunk of memory, or have | ||
1048 | * trouble getting a small set of contiguous pages, we | ||
1049 | * will reclaim both active and inactive pages. | ||
1050 | * | ||
1051 | * We use the same threshold as pageout congestion_wait below. | ||
1052 | */ | ||
1053 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1054 | lumpy_reclaim = 1; | ||
1055 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
1056 | lumpy_reclaim = 1; | ||
1064 | 1057 | ||
1065 | pagevec_init(&pvec, 1); | 1058 | pagevec_init(&pvec, 1); |
1066 | 1059 | ||
@@ -1073,19 +1066,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1073 | unsigned long nr_freed; | 1066 | unsigned long nr_freed; |
1074 | unsigned long nr_active; | 1067 | unsigned long nr_active; |
1075 | unsigned int count[NR_LRU_LISTS] = { 0, }; | 1068 | unsigned int count[NR_LRU_LISTS] = { 0, }; |
1076 | int mode = ISOLATE_INACTIVE; | 1069 | int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE; |
1077 | |||
1078 | /* | ||
1079 | * If we need a large contiguous chunk of memory, or have | ||
1080 | * trouble getting a small set of contiguous pages, we | ||
1081 | * will reclaim both active and inactive pages. | ||
1082 | * | ||
1083 | * We use the same threshold as pageout congestion_wait below. | ||
1084 | */ | ||
1085 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1086 | mode = ISOLATE_BOTH; | ||
1087 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
1088 | mode = ISOLATE_BOTH; | ||
1089 | 1070 | ||
1090 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, | 1071 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, |
1091 | &page_list, &nr_scan, sc->order, mode, | 1072 | &page_list, &nr_scan, sc->order, mode, |
@@ -1122,7 +1103,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1122 | * but that should be acceptable to the caller | 1103 | * but that should be acceptable to the caller |
1123 | */ | 1104 | */ |
1124 | if (nr_freed < nr_taken && !current_is_kswapd() && | 1105 | if (nr_freed < nr_taken && !current_is_kswapd() && |
1125 | sc->order > PAGE_ALLOC_COSTLY_ORDER) { | 1106 | lumpy_reclaim) { |
1126 | congestion_wait(WRITE, HZ/10); | 1107 | congestion_wait(WRITE, HZ/10); |
1127 | 1108 | ||
1128 | /* | 1109 | /* |
@@ -1217,18 +1198,54 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) | |||
1217 | * But we had to alter page->flags anyway. | 1198 | * But we had to alter page->flags anyway. |
1218 | */ | 1199 | */ |
1219 | 1200 | ||
1201 | static void move_active_pages_to_lru(struct zone *zone, | ||
1202 | struct list_head *list, | ||
1203 | enum lru_list lru) | ||
1204 | { | ||
1205 | unsigned long pgmoved = 0; | ||
1206 | struct pagevec pvec; | ||
1207 | struct page *page; | ||
1208 | |||
1209 | pagevec_init(&pvec, 1); | ||
1210 | |||
1211 | while (!list_empty(list)) { | ||
1212 | page = lru_to_page(list); | ||
1213 | prefetchw_prev_lru_page(page, list, flags); | ||
1214 | |||
1215 | VM_BUG_ON(PageLRU(page)); | ||
1216 | SetPageLRU(page); | ||
1217 | |||
1218 | VM_BUG_ON(!PageActive(page)); | ||
1219 | if (!is_active_lru(lru)) | ||
1220 | ClearPageActive(page); /* we are de-activating */ | ||
1221 | |||
1222 | list_move(&page->lru, &zone->lru[lru].list); | ||
1223 | mem_cgroup_add_lru_list(page, lru); | ||
1224 | pgmoved++; | ||
1225 | |||
1226 | if (!pagevec_add(&pvec, page) || list_empty(list)) { | ||
1227 | spin_unlock_irq(&zone->lru_lock); | ||
1228 | if (buffer_heads_over_limit) | ||
1229 | pagevec_strip(&pvec); | ||
1230 | __pagevec_release(&pvec); | ||
1231 | spin_lock_irq(&zone->lru_lock); | ||
1232 | } | ||
1233 | } | ||
1234 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
1235 | if (!is_active_lru(lru)) | ||
1236 | __count_vm_events(PGDEACTIVATE, pgmoved); | ||
1237 | } | ||
1220 | 1238 | ||
1221 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 1239 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
1222 | struct scan_control *sc, int priority, int file) | 1240 | struct scan_control *sc, int priority, int file) |
1223 | { | 1241 | { |
1224 | unsigned long pgmoved; | 1242 | unsigned long pgmoved; |
1225 | int pgdeactivate = 0; | ||
1226 | unsigned long pgscanned; | 1243 | unsigned long pgscanned; |
1244 | unsigned long vm_flags; | ||
1227 | LIST_HEAD(l_hold); /* The pages which were snipped off */ | 1245 | LIST_HEAD(l_hold); /* The pages which were snipped off */ |
1246 | LIST_HEAD(l_active); | ||
1228 | LIST_HEAD(l_inactive); | 1247 | LIST_HEAD(l_inactive); |
1229 | struct page *page; | 1248 | struct page *page; |
1230 | struct pagevec pvec; | ||
1231 | enum lru_list lru; | ||
1232 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1249 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1233 | 1250 | ||
1234 | lru_add_drain(); | 1251 | lru_add_drain(); |
@@ -1245,13 +1262,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1245 | } | 1262 | } |
1246 | reclaim_stat->recent_scanned[!!file] += pgmoved; | 1263 | reclaim_stat->recent_scanned[!!file] += pgmoved; |
1247 | 1264 | ||
1265 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | ||
1248 | if (file) | 1266 | if (file) |
1249 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); | 1267 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); |
1250 | else | 1268 | else |
1251 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); | 1269 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); |
1252 | spin_unlock_irq(&zone->lru_lock); | 1270 | spin_unlock_irq(&zone->lru_lock); |
1253 | 1271 | ||
1254 | pgmoved = 0; | 1272 | pgmoved = 0; /* count referenced (mapping) mapped pages */ |
1255 | while (!list_empty(&l_hold)) { | 1273 | while (!list_empty(&l_hold)) { |
1256 | cond_resched(); | 1274 | cond_resched(); |
1257 | page = lru_to_page(&l_hold); | 1275 | page = lru_to_page(&l_hold); |
@@ -1264,58 +1282,44 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1264 | 1282 | ||
1265 | /* page_referenced clears PageReferenced */ | 1283 | /* page_referenced clears PageReferenced */ |
1266 | if (page_mapping_inuse(page) && | 1284 | if (page_mapping_inuse(page) && |
1267 | page_referenced(page, 0, sc->mem_cgroup)) | 1285 | page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { |
1268 | pgmoved++; | 1286 | pgmoved++; |
1287 | /* | ||
1288 | * Identify referenced, file-backed active pages and | ||
1289 | * give them one more trip around the active list. So | ||
1290 | * that executable code get better chances to stay in | ||
1291 | * memory under moderate memory pressure. Anon pages | ||
1292 | * are not likely to be evicted by use-once streaming | ||
1293 | * IO, plus JVM can create lots of anon VM_EXEC pages, | ||
1294 | * so we ignore them here. | ||
1295 | */ | ||
1296 | if ((vm_flags & VM_EXEC) && !PageAnon(page)) { | ||
1297 | list_add(&page->lru, &l_active); | ||
1298 | continue; | ||
1299 | } | ||
1300 | } | ||
1269 | 1301 | ||
1270 | list_add(&page->lru, &l_inactive); | 1302 | list_add(&page->lru, &l_inactive); |
1271 | } | 1303 | } |
1272 | 1304 | ||
1273 | /* | 1305 | /* |
1274 | * Move the pages to the [file or anon] inactive list. | 1306 | * Move pages back to the lru list. |
1275 | */ | 1307 | */ |
1276 | pagevec_init(&pvec, 1); | ||
1277 | lru = LRU_BASE + file * LRU_FILE; | ||
1278 | |||
1279 | spin_lock_irq(&zone->lru_lock); | 1308 | spin_lock_irq(&zone->lru_lock); |
1280 | /* | 1309 | /* |
1281 | * Count referenced pages from currently used mappings as | 1310 | * Count referenced pages from currently used mappings as rotated, |
1282 | * rotated, even though they are moved to the inactive list. | 1311 | * even though only some of them are actually re-activated. This |
1283 | * This helps balance scan pressure between file and anonymous | 1312 | * helps balance scan pressure between file and anonymous pages in |
1284 | * pages in get_scan_ratio. | 1313 | * get_scan_ratio. |
1285 | */ | 1314 | */ |
1286 | reclaim_stat->recent_rotated[!!file] += pgmoved; | 1315 | reclaim_stat->recent_rotated[!!file] += pgmoved; |
1287 | 1316 | ||
1288 | pgmoved = 0; | 1317 | move_active_pages_to_lru(zone, &l_active, |
1289 | while (!list_empty(&l_inactive)) { | 1318 | LRU_ACTIVE + file * LRU_FILE); |
1290 | page = lru_to_page(&l_inactive); | 1319 | move_active_pages_to_lru(zone, &l_inactive, |
1291 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 1320 | LRU_BASE + file * LRU_FILE); |
1292 | VM_BUG_ON(PageLRU(page)); | ||
1293 | SetPageLRU(page); | ||
1294 | VM_BUG_ON(!PageActive(page)); | ||
1295 | ClearPageActive(page); | ||
1296 | 1321 | ||
1297 | list_move(&page->lru, &zone->lru[lru].list); | ||
1298 | mem_cgroup_add_lru_list(page, lru); | ||
1299 | pgmoved++; | ||
1300 | if (!pagevec_add(&pvec, page)) { | ||
1301 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
1302 | spin_unlock_irq(&zone->lru_lock); | ||
1303 | pgdeactivate += pgmoved; | ||
1304 | pgmoved = 0; | ||
1305 | if (buffer_heads_over_limit) | ||
1306 | pagevec_strip(&pvec); | ||
1307 | __pagevec_release(&pvec); | ||
1308 | spin_lock_irq(&zone->lru_lock); | ||
1309 | } | ||
1310 | } | ||
1311 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | ||
1312 | pgdeactivate += pgmoved; | ||
1313 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | ||
1314 | __count_vm_events(PGDEACTIVATE, pgdeactivate); | ||
1315 | spin_unlock_irq(&zone->lru_lock); | 1322 | spin_unlock_irq(&zone->lru_lock); |
1316 | if (buffer_heads_over_limit) | ||
1317 | pagevec_strip(&pvec); | ||
1318 | pagevec_release(&pvec); | ||
1319 | } | 1323 | } |
1320 | 1324 | ||
1321 | static int inactive_anon_is_low_global(struct zone *zone) | 1325 | static int inactive_anon_is_low_global(struct zone *zone) |
@@ -1350,12 +1354,48 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
1350 | return low; | 1354 | return low; |
1351 | } | 1355 | } |
1352 | 1356 | ||
1357 | static int inactive_file_is_low_global(struct zone *zone) | ||
1358 | { | ||
1359 | unsigned long active, inactive; | ||
1360 | |||
1361 | active = zone_page_state(zone, NR_ACTIVE_FILE); | ||
1362 | inactive = zone_page_state(zone, NR_INACTIVE_FILE); | ||
1363 | |||
1364 | return (active > inactive); | ||
1365 | } | ||
1366 | |||
1367 | /** | ||
1368 | * inactive_file_is_low - check if file pages need to be deactivated | ||
1369 | * @zone: zone to check | ||
1370 | * @sc: scan control of this context | ||
1371 | * | ||
1372 | * When the system is doing streaming IO, memory pressure here | ||
1373 | * ensures that active file pages get deactivated, until more | ||
1374 | * than half of the file pages are on the inactive list. | ||
1375 | * | ||
1376 | * Once we get to that situation, protect the system's working | ||
1377 | * set from being evicted by disabling active file page aging. | ||
1378 | * | ||
1379 | * This uses a different ratio than the anonymous pages, because | ||
1380 | * the page cache uses a use-once replacement algorithm. | ||
1381 | */ | ||
1382 | static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | ||
1383 | { | ||
1384 | int low; | ||
1385 | |||
1386 | if (scanning_global_lru(sc)) | ||
1387 | low = inactive_file_is_low_global(zone); | ||
1388 | else | ||
1389 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); | ||
1390 | return low; | ||
1391 | } | ||
1392 | |||
1353 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1393 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1354 | struct zone *zone, struct scan_control *sc, int priority) | 1394 | struct zone *zone, struct scan_control *sc, int priority) |
1355 | { | 1395 | { |
1356 | int file = is_file_lru(lru); | 1396 | int file = is_file_lru(lru); |
1357 | 1397 | ||
1358 | if (lru == LRU_ACTIVE_FILE) { | 1398 | if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) { |
1359 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1399 | shrink_active_list(nr_to_scan, zone, sc, priority, file); |
1360 | return 0; | 1400 | return 0; |
1361 | } | 1401 | } |
@@ -1384,13 +1424,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1384 | unsigned long ap, fp; | 1424 | unsigned long ap, fp; |
1385 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1425 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1386 | 1426 | ||
1387 | /* If we have no swap space, do not bother scanning anon pages. */ | ||
1388 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | ||
1389 | percent[0] = 0; | ||
1390 | percent[1] = 100; | ||
1391 | return; | ||
1392 | } | ||
1393 | |||
1394 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + | 1427 | anon = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) + |
1395 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); | 1428 | zone_nr_pages(zone, sc, LRU_INACTIVE_ANON); |
1396 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + | 1429 | file = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) + |
@@ -1400,7 +1433,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1400 | free = zone_page_state(zone, NR_FREE_PAGES); | 1433 | free = zone_page_state(zone, NR_FREE_PAGES); |
1401 | /* If we have very few page cache pages, | 1434 | /* If we have very few page cache pages, |
1402 | force-scan anon pages. */ | 1435 | force-scan anon pages. */ |
1403 | if (unlikely(file + free <= zone->pages_high)) { | 1436 | if (unlikely(file + free <= high_wmark_pages(zone))) { |
1404 | percent[0] = 100; | 1437 | percent[0] = 100; |
1405 | percent[1] = 0; | 1438 | percent[1] = 0; |
1406 | return; | 1439 | return; |
@@ -1455,6 +1488,26 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1455 | percent[1] = 100 - percent[0]; | 1488 | percent[1] = 100 - percent[0]; |
1456 | } | 1489 | } |
1457 | 1490 | ||
1491 | /* | ||
1492 | * Smallish @nr_to_scan's are deposited in @nr_saved_scan, | ||
1493 | * until we collected @swap_cluster_max pages to scan. | ||
1494 | */ | ||
1495 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, | ||
1496 | unsigned long *nr_saved_scan, | ||
1497 | unsigned long swap_cluster_max) | ||
1498 | { | ||
1499 | unsigned long nr; | ||
1500 | |||
1501 | *nr_saved_scan += nr_to_scan; | ||
1502 | nr = *nr_saved_scan; | ||
1503 | |||
1504 | if (nr >= swap_cluster_max) | ||
1505 | *nr_saved_scan = 0; | ||
1506 | else | ||
1507 | nr = 0; | ||
1508 | |||
1509 | return nr; | ||
1510 | } | ||
1458 | 1511 | ||
1459 | /* | 1512 | /* |
1460 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1513 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
@@ -1468,26 +1521,30 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1468 | enum lru_list l; | 1521 | enum lru_list l; |
1469 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1522 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
1470 | unsigned long swap_cluster_max = sc->swap_cluster_max; | 1523 | unsigned long swap_cluster_max = sc->swap_cluster_max; |
1524 | int noswap = 0; | ||
1471 | 1525 | ||
1472 | get_scan_ratio(zone, sc, percent); | 1526 | /* If we have no swap space, do not bother scanning anon pages. */ |
1527 | if (!sc->may_swap || (nr_swap_pages <= 0)) { | ||
1528 | noswap = 1; | ||
1529 | percent[0] = 0; | ||
1530 | percent[1] = 100; | ||
1531 | } else | ||
1532 | get_scan_ratio(zone, sc, percent); | ||
1473 | 1533 | ||
1474 | for_each_evictable_lru(l) { | 1534 | for_each_evictable_lru(l) { |
1475 | int file = is_file_lru(l); | 1535 | int file = is_file_lru(l); |
1476 | unsigned long scan; | 1536 | unsigned long scan; |
1477 | 1537 | ||
1478 | scan = zone_nr_pages(zone, sc, l); | 1538 | scan = zone_nr_pages(zone, sc, l); |
1479 | if (priority) { | 1539 | if (priority || noswap) { |
1480 | scan >>= priority; | 1540 | scan >>= priority; |
1481 | scan = (scan * percent[file]) / 100; | 1541 | scan = (scan * percent[file]) / 100; |
1482 | } | 1542 | } |
1483 | if (scanning_global_lru(sc)) { | 1543 | if (scanning_global_lru(sc)) |
1484 | zone->lru[l].nr_scan += scan; | 1544 | nr[l] = nr_scan_try_batch(scan, |
1485 | nr[l] = zone->lru[l].nr_scan; | 1545 | &zone->lru[l].nr_saved_scan, |
1486 | if (nr[l] >= swap_cluster_max) | 1546 | swap_cluster_max); |
1487 | zone->lru[l].nr_scan = 0; | 1547 | else |
1488 | else | ||
1489 | nr[l] = 0; | ||
1490 | } else | ||
1491 | nr[l] = scan; | 1548 | nr[l] = scan; |
1492 | } | 1549 | } |
1493 | 1550 | ||
@@ -1521,7 +1578,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1521 | * Even if we did not try to evict anon pages at all, we want to | 1578 | * Even if we did not try to evict anon pages at all, we want to |
1522 | * rebalance the anon lru active/inactive ratio. | 1579 | * rebalance the anon lru active/inactive ratio. |
1523 | */ | 1580 | */ |
1524 | if (inactive_anon_is_low(zone, sc)) | 1581 | if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0) |
1525 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1582 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1526 | 1583 | ||
1527 | throttle_vm_writeout(sc->gfp_mask); | 1584 | throttle_vm_writeout(sc->gfp_mask); |
@@ -1532,11 +1589,13 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1532 | * try to reclaim pages from zones which will satisfy the caller's allocation | 1589 | * try to reclaim pages from zones which will satisfy the caller's allocation |
1533 | * request. | 1590 | * request. |
1534 | * | 1591 | * |
1535 | * We reclaim from a zone even if that zone is over pages_high. Because: | 1592 | * We reclaim from a zone even if that zone is over high_wmark_pages(zone). |
1593 | * Because: | ||
1536 | * a) The caller may be trying to free *extra* pages to satisfy a higher-order | 1594 | * a) The caller may be trying to free *extra* pages to satisfy a higher-order |
1537 | * allocation or | 1595 | * allocation or |
1538 | * b) The zones may be over pages_high but they must go *over* pages_high to | 1596 | * b) The target zone may be at high_wmark_pages(zone) but the lower zones |
1539 | * satisfy the `incremental min' zone defense algorithm. | 1597 | * must go *over* high_wmark_pages(zone) to satisfy the `incremental min' |
1598 | * zone defense algorithm. | ||
1540 | * | 1599 | * |
1541 | * If a zone is deemed to be full of pinned pages then just give it a light | 1600 | * If a zone is deemed to be full of pinned pages then just give it a light |
1542 | * scan then give up on it. | 1601 | * scan then give up on it. |
@@ -1742,7 +1801,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1742 | 1801 | ||
1743 | /* | 1802 | /* |
1744 | * For kswapd, balance_pgdat() will work across all this node's zones until | 1803 | * For kswapd, balance_pgdat() will work across all this node's zones until |
1745 | * they are all at pages_high. | 1804 | * they are all at high_wmark_pages(zone). |
1746 | * | 1805 | * |
1747 | * Returns the number of pages which were actually freed. | 1806 | * Returns the number of pages which were actually freed. |
1748 | * | 1807 | * |
@@ -1755,11 +1814,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1755 | * the zone for when the problem goes away. | 1814 | * the zone for when the problem goes away. |
1756 | * | 1815 | * |
1757 | * kswapd scans the zones in the highmem->normal->dma direction. It skips | 1816 | * kswapd scans the zones in the highmem->normal->dma direction. It skips |
1758 | * zones which have free_pages > pages_high, but once a zone is found to have | 1817 | * zones which have free_pages > high_wmark_pages(zone), but once a zone is |
1759 | * free_pages <= pages_high, we scan that zone and the lower zones regardless | 1818 | * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the |
1760 | * of the number of free pages in the lower zones. This interoperates with | 1819 | * lower zones regardless of the number of free pages in the lower zones. This |
1761 | * the page allocator fallback scheme to ensure that aging of pages is balanced | 1820 | * interoperates with the page allocator fallback scheme to ensure that aging |
1762 | * across the zones. | 1821 | * of pages is balanced across the zones. |
1763 | */ | 1822 | */ |
1764 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | 1823 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order) |
1765 | { | 1824 | { |
@@ -1780,7 +1839,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1780 | }; | 1839 | }; |
1781 | /* | 1840 | /* |
1782 | * temp_priority is used to remember the scanning priority at which | 1841 | * temp_priority is used to remember the scanning priority at which |
1783 | * this zone was successfully refilled to free_pages == pages_high. | 1842 | * this zone was successfully refilled to |
1843 | * free_pages == high_wmark_pages(zone). | ||
1784 | */ | 1844 | */ |
1785 | int temp_priority[MAX_NR_ZONES]; | 1845 | int temp_priority[MAX_NR_ZONES]; |
1786 | 1846 | ||
@@ -1825,8 +1885,8 @@ loop_again: | |||
1825 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | 1885 | shrink_active_list(SWAP_CLUSTER_MAX, zone, |
1826 | &sc, priority, 0); | 1886 | &sc, priority, 0); |
1827 | 1887 | ||
1828 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1888 | if (!zone_watermark_ok(zone, order, |
1829 | 0, 0)) { | 1889 | high_wmark_pages(zone), 0, 0)) { |
1830 | end_zone = i; | 1890 | end_zone = i; |
1831 | break; | 1891 | break; |
1832 | } | 1892 | } |
@@ -1860,8 +1920,8 @@ loop_again: | |||
1860 | priority != DEF_PRIORITY) | 1920 | priority != DEF_PRIORITY) |
1861 | continue; | 1921 | continue; |
1862 | 1922 | ||
1863 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1923 | if (!zone_watermark_ok(zone, order, |
1864 | end_zone, 0)) | 1924 | high_wmark_pages(zone), end_zone, 0)) |
1865 | all_zones_ok = 0; | 1925 | all_zones_ok = 0; |
1866 | temp_priority[i] = priority; | 1926 | temp_priority[i] = priority; |
1867 | sc.nr_scanned = 0; | 1927 | sc.nr_scanned = 0; |
@@ -1870,8 +1930,8 @@ loop_again: | |||
1870 | * We put equal pressure on every zone, unless one | 1930 | * We put equal pressure on every zone, unless one |
1871 | * zone has way too many pages free already. | 1931 | * zone has way too many pages free already. |
1872 | */ | 1932 | */ |
1873 | if (!zone_watermark_ok(zone, order, 8*zone->pages_high, | 1933 | if (!zone_watermark_ok(zone, order, |
1874 | end_zone, 0)) | 1934 | 8*high_wmark_pages(zone), end_zone, 0)) |
1875 | shrink_zone(priority, zone, &sc); | 1935 | shrink_zone(priority, zone, &sc); |
1876 | reclaim_state->reclaimed_slab = 0; | 1936 | reclaim_state->reclaimed_slab = 0; |
1877 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1937 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
@@ -2037,7 +2097,7 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
2037 | return; | 2097 | return; |
2038 | 2098 | ||
2039 | pgdat = zone->zone_pgdat; | 2099 | pgdat = zone->zone_pgdat; |
2040 | if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0)) | 2100 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) |
2041 | return; | 2101 | return; |
2042 | if (pgdat->kswapd_max_order < order) | 2102 | if (pgdat->kswapd_max_order < order) |
2043 | pgdat->kswapd_max_order = order; | 2103 | pgdat->kswapd_max_order = order; |
@@ -2056,7 +2116,7 @@ unsigned long global_lru_pages(void) | |||
2056 | + global_page_state(NR_INACTIVE_FILE); | 2116 | + global_page_state(NR_INACTIVE_FILE); |
2057 | } | 2117 | } |
2058 | 2118 | ||
2059 | #ifdef CONFIG_PM | 2119 | #ifdef CONFIG_HIBERNATION |
2060 | /* | 2120 | /* |
2061 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages | 2121 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages |
2062 | * from LRU lists system-wide, for given pass and priority. | 2122 | * from LRU lists system-wide, for given pass and priority. |
@@ -2084,11 +2144,11 @@ static void shrink_all_zones(unsigned long nr_pages, int prio, | |||
2084 | l == LRU_ACTIVE_FILE)) | 2144 | l == LRU_ACTIVE_FILE)) |
2085 | continue; | 2145 | continue; |
2086 | 2146 | ||
2087 | zone->lru[l].nr_scan += (lru_pages >> prio) + 1; | 2147 | zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1; |
2088 | if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { | 2148 | if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) { |
2089 | unsigned long nr_to_scan; | 2149 | unsigned long nr_to_scan; |
2090 | 2150 | ||
2091 | zone->lru[l].nr_scan = 0; | 2151 | zone->lru[l].nr_saved_scan = 0; |
2092 | nr_to_scan = min(nr_pages, lru_pages); | 2152 | nr_to_scan = min(nr_pages, lru_pages); |
2093 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | 2153 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, |
2094 | sc, prio); | 2154 | sc, prio); |
@@ -2196,7 +2256,7 @@ out: | |||
2196 | 2256 | ||
2197 | return sc.nr_reclaimed; | 2257 | return sc.nr_reclaimed; |
2198 | } | 2258 | } |
2199 | #endif | 2259 | #endif /* CONFIG_HIBERNATION */ |
2200 | 2260 | ||
2201 | /* It's optimal to keep kswapds on the same CPUs as their memory, but | 2261 | /* It's optimal to keep kswapds on the same CPUs as their memory, but |
2202 | not required for correctness. So if the last cpu in a node goes | 2262 | not required for correctness. So if the last cpu in a node goes |
@@ -2290,6 +2350,48 @@ int sysctl_min_unmapped_ratio = 1; | |||
2290 | */ | 2350 | */ |
2291 | int sysctl_min_slab_ratio = 5; | 2351 | int sysctl_min_slab_ratio = 5; |
2292 | 2352 | ||
2353 | static inline unsigned long zone_unmapped_file_pages(struct zone *zone) | ||
2354 | { | ||
2355 | unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED); | ||
2356 | unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) + | ||
2357 | zone_page_state(zone, NR_ACTIVE_FILE); | ||
2358 | |||
2359 | /* | ||
2360 | * It's possible for there to be more file mapped pages than | ||
2361 | * accounted for by the pages on the file LRU lists because | ||
2362 | * tmpfs pages accounted for as ANON can also be FILE_MAPPED | ||
2363 | */ | ||
2364 | return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0; | ||
2365 | } | ||
2366 | |||
2367 | /* Work out how many page cache pages we can reclaim in this reclaim_mode */ | ||
2368 | static long zone_pagecache_reclaimable(struct zone *zone) | ||
2369 | { | ||
2370 | long nr_pagecache_reclaimable; | ||
2371 | long delta = 0; | ||
2372 | |||
2373 | /* | ||
2374 | * If RECLAIM_SWAP is set, then all file pages are considered | ||
2375 | * potentially reclaimable. Otherwise, we have to worry about | ||
2376 | * pages like swapcache and zone_unmapped_file_pages() provides | ||
2377 | * a better estimate | ||
2378 | */ | ||
2379 | if (zone_reclaim_mode & RECLAIM_SWAP) | ||
2380 | nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES); | ||
2381 | else | ||
2382 | nr_pagecache_reclaimable = zone_unmapped_file_pages(zone); | ||
2383 | |||
2384 | /* If we can't clean pages, remove dirty pages from consideration */ | ||
2385 | if (!(zone_reclaim_mode & RECLAIM_WRITE)) | ||
2386 | delta += zone_page_state(zone, NR_FILE_DIRTY); | ||
2387 | |||
2388 | /* Watch for any possible underflows due to delta */ | ||
2389 | if (unlikely(delta > nr_pagecache_reclaimable)) | ||
2390 | delta = nr_pagecache_reclaimable; | ||
2391 | |||
2392 | return nr_pagecache_reclaimable - delta; | ||
2393 | } | ||
2394 | |||
2293 | /* | 2395 | /* |
2294 | * Try to free up some pages from this zone through reclaim. | 2396 | * Try to free up some pages from this zone through reclaim. |
2295 | */ | 2397 | */ |
@@ -2324,9 +2426,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2324 | reclaim_state.reclaimed_slab = 0; | 2426 | reclaim_state.reclaimed_slab = 0; |
2325 | p->reclaim_state = &reclaim_state; | 2427 | p->reclaim_state = &reclaim_state; |
2326 | 2428 | ||
2327 | if (zone_page_state(zone, NR_FILE_PAGES) - | 2429 | if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) { |
2328 | zone_page_state(zone, NR_FILE_MAPPED) > | ||
2329 | zone->min_unmapped_pages) { | ||
2330 | /* | 2430 | /* |
2331 | * Free memory by calling shrink zone with increasing | 2431 | * Free memory by calling shrink zone with increasing |
2332 | * priorities until we have enough memory freed. | 2432 | * priorities until we have enough memory freed. |
@@ -2384,20 +2484,18 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2384 | * if less than a specified percentage of the zone is used by | 2484 | * if less than a specified percentage of the zone is used by |
2385 | * unmapped file backed pages. | 2485 | * unmapped file backed pages. |
2386 | */ | 2486 | */ |
2387 | if (zone_page_state(zone, NR_FILE_PAGES) - | 2487 | if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages && |
2388 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages | 2488 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) |
2389 | && zone_page_state(zone, NR_SLAB_RECLAIMABLE) | 2489 | return ZONE_RECLAIM_FULL; |
2390 | <= zone->min_slab_pages) | ||
2391 | return 0; | ||
2392 | 2490 | ||
2393 | if (zone_is_all_unreclaimable(zone)) | 2491 | if (zone_is_all_unreclaimable(zone)) |
2394 | return 0; | 2492 | return ZONE_RECLAIM_FULL; |
2395 | 2493 | ||
2396 | /* | 2494 | /* |
2397 | * Do not scan if the allocation should not be delayed. | 2495 | * Do not scan if the allocation should not be delayed. |
2398 | */ | 2496 | */ |
2399 | if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) | 2497 | if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC)) |
2400 | return 0; | 2498 | return ZONE_RECLAIM_NOSCAN; |
2401 | 2499 | ||
2402 | /* | 2500 | /* |
2403 | * Only run zone reclaim on the local zone or on zones that do not | 2501 | * Only run zone reclaim on the local zone or on zones that do not |
@@ -2407,18 +2505,21 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2407 | */ | 2505 | */ |
2408 | node_id = zone_to_nid(zone); | 2506 | node_id = zone_to_nid(zone); |
2409 | if (node_state(node_id, N_CPU) && node_id != numa_node_id()) | 2507 | if (node_state(node_id, N_CPU) && node_id != numa_node_id()) |
2410 | return 0; | 2508 | return ZONE_RECLAIM_NOSCAN; |
2411 | 2509 | ||
2412 | if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) | 2510 | if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED)) |
2413 | return 0; | 2511 | return ZONE_RECLAIM_NOSCAN; |
2512 | |||
2414 | ret = __zone_reclaim(zone, gfp_mask, order); | 2513 | ret = __zone_reclaim(zone, gfp_mask, order); |
2415 | zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); | 2514 | zone_clear_flag(zone, ZONE_RECLAIM_LOCKED); |
2416 | 2515 | ||
2516 | if (!ret) | ||
2517 | count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED); | ||
2518 | |||
2417 | return ret; | 2519 | return ret; |
2418 | } | 2520 | } |
2419 | #endif | 2521 | #endif |
2420 | 2522 | ||
2421 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
2422 | /* | 2523 | /* |
2423 | * page_evictable - test whether a page is evictable | 2524 | * page_evictable - test whether a page is evictable |
2424 | * @page: the page to test | 2525 | * @page: the page to test |
@@ -2665,4 +2766,3 @@ void scan_unevictable_unregister_node(struct node *node) | |||
2665 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); | 2766 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); |
2666 | } | 2767 | } |
2667 | 2768 | ||
2668 | #endif | ||