diff options
Diffstat (limited to 'mm/vmscan.c')
| -rw-r--r-- | mm/vmscan.c | 218 |
1 files changed, 141 insertions, 77 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b94c9464f262..b8a6fdc21312 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
| @@ -51,6 +51,12 @@ | |||
| 51 | #define CREATE_TRACE_POINTS | 51 | #define CREATE_TRACE_POINTS |
| 52 | #include <trace/events/vmscan.h> | 52 | #include <trace/events/vmscan.h> |
| 53 | 53 | ||
| 54 | enum lumpy_mode { | ||
| 55 | LUMPY_MODE_NONE, | ||
| 56 | LUMPY_MODE_ASYNC, | ||
| 57 | LUMPY_MODE_SYNC, | ||
| 58 | }; | ||
| 59 | |||
| 54 | struct scan_control { | 60 | struct scan_control { |
| 55 | /* Incremented by the number of inactive pages that were scanned */ | 61 | /* Incremented by the number of inactive pages that were scanned */ |
| 56 | unsigned long nr_scanned; | 62 | unsigned long nr_scanned; |
| @@ -82,7 +88,7 @@ struct scan_control { | |||
| 82 | * Intend to reclaim enough continuous memory rather than reclaim | 88 | * Intend to reclaim enough continuous memory rather than reclaim |
| 83 | * enough amount of memory. i.e, mode for high order allocation. | 89 | * enough amount of memory. i.e, mode for high order allocation. |
| 84 | */ | 90 | */ |
| 85 | bool lumpy_reclaim_mode; | 91 | enum lumpy_mode lumpy_reclaim_mode; |
| 86 | 92 | ||
| 87 | /* Which cgroup do we reclaim from */ | 93 | /* Which cgroup do we reclaim from */ |
| 88 | struct mem_cgroup *mem_cgroup; | 94 | struct mem_cgroup *mem_cgroup; |
| @@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |||
| 265 | return ret; | 271 | return ret; |
| 266 | } | 272 | } |
| 267 | 273 | ||
| 274 | static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc, | ||
| 275 | bool sync) | ||
| 276 | { | ||
| 277 | enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; | ||
| 278 | |||
| 279 | /* | ||
| 280 | * Some reclaim have alredy been failed. No worth to try synchronous | ||
| 281 | * lumpy reclaim. | ||
| 282 | */ | ||
| 283 | if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE) | ||
| 284 | return; | ||
| 285 | |||
| 286 | /* | ||
| 287 | * If we need a large contiguous chunk of memory, or have | ||
| 288 | * trouble getting a small set of contiguous pages, we | ||
| 289 | * will reclaim both active and inactive pages. | ||
| 290 | */ | ||
| 291 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
| 292 | sc->lumpy_reclaim_mode = mode; | ||
| 293 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
| 294 | sc->lumpy_reclaim_mode = mode; | ||
| 295 | else | ||
| 296 | sc->lumpy_reclaim_mode = LUMPY_MODE_NONE; | ||
| 297 | } | ||
| 298 | |||
| 299 | static void disable_lumpy_reclaim_mode(struct scan_control *sc) | ||
| 300 | { | ||
| 301 | sc->lumpy_reclaim_mode = LUMPY_MODE_NONE; | ||
| 302 | } | ||
| 303 | |||
| 268 | static inline int is_page_cache_freeable(struct page *page) | 304 | static inline int is_page_cache_freeable(struct page *page) |
| 269 | { | 305 | { |
| 270 | /* | 306 | /* |
| @@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page) | |||
| 275 | return page_count(page) - page_has_private(page) == 2; | 311 | return page_count(page) - page_has_private(page) == 2; |
| 276 | } | 312 | } |
| 277 | 313 | ||
| 278 | static int may_write_to_queue(struct backing_dev_info *bdi) | 314 | static int may_write_to_queue(struct backing_dev_info *bdi, |
| 315 | struct scan_control *sc) | ||
| 279 | { | 316 | { |
| 280 | if (current->flags & PF_SWAPWRITE) | 317 | if (current->flags & PF_SWAPWRITE) |
| 281 | return 1; | 318 | return 1; |
| @@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi) | |||
| 283 | return 1; | 320 | return 1; |
| 284 | if (bdi == current->backing_dev_info) | 321 | if (bdi == current->backing_dev_info) |
| 285 | return 1; | 322 | return 1; |
| 323 | |||
| 324 | /* lumpy reclaim for hugepage often need a lot of write */ | ||
| 325 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
| 326 | return 1; | ||
| 286 | return 0; | 327 | return 0; |
| 287 | } | 328 | } |
| 288 | 329 | ||
| @@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping, | |||
| 307 | unlock_page(page); | 348 | unlock_page(page); |
| 308 | } | 349 | } |
| 309 | 350 | ||
| 310 | /* Request for sync pageout. */ | ||
| 311 | enum pageout_io { | ||
| 312 | PAGEOUT_IO_ASYNC, | ||
| 313 | PAGEOUT_IO_SYNC, | ||
| 314 | }; | ||
| 315 | |||
| 316 | /* possible outcome of pageout() */ | 351 | /* possible outcome of pageout() */ |
| 317 | typedef enum { | 352 | typedef enum { |
| 318 | /* failed to write page out, page is locked */ | 353 | /* failed to write page out, page is locked */ |
| @@ -330,7 +365,7 @@ typedef enum { | |||
| 330 | * Calls ->writepage(). | 365 | * Calls ->writepage(). |
| 331 | */ | 366 | */ |
| 332 | static pageout_t pageout(struct page *page, struct address_space *mapping, | 367 | static pageout_t pageout(struct page *page, struct address_space *mapping, |
| 333 | enum pageout_io sync_writeback) | 368 | struct scan_control *sc) |
| 334 | { | 369 | { |
| 335 | /* | 370 | /* |
| 336 | * If the page is dirty, only perform writeback if that write | 371 | * If the page is dirty, only perform writeback if that write |
| @@ -366,7 +401,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
| 366 | } | 401 | } |
| 367 | if (mapping->a_ops->writepage == NULL) | 402 | if (mapping->a_ops->writepage == NULL) |
| 368 | return PAGE_ACTIVATE; | 403 | return PAGE_ACTIVATE; |
| 369 | if (!may_write_to_queue(mapping->backing_dev_info)) | 404 | if (!may_write_to_queue(mapping->backing_dev_info, sc)) |
| 370 | return PAGE_KEEP; | 405 | return PAGE_KEEP; |
| 371 | 406 | ||
| 372 | if (clear_page_dirty_for_io(page)) { | 407 | if (clear_page_dirty_for_io(page)) { |
| @@ -376,7 +411,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
| 376 | .nr_to_write = SWAP_CLUSTER_MAX, | 411 | .nr_to_write = SWAP_CLUSTER_MAX, |
| 377 | .range_start = 0, | 412 | .range_start = 0, |
| 378 | .range_end = LLONG_MAX, | 413 | .range_end = LLONG_MAX, |
| 379 | .nonblocking = 1, | ||
| 380 | .for_reclaim = 1, | 414 | .for_reclaim = 1, |
| 381 | }; | 415 | }; |
| 382 | 416 | ||
| @@ -394,7 +428,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
| 394 | * direct reclaiming a large contiguous area and the | 428 | * direct reclaiming a large contiguous area and the |
| 395 | * first attempt to free a range of pages fails. | 429 | * first attempt to free a range of pages fails. |
| 396 | */ | 430 | */ |
| 397 | if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) | 431 | if (PageWriteback(page) && |
| 432 | sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC) | ||
| 398 | wait_on_page_writeback(page); | 433 | wait_on_page_writeback(page); |
| 399 | 434 | ||
| 400 | if (!PageWriteback(page)) { | 435 | if (!PageWriteback(page)) { |
| @@ -402,7 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
| 402 | ClearPageReclaim(page); | 437 | ClearPageReclaim(page); |
| 403 | } | 438 | } |
| 404 | trace_mm_vmscan_writepage(page, | 439 | trace_mm_vmscan_writepage(page, |
| 405 | trace_reclaim_flags(page, sync_writeback)); | 440 | trace_reclaim_flags(page, sc->lumpy_reclaim_mode)); |
| 406 | inc_zone_page_state(page, NR_VMSCAN_WRITE); | 441 | inc_zone_page_state(page, NR_VMSCAN_WRITE); |
| 407 | return PAGE_SUCCESS; | 442 | return PAGE_SUCCESS; |
| 408 | } | 443 | } |
| @@ -580,7 +615,7 @@ static enum page_references page_check_references(struct page *page, | |||
| 580 | referenced_page = TestClearPageReferenced(page); | 615 | referenced_page = TestClearPageReferenced(page); |
| 581 | 616 | ||
| 582 | /* Lumpy reclaim - ignore references */ | 617 | /* Lumpy reclaim - ignore references */ |
| 583 | if (sc->lumpy_reclaim_mode) | 618 | if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE) |
| 584 | return PAGEREF_RECLAIM; | 619 | return PAGEREF_RECLAIM; |
| 585 | 620 | ||
| 586 | /* | 621 | /* |
| @@ -616,7 +651,7 @@ static enum page_references page_check_references(struct page *page, | |||
| 616 | } | 651 | } |
| 617 | 652 | ||
| 618 | /* Reclaim if clean, defer dirty pages to writeback */ | 653 | /* Reclaim if clean, defer dirty pages to writeback */ |
| 619 | if (referenced_page) | 654 | if (referenced_page && !PageSwapBacked(page)) |
| 620 | return PAGEREF_RECLAIM_CLEAN; | 655 | return PAGEREF_RECLAIM_CLEAN; |
| 621 | 656 | ||
| 622 | return PAGEREF_RECLAIM; | 657 | return PAGEREF_RECLAIM; |
| @@ -644,12 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) | |||
| 644 | * shrink_page_list() returns the number of reclaimed pages | 679 | * shrink_page_list() returns the number of reclaimed pages |
| 645 | */ | 680 | */ |
| 646 | static unsigned long shrink_page_list(struct list_head *page_list, | 681 | static unsigned long shrink_page_list(struct list_head *page_list, |
| 647 | struct scan_control *sc, | 682 | struct zone *zone, |
| 648 | enum pageout_io sync_writeback) | 683 | struct scan_control *sc) |
| 649 | { | 684 | { |
| 650 | LIST_HEAD(ret_pages); | 685 | LIST_HEAD(ret_pages); |
| 651 | LIST_HEAD(free_pages); | 686 | LIST_HEAD(free_pages); |
| 652 | int pgactivate = 0; | 687 | int pgactivate = 0; |
| 688 | unsigned long nr_dirty = 0; | ||
| 689 | unsigned long nr_congested = 0; | ||
| 653 | unsigned long nr_reclaimed = 0; | 690 | unsigned long nr_reclaimed = 0; |
| 654 | 691 | ||
| 655 | cond_resched(); | 692 | cond_resched(); |
| @@ -669,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 669 | goto keep; | 706 | goto keep; |
| 670 | 707 | ||
| 671 | VM_BUG_ON(PageActive(page)); | 708 | VM_BUG_ON(PageActive(page)); |
| 709 | VM_BUG_ON(page_zone(page) != zone); | ||
| 672 | 710 | ||
| 673 | sc->nr_scanned++; | 711 | sc->nr_scanned++; |
| 674 | 712 | ||
| @@ -694,10 +732,13 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 694 | * for any page for which writeback has already | 732 | * for any page for which writeback has already |
| 695 | * started. | 733 | * started. |
| 696 | */ | 734 | */ |
| 697 | if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) | 735 | if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC && |
| 736 | may_enter_fs) | ||
| 698 | wait_on_page_writeback(page); | 737 | wait_on_page_writeback(page); |
| 699 | else | 738 | else { |
| 700 | goto keep_locked; | 739 | unlock_page(page); |
| 740 | goto keep_lumpy; | ||
| 741 | } | ||
| 701 | } | 742 | } |
| 702 | 743 | ||
| 703 | references = page_check_references(page, sc); | 744 | references = page_check_references(page, sc); |
| @@ -743,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 743 | } | 784 | } |
| 744 | 785 | ||
| 745 | if (PageDirty(page)) { | 786 | if (PageDirty(page)) { |
| 787 | nr_dirty++; | ||
| 788 | |||
| 746 | if (references == PAGEREF_RECLAIM_CLEAN) | 789 | if (references == PAGEREF_RECLAIM_CLEAN) |
| 747 | goto keep_locked; | 790 | goto keep_locked; |
| 748 | if (!may_enter_fs) | 791 | if (!may_enter_fs) |
| @@ -751,14 +794,18 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
| 751 | goto keep_locked; | 794 | goto keep_locked; |
| 752 | 795 | ||
| 753 | /* Page is dirty, try to write it out here */ | 796 | /* Page is dirty, try to write it out here */ |
| 754 | switch (pageout(page, mapping, sync_writeback)) { | 797 | switch (pageout(page, mapping, sc)) { |
| 755 | case PAGE_KEEP: | 798 | case PAGE_KEEP: |
| 799 | nr_congested++; | ||
| 756 | goto keep_locked; | 800 | goto keep_locked; |
| 757 | case PAGE_ACTIVATE: | 801 | case PAGE_ACTIVATE: |
| 758 | goto activate_locked; | 802 | goto activate_locked; |
| 759 | case PAGE_SUCCESS: | 803 | case PAGE_SUCCESS: |
| 760 | if (PageWriteback(page) || PageDirty(page)) | 804 | if (PageWriteback(page)) |
| 805 | goto keep_lumpy; | ||
| 806 | if (PageDirty(page)) | ||
| 761 | goto keep; | 807 | goto keep; |
| 808 | |||
| 762 | /* | 809 | /* |
| 763 | * A synchronous write - probably a ramdisk. Go | 810 | * A synchronous write - probably a ramdisk. Go |
| 764 | * ahead and try to reclaim the page. | 811 | * ahead and try to reclaim the page. |
| @@ -841,6 +888,7 @@ cull_mlocked: | |||
| 841 | try_to_free_swap(page); | 888 | try_to_free_swap(page); |
| 842 | unlock_page(page); | 889 | unlock_page(page); |
| 843 | putback_lru_page(page); | 890 | putback_lru_page(page); |
| 891 | disable_lumpy_reclaim_mode(sc); | ||
| 844 | continue; | 892 | continue; |
| 845 | 893 | ||
| 846 | activate_locked: | 894 | activate_locked: |
| @@ -853,10 +901,21 @@ activate_locked: | |||
| 853 | keep_locked: | 901 | keep_locked: |
| 854 | unlock_page(page); | 902 | unlock_page(page); |
| 855 | keep: | 903 | keep: |
| 904 | disable_lumpy_reclaim_mode(sc); | ||
| 905 | keep_lumpy: | ||
| 856 | list_add(&page->lru, &ret_pages); | 906 | list_add(&page->lru, &ret_pages); |
| 857 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 907 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
| 858 | } | 908 | } |
| 859 | 909 | ||
| 910 | /* | ||
| 911 | * Tag a zone as congested if all the dirty pages encountered were | ||
| 912 | * backed by a congested BDI. In this case, reclaimers should just | ||
| 913 | * back off and wait for congestion to clear because further reclaim | ||
| 914 | * will encounter the same problem | ||
| 915 | */ | ||
| 916 | if (nr_dirty == nr_congested) | ||
| 917 | zone_set_flag(zone, ZONE_CONGESTED); | ||
| 918 | |||
| 860 | free_page_list(&free_pages); | 919 | free_page_list(&free_pages); |
| 861 | 920 | ||
| 862 | list_splice(&ret_pages, page_list); | 921 | list_splice(&ret_pages, page_list); |
| @@ -1006,7 +1065,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
| 1006 | 1065 | ||
| 1007 | /* Check that we have not crossed a zone boundary. */ | 1066 | /* Check that we have not crossed a zone boundary. */ |
| 1008 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | 1067 | if (unlikely(page_zone_id(cursor_page) != zone_id)) |
| 1009 | continue; | 1068 | break; |
| 1010 | 1069 | ||
| 1011 | /* | 1070 | /* |
| 1012 | * If we don't have enough swap space, reclaiming of | 1071 | * If we don't have enough swap space, reclaiming of |
| @@ -1014,8 +1073,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
| 1014 | * pointless. | 1073 | * pointless. |
| 1015 | */ | 1074 | */ |
| 1016 | if (nr_swap_pages <= 0 && PageAnon(cursor_page) && | 1075 | if (nr_swap_pages <= 0 && PageAnon(cursor_page) && |
| 1017 | !PageSwapCache(cursor_page)) | 1076 | !PageSwapCache(cursor_page)) |
| 1018 | continue; | 1077 | break; |
| 1019 | 1078 | ||
| 1020 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { | 1079 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { |
| 1021 | list_move(&cursor_page->lru, dst); | 1080 | list_move(&cursor_page->lru, dst); |
| @@ -1026,11 +1085,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
| 1026 | nr_lumpy_dirty++; | 1085 | nr_lumpy_dirty++; |
| 1027 | scan++; | 1086 | scan++; |
| 1028 | } else { | 1087 | } else { |
| 1029 | if (mode == ISOLATE_BOTH && | 1088 | /* the page is freed already. */ |
| 1030 | page_count(cursor_page)) | 1089 | if (!page_count(cursor_page)) |
| 1031 | nr_lumpy_failed++; | 1090 | continue; |
| 1091 | break; | ||
| 1032 | } | 1092 | } |
| 1033 | } | 1093 | } |
| 1094 | |||
| 1095 | /* If we break out of the loop above, lumpy reclaim failed */ | ||
| 1096 | if (pfn < end_pfn) | ||
| 1097 | nr_lumpy_failed++; | ||
| 1034 | } | 1098 | } |
| 1035 | 1099 | ||
| 1036 | *scanned = scan; | 1100 | *scanned = scan; |
| @@ -1253,7 +1317,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
| 1253 | return false; | 1317 | return false; |
| 1254 | 1318 | ||
| 1255 | /* Only stall on lumpy reclaim */ | 1319 | /* Only stall on lumpy reclaim */ |
| 1256 | if (!sc->lumpy_reclaim_mode) | 1320 | if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE) |
| 1257 | return false; | 1321 | return false; |
| 1258 | 1322 | ||
| 1259 | /* If we have relaimed everything on the isolated list, no stall */ | 1323 | /* If we have relaimed everything on the isolated list, no stall */ |
| @@ -1286,7 +1350,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
| 1286 | unsigned long nr_scanned; | 1350 | unsigned long nr_scanned; |
| 1287 | unsigned long nr_reclaimed = 0; | 1351 | unsigned long nr_reclaimed = 0; |
| 1288 | unsigned long nr_taken; | 1352 | unsigned long nr_taken; |
| 1289 | unsigned long nr_active; | ||
| 1290 | unsigned long nr_anon; | 1353 | unsigned long nr_anon; |
| 1291 | unsigned long nr_file; | 1354 | unsigned long nr_file; |
| 1292 | 1355 | ||
| @@ -1298,15 +1361,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
| 1298 | return SWAP_CLUSTER_MAX; | 1361 | return SWAP_CLUSTER_MAX; |
| 1299 | } | 1362 | } |
| 1300 | 1363 | ||
| 1301 | 1364 | set_lumpy_reclaim_mode(priority, sc, false); | |
| 1302 | lru_add_drain(); | 1365 | lru_add_drain(); |
| 1303 | spin_lock_irq(&zone->lru_lock); | 1366 | spin_lock_irq(&zone->lru_lock); |
| 1304 | 1367 | ||
| 1305 | if (scanning_global_lru(sc)) { | 1368 | if (scanning_global_lru(sc)) { |
| 1306 | nr_taken = isolate_pages_global(nr_to_scan, | 1369 | nr_taken = isolate_pages_global(nr_to_scan, |
| 1307 | &page_list, &nr_scanned, sc->order, | 1370 | &page_list, &nr_scanned, sc->order, |
| 1308 | sc->lumpy_reclaim_mode ? | 1371 | sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ? |
| 1309 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 1372 | ISOLATE_INACTIVE : ISOLATE_BOTH, |
| 1310 | zone, 0, file); | 1373 | zone, 0, file); |
| 1311 | zone->pages_scanned += nr_scanned; | 1374 | zone->pages_scanned += nr_scanned; |
| 1312 | if (current_is_kswapd()) | 1375 | if (current_is_kswapd()) |
| @@ -1318,8 +1381,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
| 1318 | } else { | 1381 | } else { |
| 1319 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, | 1382 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, |
| 1320 | &page_list, &nr_scanned, sc->order, | 1383 | &page_list, &nr_scanned, sc->order, |
| 1321 | sc->lumpy_reclaim_mode ? | 1384 | sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ? |
| 1322 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 1385 | ISOLATE_INACTIVE : ISOLATE_BOTH, |
| 1323 | zone, sc->mem_cgroup, | 1386 | zone, sc->mem_cgroup, |
| 1324 | 0, file); | 1387 | 0, file); |
| 1325 | /* | 1388 | /* |
| @@ -1337,20 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
| 1337 | 1400 | ||
| 1338 | spin_unlock_irq(&zone->lru_lock); | 1401 | spin_unlock_irq(&zone->lru_lock); |
| 1339 | 1402 | ||
| 1340 | nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); | 1403 | nr_reclaimed = shrink_page_list(&page_list, zone, sc); |
| 1341 | 1404 | ||
| 1342 | /* Check if we should syncronously wait for writeback */ | 1405 | /* Check if we should syncronously wait for writeback */ |
| 1343 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | 1406 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
| 1344 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1407 | set_lumpy_reclaim_mode(priority, sc, true); |
| 1345 | 1408 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); | |
| 1346 | /* | ||
| 1347 | * The attempt at page out may have made some | ||
| 1348 | * of the pages active, mark them inactive again. | ||
| 1349 | */ | ||
| 1350 | nr_active = clear_active_flags(&page_list, NULL); | ||
| 1351 | count_vm_events(PGDEACTIVATE, nr_active); | ||
| 1352 | |||
| 1353 | nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC); | ||
| 1354 | } | 1409 | } |
| 1355 | 1410 | ||
| 1356 | local_irq_disable(); | 1411 | local_irq_disable(); |
| @@ -1359,6 +1414,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
| 1359 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); | 1414 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); |
| 1360 | 1415 | ||
| 1361 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); | 1416 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); |
| 1417 | |||
| 1418 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, | ||
| 1419 | zone_idx(zone), | ||
| 1420 | nr_scanned, nr_reclaimed, | ||
| 1421 | priority, | ||
| 1422 | trace_shrink_flags(file, sc->lumpy_reclaim_mode)); | ||
| 1362 | return nr_reclaimed; | 1423 | return nr_reclaimed; |
| 1363 | } | 1424 | } |
| 1364 | 1425 | ||
| @@ -1506,6 +1567,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
| 1506 | spin_unlock_irq(&zone->lru_lock); | 1567 | spin_unlock_irq(&zone->lru_lock); |
| 1507 | } | 1568 | } |
| 1508 | 1569 | ||
| 1570 | #ifdef CONFIG_SWAP | ||
| 1509 | static int inactive_anon_is_low_global(struct zone *zone) | 1571 | static int inactive_anon_is_low_global(struct zone *zone) |
| 1510 | { | 1572 | { |
| 1511 | unsigned long active, inactive; | 1573 | unsigned long active, inactive; |
| @@ -1531,12 +1593,26 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
| 1531 | { | 1593 | { |
| 1532 | int low; | 1594 | int low; |
| 1533 | 1595 | ||
| 1596 | /* | ||
| 1597 | * If we don't have swap space, anonymous page deactivation | ||
| 1598 | * is pointless. | ||
| 1599 | */ | ||
| 1600 | if (!total_swap_pages) | ||
| 1601 | return 0; | ||
| 1602 | |||
| 1534 | if (scanning_global_lru(sc)) | 1603 | if (scanning_global_lru(sc)) |
| 1535 | low = inactive_anon_is_low_global(zone); | 1604 | low = inactive_anon_is_low_global(zone); |
| 1536 | else | 1605 | else |
| 1537 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); | 1606 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); |
| 1538 | return low; | 1607 | return low; |
| 1539 | } | 1608 | } |
| 1609 | #else | ||
| 1610 | static inline int inactive_anon_is_low(struct zone *zone, | ||
| 1611 | struct scan_control *sc) | ||
| 1612 | { | ||
| 1613 | return 0; | ||
| 1614 | } | ||
| 1615 | #endif | ||
| 1540 | 1616 | ||
| 1541 | static int inactive_file_is_low_global(struct zone *zone) | 1617 | static int inactive_file_is_low_global(struct zone *zone) |
| 1542 | { | 1618 | { |
| @@ -1721,21 +1797,6 @@ out: | |||
| 1721 | } | 1797 | } |
| 1722 | } | 1798 | } |
| 1723 | 1799 | ||
| 1724 | static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc) | ||
| 1725 | { | ||
| 1726 | /* | ||
| 1727 | * If we need a large contiguous chunk of memory, or have | ||
| 1728 | * trouble getting a small set of contiguous pages, we | ||
| 1729 | * will reclaim both active and inactive pages. | ||
| 1730 | */ | ||
| 1731 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
| 1732 | sc->lumpy_reclaim_mode = 1; | ||
| 1733 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
| 1734 | sc->lumpy_reclaim_mode = 1; | ||
| 1735 | else | ||
| 1736 | sc->lumpy_reclaim_mode = 0; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | /* | 1800 | /* |
| 1740 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1801 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
| 1741 | */ | 1802 | */ |
| @@ -1750,8 +1811,6 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1750 | 1811 | ||
| 1751 | get_scan_count(zone, sc, nr, priority); | 1812 | get_scan_count(zone, sc, nr, priority); |
| 1752 | 1813 | ||
| 1753 | set_lumpy_reclaim_mode(priority, sc); | ||
| 1754 | |||
| 1755 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1814 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
| 1756 | nr[LRU_INACTIVE_FILE]) { | 1815 | nr[LRU_INACTIVE_FILE]) { |
| 1757 | for_each_evictable_lru(l) { | 1816 | for_each_evictable_lru(l) { |
| @@ -1782,7 +1841,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
| 1782 | * Even if we did not try to evict anon pages at all, we want to | 1841 | * Even if we did not try to evict anon pages at all, we want to |
| 1783 | * rebalance the anon lru active/inactive ratio. | 1842 | * rebalance the anon lru active/inactive ratio. |
| 1784 | */ | 1843 | */ |
| 1785 | if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0) | 1844 | if (inactive_anon_is_low(zone, sc)) |
| 1786 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1845 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
| 1787 | 1846 | ||
| 1788 | throttle_vm_writeout(sc->gfp_mask); | 1847 | throttle_vm_writeout(sc->gfp_mask); |
| @@ -1937,21 +1996,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
| 1937 | 1996 | ||
| 1938 | /* Take a nap, wait for some writeback to complete */ | 1997 | /* Take a nap, wait for some writeback to complete */ |
| 1939 | if (!sc->hibernation_mode && sc->nr_scanned && | 1998 | if (!sc->hibernation_mode && sc->nr_scanned && |
| 1940 | priority < DEF_PRIORITY - 2) | 1999 | priority < DEF_PRIORITY - 2) { |
| 1941 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 2000 | struct zone *preferred_zone; |
| 2001 | |||
| 2002 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | ||
| 2003 | NULL, &preferred_zone); | ||
| 2004 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | ||
| 2005 | } | ||
| 1942 | } | 2006 | } |
| 1943 | 2007 | ||
| 1944 | out: | 2008 | out: |
| 1945 | /* | ||
| 1946 | * Now that we've scanned all the zones at this priority level, note | ||
| 1947 | * that level within the zone so that the next thread which performs | ||
| 1948 | * scanning of this zone will immediately start out at this priority | ||
| 1949 | * level. This affects only the decision whether or not to bring | ||
| 1950 | * mapped pages onto the inactive list. | ||
| 1951 | */ | ||
| 1952 | if (priority < 0) | ||
| 1953 | priority = 0; | ||
| 1954 | |||
| 1955 | delayacct_freepages_end(); | 2009 | delayacct_freepages_end(); |
| 1956 | put_mems_allowed(); | 2010 | put_mems_allowed(); |
| 1957 | 2011 | ||
| @@ -2247,6 +2301,15 @@ loop_again: | |||
| 2247 | if (!zone_watermark_ok(zone, order, | 2301 | if (!zone_watermark_ok(zone, order, |
| 2248 | min_wmark_pages(zone), end_zone, 0)) | 2302 | min_wmark_pages(zone), end_zone, 0)) |
| 2249 | has_under_min_watermark_zone = 1; | 2303 | has_under_min_watermark_zone = 1; |
| 2304 | } else { | ||
| 2305 | /* | ||
| 2306 | * If a zone reaches its high watermark, | ||
| 2307 | * consider it to be no longer congested. It's | ||
| 2308 | * possible there are dirty pages backed by | ||
| 2309 | * congested BDIs but as pressure is relieved, | ||
| 2310 | * spectulatively avoid congestion waits | ||
| 2311 | */ | ||
| 2312 | zone_clear_flag(zone, ZONE_CONGESTED); | ||
| 2250 | } | 2313 | } |
| 2251 | 2314 | ||
| 2252 | } | 2315 | } |
| @@ -2987,6 +3050,7 @@ int scan_unevictable_handler(struct ctl_table *table, int write, | |||
| 2987 | return 0; | 3050 | return 0; |
| 2988 | } | 3051 | } |
| 2989 | 3052 | ||
| 3053 | #ifdef CONFIG_NUMA | ||
| 2990 | /* | 3054 | /* |
| 2991 | * per node 'scan_unevictable_pages' attribute. On demand re-scan of | 3055 | * per node 'scan_unevictable_pages' attribute. On demand re-scan of |
| 2992 | * a specified node's per zone unevictable lists for evictable pages. | 3056 | * a specified node's per zone unevictable lists for evictable pages. |
| @@ -3033,4 +3097,4 @@ void scan_unevictable_unregister_node(struct node *node) | |||
| 3033 | { | 3097 | { |
| 3034 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); | 3098 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); |
| 3035 | } | 3099 | } |
| 3036 | 3100 | #endif | |
