diff options
-rw-r--r-- | include/trace/events/vmscan.h | 6 | ||||
-rw-r--r-- | mm/vmscan.c | 120 |
2 files changed, 78 insertions, 48 deletions
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index ecf952192a93..c255fcc587bf 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -25,13 +25,13 @@ | |||
25 | 25 | ||
26 | #define trace_reclaim_flags(page, sync) ( \ | 26 | #define trace_reclaim_flags(page, sync) ( \ |
27 | (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ | 27 | (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ |
28 | (sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ | 28 | (sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ |
29 | ) | 29 | ) |
30 | 30 | ||
31 | #define trace_shrink_flags(file, sync) ( \ | 31 | #define trace_shrink_flags(file, sync) ( \ |
32 | (sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_MIXED : \ | 32 | (sync == LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \ |
33 | (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ | 33 | (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ |
34 | (sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ | 34 | (sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ |
35 | ) | 35 | ) |
36 | 36 | ||
37 | TRACE_EVENT(mm_vmscan_kswapd_sleep, | 37 | TRACE_EVENT(mm_vmscan_kswapd_sleep, |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 399d54e8a82c..d9fc2dce93af 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -51,6 +51,12 @@ | |||
51 | #define CREATE_TRACE_POINTS | 51 | #define CREATE_TRACE_POINTS |
52 | #include <trace/events/vmscan.h> | 52 | #include <trace/events/vmscan.h> |
53 | 53 | ||
54 | enum lumpy_mode { | ||
55 | LUMPY_MODE_NONE, | ||
56 | LUMPY_MODE_ASYNC, | ||
57 | LUMPY_MODE_SYNC, | ||
58 | }; | ||
59 | |||
54 | struct scan_control { | 60 | struct scan_control { |
55 | /* Incremented by the number of inactive pages that were scanned */ | 61 | /* Incremented by the number of inactive pages that were scanned */ |
56 | unsigned long nr_scanned; | 62 | unsigned long nr_scanned; |
@@ -82,7 +88,7 @@ struct scan_control { | |||
82 | * Intend to reclaim enough continuous memory rather than reclaim | 88 | * Intend to reclaim enough continuous memory rather than reclaim |
83 | * enough amount of memory. i.e, mode for high order allocation. | 89 | * enough amount of memory. i.e, mode for high order allocation. |
84 | */ | 90 | */ |
85 | bool lumpy_reclaim_mode; | 91 | enum lumpy_mode lumpy_reclaim_mode; |
86 | 92 | ||
87 | /* Which cgroup do we reclaim from */ | 93 | /* Which cgroup do we reclaim from */ |
88 | struct mem_cgroup *mem_cgroup; | 94 | struct mem_cgroup *mem_cgroup; |
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |||
265 | return ret; | 271 | return ret; |
266 | } | 272 | } |
267 | 273 | ||
274 | static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc, | ||
275 | bool sync) | ||
276 | { | ||
277 | enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; | ||
278 | |||
279 | /* | ||
280 | * Some reclaim have alredy been failed. No worth to try synchronous | ||
281 | * lumpy reclaim. | ||
282 | */ | ||
283 | if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE) | ||
284 | return; | ||
285 | |||
286 | /* | ||
287 | * If we need a large contiguous chunk of memory, or have | ||
288 | * trouble getting a small set of contiguous pages, we | ||
289 | * will reclaim both active and inactive pages. | ||
290 | */ | ||
291 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
292 | sc->lumpy_reclaim_mode = mode; | ||
293 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
294 | sc->lumpy_reclaim_mode = mode; | ||
295 | else | ||
296 | sc->lumpy_reclaim_mode = LUMPY_MODE_NONE; | ||
297 | } | ||
298 | |||
299 | static void disable_lumpy_reclaim_mode(struct scan_control *sc) | ||
300 | { | ||
301 | sc->lumpy_reclaim_mode = LUMPY_MODE_NONE; | ||
302 | } | ||
303 | |||
268 | static inline int is_page_cache_freeable(struct page *page) | 304 | static inline int is_page_cache_freeable(struct page *page) |
269 | { | 305 | { |
270 | /* | 306 | /* |
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page) | |||
275 | return page_count(page) - page_has_private(page) == 2; | 311 | return page_count(page) - page_has_private(page) == 2; |
276 | } | 312 | } |
277 | 313 | ||
278 | static int may_write_to_queue(struct backing_dev_info *bdi) | 314 | static int may_write_to_queue(struct backing_dev_info *bdi, |
315 | struct scan_control *sc) | ||
279 | { | 316 | { |
280 | if (current->flags & PF_SWAPWRITE) | 317 | if (current->flags & PF_SWAPWRITE) |
281 | return 1; | 318 | return 1; |
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi) | |||
283 | return 1; | 320 | return 1; |
284 | if (bdi == current->backing_dev_info) | 321 | if (bdi == current->backing_dev_info) |
285 | return 1; | 322 | return 1; |
323 | |||
324 | /* lumpy reclaim for hugepage often need a lot of write */ | ||
325 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
326 | return 1; | ||
286 | return 0; | 327 | return 0; |
287 | } | 328 | } |
288 | 329 | ||
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping, | |||
307 | unlock_page(page); | 348 | unlock_page(page); |
308 | } | 349 | } |
309 | 350 | ||
310 | /* Request for sync pageout. */ | ||
311 | enum pageout_io { | ||
312 | PAGEOUT_IO_ASYNC, | ||
313 | PAGEOUT_IO_SYNC, | ||
314 | }; | ||
315 | |||
316 | /* possible outcome of pageout() */ | 351 | /* possible outcome of pageout() */ |
317 | typedef enum { | 352 | typedef enum { |
318 | /* failed to write page out, page is locked */ | 353 | /* failed to write page out, page is locked */ |
@@ -330,7 +365,7 @@ typedef enum { | |||
330 | * Calls ->writepage(). | 365 | * Calls ->writepage(). |
331 | */ | 366 | */ |
332 | static pageout_t pageout(struct page *page, struct address_space *mapping, | 367 | static pageout_t pageout(struct page *page, struct address_space *mapping, |
333 | enum pageout_io sync_writeback) | 368 | struct scan_control *sc) |
334 | { | 369 | { |
335 | /* | 370 | /* |
336 | * If the page is dirty, only perform writeback if that write | 371 | * If the page is dirty, only perform writeback if that write |
@@ -366,8 +401,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
366 | } | 401 | } |
367 | if (mapping->a_ops->writepage == NULL) | 402 | if (mapping->a_ops->writepage == NULL) |
368 | return PAGE_ACTIVATE; | 403 | return PAGE_ACTIVATE; |
369 | if (!may_write_to_queue(mapping->backing_dev_info)) | 404 | if (!may_write_to_queue(mapping->backing_dev_info, sc)) { |
405 | disable_lumpy_reclaim_mode(sc); | ||
370 | return PAGE_KEEP; | 406 | return PAGE_KEEP; |
407 | } | ||
371 | 408 | ||
372 | if (clear_page_dirty_for_io(page)) { | 409 | if (clear_page_dirty_for_io(page)) { |
373 | int res; | 410 | int res; |
@@ -393,7 +430,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
393 | * direct reclaiming a large contiguous area and the | 430 | * direct reclaiming a large contiguous area and the |
394 | * first attempt to free a range of pages fails. | 431 | * first attempt to free a range of pages fails. |
395 | */ | 432 | */ |
396 | if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) | 433 | if (PageWriteback(page) && |
434 | sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC) | ||
397 | wait_on_page_writeback(page); | 435 | wait_on_page_writeback(page); |
398 | 436 | ||
399 | if (!PageWriteback(page)) { | 437 | if (!PageWriteback(page)) { |
@@ -401,7 +439,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
401 | ClearPageReclaim(page); | 439 | ClearPageReclaim(page); |
402 | } | 440 | } |
403 | trace_mm_vmscan_writepage(page, | 441 | trace_mm_vmscan_writepage(page, |
404 | trace_reclaim_flags(page, sync_writeback)); | 442 | trace_reclaim_flags(page, sc->lumpy_reclaim_mode)); |
405 | inc_zone_page_state(page, NR_VMSCAN_WRITE); | 443 | inc_zone_page_state(page, NR_VMSCAN_WRITE); |
406 | return PAGE_SUCCESS; | 444 | return PAGE_SUCCESS; |
407 | } | 445 | } |
@@ -579,7 +617,7 @@ static enum page_references page_check_references(struct page *page, | |||
579 | referenced_page = TestClearPageReferenced(page); | 617 | referenced_page = TestClearPageReferenced(page); |
580 | 618 | ||
581 | /* Lumpy reclaim - ignore references */ | 619 | /* Lumpy reclaim - ignore references */ |
582 | if (sc->lumpy_reclaim_mode) | 620 | if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE) |
583 | return PAGEREF_RECLAIM; | 621 | return PAGEREF_RECLAIM; |
584 | 622 | ||
585 | /* | 623 | /* |
@@ -643,8 +681,7 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) | |||
643 | * shrink_page_list() returns the number of reclaimed pages | 681 | * shrink_page_list() returns the number of reclaimed pages |
644 | */ | 682 | */ |
645 | static unsigned long shrink_page_list(struct list_head *page_list, | 683 | static unsigned long shrink_page_list(struct list_head *page_list, |
646 | struct scan_control *sc, | 684 | struct scan_control *sc) |
647 | enum pageout_io sync_writeback) | ||
648 | { | 685 | { |
649 | LIST_HEAD(ret_pages); | 686 | LIST_HEAD(ret_pages); |
650 | LIST_HEAD(free_pages); | 687 | LIST_HEAD(free_pages); |
@@ -693,10 +730,13 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
693 | * for any page for which writeback has already | 730 | * for any page for which writeback has already |
694 | * started. | 731 | * started. |
695 | */ | 732 | */ |
696 | if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) | 733 | if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC && |
734 | may_enter_fs) | ||
697 | wait_on_page_writeback(page); | 735 | wait_on_page_writeback(page); |
698 | else | 736 | else { |
699 | goto keep_locked; | 737 | unlock_page(page); |
738 | goto keep_lumpy; | ||
739 | } | ||
700 | } | 740 | } |
701 | 741 | ||
702 | references = page_check_references(page, sc); | 742 | references = page_check_references(page, sc); |
@@ -750,14 +790,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
750 | goto keep_locked; | 790 | goto keep_locked; |
751 | 791 | ||
752 | /* Page is dirty, try to write it out here */ | 792 | /* Page is dirty, try to write it out here */ |
753 | switch (pageout(page, mapping, sync_writeback)) { | 793 | switch (pageout(page, mapping, sc)) { |
754 | case PAGE_KEEP: | 794 | case PAGE_KEEP: |
755 | goto keep_locked; | 795 | goto keep_locked; |
756 | case PAGE_ACTIVATE: | 796 | case PAGE_ACTIVATE: |
757 | goto activate_locked; | 797 | goto activate_locked; |
758 | case PAGE_SUCCESS: | 798 | case PAGE_SUCCESS: |
759 | if (PageWriteback(page) || PageDirty(page)) | 799 | if (PageWriteback(page)) |
800 | goto keep_lumpy; | ||
801 | if (PageDirty(page)) | ||
760 | goto keep; | 802 | goto keep; |
803 | |||
761 | /* | 804 | /* |
762 | * A synchronous write - probably a ramdisk. Go | 805 | * A synchronous write - probably a ramdisk. Go |
763 | * ahead and try to reclaim the page. | 806 | * ahead and try to reclaim the page. |
@@ -840,6 +883,7 @@ cull_mlocked: | |||
840 | try_to_free_swap(page); | 883 | try_to_free_swap(page); |
841 | unlock_page(page); | 884 | unlock_page(page); |
842 | putback_lru_page(page); | 885 | putback_lru_page(page); |
886 | disable_lumpy_reclaim_mode(sc); | ||
843 | continue; | 887 | continue; |
844 | 888 | ||
845 | activate_locked: | 889 | activate_locked: |
@@ -852,6 +896,8 @@ activate_locked: | |||
852 | keep_locked: | 896 | keep_locked: |
853 | unlock_page(page); | 897 | unlock_page(page); |
854 | keep: | 898 | keep: |
899 | disable_lumpy_reclaim_mode(sc); | ||
900 | keep_lumpy: | ||
855 | list_add(&page->lru, &ret_pages); | 901 | list_add(&page->lru, &ret_pages); |
856 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 902 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
857 | } | 903 | } |
@@ -1252,7 +1298,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
1252 | return false; | 1298 | return false; |
1253 | 1299 | ||
1254 | /* Only stall on lumpy reclaim */ | 1300 | /* Only stall on lumpy reclaim */ |
1255 | if (!sc->lumpy_reclaim_mode) | 1301 | if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE) |
1256 | return false; | 1302 | return false; |
1257 | 1303 | ||
1258 | /* If we have relaimed everything on the isolated list, no stall */ | 1304 | /* If we have relaimed everything on the isolated list, no stall */ |
@@ -1297,15 +1343,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1297 | return SWAP_CLUSTER_MAX; | 1343 | return SWAP_CLUSTER_MAX; |
1298 | } | 1344 | } |
1299 | 1345 | ||
1300 | 1346 | set_lumpy_reclaim_mode(priority, sc, false); | |
1301 | lru_add_drain(); | 1347 | lru_add_drain(); |
1302 | spin_lock_irq(&zone->lru_lock); | 1348 | spin_lock_irq(&zone->lru_lock); |
1303 | 1349 | ||
1304 | if (scanning_global_lru(sc)) { | 1350 | if (scanning_global_lru(sc)) { |
1305 | nr_taken = isolate_pages_global(nr_to_scan, | 1351 | nr_taken = isolate_pages_global(nr_to_scan, |
1306 | &page_list, &nr_scanned, sc->order, | 1352 | &page_list, &nr_scanned, sc->order, |
1307 | sc->lumpy_reclaim_mode ? | 1353 | sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ? |
1308 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 1354 | ISOLATE_INACTIVE : ISOLATE_BOTH, |
1309 | zone, 0, file); | 1355 | zone, 0, file); |
1310 | zone->pages_scanned += nr_scanned; | 1356 | zone->pages_scanned += nr_scanned; |
1311 | if (current_is_kswapd()) | 1357 | if (current_is_kswapd()) |
@@ -1317,8 +1363,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1317 | } else { | 1363 | } else { |
1318 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, | 1364 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, |
1319 | &page_list, &nr_scanned, sc->order, | 1365 | &page_list, &nr_scanned, sc->order, |
1320 | sc->lumpy_reclaim_mode ? | 1366 | sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ? |
1321 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 1367 | ISOLATE_INACTIVE : ISOLATE_BOTH, |
1322 | zone, sc->mem_cgroup, | 1368 | zone, sc->mem_cgroup, |
1323 | 0, file); | 1369 | 0, file); |
1324 | /* | 1370 | /* |
@@ -1336,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1336 | 1382 | ||
1337 | spin_unlock_irq(&zone->lru_lock); | 1383 | spin_unlock_irq(&zone->lru_lock); |
1338 | 1384 | ||
1339 | nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); | 1385 | nr_reclaimed = shrink_page_list(&page_list, sc); |
1340 | 1386 | ||
1341 | /* Check if we should syncronously wait for writeback */ | 1387 | /* Check if we should syncronously wait for writeback */ |
1342 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | 1388 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
@@ -1347,7 +1393,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1347 | nr_active = clear_active_flags(&page_list, NULL); | 1393 | nr_active = clear_active_flags(&page_list, NULL); |
1348 | count_vm_events(PGDEACTIVATE, nr_active); | 1394 | count_vm_events(PGDEACTIVATE, nr_active); |
1349 | 1395 | ||
1350 | nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC); | 1396 | set_lumpy_reclaim_mode(priority, sc, true); |
1397 | nr_reclaimed += shrink_page_list(&page_list, sc); | ||
1351 | } | 1398 | } |
1352 | 1399 | ||
1353 | local_irq_disable(); | 1400 | local_irq_disable(); |
@@ -1739,21 +1786,6 @@ out: | |||
1739 | } | 1786 | } |
1740 | } | 1787 | } |
1741 | 1788 | ||
1742 | static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc) | ||
1743 | { | ||
1744 | /* | ||
1745 | * If we need a large contiguous chunk of memory, or have | ||
1746 | * trouble getting a small set of contiguous pages, we | ||
1747 | * will reclaim both active and inactive pages. | ||
1748 | */ | ||
1749 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1750 | sc->lumpy_reclaim_mode = 1; | ||
1751 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
1752 | sc->lumpy_reclaim_mode = 1; | ||
1753 | else | ||
1754 | sc->lumpy_reclaim_mode = 0; | ||
1755 | } | ||
1756 | |||
1757 | /* | 1789 | /* |
1758 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1790 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1759 | */ | 1791 | */ |
@@ -1768,8 +1800,6 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1768 | 1800 | ||
1769 | get_scan_count(zone, sc, nr, priority); | 1801 | get_scan_count(zone, sc, nr, priority); |
1770 | 1802 | ||
1771 | set_lumpy_reclaim_mode(priority, sc); | ||
1772 | |||
1773 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1803 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1774 | nr[LRU_INACTIVE_FILE]) { | 1804 | nr[LRU_INACTIVE_FILE]) { |
1775 | for_each_evictable_lru(l) { | 1805 | for_each_evictable_lru(l) { |