aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/trace/events/vmscan.h6
-rw-r--r--mm/vmscan.c120
2 files changed, 78 insertions, 48 deletions
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index ecf952192a93..c255fcc587bf 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -25,13 +25,13 @@
25 25
26#define trace_reclaim_flags(page, sync) ( \ 26#define trace_reclaim_flags(page, sync) ( \
27 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \ 27 (page_is_file_cache(page) ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
28 (sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 28 (sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
29 ) 29 )
30 30
31#define trace_shrink_flags(file, sync) ( \ 31#define trace_shrink_flags(file, sync) ( \
32 (sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_MIXED : \ 32 (sync == LUMPY_MODE_SYNC ? RECLAIM_WB_MIXED : \
33 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \ 33 (file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON)) | \
34 (sync == PAGEOUT_IO_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \ 34 (sync == LUMPY_MODE_SYNC ? RECLAIM_WB_SYNC : RECLAIM_WB_ASYNC) \
35 ) 35 )
36 36
37TRACE_EVENT(mm_vmscan_kswapd_sleep, 37TRACE_EVENT(mm_vmscan_kswapd_sleep,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 399d54e8a82c..d9fc2dce93af 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,12 @@
51#define CREATE_TRACE_POINTS 51#define CREATE_TRACE_POINTS
52#include <trace/events/vmscan.h> 52#include <trace/events/vmscan.h>
53 53
54enum lumpy_mode {
55 LUMPY_MODE_NONE,
56 LUMPY_MODE_ASYNC,
57 LUMPY_MODE_SYNC,
58};
59
54struct scan_control { 60struct scan_control {
55 /* Incremented by the number of inactive pages that were scanned */ 61 /* Incremented by the number of inactive pages that were scanned */
56 unsigned long nr_scanned; 62 unsigned long nr_scanned;
@@ -82,7 +88,7 @@ struct scan_control {
82 * Intend to reclaim enough continuous memory rather than reclaim 88 * Intend to reclaim enough continuous memory rather than reclaim
83 * enough amount of memory. i.e, mode for high order allocation. 89 * enough amount of memory. i.e, mode for high order allocation.
84 */ 90 */
85 bool lumpy_reclaim_mode; 91 enum lumpy_mode lumpy_reclaim_mode;
86 92
87 /* Which cgroup do we reclaim from */ 93 /* Which cgroup do we reclaim from */
88 struct mem_cgroup *mem_cgroup; 94 struct mem_cgroup *mem_cgroup;
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
265 return ret; 271 return ret;
266} 272}
267 273
274static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
275 bool sync)
276{
277 enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
278
279 /*
280 * Some reclaim have alredy been failed. No worth to try synchronous
281 * lumpy reclaim.
282 */
283 if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
284 return;
285
286 /*
287 * If we need a large contiguous chunk of memory, or have
288 * trouble getting a small set of contiguous pages, we
289 * will reclaim both active and inactive pages.
290 */
291 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
292 sc->lumpy_reclaim_mode = mode;
293 else if (sc->order && priority < DEF_PRIORITY - 2)
294 sc->lumpy_reclaim_mode = mode;
295 else
296 sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
297}
298
299static void disable_lumpy_reclaim_mode(struct scan_control *sc)
300{
301 sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
302}
303
268static inline int is_page_cache_freeable(struct page *page) 304static inline int is_page_cache_freeable(struct page *page)
269{ 305{
270 /* 306 /*
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page)
275 return page_count(page) - page_has_private(page) == 2; 311 return page_count(page) - page_has_private(page) == 2;
276} 312}
277 313
278static int may_write_to_queue(struct backing_dev_info *bdi) 314static int may_write_to_queue(struct backing_dev_info *bdi,
315 struct scan_control *sc)
279{ 316{
280 if (current->flags & PF_SWAPWRITE) 317 if (current->flags & PF_SWAPWRITE)
281 return 1; 318 return 1;
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
283 return 1; 320 return 1;
284 if (bdi == current->backing_dev_info) 321 if (bdi == current->backing_dev_info)
285 return 1; 322 return 1;
323
324 /* lumpy reclaim for hugepage often need a lot of write */
325 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
326 return 1;
286 return 0; 327 return 0;
287} 328}
288 329
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping,
307 unlock_page(page); 348 unlock_page(page);
308} 349}
309 350
310/* Request for sync pageout. */
311enum pageout_io {
312 PAGEOUT_IO_ASYNC,
313 PAGEOUT_IO_SYNC,
314};
315
316/* possible outcome of pageout() */ 351/* possible outcome of pageout() */
317typedef enum { 352typedef enum {
318 /* failed to write page out, page is locked */ 353 /* failed to write page out, page is locked */
@@ -330,7 +365,7 @@ typedef enum {
330 * Calls ->writepage(). 365 * Calls ->writepage().
331 */ 366 */
332static pageout_t pageout(struct page *page, struct address_space *mapping, 367static pageout_t pageout(struct page *page, struct address_space *mapping,
333 enum pageout_io sync_writeback) 368 struct scan_control *sc)
334{ 369{
335 /* 370 /*
336 * If the page is dirty, only perform writeback if that write 371 * If the page is dirty, only perform writeback if that write
@@ -366,8 +401,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
366 } 401 }
367 if (mapping->a_ops->writepage == NULL) 402 if (mapping->a_ops->writepage == NULL)
368 return PAGE_ACTIVATE; 403 return PAGE_ACTIVATE;
369 if (!may_write_to_queue(mapping->backing_dev_info)) 404 if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
405 disable_lumpy_reclaim_mode(sc);
370 return PAGE_KEEP; 406 return PAGE_KEEP;
407 }
371 408
372 if (clear_page_dirty_for_io(page)) { 409 if (clear_page_dirty_for_io(page)) {
373 int res; 410 int res;
@@ -393,7 +430,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
393 * direct reclaiming a large contiguous area and the 430 * direct reclaiming a large contiguous area and the
394 * first attempt to free a range of pages fails. 431 * first attempt to free a range of pages fails.
395 */ 432 */
396 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) 433 if (PageWriteback(page) &&
434 sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC)
397 wait_on_page_writeback(page); 435 wait_on_page_writeback(page);
398 436
399 if (!PageWriteback(page)) { 437 if (!PageWriteback(page)) {
@@ -401,7 +439,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
401 ClearPageReclaim(page); 439 ClearPageReclaim(page);
402 } 440 }
403 trace_mm_vmscan_writepage(page, 441 trace_mm_vmscan_writepage(page,
404 trace_reclaim_flags(page, sync_writeback)); 442 trace_reclaim_flags(page, sc->lumpy_reclaim_mode));
405 inc_zone_page_state(page, NR_VMSCAN_WRITE); 443 inc_zone_page_state(page, NR_VMSCAN_WRITE);
406 return PAGE_SUCCESS; 444 return PAGE_SUCCESS;
407 } 445 }
@@ -579,7 +617,7 @@ static enum page_references page_check_references(struct page *page,
579 referenced_page = TestClearPageReferenced(page); 617 referenced_page = TestClearPageReferenced(page);
580 618
581 /* Lumpy reclaim - ignore references */ 619 /* Lumpy reclaim - ignore references */
582 if (sc->lumpy_reclaim_mode) 620 if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE)
583 return PAGEREF_RECLAIM; 621 return PAGEREF_RECLAIM;
584 622
585 /* 623 /*
@@ -643,8 +681,7 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
643 * shrink_page_list() returns the number of reclaimed pages 681 * shrink_page_list() returns the number of reclaimed pages
644 */ 682 */
645static unsigned long shrink_page_list(struct list_head *page_list, 683static unsigned long shrink_page_list(struct list_head *page_list,
646 struct scan_control *sc, 684 struct scan_control *sc)
647 enum pageout_io sync_writeback)
648{ 685{
649 LIST_HEAD(ret_pages); 686 LIST_HEAD(ret_pages);
650 LIST_HEAD(free_pages); 687 LIST_HEAD(free_pages);
@@ -693,10 +730,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
693 * for any page for which writeback has already 730 * for any page for which writeback has already
694 * started. 731 * started.
695 */ 732 */
696 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) 733 if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC &&
734 may_enter_fs)
697 wait_on_page_writeback(page); 735 wait_on_page_writeback(page);
698 else 736 else {
699 goto keep_locked; 737 unlock_page(page);
738 goto keep_lumpy;
739 }
700 } 740 }
701 741
702 references = page_check_references(page, sc); 742 references = page_check_references(page, sc);
@@ -750,14 +790,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
750 goto keep_locked; 790 goto keep_locked;
751 791
752 /* Page is dirty, try to write it out here */ 792 /* Page is dirty, try to write it out here */
753 switch (pageout(page, mapping, sync_writeback)) { 793 switch (pageout(page, mapping, sc)) {
754 case PAGE_KEEP: 794 case PAGE_KEEP:
755 goto keep_locked; 795 goto keep_locked;
756 case PAGE_ACTIVATE: 796 case PAGE_ACTIVATE:
757 goto activate_locked; 797 goto activate_locked;
758 case PAGE_SUCCESS: 798 case PAGE_SUCCESS:
759 if (PageWriteback(page) || PageDirty(page)) 799 if (PageWriteback(page))
800 goto keep_lumpy;
801 if (PageDirty(page))
760 goto keep; 802 goto keep;
803
761 /* 804 /*
762 * A synchronous write - probably a ramdisk. Go 805 * A synchronous write - probably a ramdisk. Go
763 * ahead and try to reclaim the page. 806 * ahead and try to reclaim the page.
@@ -840,6 +883,7 @@ cull_mlocked:
840 try_to_free_swap(page); 883 try_to_free_swap(page);
841 unlock_page(page); 884 unlock_page(page);
842 putback_lru_page(page); 885 putback_lru_page(page);
886 disable_lumpy_reclaim_mode(sc);
843 continue; 887 continue;
844 888
845activate_locked: 889activate_locked:
@@ -852,6 +896,8 @@ activate_locked:
852keep_locked: 896keep_locked:
853 unlock_page(page); 897 unlock_page(page);
854keep: 898keep:
899 disable_lumpy_reclaim_mode(sc);
900keep_lumpy:
855 list_add(&page->lru, &ret_pages); 901 list_add(&page->lru, &ret_pages);
856 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 902 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
857 } 903 }
@@ -1252,7 +1298,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
1252 return false; 1298 return false;
1253 1299
1254 /* Only stall on lumpy reclaim */ 1300 /* Only stall on lumpy reclaim */
1255 if (!sc->lumpy_reclaim_mode) 1301 if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
1256 return false; 1302 return false;
1257 1303
1258 /* If we have relaimed everything on the isolated list, no stall */ 1304 /* If we have relaimed everything on the isolated list, no stall */
@@ -1297,15 +1343,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1297 return SWAP_CLUSTER_MAX; 1343 return SWAP_CLUSTER_MAX;
1298 } 1344 }
1299 1345
1300 1346 set_lumpy_reclaim_mode(priority, sc, false);
1301 lru_add_drain(); 1347 lru_add_drain();
1302 spin_lock_irq(&zone->lru_lock); 1348 spin_lock_irq(&zone->lru_lock);
1303 1349
1304 if (scanning_global_lru(sc)) { 1350 if (scanning_global_lru(sc)) {
1305 nr_taken = isolate_pages_global(nr_to_scan, 1351 nr_taken = isolate_pages_global(nr_to_scan,
1306 &page_list, &nr_scanned, sc->order, 1352 &page_list, &nr_scanned, sc->order,
1307 sc->lumpy_reclaim_mode ? 1353 sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1308 ISOLATE_BOTH : ISOLATE_INACTIVE, 1354 ISOLATE_INACTIVE : ISOLATE_BOTH,
1309 zone, 0, file); 1355 zone, 0, file);
1310 zone->pages_scanned += nr_scanned; 1356 zone->pages_scanned += nr_scanned;
1311 if (current_is_kswapd()) 1357 if (current_is_kswapd())
@@ -1317,8 +1363,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1317 } else { 1363 } else {
1318 nr_taken = mem_cgroup_isolate_pages(nr_to_scan, 1364 nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
1319 &page_list, &nr_scanned, sc->order, 1365 &page_list, &nr_scanned, sc->order,
1320 sc->lumpy_reclaim_mode ? 1366 sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1321 ISOLATE_BOTH : ISOLATE_INACTIVE, 1367 ISOLATE_INACTIVE : ISOLATE_BOTH,
1322 zone, sc->mem_cgroup, 1368 zone, sc->mem_cgroup,
1323 0, file); 1369 0, file);
1324 /* 1370 /*
@@ -1336,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1336 1382
1337 spin_unlock_irq(&zone->lru_lock); 1383 spin_unlock_irq(&zone->lru_lock);
1338 1384
1339 nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); 1385 nr_reclaimed = shrink_page_list(&page_list, sc);
1340 1386
1341 /* Check if we should syncronously wait for writeback */ 1387 /* Check if we should syncronously wait for writeback */
1342 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { 1388 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
@@ -1347,7 +1393,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1347 nr_active = clear_active_flags(&page_list, NULL); 1393 nr_active = clear_active_flags(&page_list, NULL);
1348 count_vm_events(PGDEACTIVATE, nr_active); 1394 count_vm_events(PGDEACTIVATE, nr_active);
1349 1395
1350 nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC); 1396 set_lumpy_reclaim_mode(priority, sc, true);
1397 nr_reclaimed += shrink_page_list(&page_list, sc);
1351 } 1398 }
1352 1399
1353 local_irq_disable(); 1400 local_irq_disable();
@@ -1739,21 +1786,6 @@ out:
1739 } 1786 }
1740} 1787}
1741 1788
1742static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
1743{
1744 /*
1745 * If we need a large contiguous chunk of memory, or have
1746 * trouble getting a small set of contiguous pages, we
1747 * will reclaim both active and inactive pages.
1748 */
1749 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1750 sc->lumpy_reclaim_mode = 1;
1751 else if (sc->order && priority < DEF_PRIORITY - 2)
1752 sc->lumpy_reclaim_mode = 1;
1753 else
1754 sc->lumpy_reclaim_mode = 0;
1755}
1756
1757/* 1789/*
1758 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1790 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1759 */ 1791 */
@@ -1768,8 +1800,6 @@ static void shrink_zone(int priority, struct zone *zone,
1768 1800
1769 get_scan_count(zone, sc, nr, priority); 1801 get_scan_count(zone, sc, nr, priority);
1770 1802
1771 set_lumpy_reclaim_mode(priority, sc);
1772
1773 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1803 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1774 nr[LRU_INACTIVE_FILE]) { 1804 nr[LRU_INACTIVE_FILE]) {
1775 for_each_evictable_lru(l) { 1805 for_each_evictable_lru(l) {