aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2010-10-26 17:21:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-26 19:52:07 -0400
commit7d3579e8e61937cbba268ea9b218d006b6d64221 (patch)
tree4fa1863641343eee551681d60a823a84a2611289 /mm/vmscan.c
parentbc57e00f5e0b2480ef222c775c49552d3a930db7 (diff)
vmscan: narrow the scenarios in whcih lumpy reclaim uses synchrounous reclaim
shrink_page_list() can decide to give up reclaiming a page under a number of conditions such as 1. trylock_page() failure 2. page is unevictable 3. zone reclaim and page is mapped 4. PageWriteback() is true 5. page is swapbacked and swap is full 6. add_to_swap() failure 7. page is dirty and gfpmask don't have GFP_IO, GFP_FS 8. page is pinned 9. IO queue is congested 10. pageout() start IO, but not finished With lumpy reclaim, failures result in entering synchronous lumpy reclaim but this can be unnecessary. In cases (2), (3), (5), (6), (7) and (8), there is no point retrying. This patch causes lumpy reclaim to abort when it is known it will fail. Case (9) is more interesting. current behavior is, 1. start shrink_page_list(async) 2. found queue_congested() 3. skip pageout write 4. still start shrink_page_list(sync) 5. wait on a lot of pages 6. again, found queue_congested() 7. give up pageout write again So, it's useless time wasting. However, just skipping page reclaim is also notgood as x86 allocating a huge page needs 512 pages for example. It can have more dirty pages than queue congestion threshold (~=128). After this patch, pageout() behaves as follows; - If order > PAGE_ALLOC_COSTLY_ORDER Ignore queue congestion always. - If order <= PAGE_ALLOC_COSTLY_ORDER skip write page and disable lumpy reclaim. Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c120
1 files changed, 75 insertions, 45 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 399d54e8a82c..d9fc2dce93af 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,12 @@
51#define CREATE_TRACE_POINTS 51#define CREATE_TRACE_POINTS
52#include <trace/events/vmscan.h> 52#include <trace/events/vmscan.h>
53 53
54enum lumpy_mode {
55 LUMPY_MODE_NONE,
56 LUMPY_MODE_ASYNC,
57 LUMPY_MODE_SYNC,
58};
59
54struct scan_control { 60struct scan_control {
55 /* Incremented by the number of inactive pages that were scanned */ 61 /* Incremented by the number of inactive pages that were scanned */
56 unsigned long nr_scanned; 62 unsigned long nr_scanned;
@@ -82,7 +88,7 @@ struct scan_control {
82 * Intend to reclaim enough continuous memory rather than reclaim 88 * Intend to reclaim enough continuous memory rather than reclaim
83 * enough amount of memory. i.e, mode for high order allocation. 89 * enough amount of memory. i.e, mode for high order allocation.
84 */ 90 */
85 bool lumpy_reclaim_mode; 91 enum lumpy_mode lumpy_reclaim_mode;
86 92
87 /* Which cgroup do we reclaim from */ 93 /* Which cgroup do we reclaim from */
88 struct mem_cgroup *mem_cgroup; 94 struct mem_cgroup *mem_cgroup;
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
265 return ret; 271 return ret;
266} 272}
267 273
274static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc,
275 bool sync)
276{
277 enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC;
278
279 /*
280 * Some reclaim have alredy been failed. No worth to try synchronous
281 * lumpy reclaim.
282 */
283 if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
284 return;
285
286 /*
287 * If we need a large contiguous chunk of memory, or have
288 * trouble getting a small set of contiguous pages, we
289 * will reclaim both active and inactive pages.
290 */
291 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
292 sc->lumpy_reclaim_mode = mode;
293 else if (sc->order && priority < DEF_PRIORITY - 2)
294 sc->lumpy_reclaim_mode = mode;
295 else
296 sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
297}
298
299static void disable_lumpy_reclaim_mode(struct scan_control *sc)
300{
301 sc->lumpy_reclaim_mode = LUMPY_MODE_NONE;
302}
303
268static inline int is_page_cache_freeable(struct page *page) 304static inline int is_page_cache_freeable(struct page *page)
269{ 305{
270 /* 306 /*
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page)
275 return page_count(page) - page_has_private(page) == 2; 311 return page_count(page) - page_has_private(page) == 2;
276} 312}
277 313
278static int may_write_to_queue(struct backing_dev_info *bdi) 314static int may_write_to_queue(struct backing_dev_info *bdi,
315 struct scan_control *sc)
279{ 316{
280 if (current->flags & PF_SWAPWRITE) 317 if (current->flags & PF_SWAPWRITE)
281 return 1; 318 return 1;
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
283 return 1; 320 return 1;
284 if (bdi == current->backing_dev_info) 321 if (bdi == current->backing_dev_info)
285 return 1; 322 return 1;
323
324 /* lumpy reclaim for hugepage often need a lot of write */
325 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
326 return 1;
286 return 0; 327 return 0;
287} 328}
288 329
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping,
307 unlock_page(page); 348 unlock_page(page);
308} 349}
309 350
310/* Request for sync pageout. */
311enum pageout_io {
312 PAGEOUT_IO_ASYNC,
313 PAGEOUT_IO_SYNC,
314};
315
316/* possible outcome of pageout() */ 351/* possible outcome of pageout() */
317typedef enum { 352typedef enum {
318 /* failed to write page out, page is locked */ 353 /* failed to write page out, page is locked */
@@ -330,7 +365,7 @@ typedef enum {
330 * Calls ->writepage(). 365 * Calls ->writepage().
331 */ 366 */
332static pageout_t pageout(struct page *page, struct address_space *mapping, 367static pageout_t pageout(struct page *page, struct address_space *mapping,
333 enum pageout_io sync_writeback) 368 struct scan_control *sc)
334{ 369{
335 /* 370 /*
336 * If the page is dirty, only perform writeback if that write 371 * If the page is dirty, only perform writeback if that write
@@ -366,8 +401,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
366 } 401 }
367 if (mapping->a_ops->writepage == NULL) 402 if (mapping->a_ops->writepage == NULL)
368 return PAGE_ACTIVATE; 403 return PAGE_ACTIVATE;
369 if (!may_write_to_queue(mapping->backing_dev_info)) 404 if (!may_write_to_queue(mapping->backing_dev_info, sc)) {
405 disable_lumpy_reclaim_mode(sc);
370 return PAGE_KEEP; 406 return PAGE_KEEP;
407 }
371 408
372 if (clear_page_dirty_for_io(page)) { 409 if (clear_page_dirty_for_io(page)) {
373 int res; 410 int res;
@@ -393,7 +430,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
393 * direct reclaiming a large contiguous area and the 430 * direct reclaiming a large contiguous area and the
394 * first attempt to free a range of pages fails. 431 * first attempt to free a range of pages fails.
395 */ 432 */
396 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) 433 if (PageWriteback(page) &&
434 sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC)
397 wait_on_page_writeback(page); 435 wait_on_page_writeback(page);
398 436
399 if (!PageWriteback(page)) { 437 if (!PageWriteback(page)) {
@@ -401,7 +439,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
401 ClearPageReclaim(page); 439 ClearPageReclaim(page);
402 } 440 }
403 trace_mm_vmscan_writepage(page, 441 trace_mm_vmscan_writepage(page,
404 trace_reclaim_flags(page, sync_writeback)); 442 trace_reclaim_flags(page, sc->lumpy_reclaim_mode));
405 inc_zone_page_state(page, NR_VMSCAN_WRITE); 443 inc_zone_page_state(page, NR_VMSCAN_WRITE);
406 return PAGE_SUCCESS; 444 return PAGE_SUCCESS;
407 } 445 }
@@ -579,7 +617,7 @@ static enum page_references page_check_references(struct page *page,
579 referenced_page = TestClearPageReferenced(page); 617 referenced_page = TestClearPageReferenced(page);
580 618
581 /* Lumpy reclaim - ignore references */ 619 /* Lumpy reclaim - ignore references */
582 if (sc->lumpy_reclaim_mode) 620 if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE)
583 return PAGEREF_RECLAIM; 621 return PAGEREF_RECLAIM;
584 622
585 /* 623 /*
@@ -643,8 +681,7 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
643 * shrink_page_list() returns the number of reclaimed pages 681 * shrink_page_list() returns the number of reclaimed pages
644 */ 682 */
645static unsigned long shrink_page_list(struct list_head *page_list, 683static unsigned long shrink_page_list(struct list_head *page_list,
646 struct scan_control *sc, 684 struct scan_control *sc)
647 enum pageout_io sync_writeback)
648{ 685{
649 LIST_HEAD(ret_pages); 686 LIST_HEAD(ret_pages);
650 LIST_HEAD(free_pages); 687 LIST_HEAD(free_pages);
@@ -693,10 +730,13 @@ static unsigned long shrink_page_list(struct list_head *page_list,
693 * for any page for which writeback has already 730 * for any page for which writeback has already
694 * started. 731 * started.
695 */ 732 */
696 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) 733 if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC &&
734 may_enter_fs)
697 wait_on_page_writeback(page); 735 wait_on_page_writeback(page);
698 else 736 else {
699 goto keep_locked; 737 unlock_page(page);
738 goto keep_lumpy;
739 }
700 } 740 }
701 741
702 references = page_check_references(page, sc); 742 references = page_check_references(page, sc);
@@ -750,14 +790,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
750 goto keep_locked; 790 goto keep_locked;
751 791
752 /* Page is dirty, try to write it out here */ 792 /* Page is dirty, try to write it out here */
753 switch (pageout(page, mapping, sync_writeback)) { 793 switch (pageout(page, mapping, sc)) {
754 case PAGE_KEEP: 794 case PAGE_KEEP:
755 goto keep_locked; 795 goto keep_locked;
756 case PAGE_ACTIVATE: 796 case PAGE_ACTIVATE:
757 goto activate_locked; 797 goto activate_locked;
758 case PAGE_SUCCESS: 798 case PAGE_SUCCESS:
759 if (PageWriteback(page) || PageDirty(page)) 799 if (PageWriteback(page))
800 goto keep_lumpy;
801 if (PageDirty(page))
760 goto keep; 802 goto keep;
803
761 /* 804 /*
762 * A synchronous write - probably a ramdisk. Go 805 * A synchronous write - probably a ramdisk. Go
763 * ahead and try to reclaim the page. 806 * ahead and try to reclaim the page.
@@ -840,6 +883,7 @@ cull_mlocked:
840 try_to_free_swap(page); 883 try_to_free_swap(page);
841 unlock_page(page); 884 unlock_page(page);
842 putback_lru_page(page); 885 putback_lru_page(page);
886 disable_lumpy_reclaim_mode(sc);
843 continue; 887 continue;
844 888
845activate_locked: 889activate_locked:
@@ -852,6 +896,8 @@ activate_locked:
852keep_locked: 896keep_locked:
853 unlock_page(page); 897 unlock_page(page);
854keep: 898keep:
899 disable_lumpy_reclaim_mode(sc);
900keep_lumpy:
855 list_add(&page->lru, &ret_pages); 901 list_add(&page->lru, &ret_pages);
856 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 902 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
857 } 903 }
@@ -1252,7 +1298,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
1252 return false; 1298 return false;
1253 1299
1254 /* Only stall on lumpy reclaim */ 1300 /* Only stall on lumpy reclaim */
1255 if (!sc->lumpy_reclaim_mode) 1301 if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE)
1256 return false; 1302 return false;
1257 1303
1258 /* If we have relaimed everything on the isolated list, no stall */ 1304 /* If we have relaimed everything on the isolated list, no stall */
@@ -1297,15 +1343,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1297 return SWAP_CLUSTER_MAX; 1343 return SWAP_CLUSTER_MAX;
1298 } 1344 }
1299 1345
1300 1346 set_lumpy_reclaim_mode(priority, sc, false);
1301 lru_add_drain(); 1347 lru_add_drain();
1302 spin_lock_irq(&zone->lru_lock); 1348 spin_lock_irq(&zone->lru_lock);
1303 1349
1304 if (scanning_global_lru(sc)) { 1350 if (scanning_global_lru(sc)) {
1305 nr_taken = isolate_pages_global(nr_to_scan, 1351 nr_taken = isolate_pages_global(nr_to_scan,
1306 &page_list, &nr_scanned, sc->order, 1352 &page_list, &nr_scanned, sc->order,
1307 sc->lumpy_reclaim_mode ? 1353 sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1308 ISOLATE_BOTH : ISOLATE_INACTIVE, 1354 ISOLATE_INACTIVE : ISOLATE_BOTH,
1309 zone, 0, file); 1355 zone, 0, file);
1310 zone->pages_scanned += nr_scanned; 1356 zone->pages_scanned += nr_scanned;
1311 if (current_is_kswapd()) 1357 if (current_is_kswapd())
@@ -1317,8 +1363,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1317 } else { 1363 } else {
1318 nr_taken = mem_cgroup_isolate_pages(nr_to_scan, 1364 nr_taken = mem_cgroup_isolate_pages(nr_to_scan,
1319 &page_list, &nr_scanned, sc->order, 1365 &page_list, &nr_scanned, sc->order,
1320 sc->lumpy_reclaim_mode ? 1366 sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ?
1321 ISOLATE_BOTH : ISOLATE_INACTIVE, 1367 ISOLATE_INACTIVE : ISOLATE_BOTH,
1322 zone, sc->mem_cgroup, 1368 zone, sc->mem_cgroup,
1323 0, file); 1369 0, file);
1324 /* 1370 /*
@@ -1336,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1336 1382
1337 spin_unlock_irq(&zone->lru_lock); 1383 spin_unlock_irq(&zone->lru_lock);
1338 1384
1339 nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); 1385 nr_reclaimed = shrink_page_list(&page_list, sc);
1340 1386
1341 /* Check if we should syncronously wait for writeback */ 1387 /* Check if we should syncronously wait for writeback */
1342 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { 1388 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
@@ -1347,7 +1393,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1347 nr_active = clear_active_flags(&page_list, NULL); 1393 nr_active = clear_active_flags(&page_list, NULL);
1348 count_vm_events(PGDEACTIVATE, nr_active); 1394 count_vm_events(PGDEACTIVATE, nr_active);
1349 1395
1350 nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC); 1396 set_lumpy_reclaim_mode(priority, sc, true);
1397 nr_reclaimed += shrink_page_list(&page_list, sc);
1351 } 1398 }
1352 1399
1353 local_irq_disable(); 1400 local_irq_disable();
@@ -1739,21 +1786,6 @@ out:
1739 } 1786 }
1740} 1787}
1741 1788
1742static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc)
1743{
1744 /*
1745 * If we need a large contiguous chunk of memory, or have
1746 * trouble getting a small set of contiguous pages, we
1747 * will reclaim both active and inactive pages.
1748 */
1749 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1750 sc->lumpy_reclaim_mode = 1;
1751 else if (sc->order && priority < DEF_PRIORITY - 2)
1752 sc->lumpy_reclaim_mode = 1;
1753 else
1754 sc->lumpy_reclaim_mode = 0;
1755}
1756
1757/* 1789/*
1758 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 1790 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
1759 */ 1791 */
@@ -1768,8 +1800,6 @@ static void shrink_zone(int priority, struct zone *zone,
1768 1800
1769 get_scan_count(zone, sc, nr, priority); 1801 get_scan_count(zone, sc, nr, priority);
1770 1802
1771 set_lumpy_reclaim_mode(priority, sc);
1772
1773 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1803 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
1774 nr[LRU_INACTIVE_FILE]) { 1804 nr[LRU_INACTIVE_FILE]) {
1775 for_each_evictable_lru(l) { 1805 for_each_evictable_lru(l) {