aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2010-10-26 17:21:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-26 19:52:07 -0400
commit0e093d99763eb4cea09f8ca4f1d01f34e121d10b (patch)
treefad38f9c3651c81db298521141a79d9468f71986 /mm/vmscan.c
parent08fc468f4eaf6683bae5bdb94743a09d8630cb80 (diff)
writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone
If congestion_wait() is called with no BDI congested, the caller will sleep for the full timeout and this may be an unnecessary sleep. This patch adds a wait_iff_congested() that checks congestion and only sleeps if a BDI is congested else, it calls cond_resched() to ensure the caller is not hogging the CPU longer than its quota but otherwise will not sleep. This is aimed at reducing some of the major desktop stalls reported during IO. For example, while kswapd is operating, it calls congestion_wait() but it could just have been reclaiming clean page cache pages with no congestion. Without this patch, it would sleep for a full timeout but after this patch, it'll just call schedule() if it has been on the CPU too long. Similar logic applies to direct reclaimers that are not making enough progress. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c42
1 files changed, 35 insertions, 7 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 130ad0239f52..30fd658bb289 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -401,10 +401,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
401 } 401 }
402 if (mapping->a_ops->writepage == NULL) 402 if (mapping->a_ops->writepage == NULL)
403 return PAGE_ACTIVATE; 403 return PAGE_ACTIVATE;
404 if (!may_write_to_queue(mapping->backing_dev_info, sc)) { 404 if (!may_write_to_queue(mapping->backing_dev_info, sc))
405 disable_lumpy_reclaim_mode(sc);
406 return PAGE_KEEP; 405 return PAGE_KEEP;
407 }
408 406
409 if (clear_page_dirty_for_io(page)) { 407 if (clear_page_dirty_for_io(page)) {
410 int res; 408 int res;
@@ -681,11 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
681 * shrink_page_list() returns the number of reclaimed pages 679 * shrink_page_list() returns the number of reclaimed pages
682 */ 680 */
683static unsigned long shrink_page_list(struct list_head *page_list, 681static unsigned long shrink_page_list(struct list_head *page_list,
682 struct zone *zone,
684 struct scan_control *sc) 683 struct scan_control *sc)
685{ 684{
686 LIST_HEAD(ret_pages); 685 LIST_HEAD(ret_pages);
687 LIST_HEAD(free_pages); 686 LIST_HEAD(free_pages);
688 int pgactivate = 0; 687 int pgactivate = 0;
688 unsigned long nr_dirty = 0;
689 unsigned long nr_congested = 0;
689 unsigned long nr_reclaimed = 0; 690 unsigned long nr_reclaimed = 0;
690 691
691 cond_resched(); 692 cond_resched();
@@ -705,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
705 goto keep; 706 goto keep;
706 707
707 VM_BUG_ON(PageActive(page)); 708 VM_BUG_ON(PageActive(page));
709 VM_BUG_ON(page_zone(page) != zone);
708 710
709 sc->nr_scanned++; 711 sc->nr_scanned++;
710 712
@@ -782,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
782 } 784 }
783 785
784 if (PageDirty(page)) { 786 if (PageDirty(page)) {
787 nr_dirty++;
788
785 if (references == PAGEREF_RECLAIM_CLEAN) 789 if (references == PAGEREF_RECLAIM_CLEAN)
786 goto keep_locked; 790 goto keep_locked;
787 if (!may_enter_fs) 791 if (!may_enter_fs)
@@ -792,6 +796,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
792 /* Page is dirty, try to write it out here */ 796 /* Page is dirty, try to write it out here */
793 switch (pageout(page, mapping, sc)) { 797 switch (pageout(page, mapping, sc)) {
794 case PAGE_KEEP: 798 case PAGE_KEEP:
799 nr_congested++;
795 goto keep_locked; 800 goto keep_locked;
796 case PAGE_ACTIVATE: 801 case PAGE_ACTIVATE:
797 goto activate_locked; 802 goto activate_locked;
@@ -902,6 +907,15 @@ keep_lumpy:
902 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 907 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
903 } 908 }
904 909
910 /*
911 * Tag a zone as congested if all the dirty pages encountered were
912 * backed by a congested BDI. In this case, reclaimers should just
913 * back off and wait for congestion to clear because further reclaim
914 * will encounter the same problem
915 */
916 if (nr_dirty == nr_congested)
917 zone_set_flag(zone, ZONE_CONGESTED);
918
905 free_page_list(&free_pages); 919 free_page_list(&free_pages);
906 920
907 list_splice(&ret_pages, page_list); 921 list_splice(&ret_pages, page_list);
@@ -1386,12 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1386 1400
1387 spin_unlock_irq(&zone->lru_lock); 1401 spin_unlock_irq(&zone->lru_lock);
1388 1402
1389 nr_reclaimed = shrink_page_list(&page_list, sc); 1403 nr_reclaimed = shrink_page_list(&page_list, zone, sc);
1390 1404
1391 /* Check if we should syncronously wait for writeback */ 1405 /* Check if we should syncronously wait for writeback */
1392 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { 1406 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1393 set_lumpy_reclaim_mode(priority, sc, true); 1407 set_lumpy_reclaim_mode(priority, sc, true);
1394 nr_reclaimed += shrink_page_list(&page_list, sc); 1408 nr_reclaimed += shrink_page_list(&page_list, zone, sc);
1395 } 1409 }
1396 1410
1397 local_irq_disable(); 1411 local_irq_disable();
@@ -1982,8 +1996,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
1982 1996
1983 /* Take a nap, wait for some writeback to complete */ 1997 /* Take a nap, wait for some writeback to complete */
1984 if (!sc->hibernation_mode && sc->nr_scanned && 1998 if (!sc->hibernation_mode && sc->nr_scanned &&
1985 priority < DEF_PRIORITY - 2) 1999 priority < DEF_PRIORITY - 2) {
1986 congestion_wait(BLK_RW_ASYNC, HZ/10); 2000 struct zone *preferred_zone;
2001
2002 first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
2003 NULL, &preferred_zone);
2004 wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
2005 }
1987 } 2006 }
1988 2007
1989out: 2008out:
@@ -2282,6 +2301,15 @@ loop_again:
2282 if (!zone_watermark_ok(zone, order, 2301 if (!zone_watermark_ok(zone, order,
2283 min_wmark_pages(zone), end_zone, 0)) 2302 min_wmark_pages(zone), end_zone, 0))
2284 has_under_min_watermark_zone = 1; 2303 has_under_min_watermark_zone = 1;
2304 } else {
2305 /*
2306 * If a zone reaches its high watermark,
2307 * consider it to be no longer congested. It's
2308 * possible there are dirty pages backed by
2309 * congested BDIs but as pressure is relieved,
2310 * spectulatively avoid congestion waits
2311 */
2312 zone_clear_flag(zone, ZONE_CONGESTED);
2285 } 2313 }
2286 2314
2287 } 2315 }