diff options
author | Mel Gorman <mel@csn.ul.ie> | 2010-10-26 17:21:45 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-26 19:52:07 -0400 |
commit | 0e093d99763eb4cea09f8ca4f1d01f34e121d10b (patch) | |
tree | fad38f9c3651c81db298521141a79d9468f71986 /mm/vmscan.c | |
parent | 08fc468f4eaf6683bae5bdb94743a09d8630cb80 (diff) |
writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone
If congestion_wait() is called with no BDI congested, the caller will
sleep for the full timeout and this may be an unnecessary sleep. This
patch adds a wait_iff_congested() that checks congestion and only sleeps
if a BDI is congested else, it calls cond_resched() to ensure the caller
is not hogging the CPU longer than its quota but otherwise will not sleep.
This is aimed at reducing some of the major desktop stalls reported during
IO. For example, while kswapd is operating, it calls congestion_wait()
but it could just have been reclaiming clean page cache pages with no
congestion. Without this patch, it would sleep for a full timeout but
after this patch, it'll just call schedule() if it has been on the CPU too
long. Similar logic applies to direct reclaimers that are not making
enough progress.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 130ad0239f52..30fd658bb289 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -401,10 +401,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
401 | } | 401 | } |
402 | if (mapping->a_ops->writepage == NULL) | 402 | if (mapping->a_ops->writepage == NULL) |
403 | return PAGE_ACTIVATE; | 403 | return PAGE_ACTIVATE; |
404 | if (!may_write_to_queue(mapping->backing_dev_info, sc)) { | 404 | if (!may_write_to_queue(mapping->backing_dev_info, sc)) |
405 | disable_lumpy_reclaim_mode(sc); | ||
406 | return PAGE_KEEP; | 405 | return PAGE_KEEP; |
407 | } | ||
408 | 406 | ||
409 | if (clear_page_dirty_for_io(page)) { | 407 | if (clear_page_dirty_for_io(page)) { |
410 | int res; | 408 | int res; |
@@ -681,11 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) | |||
681 | * shrink_page_list() returns the number of reclaimed pages | 679 | * shrink_page_list() returns the number of reclaimed pages |
682 | */ | 680 | */ |
683 | static unsigned long shrink_page_list(struct list_head *page_list, | 681 | static unsigned long shrink_page_list(struct list_head *page_list, |
682 | struct zone *zone, | ||
684 | struct scan_control *sc) | 683 | struct scan_control *sc) |
685 | { | 684 | { |
686 | LIST_HEAD(ret_pages); | 685 | LIST_HEAD(ret_pages); |
687 | LIST_HEAD(free_pages); | 686 | LIST_HEAD(free_pages); |
688 | int pgactivate = 0; | 687 | int pgactivate = 0; |
688 | unsigned long nr_dirty = 0; | ||
689 | unsigned long nr_congested = 0; | ||
689 | unsigned long nr_reclaimed = 0; | 690 | unsigned long nr_reclaimed = 0; |
690 | 691 | ||
691 | cond_resched(); | 692 | cond_resched(); |
@@ -705,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
705 | goto keep; | 706 | goto keep; |
706 | 707 | ||
707 | VM_BUG_ON(PageActive(page)); | 708 | VM_BUG_ON(PageActive(page)); |
709 | VM_BUG_ON(page_zone(page) != zone); | ||
708 | 710 | ||
709 | sc->nr_scanned++; | 711 | sc->nr_scanned++; |
710 | 712 | ||
@@ -782,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
782 | } | 784 | } |
783 | 785 | ||
784 | if (PageDirty(page)) { | 786 | if (PageDirty(page)) { |
787 | nr_dirty++; | ||
788 | |||
785 | if (references == PAGEREF_RECLAIM_CLEAN) | 789 | if (references == PAGEREF_RECLAIM_CLEAN) |
786 | goto keep_locked; | 790 | goto keep_locked; |
787 | if (!may_enter_fs) | 791 | if (!may_enter_fs) |
@@ -792,6 +796,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
792 | /* Page is dirty, try to write it out here */ | 796 | /* Page is dirty, try to write it out here */ |
793 | switch (pageout(page, mapping, sc)) { | 797 | switch (pageout(page, mapping, sc)) { |
794 | case PAGE_KEEP: | 798 | case PAGE_KEEP: |
799 | nr_congested++; | ||
795 | goto keep_locked; | 800 | goto keep_locked; |
796 | case PAGE_ACTIVATE: | 801 | case PAGE_ACTIVATE: |
797 | goto activate_locked; | 802 | goto activate_locked; |
@@ -902,6 +907,15 @@ keep_lumpy: | |||
902 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 907 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
903 | } | 908 | } |
904 | 909 | ||
910 | /* | ||
911 | * Tag a zone as congested if all the dirty pages encountered were | ||
912 | * backed by a congested BDI. In this case, reclaimers should just | ||
913 | * back off and wait for congestion to clear because further reclaim | ||
914 | * will encounter the same problem | ||
915 | */ | ||
916 | if (nr_dirty == nr_congested) | ||
917 | zone_set_flag(zone, ZONE_CONGESTED); | ||
918 | |||
905 | free_page_list(&free_pages); | 919 | free_page_list(&free_pages); |
906 | 920 | ||
907 | list_splice(&ret_pages, page_list); | 921 | list_splice(&ret_pages, page_list); |
@@ -1386,12 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1386 | 1400 | ||
1387 | spin_unlock_irq(&zone->lru_lock); | 1401 | spin_unlock_irq(&zone->lru_lock); |
1388 | 1402 | ||
1389 | nr_reclaimed = shrink_page_list(&page_list, sc); | 1403 | nr_reclaimed = shrink_page_list(&page_list, zone, sc); |
1390 | 1404 | ||
1391 | /* Check if we should syncronously wait for writeback */ | 1405 | /* Check if we should syncronously wait for writeback */ |
1392 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | 1406 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
1393 | set_lumpy_reclaim_mode(priority, sc, true); | 1407 | set_lumpy_reclaim_mode(priority, sc, true); |
1394 | nr_reclaimed += shrink_page_list(&page_list, sc); | 1408 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); |
1395 | } | 1409 | } |
1396 | 1410 | ||
1397 | local_irq_disable(); | 1411 | local_irq_disable(); |
@@ -1982,8 +1996,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1982 | 1996 | ||
1983 | /* Take a nap, wait for some writeback to complete */ | 1997 | /* Take a nap, wait for some writeback to complete */ |
1984 | if (!sc->hibernation_mode && sc->nr_scanned && | 1998 | if (!sc->hibernation_mode && sc->nr_scanned && |
1985 | priority < DEF_PRIORITY - 2) | 1999 | priority < DEF_PRIORITY - 2) { |
1986 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 2000 | struct zone *preferred_zone; |
2001 | |||
2002 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | ||
2003 | NULL, &preferred_zone); | ||
2004 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | ||
2005 | } | ||
1987 | } | 2006 | } |
1988 | 2007 | ||
1989 | out: | 2008 | out: |
@@ -2282,6 +2301,15 @@ loop_again: | |||
2282 | if (!zone_watermark_ok(zone, order, | 2301 | if (!zone_watermark_ok(zone, order, |
2283 | min_wmark_pages(zone), end_zone, 0)) | 2302 | min_wmark_pages(zone), end_zone, 0)) |
2284 | has_under_min_watermark_zone = 1; | 2303 | has_under_min_watermark_zone = 1; |
2304 | } else { | ||
2305 | /* | ||
2306 | * If a zone reaches its high watermark, | ||
2307 | * consider it to be no longer congested. It's | ||
2308 | * possible there are dirty pages backed by | ||
2309 | * congested BDIs but as pressure is relieved, | ||
2310 | * spectulatively avoid congestion waits | ||
2311 | */ | ||
2312 | zone_clear_flag(zone, ZONE_CONGESTED); | ||
2285 | } | 2313 | } |
2286 | 2314 | ||
2287 | } | 2315 | } |