aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2009-12-14 20:58:53 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:16 -0500
commitf50de2d3811081957156b5d736778799379c29de (patch)
treeddc3676bcaa26e2e55e18e57928b5c8331a0f0fa
parent273f047e36d83179573dc7e3a8af6aceaa8c599e (diff)
vmscan: have kswapd sleep for a short interval and double check it should be asleep
After kswapd balances all zones in a pgdat, it goes to sleep. In the event of no IO congestion, kswapd can go to sleep very shortly after the high watermark was reached. If there are a constant stream of allocations from parallel processes, it can mean that kswapd went to sleep too quickly and the high watermark is not being maintained for sufficient length time. This patch makes kswapd go to sleep as a two-stage process. It first tries to sleep for HZ/10. If it is woken up by another process or the high watermark is no longer met, it's considered a premature sleep and kswapd continues work. Otherwise it goes fully to sleep. This adds more counters to distinguish between fast and slow breaches of watermarks. A "fast" premature sleep is one where the low watermark was hit in a very short time after kswapd going to sleep. A "slow" premature sleep indicates that the high watermark was breached after a very short interval. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: Frans Pop <elendil@planet.nl> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/vmstat.h1
-rw-r--r--mm/vmscan.c44
-rw-r--r--mm/vmstat.c2
3 files changed, 45 insertions, 2 deletions
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d85889710f9b..fd5be240c0b7 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -40,6 +40,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
40 PGSCAN_ZONE_RECLAIM_FAILED, 40 PGSCAN_ZONE_RECLAIM_FAILED,
41#endif 41#endif
42 PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL, 42 PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
43 KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW,
43 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 44 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
44#ifdef CONFIG_HUGETLB_PAGE 45#ifdef CONFIG_HUGETLB_PAGE
45 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, 46 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 61d3a9a0d96f..e176bd3936da 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1904,6 +1904,24 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1904} 1904}
1905#endif 1905#endif
1906 1906
1907/* is kswapd sleeping prematurely? */
1908static int sleeping_prematurely(int order, long remaining)
1909{
1910 struct zone *zone;
1911
1912 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
1913 if (remaining)
1914 return 1;
1915
1916 /* If after HZ/10, a zone is below the high mark, it's premature */
1917 for_each_populated_zone(zone)
1918 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
1919 0, 0))
1920 return 1;
1921
1922 return 0;
1923}
1924
1907/* 1925/*
1908 * For kswapd, balance_pgdat() will work across all this node's zones until 1926 * For kswapd, balance_pgdat() will work across all this node's zones until
1909 * they are all at high_wmark_pages(zone). 1927 * they are all at high_wmark_pages(zone).
@@ -2185,8 +2203,30 @@ static int kswapd(void *p)
2185 */ 2203 */
2186 order = new_order; 2204 order = new_order;
2187 } else { 2205 } else {
2188 if (!freezing(current) && !kthread_should_stop()) 2206 if (!freezing(current) && !kthread_should_stop()) {
2189 schedule(); 2207 long remaining = 0;
2208
2209 /* Try to sleep for a short interval */
2210 if (!sleeping_prematurely(order, remaining)) {
2211 remaining = schedule_timeout(HZ/10);
2212 finish_wait(&pgdat->kswapd_wait, &wait);
2213 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
2214 }
2215
2216 /*
2217 * After a short sleep, check if it was a
2218 * premature sleep. If not, then go fully
2219 * to sleep until explicitly woken up
2220 */
2221 if (!sleeping_prematurely(order, remaining))
2222 schedule();
2223 else {
2224 if (remaining)
2225 count_vm_event(KSWAPD_PREMATURE_FAST);
2226 else
2227 count_vm_event(KSWAPD_PREMATURE_SLOW);
2228 }
2229 }
2190 2230
2191 order = pgdat->kswapd_max_order; 2231 order = pgdat->kswapd_max_order;
2192 } 2232 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index dad2327e4580..63ab71455c5b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -683,6 +683,8 @@ static const char * const vmstat_text[] = {
683 "slabs_scanned", 683 "slabs_scanned",
684 "kswapd_steal", 684 "kswapd_steal",
685 "kswapd_inodesteal", 685 "kswapd_inodesteal",
686 "kswapd_slept_prematurely_fast",
687 "kswapd_slept_prematurely_slow",
686 "pageoutrun", 688 "pageoutrun",
687 "allocstall", 689 "allocstall",
688 690