aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2009-12-14 20:58:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:16 -0500
commitbb3ab596832b920c703d1aea1ce76d69c0f71fb7 (patch)
tree048162bf5081e7bd9802eba012e8b680a4444da8
parentf50de2d3811081957156b5d736778799379c29de (diff)
vmscan: stop kswapd waiting on congestion when the min watermark is not being met
If reclaim fails to make sufficient progress, the priority is raised. Once the priority is higher, kswapd starts waiting on congestion. However, if the zone is below the min watermark then kswapd needs to continue working without delay as there is a danger of an increased rate of GFP_ATOMIC allocation failure. This patch changes the conditions under which kswapd waits on congestion by only going to sleep if the min watermarks are being met. [mel@csn.ul.ie: add stats to track how relevant the logic is] [mel@csn.ul.ie: make kswapd only check its own zones and rename the relevant counters] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/vmstat.h3
-rw-r--r--mm/vmscan.c38
-rw-r--r--mm/vmstat.c5
3 files changed, 34 insertions, 12 deletions
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index fd5be240c0b7..ee03bba9c5df 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -40,7 +40,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
40 PGSCAN_ZONE_RECLAIM_FAILED, 40 PGSCAN_ZONE_RECLAIM_FAILED,
41#endif 41#endif
42 PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL, 42 PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
43 KSWAPD_PREMATURE_FAST, KSWAPD_PREMATURE_SLOW, 43 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
44 KSWAPD_SKIP_CONGESTION_WAIT,
44 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 45 PAGEOUTRUN, ALLOCSTALL, PGROTATED,
45#ifdef CONFIG_HUGETLB_PAGE 46#ifdef CONFIG_HUGETLB_PAGE
46 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, 47 HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e176bd3936da..cb69f717799f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1905,19 +1905,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1905#endif 1905#endif
1906 1906
1907/* is kswapd sleeping prematurely? */ 1907/* is kswapd sleeping prematurely? */
1908static int sleeping_prematurely(int order, long remaining) 1908static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
1909{ 1909{
1910 struct zone *zone; 1910 int i;
1911 1911
1912 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ 1912 /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
1913 if (remaining) 1913 if (remaining)
1914 return 1; 1914 return 1;
1915 1915
1916 /* If after HZ/10, a zone is below the high mark, it's premature */ 1916 /* If after HZ/10, a zone is below the high mark, it's premature */
1917 for_each_populated_zone(zone) 1917 for (i = 0; i < pgdat->nr_zones; i++) {
1918 struct zone *zone = pgdat->node_zones + i;
1919
1920 if (!populated_zone(zone))
1921 continue;
1922
1918 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), 1923 if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
1919 0, 0)) 1924 0, 0))
1920 return 1; 1925 return 1;
1926 }
1921 1927
1922 return 0; 1928 return 0;
1923} 1929}
@@ -1979,6 +1985,7 @@ loop_again:
1979 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 1985 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
1980 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 1986 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
1981 unsigned long lru_pages = 0; 1987 unsigned long lru_pages = 0;
1988 int has_under_min_watermark_zone = 0;
1982 1989
1983 /* The swap token gets in the way of swapout... */ 1990 /* The swap token gets in the way of swapout... */
1984 if (!priority) 1991 if (!priority)
@@ -2085,6 +2092,15 @@ loop_again:
2085 if (total_scanned > SWAP_CLUSTER_MAX * 2 && 2092 if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
2086 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) 2093 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2087 sc.may_writepage = 1; 2094 sc.may_writepage = 1;
2095
2096 /*
2097 * We are still under min water mark. it mean we have
2098 * GFP_ATOMIC allocation failure risk. Hurry up!
2099 */
2100 if (!zone_watermark_ok(zone, order, min_wmark_pages(zone),
2101 end_zone, 0))
2102 has_under_min_watermark_zone = 1;
2103
2088 } 2104 }
2089 if (all_zones_ok) 2105 if (all_zones_ok)
2090 break; /* kswapd: all done */ 2106 break; /* kswapd: all done */
@@ -2092,8 +2108,12 @@ loop_again:
2092 * OK, kswapd is getting into trouble. Take a nap, then take 2108 * OK, kswapd is getting into trouble. Take a nap, then take
2093 * another pass across the zones. 2109 * another pass across the zones.
2094 */ 2110 */
2095 if (total_scanned && priority < DEF_PRIORITY - 2) 2111 if (total_scanned && (priority < DEF_PRIORITY - 2)) {
2096 congestion_wait(BLK_RW_ASYNC, HZ/10); 2112 if (has_under_min_watermark_zone)
2113 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
2114 else
2115 congestion_wait(BLK_RW_ASYNC, HZ/10);
2116 }
2097 2117
2098 /* 2118 /*
2099 * We do this so kswapd doesn't build up large priorities for 2119 * We do this so kswapd doesn't build up large priorities for
@@ -2207,7 +2227,7 @@ static int kswapd(void *p)
2207 long remaining = 0; 2227 long remaining = 0;
2208 2228
2209 /* Try to sleep for a short interval */ 2229 /* Try to sleep for a short interval */
2210 if (!sleeping_prematurely(order, remaining)) { 2230 if (!sleeping_prematurely(pgdat, order, remaining)) {
2211 remaining = schedule_timeout(HZ/10); 2231 remaining = schedule_timeout(HZ/10);
2212 finish_wait(&pgdat->kswapd_wait, &wait); 2232 finish_wait(&pgdat->kswapd_wait, &wait);
2213 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); 2233 prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -2218,13 +2238,13 @@ static int kswapd(void *p)
2218 * premature sleep. If not, then go fully 2238 * premature sleep. If not, then go fully
2219 * to sleep until explicitly woken up 2239 * to sleep until explicitly woken up
2220 */ 2240 */
2221 if (!sleeping_prematurely(order, remaining)) 2241 if (!sleeping_prematurely(pgdat, order, remaining))
2222 schedule(); 2242 schedule();
2223 else { 2243 else {
2224 if (remaining) 2244 if (remaining)
2225 count_vm_event(KSWAPD_PREMATURE_FAST); 2245 count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
2226 else 2246 else
2227 count_vm_event(KSWAPD_PREMATURE_SLOW); 2247 count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY);
2228 } 2248 }
2229 } 2249 }
2230 2250
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 63ab71455c5b..6051fbab67ba 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -683,8 +683,9 @@ static const char * const vmstat_text[] = {
683 "slabs_scanned", 683 "slabs_scanned",
684 "kswapd_steal", 684 "kswapd_steal",
685 "kswapd_inodesteal", 685 "kswapd_inodesteal",
686 "kswapd_slept_prematurely_fast", 686 "kswapd_low_wmark_hit_quickly",
687 "kswapd_slept_prematurely_slow", 687 "kswapd_high_wmark_hit_quickly",
688 "kswapd_skip_congestion_wait",
688 "pageoutrun", 689 "pageoutrun",
689 "allocstall", 690 "allocstall",
690 691