aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-01-29 17:05:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-29 19:22:39 -0500
commita1c3bfb2f67ef766de03f1f56bdfff9c8595ab14 (patch)
treee06405192d674561bf2718ab03879c32103ae34e
parenta804552b9a15c931cfc2a92a2e0aed1add8b580a (diff)
mm/page-writeback.c: do not count anon pages as dirtyable memory
The VM is currently heavily tuned to avoid swapping. Whether that is good or bad is a separate discussion, but as long as the VM won't swap to make room for dirty cache, we can not consider anonymous pages when calculating the amount of dirtyable memory, the baseline to which dirty_background_ratio and dirty_ratio are applied. A simple workload that occupies a significant size (40+%, depending on memory layout, storage speeds etc.) of memory with anon/tmpfs pages and uses the remainder for a streaming writer demonstrates this problem. In that case, the actual cache pages are a small fraction of what is considered dirtyable overall, which results in an relatively large portion of the cache pages to be dirtied. As kswapd starts rotating these, random tasks enter direct reclaim and stall on IO. Only consider free pages and file pages dirtyable. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Tejun Heo <tj@kernel.org> Tested-by: Tejun Heo <tj@kernel.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Wu Fengguang <fengguang.wu@intel.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--mm/internal.h1
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/vmscan.c23
4 files changed, 5 insertions, 27 deletions
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index e4b948080d20..a67b38415768 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -142,8 +142,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
142 return x; 142 return x;
143} 143}
144 144
145extern unsigned long global_reclaimable_pages(void);
146
147#ifdef CONFIG_NUMA 145#ifdef CONFIG_NUMA
148/* 146/*
149 * Determine the per node value of a stat item. This function 147 * Determine the per node value of a stat item. This function
diff --git a/mm/internal.h b/mm/internal.h
index 612c14f5e0f5..29e1e761f9eb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -83,7 +83,6 @@ extern unsigned long highest_memmap_pfn;
83 */ 83 */
84extern int isolate_lru_page(struct page *page); 84extern int isolate_lru_page(struct page *page);
85extern void putback_lru_page(struct page *page); 85extern void putback_lru_page(struct page *page);
86extern unsigned long zone_reclaimable_pages(struct zone *zone);
87extern bool zone_reclaimable(struct zone *zone); 86extern bool zone_reclaimable(struct zone *zone);
88 87
89/* 88/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 61119b8a11e6..2d30e2cfe804 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -205,7 +205,8 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
205 nr_pages = zone_page_state(zone, NR_FREE_PAGES); 205 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
206 nr_pages -= min(nr_pages, zone->dirty_balance_reserve); 206 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
207 207
208 nr_pages += zone_reclaimable_pages(zone); 208 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
209 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
209 210
210 return nr_pages; 211 return nr_pages;
211} 212}
@@ -258,7 +259,8 @@ static unsigned long global_dirtyable_memory(void)
258 x = global_page_state(NR_FREE_PAGES); 259 x = global_page_state(NR_FREE_PAGES);
259 x -= min(x, dirty_balance_reserve); 260 x -= min(x, dirty_balance_reserve);
260 261
261 x += global_reclaimable_pages(); 262 x += global_page_state(NR_INACTIVE_FILE);
263 x += global_page_state(NR_ACTIVE_FILE);
262 264
263 if (!vm_highmem_is_dirtyable) 265 if (!vm_highmem_is_dirtyable)
264 x -= highmem_dirtyable_memory(x); 266 x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 90c4075d8d75..a9c74b409681 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -147,7 +147,7 @@ static bool global_reclaim(struct scan_control *sc)
147} 147}
148#endif 148#endif
149 149
150unsigned long zone_reclaimable_pages(struct zone *zone) 150static unsigned long zone_reclaimable_pages(struct zone *zone)
151{ 151{
152 int nr; 152 int nr;
153 153
@@ -3315,27 +3315,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
3315 wake_up_interruptible(&pgdat->kswapd_wait); 3315 wake_up_interruptible(&pgdat->kswapd_wait);
3316} 3316}
3317 3317
3318/*
3319 * The reclaimable count would be mostly accurate.
3320 * The less reclaimable pages may be
3321 * - mlocked pages, which will be moved to unevictable list when encountered
3322 * - mapped pages, which may require several travels to be reclaimed
3323 * - dirty pages, which is not "instantly" reclaimable
3324 */
3325unsigned long global_reclaimable_pages(void)
3326{
3327 int nr;
3328
3329 nr = global_page_state(NR_ACTIVE_FILE) +
3330 global_page_state(NR_INACTIVE_FILE);
3331
3332 if (get_nr_swap_pages() > 0)
3333 nr += global_page_state(NR_ACTIVE_ANON) +
3334 global_page_state(NR_INACTIVE_ANON);
3335
3336 return nr;
3337}
3338
3339#ifdef CONFIG_HIBERNATION 3318#ifdef CONFIG_HIBERNATION
3340/* 3319/*
3341 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of 3320 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of