aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-01-29 17:05:41 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-02-13 16:48:00 -0500
commit48526149964e69fc54a06c409e13d36990386464 (patch)
tree844e34d12c42f34711531573af92243fbf273ef8 /mm
parent03381bd28963f97a976d4742468359f12474ea39 (diff)
mm/page-writeback.c: do not count anon pages as dirtyable memory
commit a1c3bfb2f67ef766de03f1f56bdfff9c8595ab14 upstream. The VM is currently heavily tuned to avoid swapping. Whether that is good or bad is a separate discussion, but as long as the VM won't swap to make room for dirty cache, we can not consider anonymous pages when calculating the amount of dirtyable memory, the baseline to which dirty_background_ratio and dirty_ratio are applied. A simple workload that occupies a significant size (40+%, depending on memory layout, storage speeds etc.) of memory with anon/tmpfs pages and uses the remainder for a streaming writer demonstrates this problem. In that case, the actual cache pages are a small fraction of what is considered dirtyable overall, which results in an relatively large portion of the cache pages to be dirtied. As kswapd starts rotating these, random tasks enter direct reclaim and stall on IO. Only consider free pages and file pages dirtyable. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Tejun Heo <tj@kernel.org> Tested-by: Tejun Heo <tj@kernel.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Wu Fengguang <fengguang.wu@intel.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/vmscan.c49
2 files changed, 18 insertions, 37 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bcd929093e64..5a06d4cb9a3d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -202,7 +202,8 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
202 nr_pages = zone_page_state(zone, NR_FREE_PAGES); 202 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
203 nr_pages -= min(nr_pages, zone->dirty_balance_reserve); 203 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
204 204
205 nr_pages += zone_reclaimable_pages(zone); 205 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
206 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
206 207
207 return nr_pages; 208 return nr_pages;
208} 209}
@@ -255,7 +256,8 @@ static unsigned long global_dirtyable_memory(void)
255 x = global_page_state(NR_FREE_PAGES); 256 x = global_page_state(NR_FREE_PAGES);
256 x -= min(x, dirty_balance_reserve); 257 x -= min(x, dirty_balance_reserve);
257 258
258 x += global_reclaimable_pages(); 259 x += global_page_state(NR_INACTIVE_FILE);
260 x += global_page_state(NR_ACTIVE_FILE);
259 261
260 if (!vm_highmem_is_dirtyable) 262 if (!vm_highmem_is_dirtyable)
261 x -= highmem_dirtyable_memory(x); 263 x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7dbdb6afd101..43ddef3cf44f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2117,6 +2117,20 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2117 return aborted_reclaim; 2117 return aborted_reclaim;
2118} 2118}
2119 2119
2120static unsigned long zone_reclaimable_pages(struct zone *zone)
2121{
2122 int nr;
2123
2124 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
2125 zone_page_state(zone, NR_INACTIVE_FILE);
2126
2127 if (get_nr_swap_pages() > 0)
2128 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
2129 zone_page_state(zone, NR_INACTIVE_ANON);
2130
2131 return nr;
2132}
2133
2120static bool zone_reclaimable(struct zone *zone) 2134static bool zone_reclaimable(struct zone *zone)
2121{ 2135{
2122 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; 2136 return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
@@ -3075,41 +3089,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
3075 wake_up_interruptible(&pgdat->kswapd_wait); 3089 wake_up_interruptible(&pgdat->kswapd_wait);
3076} 3090}
3077 3091
3078/*
3079 * The reclaimable count would be mostly accurate.
3080 * The less reclaimable pages may be
3081 * - mlocked pages, which will be moved to unevictable list when encountered
3082 * - mapped pages, which may require several travels to be reclaimed
3083 * - dirty pages, which is not "instantly" reclaimable
3084 */
3085unsigned long global_reclaimable_pages(void)
3086{
3087 int nr;
3088
3089 nr = global_page_state(NR_ACTIVE_FILE) +
3090 global_page_state(NR_INACTIVE_FILE);
3091
3092 if (get_nr_swap_pages() > 0)
3093 nr += global_page_state(NR_ACTIVE_ANON) +
3094 global_page_state(NR_INACTIVE_ANON);
3095
3096 return nr;
3097}
3098
3099unsigned long zone_reclaimable_pages(struct zone *zone)
3100{
3101 int nr;
3102
3103 nr = zone_page_state(zone, NR_ACTIVE_FILE) +
3104 zone_page_state(zone, NR_INACTIVE_FILE);
3105
3106 if (get_nr_swap_pages() > 0)
3107 nr += zone_page_state(zone, NR_ACTIVE_ANON) +
3108 zone_page_state(zone, NR_INACTIVE_ANON);
3109
3110 return nr;
3111}
3112
3113#ifdef CONFIG_HIBERNATION 3092#ifdef CONFIG_HIBERNATION
3114/* 3093/*
3115 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of 3094 * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of