aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-01-29 17:05:39 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-02-13 16:48:00 -0500
commit03381bd28963f97a976d4742468359f12474ea39 (patch)
treed09ae2ba0408a70071e2513afacf679d136a147a /mm
parent9fa1577a45d333d37b0dd7e56524c351bab6a21b (diff)
mm/page-writeback.c: fix dirty_balance_reserve subtraction from dirtyable memory
commit a804552b9a15c931cfc2a92a2e0aed1add8b580a upstream. Tejun reported stuttering and latency spikes on a system where random tasks would enter direct reclaim and get stuck on dirty pages. Around 50% of memory was occupied by tmpfs backed by an SSD, and another disk (rotating) was reading and writing at max speed to shrink a partition. : The problem was pretty ridiculous. It's a 8gig machine w/ one ssd and 10k : rpm harddrive and I could reliably reproduce constant stuttering every : several seconds for as long as buffered IO was going on on the hard drive : either with tmpfs occupying somewhere above 4gig or a test program which : allocates about the same amount of anon memory. Although swap usage was : zero, turning off swap also made the problem go away too. : : The trigger conditions seem quite plausible - high anon memory usage w/ : heavy buffered IO and swap configured - and it's highly likely that this : is happening in the wild too. (this can happen with copying large files : to usb sticks too, right?) This patch (of 2): The dirty_balance_reserve is an approximation of the fraction of free pages that the page allocator does not make available for page cache allocations. As a result, it has to be taken into account when calculating the amount of "dirtyable memory", the baseline to which dirty_background_ratio and dirty_ratio are applied. However, currently the reserve is subtracted from the sum of free and reclaimable pages, which is non-sensical and leads to erroneous results when the system is dominated by unreclaimable pages and the dirty_balance_reserve is bigger than free+reclaimable. In that case, at least the already allocated cache should be considered dirtyable. Fix the calculation by subtracting the reserve from the amount of free pages, then adding the reclaimable pages on top. [akpm@linux-foundation.org: fix CONFIG_HIGHMEM build] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Tejun Heo <tj@kernel.org> Tested-by: Tejun Heo <tj@kernel.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Wu Fengguang <fengguang.wu@intel.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page-writeback.c55
1 files changed, 24 insertions, 31 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index aca4364275b5..bcd929093e64 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -188,6 +188,25 @@ static unsigned long writeout_period_time = 0;
188 * global dirtyable memory first. 188 * global dirtyable memory first.
189 */ 189 */
190 190
191/**
192 * zone_dirtyable_memory - number of dirtyable pages in a zone
193 * @zone: the zone
194 *
195 * Returns the zone's number of pages potentially available for dirty
196 * page cache. This is the base value for the per-zone dirty limits.
197 */
198static unsigned long zone_dirtyable_memory(struct zone *zone)
199{
200 unsigned long nr_pages;
201
202 nr_pages = zone_page_state(zone, NR_FREE_PAGES);
203 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
204
205 nr_pages += zone_reclaimable_pages(zone);
206
207 return nr_pages;
208}
209
191static unsigned long highmem_dirtyable_memory(unsigned long total) 210static unsigned long highmem_dirtyable_memory(unsigned long total)
192{ 211{
193#ifdef CONFIG_HIGHMEM 212#ifdef CONFIG_HIGHMEM
@@ -195,11 +214,9 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
195 unsigned long x = 0; 214 unsigned long x = 0;
196 215
197 for_each_node_state(node, N_HIGH_MEMORY) { 216 for_each_node_state(node, N_HIGH_MEMORY) {
198 struct zone *z = 217 struct zone *z = &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
199 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
200 218
201 x += zone_page_state(z, NR_FREE_PAGES) + 219 x += zone_dirtyable_memory(z);
202 zone_reclaimable_pages(z) - z->dirty_balance_reserve;
203 } 220 }
204 /* 221 /*
205 * Unreclaimable memory (kernel memory or anonymous memory 222 * Unreclaimable memory (kernel memory or anonymous memory
@@ -235,9 +252,11 @@ static unsigned long global_dirtyable_memory(void)
235{ 252{
236 unsigned long x; 253 unsigned long x;
237 254
238 x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); 255 x = global_page_state(NR_FREE_PAGES);
239 x -= min(x, dirty_balance_reserve); 256 x -= min(x, dirty_balance_reserve);
240 257
258 x += global_reclaimable_pages();
259
241 if (!vm_highmem_is_dirtyable) 260 if (!vm_highmem_is_dirtyable)
242 x -= highmem_dirtyable_memory(x); 261 x -= highmem_dirtyable_memory(x);
243 262
@@ -289,32 +308,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
289} 308}
290 309
291/** 310/**
292 * zone_dirtyable_memory - number of dirtyable pages in a zone
293 * @zone: the zone
294 *
295 * Returns the zone's number of pages potentially available for dirty
296 * page cache. This is the base value for the per-zone dirty limits.
297 */
298static unsigned long zone_dirtyable_memory(struct zone *zone)
299{
300 /*
301 * The effective global number of dirtyable pages may exclude
302 * highmem as a big-picture measure to keep the ratio between
303 * dirty memory and lowmem reasonable.
304 *
305 * But this function is purely about the individual zone and a
306 * highmem zone can hold its share of dirty pages, so we don't
307 * care about vm_highmem_is_dirtyable here.
308 */
309 unsigned long nr_pages = zone_page_state(zone, NR_FREE_PAGES) +
310 zone_reclaimable_pages(zone);
311
312 /* don't allow this to underflow */
313 nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
314 return nr_pages;
315}
316
317/**
318 * zone_dirty_limit - maximum number of dirty pages allowed in a zone 311 * zone_dirty_limit - maximum number of dirty pages allowed in a zone
319 * @zone: the zone 312 * @zone: the zone
320 * 313 *