aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <jweiner@redhat.com>2012-01-10 18:07:42 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-10 19:30:43 -0500
commitab8fabd46f811d5153d8a0cd2fac9a0d41fb593d (patch)
tree0a6f7dcca59d22abe07973e3fafc41719ff3ad9d
parent25bd91bd27820d5971258cecd1c0e64b0e485144 (diff)
mm: exclude reserved pages from dirtyable memory
Per-zone dirty limits try to distribute page cache pages allocated for writing across zones in proportion to the individual zone sizes, to reduce the likelihood of reclaim having to write back individual pages from the LRU lists in order to make progress. This patch: The amount of dirtyable pages should not include the full number of free pages: there is a number of reserved pages that the page allocator and kswapd always try to keep free. The closer (reclaimable pages - dirty pages) is to the number of reserved pages, the more likely it becomes for reclaim to run into dirty pages: +----------+ --- | anon | | +----------+ | | | | | | -- dirty limit new -- flusher new | file | | | | | | | | | -- dirty limit old -- flusher old | | | +----------+ --- reclaim | reserved | +----------+ | kernel | +----------+ This patch introduces a per-zone dirty reserve that takes both the lowmem reserve as well as the high watermark of the zone into account, and a global sum of those per-zone values that is subtracted from the global amount of dirtyable pages. The lowmem reserve is unavailable to page cache allocations and kswapd tries to keep the high watermark free. We don't want to end up in a situation where reclaim has to clean pages in order to balance zones. Not treating reserved pages as dirtyable on a global level is only a conceptual fix. In reality, dirty pages are not distributed equally across zones and reclaim runs into dirty pages on a regular basis. But it is important to get this right before tackling the problem on a per-zone level, where the distance between reclaim and the dirty pages is mostly much smaller in absolute numbers. [akpm@linux-foundation.org: fix highmem build] Signed-off-by: Johannes Weiner <jweiner@redhat.com> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Acked-by: Mel Gorman <mgorman@suse.de> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Jan Kara <jack@suse.cz> Cc: Shaohua Li <shaohua.li@intel.com> Cc: Chris Mason <chris.mason@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/swap.h1
-rw-r--r--mm/page-writeback.c5
-rw-r--r--mm/page_alloc.c19
4 files changed, 29 insertions, 2 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3ac040f19369..ca6ca92418a6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -317,6 +317,12 @@ struct zone {
317 */ 317 */
318 unsigned long lowmem_reserve[MAX_NR_ZONES]; 318 unsigned long lowmem_reserve[MAX_NR_ZONES];
319 319
320 /*
321 * This is a per-zone reserve of pages that should not be
322 * considered dirtyable memory.
323 */
324 unsigned long dirty_balance_reserve;
325
320#ifdef CONFIG_NUMA 326#ifdef CONFIG_NUMA
321 int node; 327 int node;
322 /* 328 /*
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e22e126d2ac..06061a7f8e69 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -207,6 +207,7 @@ struct swap_list_t {
207/* linux/mm/page_alloc.c */ 207/* linux/mm/page_alloc.c */
208extern unsigned long totalram_pages; 208extern unsigned long totalram_pages;
209extern unsigned long totalreserve_pages; 209extern unsigned long totalreserve_pages;
210extern unsigned long dirty_balance_reserve;
210extern unsigned int nr_free_buffer_pages(void); 211extern unsigned int nr_free_buffer_pages(void);
211extern unsigned int nr_free_pagecache_pages(void); 212extern unsigned int nr_free_pagecache_pages(void);
212 213
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c081bf62202b..9ab6de82d8e6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -157,7 +157,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
157 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM]; 157 &NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
158 158
159 x += zone_page_state(z, NR_FREE_PAGES) + 159 x += zone_page_state(z, NR_FREE_PAGES) +
160 zone_reclaimable_pages(z); 160 zone_reclaimable_pages(z) - z->dirty_balance_reserve;
161 } 161 }
162 /* 162 /*
163 * Make sure that the number of highmem pages is never larger 163 * Make sure that the number of highmem pages is never larger
@@ -181,7 +181,8 @@ static unsigned long determine_dirtyable_memory(void)
181{ 181{
182 unsigned long x; 182 unsigned long x;
183 183
184 x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages(); 184 x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages() -
185 dirty_balance_reserve;
185 186
186 if (!vm_highmem_is_dirtyable) 187 if (!vm_highmem_is_dirtyable)
187 x -= highmem_dirtyable_memory(x); 188 x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 93baebcc06f3..2cb9eb71e282 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -97,6 +97,14 @@ EXPORT_SYMBOL(node_states);
97 97
98unsigned long totalram_pages __read_mostly; 98unsigned long totalram_pages __read_mostly;
99unsigned long totalreserve_pages __read_mostly; 99unsigned long totalreserve_pages __read_mostly;
100/*
101 * When calculating the number of globally allowed dirty pages, there
102 * is a certain number of per-zone reserves that should not be
103 * considered dirtyable memory. This is the sum of those reserves
104 * over all existing zones that contribute dirtyable memory.
105 */
106unsigned long dirty_balance_reserve __read_mostly;
107
100int percpu_pagelist_fraction; 108int percpu_pagelist_fraction;
101gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; 109gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
102 110
@@ -4822,8 +4830,19 @@ static void calculate_totalreserve_pages(void)
4822 if (max > zone->present_pages) 4830 if (max > zone->present_pages)
4823 max = zone->present_pages; 4831 max = zone->present_pages;
4824 reserve_pages += max; 4832 reserve_pages += max;
4833 /*
4834 * Lowmem reserves are not available to
4835 * GFP_HIGHUSER page cache allocations and
4836 * kswapd tries to balance zones to their high
4837 * watermark. As a result, neither should be
4838 * regarded as dirtyable memory, to prevent a
4839 * situation where reclaim has to clean pages
4840 * in order to balance the zones.
4841 */
4842 zone->dirty_balance_reserve = max;
4825 } 4843 }
4826 } 4844 }
4845 dirty_balance_reserve = reserve_pages;
4827 totalreserve_pages = reserve_pages; 4846 totalreserve_pages = reserve_pages;
4828} 4847}
4829 4848