aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/gfp.h4
-rw-r--r--include/linux/writeback.h1
-rw-r--r--mm/page-writeback.c82
-rw-r--r--mm/page_alloc.c29
4 files changed, 115 insertions, 1 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 66f172fdf5fe..581e74b7df95 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -36,6 +36,7 @@ struct vm_area_struct;
36#endif 36#endif
37#define ___GFP_NO_KSWAPD 0x400000u 37#define ___GFP_NO_KSWAPD 0x400000u
38#define ___GFP_OTHER_NODE 0x800000u 38#define ___GFP_OTHER_NODE 0x800000u
39#define ___GFP_WRITE 0x1000000u
39 40
40/* 41/*
41 * GFP bitmasks.. 42 * GFP bitmasks..
@@ -85,6 +86,7 @@ struct vm_area_struct;
85 86
86#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) 87#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
87#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ 88#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
89#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
88 90
89/* 91/*
90 * This may seem redundant, but it's a way of annotating false positives vs. 92 * This may seem redundant, but it's a way of annotating false positives vs.
@@ -92,7 +94,7 @@ struct vm_area_struct;
92 */ 94 */
93#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) 95#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
94 96
95#define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */ 97#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */
96#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) 98#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
97 99
98/* This equals 0, but use constants in case they ever change */ 100/* This equals 0, but use constants in case they ever change */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 34a005515fef..6dff47304971 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -124,6 +124,7 @@ void laptop_mode_timer_fn(unsigned long data);
124static inline void laptop_sync_completion(void) { } 124static inline void laptop_sync_completion(void) { }
125#endif 125#endif
126void throttle_vm_writeout(gfp_t gfp_mask); 126void throttle_vm_writeout(gfp_t gfp_mask);
127bool zone_dirty_ok(struct zone *zone);
127 128
128extern unsigned long global_dirty_limit; 129extern unsigned long global_dirty_limit;
129 130
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 433fa990fe8b..5cdd4f2b0c9d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -147,6 +147,24 @@ static struct prop_descriptor vm_completions;
147 * clamping level. 147 * clamping level.
148 */ 148 */
149 149
150/*
151 * In a memory zone, there is a certain amount of pages we consider
152 * available for the page cache, which is essentially the number of
153 * free and reclaimable pages, minus some zone reserves to protect
154 * lowmem and the ability to uphold the zone's watermarks without
155 * requiring writeback.
156 *
157 * This number of dirtyable pages is the base value of which the
158 * user-configurable dirty ratio is the effictive number of pages that
159 * are allowed to be actually dirtied. Per individual zone, or
160 * globally by using the sum of dirtyable pages over all zones.
161 *
162 * Because the user is allowed to specify the dirty limit globally as
163 * absolute number of bytes, calculating the per-zone dirty limit can
164 * require translating the configured limit into a percentage of
165 * global dirtyable memory first.
166 */
167
150static unsigned long highmem_dirtyable_memory(unsigned long total) 168static unsigned long highmem_dirtyable_memory(unsigned long total)
151{ 169{
152#ifdef CONFIG_HIGHMEM 170#ifdef CONFIG_HIGHMEM
@@ -232,6 +250,70 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
232 trace_global_dirty_state(background, dirty); 250 trace_global_dirty_state(background, dirty);
233} 251}
234 252
253/**
254 * zone_dirtyable_memory - number of dirtyable pages in a zone
255 * @zone: the zone
256 *
257 * Returns the zone's number of pages potentially available for dirty
258 * page cache. This is the base value for the per-zone dirty limits.
259 */
260static unsigned long zone_dirtyable_memory(struct zone *zone)
261{
262 /*
263 * The effective global number of dirtyable pages may exclude
264 * highmem as a big-picture measure to keep the ratio between
265 * dirty memory and lowmem reasonable.
266 *
267 * But this function is purely about the individual zone and a
268 * highmem zone can hold its share of dirty pages, so we don't
269 * care about vm_highmem_is_dirtyable here.
270 */
271 return zone_page_state(zone, NR_FREE_PAGES) +
272 zone_reclaimable_pages(zone) -
273 zone->dirty_balance_reserve;
274}
275
276/**
277 * zone_dirty_limit - maximum number of dirty pages allowed in a zone
278 * @zone: the zone
279 *
280 * Returns the maximum number of dirty pages allowed in a zone, based
281 * on the zone's dirtyable memory.
282 */
283static unsigned long zone_dirty_limit(struct zone *zone)
284{
285 unsigned long zone_memory = zone_dirtyable_memory(zone);
286 struct task_struct *tsk = current;
287 unsigned long dirty;
288
289 if (vm_dirty_bytes)
290 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) *
291 zone_memory / global_dirtyable_memory();
292 else
293 dirty = vm_dirty_ratio * zone_memory / 100;
294
295 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk))
296 dirty += dirty / 4;
297
298 return dirty;
299}
300
301/**
302 * zone_dirty_ok - tells whether a zone is within its dirty limits
303 * @zone: the zone to check
304 *
305 * Returns %true when the dirty pages in @zone are within the zone's
306 * dirty limit, %false if the limit is exceeded.
307 */
308bool zone_dirty_ok(struct zone *zone)
309{
310 unsigned long limit = zone_dirty_limit(zone);
311
312 return zone_page_state(zone, NR_FILE_DIRTY) +
313 zone_page_state(zone, NR_UNSTABLE_NFS) +
314 zone_page_state(zone, NR_WRITEBACK) <= limit;
315}
316
235/* 317/*
236 * couple the period to the dirty_ratio: 318 * couple the period to the dirty_ratio:
237 * 319 *
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2cb9eb71e282..4f95bcf0f2b1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1735,6 +1735,35 @@ zonelist_scan:
1735 if ((alloc_flags & ALLOC_CPUSET) && 1735 if ((alloc_flags & ALLOC_CPUSET) &&
1736 !cpuset_zone_allowed_softwall(zone, gfp_mask)) 1736 !cpuset_zone_allowed_softwall(zone, gfp_mask))
1737 continue; 1737 continue;
1738 /*
1739 * When allocating a page cache page for writing, we
1740 * want to get it from a zone that is within its dirty
1741 * limit, such that no single zone holds more than its
1742 * proportional share of globally allowed dirty pages.
1743 * The dirty limits take into account the zone's
1744 * lowmem reserves and high watermark so that kswapd
1745 * should be able to balance it without having to
1746 * write pages from its LRU list.
1747 *
1748 * This may look like it could increase pressure on
1749 * lower zones by failing allocations in higher zones
1750 * before they are full. But the pages that do spill
1751 * over are limited as the lower zones are protected
1752 * by this very same mechanism. It should not become
1753 * a practical burden to them.
1754 *
1755 * XXX: For now, allow allocations to potentially
1756 * exceed the per-zone dirty limit in the slowpath
1757 * (ALLOC_WMARK_LOW unset) before going into reclaim,
1758 * which is important when on a NUMA setup the allowed
1759 * zones are together not big enough to reach the
1760 * global limit. The proper fix for these situations
1761 * will require awareness of zones in the
1762 * dirty-throttling and the flusher threads.
1763 */
1764 if ((alloc_flags & ALLOC_WMARK_LOW) &&
1765 (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
1766 goto this_zone_full;
1738 1767
1739 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); 1768 BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
1740 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { 1769 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {