diff options
-rw-r--r-- | include/linux/gfp.h | 4 | ||||
-rw-r--r-- | include/linux/writeback.h | 1 | ||||
-rw-r--r-- | mm/page-writeback.c | 82 | ||||
-rw-r--r-- | mm/page_alloc.c | 29 |
4 files changed, 115 insertions, 1 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 66f172fdf5fe..581e74b7df95 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -36,6 +36,7 @@ struct vm_area_struct; | |||
36 | #endif | 36 | #endif |
37 | #define ___GFP_NO_KSWAPD 0x400000u | 37 | #define ___GFP_NO_KSWAPD 0x400000u |
38 | #define ___GFP_OTHER_NODE 0x800000u | 38 | #define ___GFP_OTHER_NODE 0x800000u |
39 | #define ___GFP_WRITE 0x1000000u | ||
39 | 40 | ||
40 | /* | 41 | /* |
41 | * GFP bitmasks.. | 42 | * GFP bitmasks.. |
@@ -85,6 +86,7 @@ struct vm_area_struct; | |||
85 | 86 | ||
86 | #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) | 87 | #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) |
87 | #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ | 88 | #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ |
89 | #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ | ||
88 | 90 | ||
89 | /* | 91 | /* |
90 | * This may seem redundant, but it's a way of annotating false positives vs. | 92 | * This may seem redundant, but it's a way of annotating false positives vs. |
@@ -92,7 +94,7 @@ struct vm_area_struct; | |||
92 | */ | 94 | */ |
93 | #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) | 95 | #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) |
94 | 96 | ||
95 | #define __GFP_BITS_SHIFT 24 /* Room for N __GFP_FOO bits */ | 97 | #define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ |
96 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) | 98 | #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) |
97 | 99 | ||
98 | /* This equals 0, but use constants in case they ever change */ | 100 | /* This equals 0, but use constants in case they ever change */ |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 34a005515fef..6dff47304971 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -124,6 +124,7 @@ void laptop_mode_timer_fn(unsigned long data); | |||
124 | static inline void laptop_sync_completion(void) { } | 124 | static inline void laptop_sync_completion(void) { } |
125 | #endif | 125 | #endif |
126 | void throttle_vm_writeout(gfp_t gfp_mask); | 126 | void throttle_vm_writeout(gfp_t gfp_mask); |
127 | bool zone_dirty_ok(struct zone *zone); | ||
127 | 128 | ||
128 | extern unsigned long global_dirty_limit; | 129 | extern unsigned long global_dirty_limit; |
129 | 130 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 433fa990fe8b..5cdd4f2b0c9d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -147,6 +147,24 @@ static struct prop_descriptor vm_completions; | |||
147 | * clamping level. | 147 | * clamping level. |
148 | */ | 148 | */ |
149 | 149 | ||
150 | /* | ||
151 | * In a memory zone, there is a certain amount of pages we consider | ||
152 | * available for the page cache, which is essentially the number of | ||
153 | * free and reclaimable pages, minus some zone reserves to protect | ||
154 | * lowmem and the ability to uphold the zone's watermarks without | ||
155 | * requiring writeback. | ||
156 | * | ||
157 | * This number of dirtyable pages is the base value of which the | ||
158 | * user-configurable dirty ratio is the effictive number of pages that | ||
159 | * are allowed to be actually dirtied. Per individual zone, or | ||
160 | * globally by using the sum of dirtyable pages over all zones. | ||
161 | * | ||
162 | * Because the user is allowed to specify the dirty limit globally as | ||
163 | * absolute number of bytes, calculating the per-zone dirty limit can | ||
164 | * require translating the configured limit into a percentage of | ||
165 | * global dirtyable memory first. | ||
166 | */ | ||
167 | |||
150 | static unsigned long highmem_dirtyable_memory(unsigned long total) | 168 | static unsigned long highmem_dirtyable_memory(unsigned long total) |
151 | { | 169 | { |
152 | #ifdef CONFIG_HIGHMEM | 170 | #ifdef CONFIG_HIGHMEM |
@@ -232,6 +250,70 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) | |||
232 | trace_global_dirty_state(background, dirty); | 250 | trace_global_dirty_state(background, dirty); |
233 | } | 251 | } |
234 | 252 | ||
253 | /** | ||
254 | * zone_dirtyable_memory - number of dirtyable pages in a zone | ||
255 | * @zone: the zone | ||
256 | * | ||
257 | * Returns the zone's number of pages potentially available for dirty | ||
258 | * page cache. This is the base value for the per-zone dirty limits. | ||
259 | */ | ||
260 | static unsigned long zone_dirtyable_memory(struct zone *zone) | ||
261 | { | ||
262 | /* | ||
263 | * The effective global number of dirtyable pages may exclude | ||
264 | * highmem as a big-picture measure to keep the ratio between | ||
265 | * dirty memory and lowmem reasonable. | ||
266 | * | ||
267 | * But this function is purely about the individual zone and a | ||
268 | * highmem zone can hold its share of dirty pages, so we don't | ||
269 | * care about vm_highmem_is_dirtyable here. | ||
270 | */ | ||
271 | return zone_page_state(zone, NR_FREE_PAGES) + | ||
272 | zone_reclaimable_pages(zone) - | ||
273 | zone->dirty_balance_reserve; | ||
274 | } | ||
275 | |||
276 | /** | ||
277 | * zone_dirty_limit - maximum number of dirty pages allowed in a zone | ||
278 | * @zone: the zone | ||
279 | * | ||
280 | * Returns the maximum number of dirty pages allowed in a zone, based | ||
281 | * on the zone's dirtyable memory. | ||
282 | */ | ||
283 | static unsigned long zone_dirty_limit(struct zone *zone) | ||
284 | { | ||
285 | unsigned long zone_memory = zone_dirtyable_memory(zone); | ||
286 | struct task_struct *tsk = current; | ||
287 | unsigned long dirty; | ||
288 | |||
289 | if (vm_dirty_bytes) | ||
290 | dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) * | ||
291 | zone_memory / global_dirtyable_memory(); | ||
292 | else | ||
293 | dirty = vm_dirty_ratio * zone_memory / 100; | ||
294 | |||
295 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) | ||
296 | dirty += dirty / 4; | ||
297 | |||
298 | return dirty; | ||
299 | } | ||
300 | |||
301 | /** | ||
302 | * zone_dirty_ok - tells whether a zone is within its dirty limits | ||
303 | * @zone: the zone to check | ||
304 | * | ||
305 | * Returns %true when the dirty pages in @zone are within the zone's | ||
306 | * dirty limit, %false if the limit is exceeded. | ||
307 | */ | ||
308 | bool zone_dirty_ok(struct zone *zone) | ||
309 | { | ||
310 | unsigned long limit = zone_dirty_limit(zone); | ||
311 | |||
312 | return zone_page_state(zone, NR_FILE_DIRTY) + | ||
313 | zone_page_state(zone, NR_UNSTABLE_NFS) + | ||
314 | zone_page_state(zone, NR_WRITEBACK) <= limit; | ||
315 | } | ||
316 | |||
235 | /* | 317 | /* |
236 | * couple the period to the dirty_ratio: | 318 | * couple the period to the dirty_ratio: |
237 | * | 319 | * |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2cb9eb71e282..4f95bcf0f2b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1735,6 +1735,35 @@ zonelist_scan: | |||
1735 | if ((alloc_flags & ALLOC_CPUSET) && | 1735 | if ((alloc_flags & ALLOC_CPUSET) && |
1736 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) | 1736 | !cpuset_zone_allowed_softwall(zone, gfp_mask)) |
1737 | continue; | 1737 | continue; |
1738 | /* | ||
1739 | * When allocating a page cache page for writing, we | ||
1740 | * want to get it from a zone that is within its dirty | ||
1741 | * limit, such that no single zone holds more than its | ||
1742 | * proportional share of globally allowed dirty pages. | ||
1743 | * The dirty limits take into account the zone's | ||
1744 | * lowmem reserves and high watermark so that kswapd | ||
1745 | * should be able to balance it without having to | ||
1746 | * write pages from its LRU list. | ||
1747 | * | ||
1748 | * This may look like it could increase pressure on | ||
1749 | * lower zones by failing allocations in higher zones | ||
1750 | * before they are full. But the pages that do spill | ||
1751 | * over are limited as the lower zones are protected | ||
1752 | * by this very same mechanism. It should not become | ||
1753 | * a practical burden to them. | ||
1754 | * | ||
1755 | * XXX: For now, allow allocations to potentially | ||
1756 | * exceed the per-zone dirty limit in the slowpath | ||
1757 | * (ALLOC_WMARK_LOW unset) before going into reclaim, | ||
1758 | * which is important when on a NUMA setup the allowed | ||
1759 | * zones are together not big enough to reach the | ||
1760 | * global limit. The proper fix for these situations | ||
1761 | * will require awareness of zones in the | ||
1762 | * dirty-throttling and the flusher threads. | ||
1763 | */ | ||
1764 | if ((alloc_flags & ALLOC_WMARK_LOW) && | ||
1765 | (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone)) | ||
1766 | goto this_zone_full; | ||
1738 | 1767 | ||
1739 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); | 1768 | BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); |
1740 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { | 1769 | if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { |