summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2015-11-06 19:28:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-06 20:50:42 -0500
commitd0164adc89f6bb374d304ffcc375c6d2652fe67d (patch)
treede1cbe09c86dcd24a4a476f7e0b41af239bbdc29 /mm/page_alloc.c
parent016c13daa5c9e4827eca703e2f0621c131f2cca3 (diff)
mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd
__GFP_WAIT has been used to identify atomic context in callers that hold spinlocks or are in interrupts. They are expected to be high priority and have access one of two watermarks lower than "min" which can be referred to as the "atomic reserve". __GFP_HIGH users get access to the first lower watermark and can be called the "high priority reserve". Over time, callers had a requirement to not block when fallback options were available. Some have abused __GFP_WAIT leading to a situation where an optimisitic allocation with a fallback option can access atomic reserves. This patch uses __GFP_ATOMIC to identify callers that are truely atomic, cannot sleep and have no alternative. High priority users continue to use __GFP_HIGH. __GFP_DIRECT_RECLAIM identifies callers that can sleep and are willing to enter direct reclaim. __GFP_KSWAPD_RECLAIM to identify callers that want to wake kswapd for background reclaim. __GFP_WAIT is redefined as a caller that is willing to enter direct reclaim and wake kswapd for background reclaim. This patch then converts a number of sites o __GFP_ATOMIC is used by callers that are high priority and have memory pools for those requests. GFP_ATOMIC uses this flag. o Callers that have a limited mempool to guarantee forward progress clear __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall into this category where kswapd will still be woken but atomic reserves are not used as there is a one-entry mempool to guarantee progress. o Callers that are checking if they are non-blocking should use the helper gfpflags_allow_blocking() where possible. This is because checking for __GFP_WAIT as was done historically now can trigger false positives. Some exceptions like dm-crypt.c exist where the code intent is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to flag manipulations. o Callers that built their own GFP flags instead of starting with GFP_KERNEL and friends now also need to specify __GFP_KSWAPD_RECLAIM. The first key hazard to watch out for is callers that removed __GFP_WAIT and was depending on access to atomic reserves for inconspicuous reasons. In some cases it may be appropriate for them to use __GFP_HIGH. The second key hazard is callers that assembled their own combination of GFP flags instead of starting with something like GFP_KERNEL. They may now wish to specify __GFP_KSWAPD_RECLAIM. It's almost certainly harmless if it's missed in most cases as other activity will wake kswapd. Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Vitaly Wool <vitalywool@gmail.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c43
1 files changed, 27 insertions, 16 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 67390988881a..70461f3e3378 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -169,12 +169,12 @@ void pm_restrict_gfp_mask(void)
169 WARN_ON(!mutex_is_locked(&pm_mutex)); 169 WARN_ON(!mutex_is_locked(&pm_mutex));
170 WARN_ON(saved_gfp_mask); 170 WARN_ON(saved_gfp_mask);
171 saved_gfp_mask = gfp_allowed_mask; 171 saved_gfp_mask = gfp_allowed_mask;
172 gfp_allowed_mask &= ~GFP_IOFS; 172 gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
173} 173}
174 174
175bool pm_suspended_storage(void) 175bool pm_suspended_storage(void)
176{ 176{
177 if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS) 177 if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
178 return false; 178 return false;
179 return true; 179 return true;
180} 180}
@@ -2183,7 +2183,7 @@ static bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
2183 return false; 2183 return false;
2184 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM)) 2184 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
2185 return false; 2185 return false;
2186 if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_WAIT)) 2186 if (fail_page_alloc.ignore_gfp_wait && (gfp_mask & __GFP_DIRECT_RECLAIM))
2187 return false; 2187 return false;
2188 2188
2189 return should_fail(&fail_page_alloc.attr, 1 << order); 2189 return should_fail(&fail_page_alloc.attr, 1 << order);
@@ -2685,7 +2685,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
2685 if (test_thread_flag(TIF_MEMDIE) || 2685 if (test_thread_flag(TIF_MEMDIE) ||
2686 (current->flags & (PF_MEMALLOC | PF_EXITING))) 2686 (current->flags & (PF_MEMALLOC | PF_EXITING)))
2687 filter &= ~SHOW_MEM_FILTER_NODES; 2687 filter &= ~SHOW_MEM_FILTER_NODES;
2688 if (in_interrupt() || !(gfp_mask & __GFP_WAIT)) 2688 if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
2689 filter &= ~SHOW_MEM_FILTER_NODES; 2689 filter &= ~SHOW_MEM_FILTER_NODES;
2690 2690
2691 if (fmt) { 2691 if (fmt) {
@@ -2945,7 +2945,6 @@ static inline int
2945gfp_to_alloc_flags(gfp_t gfp_mask) 2945gfp_to_alloc_flags(gfp_t gfp_mask)
2946{ 2946{
2947 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; 2947 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
2948 const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
2949 2948
2950 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ 2949 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
2951 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); 2950 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2954,11 +2953,11 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2954 * The caller may dip into page reserves a bit more if the caller 2953 * The caller may dip into page reserves a bit more if the caller
2955 * cannot run direct reclaim, or if the caller has realtime scheduling 2954 * cannot run direct reclaim, or if the caller has realtime scheduling
2956 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will 2955 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
2957 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH). 2956 * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH).
2958 */ 2957 */
2959 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); 2958 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
2960 2959
2961 if (atomic) { 2960 if (gfp_mask & __GFP_ATOMIC) {
2962 /* 2961 /*
2963 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even 2962 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
2964 * if it can't schedule. 2963 * if it can't schedule.
@@ -2995,11 +2994,16 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
2995 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS); 2994 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_NO_WATERMARKS);
2996} 2995}
2997 2996
2997static inline bool is_thp_gfp_mask(gfp_t gfp_mask)
2998{
2999 return (gfp_mask & (GFP_TRANSHUGE | __GFP_KSWAPD_RECLAIM)) == GFP_TRANSHUGE;
3000}
3001
2998static inline struct page * 3002static inline struct page *
2999__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, 3003__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3000 struct alloc_context *ac) 3004 struct alloc_context *ac)
3001{ 3005{
3002 const gfp_t wait = gfp_mask & __GFP_WAIT; 3006 bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
3003 struct page *page = NULL; 3007 struct page *page = NULL;
3004 int alloc_flags; 3008 int alloc_flags;
3005 unsigned long pages_reclaimed = 0; 3009 unsigned long pages_reclaimed = 0;
@@ -3020,15 +3024,23 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3020 } 3024 }
3021 3025
3022 /* 3026 /*
3027 * We also sanity check to catch abuse of atomic reserves being used by
3028 * callers that are not in atomic context.
3029 */
3030 if (WARN_ON_ONCE((gfp_mask & (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)) ==
3031 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
3032 gfp_mask &= ~__GFP_ATOMIC;
3033
3034 /*
3023 * If this allocation cannot block and it is for a specific node, then 3035 * If this allocation cannot block and it is for a specific node, then
3024 * fail early. There's no need to wakeup kswapd or retry for a 3036 * fail early. There's no need to wakeup kswapd or retry for a
3025 * speculative node-specific allocation. 3037 * speculative node-specific allocation.
3026 */ 3038 */
3027 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) 3039 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !can_direct_reclaim)
3028 goto nopage; 3040 goto nopage;
3029 3041
3030retry: 3042retry:
3031 if (!(gfp_mask & __GFP_NO_KSWAPD)) 3043 if (gfp_mask & __GFP_KSWAPD_RECLAIM)
3032 wake_all_kswapds(order, ac); 3044 wake_all_kswapds(order, ac);
3033 3045
3034 /* 3046 /*
@@ -3071,8 +3083,8 @@ retry:
3071 } 3083 }
3072 } 3084 }
3073 3085
3074 /* Atomic allocations - we can't balance anything */ 3086 /* Caller is not willing to reclaim, we can't balance anything */
3075 if (!wait) { 3087 if (!can_direct_reclaim) {
3076 /* 3088 /*
3077 * All existing users of the deprecated __GFP_NOFAIL are 3089 * All existing users of the deprecated __GFP_NOFAIL are
3078 * blockable, so warn of any new users that actually allow this 3090 * blockable, so warn of any new users that actually allow this
@@ -3102,7 +3114,7 @@ retry:
3102 goto got_pg; 3114 goto got_pg;
3103 3115
3104 /* Checks for THP-specific high-order allocations */ 3116 /* Checks for THP-specific high-order allocations */
3105 if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) { 3117 if (is_thp_gfp_mask(gfp_mask)) {
3106 /* 3118 /*
3107 * If compaction is deferred for high-order allocations, it is 3119 * If compaction is deferred for high-order allocations, it is
3108 * because sync compaction recently failed. If this is the case 3120 * because sync compaction recently failed. If this is the case
@@ -3137,8 +3149,7 @@ retry:
3137 * fault, so use asynchronous memory compaction for THP unless it is 3149 * fault, so use asynchronous memory compaction for THP unless it is
3138 * khugepaged trying to collapse. 3150 * khugepaged trying to collapse.
3139 */ 3151 */
3140 if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE || 3152 if (!is_thp_gfp_mask(gfp_mask) || (current->flags & PF_KTHREAD))
3141 (current->flags & PF_KTHREAD))
3142 migration_mode = MIGRATE_SYNC_LIGHT; 3153 migration_mode = MIGRATE_SYNC_LIGHT;
3143 3154
3144 /* Try direct reclaim and then allocating */ 3155 /* Try direct reclaim and then allocating */
@@ -3209,7 +3220,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3209 3220
3210 lockdep_trace_alloc(gfp_mask); 3221 lockdep_trace_alloc(gfp_mask);
3211 3222
3212 might_sleep_if(gfp_mask & __GFP_WAIT); 3223 might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
3213 3224
3214 if (should_fail_alloc_page(gfp_mask, order)) 3225 if (should_fail_alloc_page(gfp_mask, order))
3215 return NULL; 3226 return NULL;