aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2015-04-14 18:46:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 19:49:03 -0400
commit4167e9b2cf10f8a4bcda0c713ddc8bb0a18e8187 (patch)
tree744caf92870f2afa4facca7cdfbb6315e71b6592 /mm
parentb360edb43f8ed50aa7b8c9aae7d7557a1a6e32c8 (diff)
mm: remove GFP_THISNODE
NOTE: this is not about __GFP_THISNODE, this is only about GFP_THISNODE. GFP_THISNODE is a secret combination of gfp bits that have different behavior than expected. It is a combination of __GFP_THISNODE, __GFP_NORETRY, and __GFP_NOWARN and is special-cased in the page allocator slowpath to fail without trying reclaim even though it may be used in combination with __GFP_WAIT. An example of the problem this creates: commit e97ca8e5b864 ("mm: fix GFP_THISNODE callers and clarify") fixed up many users of GFP_THISNODE that really just wanted __GFP_THISNODE. The problem doesn't end there, however, because even it was a no-op for alloc_misplaced_dst_page(), which also sets __GFP_NORETRY and __GFP_NOWARN, and migrate_misplaced_transhuge_page(), where __GFP_NORETRY and __GFP_NOWAIT is set in GFP_TRANSHUGE. Converting GFP_THISNODE to __GFP_THISNODE is a no-op in these cases since the page allocator special-cases __GFP_THISNODE && __GFP_NORETRY && __GFP_NOWARN. It's time to just remove GFP_THISNODE entirely. We leave __GFP_THISNODE to restrict an allocation to a local node, but remove GFP_THISNODE and its obscurity. Instead, we require that a caller clear __GFP_WAIT if it wants to avoid reclaim. This allows the aforementioned functions to actually reclaim as they should. It also enables any future callers that want to do __GFP_THISNODE but also __GFP_NORETRY && __GFP_NOWARN to reclaim. The rule is simple: if you don't want to reclaim, then don't set __GFP_WAIT. Aside: ovs_flow_stats_update() really wants to avoid reclaim as well, so it is unchanged. Signed-off-by: David Rientjes <rientjes@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Christoph Lameter <cl@linux.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Pravin Shelar <pshelar@nicira.com> Cc: Jarno Rajahalme <jrajahalme@nicira.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Greg Thelen <gthelen@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/slab.c22
2 files changed, 24 insertions, 20 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84466a4b1b36..86af1a96a6dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2412,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2412 *did_some_progress = 1; 2412 *did_some_progress = 1;
2413 goto out; 2413 goto out;
2414 } 2414 }
2415 /* 2415 /* The OOM killer may not free memory on a specific node */
2416 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2417 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
2418 * The caller should handle page allocation failure by itself if
2419 * it specifies __GFP_THISNODE.
2420 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
2421 */
2422 if (gfp_mask & __GFP_THISNODE) 2416 if (gfp_mask & __GFP_THISNODE)
2423 goto out; 2417 goto out;
2424 } 2418 }
@@ -2673,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2673 } 2667 }
2674 2668
2675 /* 2669 /*
2676 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and 2670 * If this allocation cannot block and it is for a specific node, then
2677 * __GFP_NOWARN set) should not cause reclaim since the subsystem 2671 * fail early. There's no need to wakeup kswapd or retry for a
2678 * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim 2672 * speculative node-specific allocation.
2679 * using a larger set of nodes after it has established that the
2680 * allowed per node queues are empty and that nodes are
2681 * over allocated.
2682 */ 2673 */
2683 if (IS_ENABLED(CONFIG_NUMA) && 2674 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
2684 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2685 goto nopage; 2675 goto nopage;
2686 2676
2687retry: 2677retry:
@@ -2874,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2874 /* 2864 /*
2875 * Check the zones suitable for the gfp_mask contain at least one 2865 * Check the zones suitable for the gfp_mask contain at least one
2876 * valid zone. It's possible to have an empty zonelist as a result 2866 * valid zone. It's possible to have an empty zonelist as a result
2877 * of GFP_THISNODE and a memoryless node 2867 * of __GFP_THISNODE and a memoryless node
2878 */ 2868 */
2879 if (unlikely(!zonelist->_zonerefs->zone)) 2869 if (unlikely(!zonelist->_zonerefs->zone))
2880 return NULL; 2870 return NULL;
diff --git a/mm/slab.c b/mm/slab.c
index c4b89eaf4c96..7eb38dd1cefa 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
857 return NULL; 857 return NULL;
858} 858}
859 859
860static inline gfp_t gfp_exact_node(gfp_t flags)
861{
862 return flags;
863}
864
860#else /* CONFIG_NUMA */ 865#else /* CONFIG_NUMA */
861 866
862static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 867static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
@@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1023 1028
1024 return __cache_free_alien(cachep, objp, node, page_node); 1029 return __cache_free_alien(cachep, objp, node, page_node);
1025} 1030}
1031
1032/*
1033 * Construct gfp mask to allocate from a specific node but do not invoke reclaim
1034 * or warn about failures.
1035 */
1036static inline gfp_t gfp_exact_node(gfp_t flags)
1037{
1038 return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
1039}
1026#endif 1040#endif
1027 1041
1028/* 1042/*
@@ -2825,7 +2839,7 @@ alloc_done:
2825 if (unlikely(!ac->avail)) { 2839 if (unlikely(!ac->avail)) {
2826 int x; 2840 int x;
2827force_grow: 2841force_grow:
2828 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); 2842 x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
2829 2843
2830 /* cache_grow can reenable interrupts, then ac could change. */ 2844 /* cache_grow can reenable interrupts, then ac could change. */
2831 ac = cpu_cache_get(cachep); 2845 ac = cpu_cache_get(cachep);
@@ -3019,7 +3033,7 @@ retry:
3019 get_node(cache, nid) && 3033 get_node(cache, nid) &&
3020 get_node(cache, nid)->free_objects) { 3034 get_node(cache, nid)->free_objects) {
3021 obj = ____cache_alloc_node(cache, 3035 obj = ____cache_alloc_node(cache,
3022 flags | GFP_THISNODE, nid); 3036 gfp_exact_node(flags), nid);
3023 if (obj) 3037 if (obj)
3024 break; 3038 break;
3025 } 3039 }
@@ -3047,7 +3061,7 @@ retry:
3047 nid = page_to_nid(page); 3061 nid = page_to_nid(page);
3048 if (cache_grow(cache, flags, nid, page)) { 3062 if (cache_grow(cache, flags, nid, page)) {
3049 obj = ____cache_alloc_node(cache, 3063 obj = ____cache_alloc_node(cache,
3050 flags | GFP_THISNODE, nid); 3064 gfp_exact_node(flags), nid);
3051 if (!obj) 3065 if (!obj)
3052 /* 3066 /*
3053 * Another processor may allocate the 3067 * Another processor may allocate the
@@ -3118,7 +3132,7 @@ retry:
3118 3132
3119must_grow: 3133must_grow:
3120 spin_unlock(&n->list_lock); 3134 spin_unlock(&n->list_lock);
3121 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); 3135 x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL);
3122 if (x) 3136 if (x)
3123 goto retry; 3137 goto retry;
3124 3138