aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2015-04-14 18:46:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 19:49:03 -0400
commit4167e9b2cf10f8a4bcda0c713ddc8bb0a18e8187 (patch)
tree744caf92870f2afa4facca7cdfbb6315e71b6592
parentb360edb43f8ed50aa7b8c9aae7d7557a1a6e32c8 (diff)
mm: remove GFP_THISNODE
NOTE: this is not about __GFP_THISNODE, this is only about GFP_THISNODE. GFP_THISNODE is a secret combination of gfp bits that have different behavior than expected. It is a combination of __GFP_THISNODE, __GFP_NORETRY, and __GFP_NOWARN and is special-cased in the page allocator slowpath to fail without trying reclaim even though it may be used in combination with __GFP_WAIT. An example of the problem this creates: commit e97ca8e5b864 ("mm: fix GFP_THISNODE callers and clarify") fixed up many users of GFP_THISNODE that really just wanted __GFP_THISNODE. The problem doesn't end there, however, because even it was a no-op for alloc_misplaced_dst_page(), which also sets __GFP_NORETRY and __GFP_NOWARN, and migrate_misplaced_transhuge_page(), where __GFP_NORETRY and __GFP_NOWAIT is set in GFP_TRANSHUGE. Converting GFP_THISNODE to __GFP_THISNODE is a no-op in these cases since the page allocator special-cases __GFP_THISNODE && __GFP_NORETRY && __GFP_NOWARN. It's time to just remove GFP_THISNODE entirely. We leave __GFP_THISNODE to restrict an allocation to a local node, but remove GFP_THISNODE and its obscurity. Instead, we require that a caller clear __GFP_WAIT if it wants to avoid reclaim. This allows the aforementioned functions to actually reclaim as they should. It also enables any future callers that want to do __GFP_THISNODE but also __GFP_NORETRY && __GFP_NOWARN to reclaim. The rule is simple: if you don't want to reclaim, then don't set __GFP_WAIT. Aside: ovs_flow_stats_update() really wants to avoid reclaim as well, so it is unchanged. Signed-off-by: David Rientjes <rientjes@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Christoph Lameter <cl@linux.com> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Pravin Shelar <pshelar@nicira.com> Cc: Jarno Rajahalme <jrajahalme@nicira.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Greg Thelen <gthelen@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/gfp.h10
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/slab.c22
-rw-r--r--net/openvswitch/flow.c4
4 files changed, 27 insertions, 31 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51bd1e72a917..4423a0f8eabe 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -117,16 +117,6 @@ struct vm_area_struct;
117 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 117 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
118 __GFP_NO_KSWAPD) 118 __GFP_NO_KSWAPD)
119 119
120/*
121 * GFP_THISNODE does not perform any reclaim, you most likely want to
122 * use __GFP_THISNODE to allocate from a given node without fallback!
123 */
124#ifdef CONFIG_NUMA
125#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
126#else
127#define GFP_THISNODE ((__force gfp_t)0)
128#endif
129
130/* This mask makes up all the page movable related flags */ 120/* This mask makes up all the page movable related flags */
131#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) 121#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
132 122
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84466a4b1b36..86af1a96a6dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2412,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2412 *did_some_progress = 1; 2412 *did_some_progress = 1;
2413 goto out; 2413 goto out;
2414 } 2414 }
2415 /* 2415 /* The OOM killer may not free memory on a specific node */
2416 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2417 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
2418 * The caller should handle page allocation failure by itself if
2419 * it specifies __GFP_THISNODE.
2420 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
2421 */
2422 if (gfp_mask & __GFP_THISNODE) 2416 if (gfp_mask & __GFP_THISNODE)
2423 goto out; 2417 goto out;
2424 } 2418 }
@@ -2673,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2673 } 2667 }
2674 2668
2675 /* 2669 /*
2676 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and 2670 * If this allocation cannot block and it is for a specific node, then
2677 * __GFP_NOWARN set) should not cause reclaim since the subsystem 2671 * fail early. There's no need to wakeup kswapd or retry for a
2678 * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim 2672 * speculative node-specific allocation.
2679 * using a larger set of nodes after it has established that the
2680 * allowed per node queues are empty and that nodes are
2681 * over allocated.
2682 */ 2673 */
2683 if (IS_ENABLED(CONFIG_NUMA) && 2674 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
2684 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2685 goto nopage; 2675 goto nopage;
2686 2676
2687retry: 2677retry:
@@ -2874,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2874 /* 2864 /*
2875 * Check the zones suitable for the gfp_mask contain at least one 2865 * Check the zones suitable for the gfp_mask contain at least one
2876 * valid zone. It's possible to have an empty zonelist as a result 2866 * valid zone. It's possible to have an empty zonelist as a result
2877 * of GFP_THISNODE and a memoryless node 2867 * of __GFP_THISNODE and a memoryless node
2878 */ 2868 */
2879 if (unlikely(!zonelist->_zonerefs->zone)) 2869 if (unlikely(!zonelist->_zonerefs->zone))
2880 return NULL; 2870 return NULL;
diff --git a/mm/slab.c b/mm/slab.c
index c4b89eaf4c96..7eb38dd1cefa 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
857 return NULL; 857 return NULL;
858} 858}
859 859
860static inline gfp_t gfp_exact_node(gfp_t flags)
861{
862 return flags;
863}
864
860#else /* CONFIG_NUMA */ 865#else /* CONFIG_NUMA */
861 866
862static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 867static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
@@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1023 1028
1024 return __cache_free_alien(cachep, objp, node, page_node); 1029 return __cache_free_alien(cachep, objp, node, page_node);
1025} 1030}
1031
1032/*
1033 * Construct gfp mask to allocate from a specific node but do not invoke reclaim
1034 * or warn about failures.
1035 */
1036static inline gfp_t gfp_exact_node(gfp_t flags)
1037{
1038 return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT;
1039}
1026#endif 1040#endif
1027 1041
1028/* 1042/*
@@ -2825,7 +2839,7 @@ alloc_done:
2825 if (unlikely(!ac->avail)) { 2839 if (unlikely(!ac->avail)) {
2826 int x; 2840 int x;
2827force_grow: 2841force_grow:
2828 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); 2842 x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
2829 2843
2830 /* cache_grow can reenable interrupts, then ac could change. */ 2844 /* cache_grow can reenable interrupts, then ac could change. */
2831 ac = cpu_cache_get(cachep); 2845 ac = cpu_cache_get(cachep);
@@ -3019,7 +3033,7 @@ retry:
3019 get_node(cache, nid) && 3033 get_node(cache, nid) &&
3020 get_node(cache, nid)->free_objects) { 3034 get_node(cache, nid)->free_objects) {
3021 obj = ____cache_alloc_node(cache, 3035 obj = ____cache_alloc_node(cache,
3022 flags | GFP_THISNODE, nid); 3036 gfp_exact_node(flags), nid);
3023 if (obj) 3037 if (obj)
3024 break; 3038 break;
3025 } 3039 }
@@ -3047,7 +3061,7 @@ retry:
3047 nid = page_to_nid(page); 3061 nid = page_to_nid(page);
3048 if (cache_grow(cache, flags, nid, page)) { 3062 if (cache_grow(cache, flags, nid, page)) {
3049 obj = ____cache_alloc_node(cache, 3063 obj = ____cache_alloc_node(cache,
3050 flags | GFP_THISNODE, nid); 3064 gfp_exact_node(flags), nid);
3051 if (!obj) 3065 if (!obj)
3052 /* 3066 /*
3053 * Another processor may allocate the 3067 * Another processor may allocate the
@@ -3118,7 +3132,7 @@ retry:
3118 3132
3119must_grow: 3133must_grow:
3120 spin_unlock(&n->list_lock); 3134 spin_unlock(&n->list_lock);
3121 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); 3135 x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL);
3122 if (x) 3136 if (x)
3123 goto retry; 3137 goto retry;
3124 3138
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 50ec42f170a0..2dacc7b5af23 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
100 100
101 new_stats = 101 new_stats =
102 kmem_cache_alloc_node(flow_stats_cache, 102 kmem_cache_alloc_node(flow_stats_cache,
103 GFP_THISNODE | 103 GFP_NOWAIT |
104 __GFP_THISNODE |
105 __GFP_NOWARN |
104 __GFP_NOMEMALLOC, 106 __GFP_NOMEMALLOC,
105 node); 107 node);
106 if (likely(new_stats)) { 108 if (likely(new_stats)) {