diff options
author | David Rientjes <rientjes@google.com> | 2015-04-14 18:46:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-14 19:49:03 -0400 |
commit | 4167e9b2cf10f8a4bcda0c713ddc8bb0a18e8187 (patch) | |
tree | 744caf92870f2afa4facca7cdfbb6315e71b6592 /mm | |
parent | b360edb43f8ed50aa7b8c9aae7d7557a1a6e32c8 (diff) |
mm: remove GFP_THISNODE
NOTE: this is not about __GFP_THISNODE, this is only about GFP_THISNODE.
GFP_THISNODE is a secret combination of gfp bits that have different
behavior than expected. It is a combination of __GFP_THISNODE,
__GFP_NORETRY, and __GFP_NOWARN and is special-cased in the page
allocator slowpath to fail without trying reclaim even though it may be
used in combination with __GFP_WAIT.
An example of the problem this creates: commit e97ca8e5b864 ("mm: fix
GFP_THISNODE callers and clarify") fixed up many users of GFP_THISNODE
that really just wanted __GFP_THISNODE. The problem doesn't end there,
however, because even it was a no-op for alloc_misplaced_dst_page(),
which also sets __GFP_NORETRY and __GFP_NOWARN, and
migrate_misplaced_transhuge_page(), where __GFP_NORETRY and __GFP_NOWAIT
is set in GFP_TRANSHUGE. Converting GFP_THISNODE to __GFP_THISNODE is a
no-op in these cases since the page allocator special-cases
__GFP_THISNODE && __GFP_NORETRY && __GFP_NOWARN.
It's time to just remove GFP_THISNODE entirely. We leave __GFP_THISNODE
to restrict an allocation to a local node, but remove GFP_THISNODE and
its obscurity. Instead, we require that a caller clear __GFP_WAIT if it
wants to avoid reclaim.
This allows the aforementioned functions to actually reclaim as they
should. It also enables any future callers that want to do
__GFP_THISNODE but also __GFP_NORETRY && __GFP_NOWARN to reclaim. The
rule is simple: if you don't want to reclaim, then don't set __GFP_WAIT.
Aside: ovs_flow_stats_update() really wants to avoid reclaim as well, so
it is unchanged.
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Pravin Shelar <pshelar@nicira.com>
Cc: Jarno Rajahalme <jrajahalme@nicira.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/slab.c | 22 |
2 files changed, 24 insertions, 20 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 84466a4b1b36..86af1a96a6dc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2412,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, | |||
2412 | *did_some_progress = 1; | 2412 | *did_some_progress = 1; |
2413 | goto out; | 2413 | goto out; |
2414 | } | 2414 | } |
2415 | /* | 2415 | /* The OOM killer may not free memory on a specific node */ |
2416 | * GFP_THISNODE contains __GFP_NORETRY and we never hit this. | ||
2417 | * Sanity check for bare calls of __GFP_THISNODE, not real OOM. | ||
2418 | * The caller should handle page allocation failure by itself if | ||
2419 | * it specifies __GFP_THISNODE. | ||
2420 | * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER. | ||
2421 | */ | ||
2422 | if (gfp_mask & __GFP_THISNODE) | 2416 | if (gfp_mask & __GFP_THISNODE) |
2423 | goto out; | 2417 | goto out; |
2424 | } | 2418 | } |
@@ -2673,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2673 | } | 2667 | } |
2674 | 2668 | ||
2675 | /* | 2669 | /* |
2676 | * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and | 2670 | * If this allocation cannot block and it is for a specific node, then |
2677 | * __GFP_NOWARN set) should not cause reclaim since the subsystem | 2671 | * fail early. There's no need to wakeup kswapd or retry for a |
2678 | * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim | 2672 | * speculative node-specific allocation. |
2679 | * using a larger set of nodes after it has established that the | ||
2680 | * allowed per node queues are empty and that nodes are | ||
2681 | * over allocated. | ||
2682 | */ | 2673 | */ |
2683 | if (IS_ENABLED(CONFIG_NUMA) && | 2674 | if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait) |
2684 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2685 | goto nopage; | 2675 | goto nopage; |
2686 | 2676 | ||
2687 | retry: | 2677 | retry: |
@@ -2874,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2874 | /* | 2864 | /* |
2875 | * Check the zones suitable for the gfp_mask contain at least one | 2865 | * Check the zones suitable for the gfp_mask contain at least one |
2876 | * valid zone. It's possible to have an empty zonelist as a result | 2866 | * valid zone. It's possible to have an empty zonelist as a result |
2877 | * of GFP_THISNODE and a memoryless node | 2867 | * of __GFP_THISNODE and a memoryless node |
2878 | */ | 2868 | */ |
2879 | if (unlikely(!zonelist->_zonerefs->zone)) | 2869 | if (unlikely(!zonelist->_zonerefs->zone)) |
2880 | return NULL; | 2870 | return NULL; |
@@ -857,6 +857,11 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, | |||
857 | return NULL; | 857 | return NULL; |
858 | } | 858 | } |
859 | 859 | ||
860 | static inline gfp_t gfp_exact_node(gfp_t flags) | ||
861 | { | ||
862 | return flags; | ||
863 | } | ||
864 | |||
860 | #else /* CONFIG_NUMA */ | 865 | #else /* CONFIG_NUMA */ |
861 | 866 | ||
862 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 867 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
@@ -1023,6 +1028,15 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1023 | 1028 | ||
1024 | return __cache_free_alien(cachep, objp, node, page_node); | 1029 | return __cache_free_alien(cachep, objp, node, page_node); |
1025 | } | 1030 | } |
1031 | |||
1032 | /* | ||
1033 | * Construct gfp mask to allocate from a specific node but do not invoke reclaim | ||
1034 | * or warn about failures. | ||
1035 | */ | ||
1036 | static inline gfp_t gfp_exact_node(gfp_t flags) | ||
1037 | { | ||
1038 | return (flags | __GFP_THISNODE | __GFP_NOWARN) & ~__GFP_WAIT; | ||
1039 | } | ||
1026 | #endif | 1040 | #endif |
1027 | 1041 | ||
1028 | /* | 1042 | /* |
@@ -2825,7 +2839,7 @@ alloc_done: | |||
2825 | if (unlikely(!ac->avail)) { | 2839 | if (unlikely(!ac->avail)) { |
2826 | int x; | 2840 | int x; |
2827 | force_grow: | 2841 | force_grow: |
2828 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 2842 | x = cache_grow(cachep, gfp_exact_node(flags), node, NULL); |
2829 | 2843 | ||
2830 | /* cache_grow can reenable interrupts, then ac could change. */ | 2844 | /* cache_grow can reenable interrupts, then ac could change. */ |
2831 | ac = cpu_cache_get(cachep); | 2845 | ac = cpu_cache_get(cachep); |
@@ -3019,7 +3033,7 @@ retry: | |||
3019 | get_node(cache, nid) && | 3033 | get_node(cache, nid) && |
3020 | get_node(cache, nid)->free_objects) { | 3034 | get_node(cache, nid)->free_objects) { |
3021 | obj = ____cache_alloc_node(cache, | 3035 | obj = ____cache_alloc_node(cache, |
3022 | flags | GFP_THISNODE, nid); | 3036 | gfp_exact_node(flags), nid); |
3023 | if (obj) | 3037 | if (obj) |
3024 | break; | 3038 | break; |
3025 | } | 3039 | } |
@@ -3047,7 +3061,7 @@ retry: | |||
3047 | nid = page_to_nid(page); | 3061 | nid = page_to_nid(page); |
3048 | if (cache_grow(cache, flags, nid, page)) { | 3062 | if (cache_grow(cache, flags, nid, page)) { |
3049 | obj = ____cache_alloc_node(cache, | 3063 | obj = ____cache_alloc_node(cache, |
3050 | flags | GFP_THISNODE, nid); | 3064 | gfp_exact_node(flags), nid); |
3051 | if (!obj) | 3065 | if (!obj) |
3052 | /* | 3066 | /* |
3053 | * Another processor may allocate the | 3067 | * Another processor may allocate the |
@@ -3118,7 +3132,7 @@ retry: | |||
3118 | 3132 | ||
3119 | must_grow: | 3133 | must_grow: |
3120 | spin_unlock(&n->list_lock); | 3134 | spin_unlock(&n->list_lock); |
3121 | x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); | 3135 | x = cache_grow(cachep, gfp_exact_node(flags), nodeid, NULL); |
3122 | if (x) | 3136 | if (x) |
3123 | goto retry; | 3137 | goto retry; |
3124 | 3138 | ||