aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-03-10 18:49:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-03-10 20:26:19 -0400
commite97ca8e5b864f88b028c1759ba8536fa827d6d96 (patch)
tree37b58e8435cd1fc0f207ba51791da88b7488c749
parentfa389e220254c69ffae0d403eac4146171062d08 (diff)
mm: fix GFP_THISNODE callers and clarify
GFP_THISNODE is for callers that implement their own clever fallback to remote nodes. It restricts the allocation to the specified node and does not invoke reclaim, assuming that the caller will take care of it when the fallback fails, e.g. through a subsequent allocation request without GFP_THISNODE set. However, many current GFP_THISNODE users only want the node exclusive aspect of the flag, without actually implementing their own fallback or triggering reclaim if necessary. This results in things like page migration failing prematurely even when there is easily reclaimable memory available, unless kswapd happens to be running already or a concurrent allocation attempt triggers the necessary reclaim. Convert all callsites that don't implement their own fallback strategy to __GFP_THISNODE. This restricts the allocation a single node too, but at the same time allows the allocator to enter the slowpath, wake kswapd, and invoke direct reclaim if necessary, to make the allocation happen when memory is full. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Rik van Riel <riel@redhat.com> Cc: Jan Stancek <jstancek@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/ia64/kernel/uncached.c2
-rw-r--r--arch/powerpc/platforms/cell/ras.c3
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c2
-rw-r--r--include/linux/gfp.h4
-rw-r--r--include/linux/mmzone.h4
-rw-r--r--include/linux/slab.h2
-rw-r--r--kernel/profile.c4
-rw-r--r--mm/migrate.c11
8 files changed, 19 insertions, 13 deletions
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index a96bcf83a735..20e8a9b21d75 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
98 /* attempt to allocate a granule's worth of cached memory pages */ 98 /* attempt to allocate a granule's worth of cached memory pages */
99 99
100 page = alloc_pages_exact_node(nid, 100 page = alloc_pages_exact_node(nid,
101 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 101 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
102 IA64_GRANULE_SHIFT-PAGE_SHIFT); 102 IA64_GRANULE_SHIFT-PAGE_SHIFT);
103 if (!page) { 103 if (!page) {
104 mutex_unlock(&uc_pool->add_chunk_mutex); 104 mutex_unlock(&uc_pool->add_chunk_mutex);
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 5ec1e47a0d77..e865d748179b 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -123,7 +123,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)
123 123
124 area->nid = nid; 124 area->nid = nid;
125 area->order = order; 125 area->order = order;
126 area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE, 126 area->pages = alloc_pages_exact_node(area->nid,
127 GFP_KERNEL|__GFP_THISNODE,
127 area->order); 128 area->order);
128 129
129 if (!area->pages) { 130 if (!area->pages) {
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index b9e2000969f0..95c894482fdd 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -240,7 +240,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
240 240
241 nid = cpu_to_node(cpu); 241 nid = cpu_to_node(cpu);
242 page = alloc_pages_exact_node(nid, 242 page = alloc_pages_exact_node(nid,
243 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 243 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
244 pg_order); 244 pg_order);
245 if (page == NULL) { 245 if (page == NULL) {
246 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " 246 dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0437439bc047..39b81dc7d01a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -123,6 +123,10 @@ struct vm_area_struct;
123 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ 123 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
124 __GFP_NO_KSWAPD) 124 __GFP_NO_KSWAPD)
125 125
126/*
127 * GFP_THISNODE does not perform any reclaim, you most likely want to
128 * use __GFP_THISNODE to allocate from a given node without fallback!
129 */
126#ifdef CONFIG_NUMA 130#ifdef CONFIG_NUMA
127#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) 131#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
128#else 132#else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5f2052c83154..9b61b9bf81ac 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -590,10 +590,10 @@ static inline bool zone_is_empty(struct zone *zone)
590 590
591/* 591/*
592 * The NUMA zonelists are doubled because we need zonelists that restrict the 592 * The NUMA zonelists are doubled because we need zonelists that restrict the
593 * allocations to a single node for GFP_THISNODE. 593 * allocations to a single node for __GFP_THISNODE.
594 * 594 *
595 * [0] : Zonelist with fallback 595 * [0] : Zonelist with fallback
596 * [1] : No fallback (GFP_THISNODE) 596 * [1] : No fallback (__GFP_THISNODE)
597 */ 597 */
598#define MAX_ZONELISTS 2 598#define MAX_ZONELISTS 2
599 599
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9260abdd67df..b5b2df60299e 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -410,7 +410,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
410 * 410 *
411 * %GFP_NOWAIT - Allocation will not sleep. 411 * %GFP_NOWAIT - Allocation will not sleep.
412 * 412 *
413 * %GFP_THISNODE - Allocate node-local memory only. 413 * %__GFP_THISNODE - Allocate node-local memory only.
414 * 414 *
415 * %GFP_DMA - Allocation suitable for DMA. 415 * %GFP_DMA - Allocation suitable for DMA.
416 * Should only be used for kmalloc() caches. Otherwise, use a 416 * Should only be used for kmalloc() caches. Otherwise, use a
diff --git a/kernel/profile.c b/kernel/profile.c
index 6631e1ef55ab..ebdd9c1a86b4 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -549,14 +549,14 @@ static int create_hash_tables(void)
549 struct page *page; 549 struct page *page;
550 550
551 page = alloc_pages_exact_node(node, 551 page = alloc_pages_exact_node(node,
552 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 552 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
553 0); 553 0);
554 if (!page) 554 if (!page)
555 goto out_cleanup; 555 goto out_cleanup;
556 per_cpu(cpu_profile_hits, cpu)[1] 556 per_cpu(cpu_profile_hits, cpu)[1]
557 = (struct profile_hit *)page_address(page); 557 = (struct profile_hit *)page_address(page);
558 page = alloc_pages_exact_node(node, 558 page = alloc_pages_exact_node(node,
559 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 559 GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
560 0); 560 0);
561 if (!page) 561 if (!page)
562 goto out_cleanup; 562 goto out_cleanup;
diff --git a/mm/migrate.c b/mm/migrate.c
index 482a33d89134..b494fdb9a636 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1158,7 +1158,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
1158 pm->node); 1158 pm->node);
1159 else 1159 else
1160 return alloc_pages_exact_node(pm->node, 1160 return alloc_pages_exact_node(pm->node,
1161 GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); 1161 GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
1162} 1162}
1163 1163
1164/* 1164/*
@@ -1544,9 +1544,9 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
1544 struct page *newpage; 1544 struct page *newpage;
1545 1545
1546 newpage = alloc_pages_exact_node(nid, 1546 newpage = alloc_pages_exact_node(nid,
1547 (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | 1547 (GFP_HIGHUSER_MOVABLE |
1548 __GFP_NOMEMALLOC | __GFP_NORETRY | 1548 __GFP_THISNODE | __GFP_NOMEMALLOC |
1549 __GFP_NOWARN) & 1549 __GFP_NORETRY | __GFP_NOWARN) &
1550 ~GFP_IOFS, 0); 1550 ~GFP_IOFS, 0);
1551 1551
1552 return newpage; 1552 return newpage;
@@ -1747,7 +1747,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1747 goto out_dropref; 1747 goto out_dropref;
1748 1748
1749 new_page = alloc_pages_node(node, 1749 new_page = alloc_pages_node(node,
1750 (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); 1750 (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
1751 HPAGE_PMD_ORDER);
1751 if (!new_page) 1752 if (!new_page)
1752 goto out_fail; 1753 goto out_fail;
1753 1754