aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2009-12-14 20:58:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:13 -0500
commitbad44b5be84cf3bb1ff900bec02ee61e1993328c (patch)
treeedd9a9cc2f482ca8684c9e64abe4b977a5525615
parent39da08cb074cf19cb249832a2a955dfb28837e65 (diff)
mm: add gfp flags for NODEMASK_ALLOC slab allocations
Objects passed to NODEMASK_ALLOC() are relatively small in size and are backed by slab caches that are not of large order, traditionally never greater than PAGE_ALLOC_COSTLY_ORDER. Thus, using GFP_KERNEL for these allocations on large machines when CONFIG_NODES_SHIFT > 8 will cause the page allocator to loop endlessly in the allocation attempt, each time invoking both direct reclaim or the oom killer. This is of particular interest when using NODEMASK_ALLOC() from a mempolicy context (either directly in mm/mempolicy.c or the mempolicy constrained hugetlb allocations) since the oom killer always kills current when allocations are constrained by mempolicies. So for all present use cases in the kernel, current would end up being oom killed when direct reclaim fails. That would allow the NODEMASK_ALLOC() to succeed but current would have sacrificed itself upon returning. This patch adds gfp flags to NODEMASK_ALLOC() to pass to kmalloc() on CONFIG_NODES_SHIFT > 8; this parameter is a nop on other configurations. All current use cases either directly from hugetlb code or indirectly via NODEMASK_SCRATCH() union __GFP_NORETRY to avoid direct reclaim and the oom killer when the slab allocator needs to allocate additional pages. The side-effect of this change is that all current use cases of either NODEMASK_ALLOC() or NODEMASK_SCRATCH() need appropriate -ENOMEM handling when the allocation fails (never for CONFIG_NODES_SHIFT <= 8). All current use cases were audited and do have appropriate error handling at this time. Signed-off-by: David Rientjes <rientjes@google.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Rientjes <rientjes@google.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/nodemask.h21
-rw-r--r--mm/hugetlb.c5
2 files changed, 15 insertions, 11 deletions
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index cbd521a03127..454997cccbd8 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -485,15 +485,17 @@ static inline int num_node_state(enum node_states state)
485#define for_each_online_node(node) for_each_node_state(node, N_ONLINE) 485#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
486 486
487/* 487/*
488 * For nodemask scrach area.(See CPUMASK_ALLOC() in cpumask.h) 488 * For nodemask scrach area.
489 * NODEMASK_ALLOC(x, m) allocates an object of type 'x' with the name 'm'. 489 * NODEMASK_ALLOC(type, name) allocates an object with a specified type and
490 * name.
490 */ 491 */
491#if NODES_SHIFT > 8 /* nodemask_t > 64 bytes */ 492#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
492#define NODEMASK_ALLOC(x, m) x *m = kmalloc(sizeof(*m), GFP_KERNEL) 493#define NODEMASK_ALLOC(type, name, gfp_flags) \
493#define NODEMASK_FREE(m) kfree(m) 494 type *name = kmalloc(sizeof(*name), gfp_flags)
495#define NODEMASK_FREE(m) kfree(m)
494#else 496#else
495#define NODEMASK_ALLOC(x, m) x _m, *m = &_m 497#define NODEMASK_ALLOC(type, name, gfp_flags) type _name, *name = &_name
496#define NODEMASK_FREE(m) do {} while (0) 498#define NODEMASK_FREE(m) do {} while (0)
497#endif 499#endif
498 500
499/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */ 501/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */
@@ -502,8 +504,9 @@ struct nodemask_scratch {
502 nodemask_t mask2; 504 nodemask_t mask2;
503}; 505};
504 506
505#define NODEMASK_SCRATCH(x) \ 507#define NODEMASK_SCRATCH(x) \
506 NODEMASK_ALLOC(struct nodemask_scratch, x) 508 NODEMASK_ALLOC(struct nodemask_scratch, x, \
509 GFP_KERNEL | __GFP_NORETRY)
507#define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) 510#define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x)
508 511
509 512
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b4a263512cb7..450493d25572 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1361,7 +1361,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
1361 int nid; 1361 int nid;
1362 unsigned long count; 1362 unsigned long count;
1363 struct hstate *h; 1363 struct hstate *h;
1364 NODEMASK_ALLOC(nodemask_t, nodes_allowed); 1364 NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
1365 1365
1366 err = strict_strtoul(buf, 10, &count); 1366 err = strict_strtoul(buf, 10, &count);
1367 if (err) 1367 if (err)
@@ -1857,7 +1857,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
1857 proc_doulongvec_minmax(table, write, buffer, length, ppos); 1857 proc_doulongvec_minmax(table, write, buffer, length, ppos);
1858 1858
1859 if (write) { 1859 if (write) {
1860 NODEMASK_ALLOC(nodemask_t, nodes_allowed); 1860 NODEMASK_ALLOC(nodemask_t, nodes_allowed,
1861 GFP_KERNEL | __GFP_NORETRY);
1861 if (!(obey_mempolicy && 1862 if (!(obey_mempolicy &&
1862 init_nodemask_of_mempolicy(nodes_allowed))) { 1863 init_nodemask_of_mempolicy(nodes_allowed))) {
1863 NODEMASK_FREE(nodes_allowed); 1864 NODEMASK_FREE(nodes_allowed);