aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-08-06 18:07:33 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-08-07 13:39:55 -0400
commit4bfc44958e499af9a73f62201543b3a1f617cfeb (patch)
tree01b40f951cd1a5e6dda8e2b21e1e24650c43c95e /include/linux
parent93274e4d4e9416ad1fa47e2f26011e2c483fe5fe (diff)
mm: make set_mempolicy(MPOL_INTERLEAV) N_HIGH_MEMORY aware
At first, init_task's mems_allowed is initialized as this. init_task->mems_allowed == node_state[N_POSSIBLE] And cpuset's top_cpuset mask is initialized as this top_cpuset->mems_allowed = node_state[N_HIGH_MEMORY] Before 2.6.29: policy's mems_allowed is initialized as this. 1. update tasks->mems_allowed by its cpuset->mems_allowed. 2. policy->mems_allowed = nodes_and(tasks->mems_allowed, user's mask) Updating task's mems_allowed in reference to top_cpuset's one. cpuset's mems_allowed is aware of N_HIGH_MEMORY, always. In 2.6.30: After commit 58568d2a8215cb6f55caf2332017d7bdff954e1c ("cpuset,mm: update tasks' mems_allowed in time"), policy's mems_allowed is initialized as this. 1. policy->mems_allowd = nodes_and(task->mems_allowed, user's mask) Here, if task is in top_cpuset, task->mems_allowed is not updated from init's one. Assume user excutes command as #numactrl --interleave=all ,.... policy->mems_allowd = nodes_and(N_POSSIBLE, ALL_SET_MASK) Then, policy's mems_allowd can includes a possible node, which has no pgdat. MPOL's INTERLEAVE just scans nodemask of task->mems_allowd and access this directly. NODE_DATA(nid)->zonelist even if NODE_DATA(nid)==NULL Then, what's we need is making policy->mems_allowed be aware of N_HIGH_MEMORY. This patch does that. But to do so, extra nodemask will be on statck. Because I know cpumask has a new interface of CPUMASK_ALLOC(), I added it to node. This patch stands on old behavior. But I feel this fix itself is just a Band-Aid. But to do fundametal fix, we have to take care of memory hotplug and it takes time. (task->mems_allowd should be N_HIGH_MEMORY, I think.) mpol_set_nodemask() should be aware of N_HIGH_MEMORY and policy's nodemask should be includes only online nodes. In old behavior, this is guaranteed by frequent reference to cpuset's code. Now, most of them are removed and mempolicy has to check it by itself. To do check, a few nodemask_t will be used for calculating nodemask. But, size of nodemask_t can be big and it's not good to allocate them on stack. Now, cpumask_t has CPUMASK_ALLOC/FREE an easy code for get scratch area. NODEMASK_ALLOC/FREE shoudl be there. [akpm@linux-foundation.org: cleanups & tweaks] Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Miao Xie <miaox@cn.fujitsu.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Paul Menage <menage@google.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: David Rientjes <rientjes@google.com> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/nodemask.h28
1 files changed, 28 insertions, 0 deletions
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 829b94b156f2..b359c4a9ec9e 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -82,6 +82,12 @@
82 * to generate slightly worse code. So use a simple one-line #define 82 * to generate slightly worse code. So use a simple one-line #define
83 * for node_isset(), instead of wrapping an inline inside a macro, the 83 * for node_isset(), instead of wrapping an inline inside a macro, the
84 * way we do the other calls. 84 * way we do the other calls.
85 *
86 * NODEMASK_SCRATCH
87 * When doing above logical AND, OR, XOR, Remap operations the callers tend to
88 * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large,
89 * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper
90 * for such situations. See below and CPUMASK_ALLOC also.
85 */ 91 */
86 92
87#include <linux/kernel.h> 93#include <linux/kernel.h>
@@ -473,4 +479,26 @@ static inline int num_node_state(enum node_states state)
473#define for_each_node(node) for_each_node_state(node, N_POSSIBLE) 479#define for_each_node(node) for_each_node_state(node, N_POSSIBLE)
474#define for_each_online_node(node) for_each_node_state(node, N_ONLINE) 480#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
475 481
482/*
483 * For nodemask scrach area.(See CPUMASK_ALLOC() in cpumask.h)
484 */
485
486#if NODES_SHIFT > 8 /* nodemask_t > 64 bytes */
487#define NODEMASK_ALLOC(x, m) struct x *m = kmalloc(sizeof(*m), GFP_KERNEL)
488#define NODEMASK_FREE(m) kfree(m)
489#else
490#define NODEMASK_ALLOC(x, m) struct x _m, *m = &_m
491#define NODEMASK_FREE(m)
492#endif
493
494/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */
495struct nodemask_scratch {
496 nodemask_t mask1;
497 nodemask_t mask2;
498};
499
500#define NODEMASK_SCRATCH(x) NODEMASK_ALLOC(nodemask_scratch, x)
501#define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x)
502
503
476#endif /* __LINUX_NODEMASK_H */ 504#endif /* __LINUX_NODEMASK_H */