aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2008-02-11 23:30:22 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-11 23:48:29 -0500
commit31f1de46b90ad360a16e7af3e277d104961df923 (patch)
treea54e8698d4e4d088c4008e0ae91b579b13d2c208
parent1a510089849ff9f70b654659bf976a6baf3a4833 (diff)
mempolicy: silently restrict nodemask to allowed nodes
Kosaki Motohito noted that "numactl --interleave=all ..." failed in the presence of memoryless nodes. This patch attempts to fix that problem. Some background: numactl --interleave=all calls set_mempolicy(2) with a fully populated [out to MAXNUMNODES] nodemask. set_mempolicy() [in do_set_mempolicy()] calls contextualize_policy() which requires that the nodemask be a subset of the current task's mems_allowed; else EINVAL will be returned. A task's mems_allowed will always be a subset of node_states[N_HIGH_MEMORY] i.e., nodes with memory. So, a fully populated nodemask will be declared invalid if it includes memoryless nodes. NOTE: the same thing will occur when running in a cpuset with restricted mem_allowed--for the same reason: node mask contains dis-allowed nodes. mbind(2), on the other hand, just masks off any nodes in the nodemask that are not included in the caller's mems_allowed. In each case [mbind() and set_mempolicy()], mpol_check_policy() will complain [again, resulting in EINVAL] if the nodemask contains any memoryless nodes. This is somewhat redundant as mpol_new() will remove memoryless nodes for interleave policy, as will bind_zonelist()--called by mpol_new() for BIND policy. Proposed fix: 1) modify contextualize_policy logic to: a) remember whether the incoming node mask is empty. b) if not, restrict the nodemask to allowed nodes, as is currently done in-line for mbind(). This guarantees that the resulting mask includes only nodes with memory. NOTE: this is a [benign, IMO] change in behavior for set_mempolicy(). Dis-allowed nodes will be silently ignored, rather than returning an error. c) fold this code into mpol_check_policy(), replace 2 calls to contextualize_policy() to call mpol_check_policy() directly and remove contextualize_policy(). 2) In existing mpol_check_policy() logic, after "contextualization": a) MPOL_DEFAULT: require that in coming mask "was_empty" b) MPOL_{BIND|INTERLEAVE}: require that contextualized nodemask contains at least one node. c) add a case for MPOL_PREFERRED: if in coming was not empty and resulting mask IS empty, user specified invalid nodes. Return EINVAL. c) remove the now redundant check for memoryless nodes 3) remove the now redundant masking of policy nodes for interleave policy from mpol_new(). 4) Now that mpol_check_policy() contextualizes the nodemask, remove the in-line nodes_and() from sys_mbind(). I believe that this restores mbind() to the behavior before the memoryless-nodes patch series. E.g., we'll no longer treat an invalid nodemask with MPOL_PREFERRED as local allocation. [ Patch history: v1 -> v2: - Communicate whether or not incoming node mask was empty to mpol_check_policy() for better error checking. - As suggested by David Rientjes, remove the now unused cpuset_nodes_subset_current_mems_allowed() from cpuset.h v2 -> v3: - As suggested by Kosaki Motohito, fold the "contextualization" of policy nodemask into mpol_check_policy(). Looks a little cleaner. ] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/cpuset.h3
-rw-r--r--mm/mempolicy.c61
2 files changed, 36 insertions, 28 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index f8c9a2752f06..0a26be353cb3 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -26,8 +26,6 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
26#define cpuset_current_mems_allowed (current->mems_allowed) 26#define cpuset_current_mems_allowed (current->mems_allowed)
27void cpuset_init_current_mems_allowed(void); 27void cpuset_init_current_mems_allowed(void);
28void cpuset_update_task_memory_state(void); 28void cpuset_update_task_memory_state(void);
29#define cpuset_nodes_subset_current_mems_allowed(nodes) \
30 nodes_subset((nodes), current->mems_allowed)
31int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); 29int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
32 30
33extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); 31extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask);
@@ -103,7 +101,6 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
103#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) 101#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
104static inline void cpuset_init_current_mems_allowed(void) {} 102static inline void cpuset_init_current_mems_allowed(void) {}
105static inline void cpuset_update_task_memory_state(void) {} 103static inline void cpuset_update_task_memory_state(void) {}
106#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
107 104
108static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 105static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
109{ 106{
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 83c69f8a64c2..8d246c3b340f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -116,22 +116,51 @@ static void mpol_rebind_policy(struct mempolicy *pol,
116/* Do sanity checking on a policy */ 116/* Do sanity checking on a policy */
117static int mpol_check_policy(int mode, nodemask_t *nodes) 117static int mpol_check_policy(int mode, nodemask_t *nodes)
118{ 118{
119 int empty = nodes_empty(*nodes); 119 int was_empty, is_empty;
120
121 if (!nodes)
122 return 0;
123
124 /*
125 * "Contextualize" the in-coming nodemast for cpusets:
126 * Remember whether in-coming nodemask was empty, If not,
127 * restrict the nodes to the allowed nodes in the cpuset.
128 * This is guaranteed to be a subset of nodes with memory.
129 */
130 cpuset_update_task_memory_state();
131 is_empty = was_empty = nodes_empty(*nodes);
132 if (!was_empty) {
133 nodes_and(*nodes, *nodes, cpuset_current_mems_allowed);
134 is_empty = nodes_empty(*nodes); /* after "contextualization" */
135 }
120 136
121 switch (mode) { 137 switch (mode) {
122 case MPOL_DEFAULT: 138 case MPOL_DEFAULT:
123 if (!empty) 139 /*
140 * require caller to specify an empty nodemask
141 * before "contextualization"
142 */
143 if (!was_empty)
124 return -EINVAL; 144 return -EINVAL;
125 break; 145 break;
126 case MPOL_BIND: 146 case MPOL_BIND:
127 case MPOL_INTERLEAVE: 147 case MPOL_INTERLEAVE:
128 /* Preferred will only use the first bit, but allow 148 /*
129 more for now. */ 149 * require at least 1 valid node after "contextualization"
130 if (empty) 150 */
151 if (is_empty)
152 return -EINVAL;
153 break;
154 case MPOL_PREFERRED:
155 /*
156 * Did caller specify invalid nodes?
157 * Don't silently accept this as "local allocation".
158 */
159 if (!was_empty && is_empty)
131 return -EINVAL; 160 return -EINVAL;
132 break; 161 break;
133 } 162 }
134 return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL; 163 return 0;
135} 164}
136 165
137/* Generate a custom zonelist for the BIND policy. */ 166/* Generate a custom zonelist for the BIND policy. */
@@ -188,8 +217,6 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
188 switch (mode) { 217 switch (mode) {
189 case MPOL_INTERLEAVE: 218 case MPOL_INTERLEAVE:
190 policy->v.nodes = *nodes; 219 policy->v.nodes = *nodes;
191 nodes_and(policy->v.nodes, policy->v.nodes,
192 node_states[N_HIGH_MEMORY]);
193 if (nodes_weight(policy->v.nodes) == 0) { 220 if (nodes_weight(policy->v.nodes) == 0) {
194 kmem_cache_free(policy_cache, policy); 221 kmem_cache_free(policy_cache, policy);
195 return ERR_PTR(-EINVAL); 222 return ERR_PTR(-EINVAL);
@@ -421,18 +448,6 @@ static int mbind_range(struct vm_area_struct *vma, unsigned long start,
421 return err; 448 return err;
422} 449}
423 450
424static int contextualize_policy(int mode, nodemask_t *nodes)
425{
426 if (!nodes)
427 return 0;
428
429 cpuset_update_task_memory_state();
430 if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
431 return -EINVAL;
432 return mpol_check_policy(mode, nodes);
433}
434
435
436/* 451/*
437 * Update task->flags PF_MEMPOLICY bit: set iff non-default 452 * Update task->flags PF_MEMPOLICY bit: set iff non-default
438 * mempolicy. Allows more rapid checking of this (combined perhaps 453 * mempolicy. Allows more rapid checking of this (combined perhaps
@@ -468,7 +483,7 @@ static long do_set_mempolicy(int mode, nodemask_t *nodes)
468{ 483{
469 struct mempolicy *new; 484 struct mempolicy *new;
470 485
471 if (contextualize_policy(mode, nodes)) 486 if (mpol_check_policy(mode, nodes))
472 return -EINVAL; 487 return -EINVAL;
473 new = mpol_new(mode, nodes); 488 new = mpol_new(mode, nodes);
474 if (IS_ERR(new)) 489 if (IS_ERR(new))
@@ -915,10 +930,6 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
915 err = get_nodes(&nodes, nmask, maxnode); 930 err = get_nodes(&nodes, nmask, maxnode);
916 if (err) 931 if (err)
917 return err; 932 return err;
918#ifdef CONFIG_CPUSETS
919 /* Restrict the nodes to the allowed nodes in the cpuset */
920 nodes_and(nodes, nodes, current->mems_allowed);
921#endif
922 return do_mbind(start, len, mode, &nodes, flags); 933 return do_mbind(start, len, mode, &nodes, flags);
923} 934}
924 935