aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2008-04-28 05:12:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:20 -0400
commit3e1f064562fcff7bf3856bc1d00dfa84d4f121cc (patch)
tree9ebc17449238ab5284b72f634405044376dc816b
parent3842b46de626d1a3c44ad280d67ab0a4dc047d13 (diff)
mempolicy: disallow static or relative flags for local preferred mode
MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES don't mean anything for MPOL_PREFERRED policies that were created with an empty nodemask (for purely local allocations). They'll never be invalidated because the allowed mems of a task changes or need to be rebound relative to a cpuset's placement. Also fixes a bug identified by Lee Schermerhorn that disallowed empty nodemasks to be passed to MPOL_PREFERRED to specify local allocations. [A different, somewhat incomplete, patch already existed in 25-rc5-mm1.] Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Cc: Andi Kleen <ak@suse.de> Cc: Randy Dunlap <randy.dunlap@oracle.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/vm/numa_memory_policy.txt16
-rw-r--r--mm/mempolicy.c42
2 files changed, 40 insertions, 18 deletions
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index 706410dfb9e5..1c7dd21623d2 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -205,6 +205,12 @@ Components of Memory Policies
205 local allocation for a specific range of addresses--i.e. for 205 local allocation for a specific range of addresses--i.e. for
206 VMA policies. 206 VMA policies.
207 207
208 It is possible for the user to specify that local allocation is
209 always preferred by passing an empty nodemask with this mode.
210 If an empty nodemask is passed, the policy cannot use the
211 MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
212 below.
213
208 MPOL_INTERLEAVED: This mode specifies that page allocations be 214 MPOL_INTERLEAVED: This mode specifies that page allocations be
209 interleaved, on a page granularity, across the nodes specified in 215 interleaved, on a page granularity, across the nodes specified in
210 the policy. This mode also behaves slightly differently, based on 216 the policy. This mode also behaves slightly differently, based on
@@ -254,7 +260,10 @@ Components of Memory Policies
254 occurs over that node. If no nodes from the user's nodemask are 260 occurs over that node. If no nodes from the user's nodemask are
255 now allowed, the Default behavior is used. 261 now allowed, the Default behavior is used.
256 262
257 MPOL_F_STATIC_NODES cannot be used with MPOL_F_RELATIVE_NODES. 263 MPOL_F_STATIC_NODES cannot be combined with the
264 MPOL_F_RELATIVE_NODES flag. It also cannot be used for
265 MPOL_PREFERRED policies that were created with an empty nodemask
266 (local allocation).
258 267
259 MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed 268 MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed
260 by the user will be mapped relative to the set of the task or VMA's 269 by the user will be mapped relative to the set of the task or VMA's
@@ -301,7 +310,10 @@ Components of Memory Policies
301 set of memory nodes allowed by the task's cpuset, as that may 310 set of memory nodes allowed by the task's cpuset, as that may
302 change over time. 311 change over time.
303 312
304 MPOL_F_RELATIVE_NODES cannot be used with MPOL_F_STATIC_NODES. 313 MPOL_F_RELATIVE_NODES cannot be combined with the
314 MPOL_F_STATIC_NODES flag. It also cannot be used for
315 MPOL_PREFERRED policies that were created with an empty nodemask
316 (local allocation).
305 317
306MEMORY POLICY APIs 318MEMORY POLICY APIs
307 319
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a94d994eaaa8..c1b907789d84 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -181,27 +181,43 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
181{ 181{
182 struct mempolicy *policy; 182 struct mempolicy *policy;
183 nodemask_t cpuset_context_nmask; 183 nodemask_t cpuset_context_nmask;
184 int localalloc = 0;
185 int ret; 184 int ret;
186 185
187 pr_debug("setting mode %d flags %d nodes[0] %lx\n", 186 pr_debug("setting mode %d flags %d nodes[0] %lx\n",
188 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); 187 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
189 188
190 if (mode == MPOL_DEFAULT) 189 if (mode == MPOL_DEFAULT) {
191 return NULL; 190 if (nodes && !nodes_empty(*nodes))
192 if (!nodes || nodes_empty(*nodes)) {
193 if (mode != MPOL_PREFERRED)
194 return ERR_PTR(-EINVAL); 191 return ERR_PTR(-EINVAL);
195 localalloc = 1; /* special case: no mode flags */ 192 return NULL;
196 } 193 }
194 VM_BUG_ON(!nodes);
195
196 /*
197 * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or
198 * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation).
199 * All other modes require a valid pointer to a non-empty nodemask.
200 */
201 if (mode == MPOL_PREFERRED) {
202 if (nodes_empty(*nodes)) {
203 if (((flags & MPOL_F_STATIC_NODES) ||
204 (flags & MPOL_F_RELATIVE_NODES)))
205 return ERR_PTR(-EINVAL);
206 nodes = NULL; /* flag local alloc */
207 }
208 } else if (nodes_empty(*nodes))
209 return ERR_PTR(-EINVAL);
197 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); 210 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
198 if (!policy) 211 if (!policy)
199 return ERR_PTR(-ENOMEM); 212 return ERR_PTR(-ENOMEM);
200 atomic_set(&policy->refcnt, 1); 213 atomic_set(&policy->refcnt, 1);
201 policy->policy = mode; 214 policy->policy = mode;
215 policy->flags = flags;
202 216
203 if (!localalloc) { 217 if (nodes) {
204 policy->flags = flags; 218 /*
219 * cpuset related setup doesn't apply to local allocation
220 */
205 cpuset_update_task_memory_state(); 221 cpuset_update_task_memory_state();
206 if (flags & MPOL_F_RELATIVE_NODES) 222 if (flags & MPOL_F_RELATIVE_NODES)
207 mpol_relative_nodemask(&cpuset_context_nmask, nodes, 223 mpol_relative_nodemask(&cpuset_context_nmask, nodes,
@@ -217,7 +233,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
217 } 233 }
218 234
219 ret = mpol_ops[mode].create(policy, 235 ret = mpol_ops[mode].create(policy,
220 localalloc ? NULL : &cpuset_context_nmask); 236 nodes ? &cpuset_context_nmask : NULL);
221 if (ret < 0) { 237 if (ret < 0) {
222 kmem_cache_free(policy_cache, policy); 238 kmem_cache_free(policy_cache, policy);
223 return ERR_PTR(ret); 239 return ERR_PTR(ret);
@@ -259,10 +275,6 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
259{ 275{
260 nodemask_t tmp; 276 nodemask_t tmp;
261 277
262 /*
263 * check 'STATIC_NODES first, as preferred_node == -1 may be
264 * a temporary, "fallback" state for this policy.
265 */
266 if (pol->flags & MPOL_F_STATIC_NODES) { 278 if (pol->flags & MPOL_F_STATIC_NODES) {
267 int node = first_node(pol->w.user_nodemask); 279 int node = first_node(pol->w.user_nodemask);
268 280
@@ -270,12 +282,10 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
270 pol->v.preferred_node = node; 282 pol->v.preferred_node = node;
271 else 283 else
272 pol->v.preferred_node = -1; 284 pol->v.preferred_node = -1;
273 } else if (pol->v.preferred_node == -1) {
274 return; /* no remap required for explicit local alloc */
275 } else if (pol->flags & MPOL_F_RELATIVE_NODES) { 285 } else if (pol->flags & MPOL_F_RELATIVE_NODES) {
276 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); 286 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
277 pol->v.preferred_node = first_node(tmp); 287 pol->v.preferred_node = first_node(tmp);
278 } else { 288 } else if (pol->v.preferred_node != -1) {
279 pol->v.preferred_node = node_remap(pol->v.preferred_node, 289 pol->v.preferred_node = node_remap(pol->v.preferred_node,
280 pol->w.cpuset_mems_allowed, 290 pol->w.cpuset_mems_allowed,
281 *nodes); 291 *nodes);