diff options
author | David Rientjes <rientjes@google.com> | 2008-04-28 05:12:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-28 11:58:20 -0400 |
commit | 3e1f064562fcff7bf3856bc1d00dfa84d4f121cc (patch) | |
tree | 9ebc17449238ab5284b72f634405044376dc816b | |
parent | 3842b46de626d1a3c44ad280d67ab0a4dc047d13 (diff) |
mempolicy: disallow static or relative flags for local preferred mode
MPOL_F_STATIC_NODES and MPOL_F_RELATIVE_NODES don't mean anything for
MPOL_PREFERRED policies that were created with an empty nodemask (for purely
local allocations). They'll never be invalidated because the allowed mems of
a task changes or need to be rebound relative to a cpuset's placement.
Also fixes a bug identified by Lee Schermerhorn that disallowed empty
nodemasks to be passed to MPOL_PREFERRED to specify local allocations. [A
different, somewhat incomplete, patch already existed in 25-rc5-mm1.]
Cc: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/vm/numa_memory_policy.txt | 16 | ||||
-rw-r--r-- | mm/mempolicy.c | 42 |
2 files changed, 40 insertions, 18 deletions
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt index 706410dfb9e5..1c7dd21623d2 100644 --- a/Documentation/vm/numa_memory_policy.txt +++ b/Documentation/vm/numa_memory_policy.txt | |||
@@ -205,6 +205,12 @@ Components of Memory Policies | |||
205 | local allocation for a specific range of addresses--i.e. for | 205 | local allocation for a specific range of addresses--i.e. for |
206 | VMA policies. | 206 | VMA policies. |
207 | 207 | ||
208 | It is possible for the user to specify that local allocation is | ||
209 | always preferred by passing an empty nodemask with this mode. | ||
210 | If an empty nodemask is passed, the policy cannot use the | ||
211 | MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described | ||
212 | below. | ||
213 | |||
208 | MPOL_INTERLEAVED: This mode specifies that page allocations be | 214 | MPOL_INTERLEAVED: This mode specifies that page allocations be |
209 | interleaved, on a page granularity, across the nodes specified in | 215 | interleaved, on a page granularity, across the nodes specified in |
210 | the policy. This mode also behaves slightly differently, based on | 216 | the policy. This mode also behaves slightly differently, based on |
@@ -254,7 +260,10 @@ Components of Memory Policies | |||
254 | occurs over that node. If no nodes from the user's nodemask are | 260 | occurs over that node. If no nodes from the user's nodemask are |
255 | now allowed, the Default behavior is used. | 261 | now allowed, the Default behavior is used. |
256 | 262 | ||
257 | MPOL_F_STATIC_NODES cannot be used with MPOL_F_RELATIVE_NODES. | 263 | MPOL_F_STATIC_NODES cannot be combined with the |
264 | MPOL_F_RELATIVE_NODES flag. It also cannot be used for | ||
265 | MPOL_PREFERRED policies that were created with an empty nodemask | ||
266 | (local allocation). | ||
258 | 267 | ||
259 | MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed | 268 | MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed |
260 | by the user will be mapped relative to the set of the task or VMA's | 269 | by the user will be mapped relative to the set of the task or VMA's |
@@ -301,7 +310,10 @@ Components of Memory Policies | |||
301 | set of memory nodes allowed by the task's cpuset, as that may | 310 | set of memory nodes allowed by the task's cpuset, as that may |
302 | change over time. | 311 | change over time. |
303 | 312 | ||
304 | MPOL_F_RELATIVE_NODES cannot be used with MPOL_F_STATIC_NODES. | 313 | MPOL_F_RELATIVE_NODES cannot be combined with the |
314 | MPOL_F_STATIC_NODES flag. It also cannot be used for | ||
315 | MPOL_PREFERRED policies that were created with an empty nodemask | ||
316 | (local allocation). | ||
305 | 317 | ||
306 | MEMORY POLICY APIs | 318 | MEMORY POLICY APIs |
307 | 319 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index a94d994eaaa8..c1b907789d84 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -181,27 +181,43 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
181 | { | 181 | { |
182 | struct mempolicy *policy; | 182 | struct mempolicy *policy; |
183 | nodemask_t cpuset_context_nmask; | 183 | nodemask_t cpuset_context_nmask; |
184 | int localalloc = 0; | ||
185 | int ret; | 184 | int ret; |
186 | 185 | ||
187 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", | 186 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", |
188 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); | 187 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); |
189 | 188 | ||
190 | if (mode == MPOL_DEFAULT) | 189 | if (mode == MPOL_DEFAULT) { |
191 | return NULL; | 190 | if (nodes && !nodes_empty(*nodes)) |
192 | if (!nodes || nodes_empty(*nodes)) { | ||
193 | if (mode != MPOL_PREFERRED) | ||
194 | return ERR_PTR(-EINVAL); | 191 | return ERR_PTR(-EINVAL); |
195 | localalloc = 1; /* special case: no mode flags */ | 192 | return NULL; |
196 | } | 193 | } |
194 | VM_BUG_ON(!nodes); | ||
195 | |||
196 | /* | ||
197 | * MPOL_PREFERRED cannot be used with MPOL_F_STATIC_NODES or | ||
198 | * MPOL_F_RELATIVE_NODES if the nodemask is empty (local allocation). | ||
199 | * All other modes require a valid pointer to a non-empty nodemask. | ||
200 | */ | ||
201 | if (mode == MPOL_PREFERRED) { | ||
202 | if (nodes_empty(*nodes)) { | ||
203 | if (((flags & MPOL_F_STATIC_NODES) || | ||
204 | (flags & MPOL_F_RELATIVE_NODES))) | ||
205 | return ERR_PTR(-EINVAL); | ||
206 | nodes = NULL; /* flag local alloc */ | ||
207 | } | ||
208 | } else if (nodes_empty(*nodes)) | ||
209 | return ERR_PTR(-EINVAL); | ||
197 | policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); | 210 | policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); |
198 | if (!policy) | 211 | if (!policy) |
199 | return ERR_PTR(-ENOMEM); | 212 | return ERR_PTR(-ENOMEM); |
200 | atomic_set(&policy->refcnt, 1); | 213 | atomic_set(&policy->refcnt, 1); |
201 | policy->policy = mode; | 214 | policy->policy = mode; |
215 | policy->flags = flags; | ||
202 | 216 | ||
203 | if (!localalloc) { | 217 | if (nodes) { |
204 | policy->flags = flags; | 218 | /* |
219 | * cpuset related setup doesn't apply to local allocation | ||
220 | */ | ||
205 | cpuset_update_task_memory_state(); | 221 | cpuset_update_task_memory_state(); |
206 | if (flags & MPOL_F_RELATIVE_NODES) | 222 | if (flags & MPOL_F_RELATIVE_NODES) |
207 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | 223 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, |
@@ -217,7 +233,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | |||
217 | } | 233 | } |
218 | 234 | ||
219 | ret = mpol_ops[mode].create(policy, | 235 | ret = mpol_ops[mode].create(policy, |
220 | localalloc ? NULL : &cpuset_context_nmask); | 236 | nodes ? &cpuset_context_nmask : NULL); |
221 | if (ret < 0) { | 237 | if (ret < 0) { |
222 | kmem_cache_free(policy_cache, policy); | 238 | kmem_cache_free(policy_cache, policy); |
223 | return ERR_PTR(ret); | 239 | return ERR_PTR(ret); |
@@ -259,10 +275,6 @@ static void mpol_rebind_preferred(struct mempolicy *pol, | |||
259 | { | 275 | { |
260 | nodemask_t tmp; | 276 | nodemask_t tmp; |
261 | 277 | ||
262 | /* | ||
263 | * check 'STATIC_NODES first, as preferred_node == -1 may be | ||
264 | * a temporary, "fallback" state for this policy. | ||
265 | */ | ||
266 | if (pol->flags & MPOL_F_STATIC_NODES) { | 278 | if (pol->flags & MPOL_F_STATIC_NODES) { |
267 | int node = first_node(pol->w.user_nodemask); | 279 | int node = first_node(pol->w.user_nodemask); |
268 | 280 | ||
@@ -270,12 +282,10 @@ static void mpol_rebind_preferred(struct mempolicy *pol, | |||
270 | pol->v.preferred_node = node; | 282 | pol->v.preferred_node = node; |
271 | else | 283 | else |
272 | pol->v.preferred_node = -1; | 284 | pol->v.preferred_node = -1; |
273 | } else if (pol->v.preferred_node == -1) { | ||
274 | return; /* no remap required for explicit local alloc */ | ||
275 | } else if (pol->flags & MPOL_F_RELATIVE_NODES) { | 285 | } else if (pol->flags & MPOL_F_RELATIVE_NODES) { |
276 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); | 286 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); |
277 | pol->v.preferred_node = first_node(tmp); | 287 | pol->v.preferred_node = first_node(tmp); |
278 | } else { | 288 | } else if (pol->v.preferred_node != -1) { |
279 | pol->v.preferred_node = node_remap(pol->v.preferred_node, | 289 | pol->v.preferred_node = node_remap(pol->v.preferred_node, |
280 | pol->w.cpuset_mems_allowed, | 290 | pol->w.cpuset_mems_allowed, |
281 | *nodes); | 291 | *nodes); |