diff options
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 233 |
1 files changed, 140 insertions, 93 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d44c524e5ae4..a94d994eaaa8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -63,7 +63,6 @@ | |||
63 | grows down? | 63 | grows down? |
64 | make bind policy root only? It can trigger oom much faster and the | 64 | make bind policy root only? It can trigger oom much faster and the |
65 | kernel is not always grateful with that. | 65 | kernel is not always grateful with that. |
66 | could replace all the switch()es with a mempolicy_ops structure. | ||
67 | */ | 66 | */ |
68 | 67 | ||
69 | #include <linux/mempolicy.h> | 68 | #include <linux/mempolicy.h> |
@@ -110,8 +109,13 @@ struct mempolicy default_policy = { | |||
110 | .policy = MPOL_DEFAULT, | 109 | .policy = MPOL_DEFAULT, |
111 | }; | 110 | }; |
112 | 111 | ||
112 | static const struct mempolicy_operations { | ||
113 | int (*create)(struct mempolicy *pol, const nodemask_t *nodes); | ||
114 | void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); | ||
115 | } mpol_ops[MPOL_MAX]; | ||
116 | |||
113 | /* Check that the nodemask contains at least one populated zone */ | 117 | /* Check that the nodemask contains at least one populated zone */ |
114 | static int is_valid_nodemask(nodemask_t *nodemask) | 118 | static int is_valid_nodemask(const nodemask_t *nodemask) |
115 | { | 119 | { |
116 | int nd, k; | 120 | int nd, k; |
117 | 121 | ||
@@ -144,125 +148,151 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig, | |||
144 | nodes_onto(*ret, tmp, *rel); | 148 | nodes_onto(*ret, tmp, *rel); |
145 | } | 149 | } |
146 | 150 | ||
151 | static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes) | ||
152 | { | ||
153 | if (nodes_empty(*nodes)) | ||
154 | return -EINVAL; | ||
155 | pol->v.nodes = *nodes; | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) | ||
160 | { | ||
161 | if (!nodes) | ||
162 | pol->v.preferred_node = -1; /* local allocation */ | ||
163 | else if (nodes_empty(*nodes)) | ||
164 | return -EINVAL; /* no allowed nodes */ | ||
165 | else | ||
166 | pol->v.preferred_node = first_node(*nodes); | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) | ||
171 | { | ||
172 | if (!is_valid_nodemask(nodes)) | ||
173 | return -EINVAL; | ||
174 | pol->v.nodes = *nodes; | ||
175 | return 0; | ||
176 | } | ||
177 | |||
147 | /* Create a new policy */ | 178 | /* Create a new policy */ |
148 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | 179 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, |
149 | nodemask_t *nodes) | 180 | nodemask_t *nodes) |
150 | { | 181 | { |
151 | struct mempolicy *policy; | 182 | struct mempolicy *policy; |
152 | nodemask_t cpuset_context_nmask; | 183 | nodemask_t cpuset_context_nmask; |
184 | int localalloc = 0; | ||
185 | int ret; | ||
153 | 186 | ||
154 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", | 187 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", |
155 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); | 188 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); |
156 | 189 | ||
157 | if (mode == MPOL_DEFAULT) | 190 | if (mode == MPOL_DEFAULT) |
158 | return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) : | 191 | return NULL; |
159 | NULL; | 192 | if (!nodes || nodes_empty(*nodes)) { |
193 | if (mode != MPOL_PREFERRED) | ||
194 | return ERR_PTR(-EINVAL); | ||
195 | localalloc = 1; /* special case: no mode flags */ | ||
196 | } | ||
160 | policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); | 197 | policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); |
161 | if (!policy) | 198 | if (!policy) |
162 | return ERR_PTR(-ENOMEM); | 199 | return ERR_PTR(-ENOMEM); |
163 | atomic_set(&policy->refcnt, 1); | 200 | atomic_set(&policy->refcnt, 1); |
164 | cpuset_update_task_memory_state(); | ||
165 | if (flags & MPOL_F_RELATIVE_NODES) | ||
166 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | ||
167 | &cpuset_current_mems_allowed); | ||
168 | else | ||
169 | nodes_and(cpuset_context_nmask, *nodes, | ||
170 | cpuset_current_mems_allowed); | ||
171 | switch (mode) { | ||
172 | case MPOL_INTERLEAVE: | ||
173 | if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask)) | ||
174 | goto free; | ||
175 | policy->v.nodes = cpuset_context_nmask; | ||
176 | break; | ||
177 | case MPOL_PREFERRED: | ||
178 | policy->v.preferred_node = first_node(cpuset_context_nmask); | ||
179 | if (policy->v.preferred_node >= MAX_NUMNODES) | ||
180 | goto free; | ||
181 | break; | ||
182 | case MPOL_BIND: | ||
183 | if (!is_valid_nodemask(&cpuset_context_nmask)) | ||
184 | goto free; | ||
185 | policy->v.nodes = cpuset_context_nmask; | ||
186 | break; | ||
187 | default: | ||
188 | BUG(); | ||
189 | } | ||
190 | policy->policy = mode; | 201 | policy->policy = mode; |
191 | policy->flags = flags; | 202 | |
192 | if (mpol_store_user_nodemask(policy)) | 203 | if (!localalloc) { |
193 | policy->w.user_nodemask = *nodes; | 204 | policy->flags = flags; |
194 | else | 205 | cpuset_update_task_memory_state(); |
195 | policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current); | 206 | if (flags & MPOL_F_RELATIVE_NODES) |
207 | mpol_relative_nodemask(&cpuset_context_nmask, nodes, | ||
208 | &cpuset_current_mems_allowed); | ||
209 | else | ||
210 | nodes_and(cpuset_context_nmask, *nodes, | ||
211 | cpuset_current_mems_allowed); | ||
212 | if (mpol_store_user_nodemask(policy)) | ||
213 | policy->w.user_nodemask = *nodes; | ||
214 | else | ||
215 | policy->w.cpuset_mems_allowed = | ||
216 | cpuset_mems_allowed(current); | ||
217 | } | ||
218 | |||
219 | ret = mpol_ops[mode].create(policy, | ||
220 | localalloc ? NULL : &cpuset_context_nmask); | ||
221 | if (ret < 0) { | ||
222 | kmem_cache_free(policy_cache, policy); | ||
223 | return ERR_PTR(ret); | ||
224 | } | ||
196 | return policy; | 225 | return policy; |
226 | } | ||
227 | |||
228 | static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) | ||
229 | { | ||
230 | } | ||
231 | |||
232 | static void mpol_rebind_nodemask(struct mempolicy *pol, | ||
233 | const nodemask_t *nodes) | ||
234 | { | ||
235 | nodemask_t tmp; | ||
236 | |||
237 | if (pol->flags & MPOL_F_STATIC_NODES) | ||
238 | nodes_and(tmp, pol->w.user_nodemask, *nodes); | ||
239 | else if (pol->flags & MPOL_F_RELATIVE_NODES) | ||
240 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); | ||
241 | else { | ||
242 | nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed, | ||
243 | *nodes); | ||
244 | pol->w.cpuset_mems_allowed = *nodes; | ||
245 | } | ||
197 | 246 | ||
198 | free: | 247 | pol->v.nodes = tmp; |
199 | kmem_cache_free(policy_cache, policy); | 248 | if (!node_isset(current->il_next, tmp)) { |
200 | return ERR_PTR(-EINVAL); | 249 | current->il_next = next_node(current->il_next, tmp); |
250 | if (current->il_next >= MAX_NUMNODES) | ||
251 | current->il_next = first_node(tmp); | ||
252 | if (current->il_next >= MAX_NUMNODES) | ||
253 | current->il_next = numa_node_id(); | ||
254 | } | ||
255 | } | ||
256 | |||
257 | static void mpol_rebind_preferred(struct mempolicy *pol, | ||
258 | const nodemask_t *nodes) | ||
259 | { | ||
260 | nodemask_t tmp; | ||
261 | |||
262 | /* | ||
263 | * check 'STATIC_NODES first, as preferred_node == -1 may be | ||
264 | * a temporary, "fallback" state for this policy. | ||
265 | */ | ||
266 | if (pol->flags & MPOL_F_STATIC_NODES) { | ||
267 | int node = first_node(pol->w.user_nodemask); | ||
268 | |||
269 | if (node_isset(node, *nodes)) | ||
270 | pol->v.preferred_node = node; | ||
271 | else | ||
272 | pol->v.preferred_node = -1; | ||
273 | } else if (pol->v.preferred_node == -1) { | ||
274 | return; /* no remap required for explicit local alloc */ | ||
275 | } else if (pol->flags & MPOL_F_RELATIVE_NODES) { | ||
276 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); | ||
277 | pol->v.preferred_node = first_node(tmp); | ||
278 | } else { | ||
279 | pol->v.preferred_node = node_remap(pol->v.preferred_node, | ||
280 | pol->w.cpuset_mems_allowed, | ||
281 | *nodes); | ||
282 | pol->w.cpuset_mems_allowed = *nodes; | ||
283 | } | ||
201 | } | 284 | } |
202 | 285 | ||
203 | /* Migrate a policy to a different set of nodes */ | 286 | /* Migrate a policy to a different set of nodes */ |
204 | static void mpol_rebind_policy(struct mempolicy *pol, | 287 | static void mpol_rebind_policy(struct mempolicy *pol, |
205 | const nodemask_t *newmask) | 288 | const nodemask_t *newmask) |
206 | { | 289 | { |
207 | nodemask_t tmp; | ||
208 | int static_nodes; | ||
209 | int relative_nodes; | ||
210 | |||
211 | if (!pol) | 290 | if (!pol) |
212 | return; | 291 | return; |
213 | static_nodes = pol->flags & MPOL_F_STATIC_NODES; | ||
214 | relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES; | ||
215 | if (!mpol_store_user_nodemask(pol) && | 292 | if (!mpol_store_user_nodemask(pol) && |
216 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) | 293 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) |
217 | return; | 294 | return; |
218 | 295 | mpol_ops[pol->policy].rebind(pol, newmask); | |
219 | switch (pol->policy) { | ||
220 | case MPOL_DEFAULT: | ||
221 | break; | ||
222 | case MPOL_BIND: | ||
223 | /* Fall through */ | ||
224 | case MPOL_INTERLEAVE: | ||
225 | if (static_nodes) | ||
226 | nodes_and(tmp, pol->w.user_nodemask, *newmask); | ||
227 | else if (relative_nodes) | ||
228 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, | ||
229 | newmask); | ||
230 | else { | ||
231 | nodes_remap(tmp, pol->v.nodes, | ||
232 | pol->w.cpuset_mems_allowed, *newmask); | ||
233 | pol->w.cpuset_mems_allowed = *newmask; | ||
234 | } | ||
235 | pol->v.nodes = tmp; | ||
236 | if (!node_isset(current->il_next, tmp)) { | ||
237 | current->il_next = next_node(current->il_next, tmp); | ||
238 | if (current->il_next >= MAX_NUMNODES) | ||
239 | current->il_next = first_node(tmp); | ||
240 | if (current->il_next >= MAX_NUMNODES) | ||
241 | current->il_next = numa_node_id(); | ||
242 | } | ||
243 | break; | ||
244 | case MPOL_PREFERRED: | ||
245 | if (static_nodes) { | ||
246 | int node = first_node(pol->w.user_nodemask); | ||
247 | |||
248 | if (node_isset(node, *newmask)) | ||
249 | pol->v.preferred_node = node; | ||
250 | else | ||
251 | pol->v.preferred_node = -1; | ||
252 | } else if (relative_nodes) { | ||
253 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, | ||
254 | newmask); | ||
255 | pol->v.preferred_node = first_node(tmp); | ||
256 | } else { | ||
257 | pol->v.preferred_node = node_remap(pol->v.preferred_node, | ||
258 | pol->w.cpuset_mems_allowed, *newmask); | ||
259 | pol->w.cpuset_mems_allowed = *newmask; | ||
260 | } | ||
261 | break; | ||
262 | default: | ||
263 | BUG(); | ||
264 | break; | ||
265 | } | ||
266 | } | 296 | } |
267 | 297 | ||
268 | /* | 298 | /* |
@@ -291,6 +321,24 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) | |||
291 | up_write(&mm->mmap_sem); | 321 | up_write(&mm->mmap_sem); |
292 | } | 322 | } |
293 | 323 | ||
324 | static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { | ||
325 | [MPOL_DEFAULT] = { | ||
326 | .rebind = mpol_rebind_default, | ||
327 | }, | ||
328 | [MPOL_INTERLEAVE] = { | ||
329 | .create = mpol_new_interleave, | ||
330 | .rebind = mpol_rebind_nodemask, | ||
331 | }, | ||
332 | [MPOL_PREFERRED] = { | ||
333 | .create = mpol_new_preferred, | ||
334 | .rebind = mpol_rebind_preferred, | ||
335 | }, | ||
336 | [MPOL_BIND] = { | ||
337 | .create = mpol_new_bind, | ||
338 | .rebind = mpol_rebind_nodemask, | ||
339 | }, | ||
340 | }; | ||
341 | |||
294 | static void gather_stats(struct page *, void *, int pte_dirty); | 342 | static void gather_stats(struct page *, void *, int pte_dirty); |
295 | static void migrate_page_add(struct page *page, struct list_head *pagelist, | 343 | static void migrate_page_add(struct page *page, struct list_head *pagelist, |
296 | unsigned long flags); | 344 | unsigned long flags); |
@@ -1848,7 +1896,6 @@ void numa_default_policy(void) | |||
1848 | /* | 1896 | /* |
1849 | * Display pages allocated per node and memory policy via /proc. | 1897 | * Display pages allocated per node and memory policy via /proc. |
1850 | */ | 1898 | */ |
1851 | |||
1852 | static const char * const policy_types[] = | 1899 | static const char * const policy_types[] = |
1853 | { "default", "prefer", "bind", "interleave" }; | 1900 | { "default", "prefer", "bind", "interleave" }; |
1854 | 1901 | ||