aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c233
1 files changed, 140 insertions, 93 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d44c524e5ae4..a94d994eaaa8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -63,7 +63,6 @@
63 grows down? 63 grows down?
64 make bind policy root only? It can trigger oom much faster and the 64 make bind policy root only? It can trigger oom much faster and the
65 kernel is not always grateful with that. 65 kernel is not always grateful with that.
66 could replace all the switch()es with a mempolicy_ops structure.
67*/ 66*/
68 67
69#include <linux/mempolicy.h> 68#include <linux/mempolicy.h>
@@ -110,8 +109,13 @@ struct mempolicy default_policy = {
110 .policy = MPOL_DEFAULT, 109 .policy = MPOL_DEFAULT,
111}; 110};
112 111
112static const struct mempolicy_operations {
113 int (*create)(struct mempolicy *pol, const nodemask_t *nodes);
114 void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes);
115} mpol_ops[MPOL_MAX];
116
113/* Check that the nodemask contains at least one populated zone */ 117/* Check that the nodemask contains at least one populated zone */
114static int is_valid_nodemask(nodemask_t *nodemask) 118static int is_valid_nodemask(const nodemask_t *nodemask)
115{ 119{
116 int nd, k; 120 int nd, k;
117 121
@@ -144,125 +148,151 @@ static void mpol_relative_nodemask(nodemask_t *ret, const nodemask_t *orig,
144 nodes_onto(*ret, tmp, *rel); 148 nodes_onto(*ret, tmp, *rel);
145} 149}
146 150
151static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
152{
153 if (nodes_empty(*nodes))
154 return -EINVAL;
155 pol->v.nodes = *nodes;
156 return 0;
157}
158
159static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
160{
161 if (!nodes)
162 pol->v.preferred_node = -1; /* local allocation */
163 else if (nodes_empty(*nodes))
164 return -EINVAL; /* no allowed nodes */
165 else
166 pol->v.preferred_node = first_node(*nodes);
167 return 0;
168}
169
170static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
171{
172 if (!is_valid_nodemask(nodes))
173 return -EINVAL;
174 pol->v.nodes = *nodes;
175 return 0;
176}
177
147/* Create a new policy */ 178/* Create a new policy */
148static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, 179static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
149 nodemask_t *nodes) 180 nodemask_t *nodes)
150{ 181{
151 struct mempolicy *policy; 182 struct mempolicy *policy;
152 nodemask_t cpuset_context_nmask; 183 nodemask_t cpuset_context_nmask;
184 int localalloc = 0;
185 int ret;
153 186
154 pr_debug("setting mode %d flags %d nodes[0] %lx\n", 187 pr_debug("setting mode %d flags %d nodes[0] %lx\n",
155 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); 188 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
156 189
157 if (mode == MPOL_DEFAULT) 190 if (mode == MPOL_DEFAULT)
158 return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) : 191 return NULL;
159 NULL; 192 if (!nodes || nodes_empty(*nodes)) {
193 if (mode != MPOL_PREFERRED)
194 return ERR_PTR(-EINVAL);
195 localalloc = 1; /* special case: no mode flags */
196 }
160 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); 197 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
161 if (!policy) 198 if (!policy)
162 return ERR_PTR(-ENOMEM); 199 return ERR_PTR(-ENOMEM);
163 atomic_set(&policy->refcnt, 1); 200 atomic_set(&policy->refcnt, 1);
164 cpuset_update_task_memory_state();
165 if (flags & MPOL_F_RELATIVE_NODES)
166 mpol_relative_nodemask(&cpuset_context_nmask, nodes,
167 &cpuset_current_mems_allowed);
168 else
169 nodes_and(cpuset_context_nmask, *nodes,
170 cpuset_current_mems_allowed);
171 switch (mode) {
172 case MPOL_INTERLEAVE:
173 if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
174 goto free;
175 policy->v.nodes = cpuset_context_nmask;
176 break;
177 case MPOL_PREFERRED:
178 policy->v.preferred_node = first_node(cpuset_context_nmask);
179 if (policy->v.preferred_node >= MAX_NUMNODES)
180 goto free;
181 break;
182 case MPOL_BIND:
183 if (!is_valid_nodemask(&cpuset_context_nmask))
184 goto free;
185 policy->v.nodes = cpuset_context_nmask;
186 break;
187 default:
188 BUG();
189 }
190 policy->policy = mode; 201 policy->policy = mode;
191 policy->flags = flags; 202
192 if (mpol_store_user_nodemask(policy)) 203 if (!localalloc) {
193 policy->w.user_nodemask = *nodes; 204 policy->flags = flags;
194 else 205 cpuset_update_task_memory_state();
195 policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current); 206 if (flags & MPOL_F_RELATIVE_NODES)
207 mpol_relative_nodemask(&cpuset_context_nmask, nodes,
208 &cpuset_current_mems_allowed);
209 else
210 nodes_and(cpuset_context_nmask, *nodes,
211 cpuset_current_mems_allowed);
212 if (mpol_store_user_nodemask(policy))
213 policy->w.user_nodemask = *nodes;
214 else
215 policy->w.cpuset_mems_allowed =
216 cpuset_mems_allowed(current);
217 }
218
219 ret = mpol_ops[mode].create(policy,
220 localalloc ? NULL : &cpuset_context_nmask);
221 if (ret < 0) {
222 kmem_cache_free(policy_cache, policy);
223 return ERR_PTR(ret);
224 }
196 return policy; 225 return policy;
226}
227
228static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes)
229{
230}
231
232static void mpol_rebind_nodemask(struct mempolicy *pol,
233 const nodemask_t *nodes)
234{
235 nodemask_t tmp;
236
237 if (pol->flags & MPOL_F_STATIC_NODES)
238 nodes_and(tmp, pol->w.user_nodemask, *nodes);
239 else if (pol->flags & MPOL_F_RELATIVE_NODES)
240 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
241 else {
242 nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed,
243 *nodes);
244 pol->w.cpuset_mems_allowed = *nodes;
245 }
197 246
198free: 247 pol->v.nodes = tmp;
199 kmem_cache_free(policy_cache, policy); 248 if (!node_isset(current->il_next, tmp)) {
200 return ERR_PTR(-EINVAL); 249 current->il_next = next_node(current->il_next, tmp);
250 if (current->il_next >= MAX_NUMNODES)
251 current->il_next = first_node(tmp);
252 if (current->il_next >= MAX_NUMNODES)
253 current->il_next = numa_node_id();
254 }
255}
256
257static void mpol_rebind_preferred(struct mempolicy *pol,
258 const nodemask_t *nodes)
259{
260 nodemask_t tmp;
261
262 /*
263 * check 'STATIC_NODES first, as preferred_node == -1 may be
264 * a temporary, "fallback" state for this policy.
265 */
266 if (pol->flags & MPOL_F_STATIC_NODES) {
267 int node = first_node(pol->w.user_nodemask);
268
269 if (node_isset(node, *nodes))
270 pol->v.preferred_node = node;
271 else
272 pol->v.preferred_node = -1;
273 } else if (pol->v.preferred_node == -1) {
274 return; /* no remap required for explicit local alloc */
275 } else if (pol->flags & MPOL_F_RELATIVE_NODES) {
276 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
277 pol->v.preferred_node = first_node(tmp);
278 } else {
279 pol->v.preferred_node = node_remap(pol->v.preferred_node,
280 pol->w.cpuset_mems_allowed,
281 *nodes);
282 pol->w.cpuset_mems_allowed = *nodes;
283 }
201} 284}
202 285
203/* Migrate a policy to a different set of nodes */ 286/* Migrate a policy to a different set of nodes */
204static void mpol_rebind_policy(struct mempolicy *pol, 287static void mpol_rebind_policy(struct mempolicy *pol,
205 const nodemask_t *newmask) 288 const nodemask_t *newmask)
206{ 289{
207 nodemask_t tmp;
208 int static_nodes;
209 int relative_nodes;
210
211 if (!pol) 290 if (!pol)
212 return; 291 return;
213 static_nodes = pol->flags & MPOL_F_STATIC_NODES;
214 relative_nodes = pol->flags & MPOL_F_RELATIVE_NODES;
215 if (!mpol_store_user_nodemask(pol) && 292 if (!mpol_store_user_nodemask(pol) &&
216 nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) 293 nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
217 return; 294 return;
218 295 mpol_ops[pol->policy].rebind(pol, newmask);
219 switch (pol->policy) {
220 case MPOL_DEFAULT:
221 break;
222 case MPOL_BIND:
223 /* Fall through */
224 case MPOL_INTERLEAVE:
225 if (static_nodes)
226 nodes_and(tmp, pol->w.user_nodemask, *newmask);
227 else if (relative_nodes)
228 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
229 newmask);
230 else {
231 nodes_remap(tmp, pol->v.nodes,
232 pol->w.cpuset_mems_allowed, *newmask);
233 pol->w.cpuset_mems_allowed = *newmask;
234 }
235 pol->v.nodes = tmp;
236 if (!node_isset(current->il_next, tmp)) {
237 current->il_next = next_node(current->il_next, tmp);
238 if (current->il_next >= MAX_NUMNODES)
239 current->il_next = first_node(tmp);
240 if (current->il_next >= MAX_NUMNODES)
241 current->il_next = numa_node_id();
242 }
243 break;
244 case MPOL_PREFERRED:
245 if (static_nodes) {
246 int node = first_node(pol->w.user_nodemask);
247
248 if (node_isset(node, *newmask))
249 pol->v.preferred_node = node;
250 else
251 pol->v.preferred_node = -1;
252 } else if (relative_nodes) {
253 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask,
254 newmask);
255 pol->v.preferred_node = first_node(tmp);
256 } else {
257 pol->v.preferred_node = node_remap(pol->v.preferred_node,
258 pol->w.cpuset_mems_allowed, *newmask);
259 pol->w.cpuset_mems_allowed = *newmask;
260 }
261 break;
262 default:
263 BUG();
264 break;
265 }
266} 296}
267 297
268/* 298/*
@@ -291,6 +321,24 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
291 up_write(&mm->mmap_sem); 321 up_write(&mm->mmap_sem);
292} 322}
293 323
324static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
325 [MPOL_DEFAULT] = {
326 .rebind = mpol_rebind_default,
327 },
328 [MPOL_INTERLEAVE] = {
329 .create = mpol_new_interleave,
330 .rebind = mpol_rebind_nodemask,
331 },
332 [MPOL_PREFERRED] = {
333 .create = mpol_new_preferred,
334 .rebind = mpol_rebind_preferred,
335 },
336 [MPOL_BIND] = {
337 .create = mpol_new_bind,
338 .rebind = mpol_rebind_nodemask,
339 },
340};
341
294static void gather_stats(struct page *, void *, int pte_dirty); 342static void gather_stats(struct page *, void *, int pte_dirty);
295static void migrate_page_add(struct page *page, struct list_head *pagelist, 343static void migrate_page_add(struct page *page, struct list_head *pagelist,
296 unsigned long flags); 344 unsigned long flags);
@@ -1848,7 +1896,6 @@ void numa_default_policy(void)
1848/* 1896/*
1849 * Display pages allocated per node and memory policy via /proc. 1897 * Display pages allocated per node and memory policy via /proc.
1850 */ 1898 */
1851
1852static const char * const policy_types[] = 1899static const char * const policy_types[] =
1853 { "default", "prefer", "bind", "interleave" }; 1900 { "default", "prefer", "bind", "interleave" };
1854 1901