diff options
-rw-r--r-- | include/linux/mempolicy.h | 15 | ||||
-rw-r--r-- | kernel/cpuset.c | 4 | ||||
-rw-r--r-- | mm/mempolicy.c | 124 |
3 files changed, 119 insertions, 24 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 1cc966cd3e5f..7b9ef6bf45aa 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -23,6 +23,13 @@ enum { | |||
23 | MPOL_MAX, /* always last member of enum */ | 23 | MPOL_MAX, /* always last member of enum */ |
24 | }; | 24 | }; |
25 | 25 | ||
26 | enum mpol_rebind_step { | ||
27 | MPOL_REBIND_ONCE, /* do rebind work at once(not by two step) */ | ||
28 | MPOL_REBIND_STEP1, /* first step(set all the newly nodes) */ | ||
29 | MPOL_REBIND_STEP2, /* second step(clean all the disallowed nodes)*/ | ||
30 | MPOL_REBIND_NSTEP, | ||
31 | }; | ||
32 | |||
26 | /* Flags for set_mempolicy */ | 33 | /* Flags for set_mempolicy */ |
27 | #define MPOL_F_STATIC_NODES (1 << 15) | 34 | #define MPOL_F_STATIC_NODES (1 << 15) |
28 | #define MPOL_F_RELATIVE_NODES (1 << 14) | 35 | #define MPOL_F_RELATIVE_NODES (1 << 14) |
@@ -51,6 +58,7 @@ enum { | |||
51 | */ | 58 | */ |
52 | #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ | 59 | #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ |
53 | #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ | 60 | #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ |
61 | #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ | ||
54 | 62 | ||
55 | #ifdef __KERNEL__ | 63 | #ifdef __KERNEL__ |
56 | 64 | ||
@@ -193,8 +201,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, | |||
193 | 201 | ||
194 | extern void numa_default_policy(void); | 202 | extern void numa_default_policy(void); |
195 | extern void numa_policy_init(void); | 203 | extern void numa_policy_init(void); |
196 | extern void mpol_rebind_task(struct task_struct *tsk, | 204 | extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, |
197 | const nodemask_t *new); | 205 | enum mpol_rebind_step step); |
198 | extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); | 206 | extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); |
199 | extern void mpol_fix_fork_child_flag(struct task_struct *p); | 207 | extern void mpol_fix_fork_child_flag(struct task_struct *p); |
200 | 208 | ||
@@ -308,7 +316,8 @@ static inline void numa_default_policy(void) | |||
308 | } | 316 | } |
309 | 317 | ||
310 | static inline void mpol_rebind_task(struct task_struct *tsk, | 318 | static inline void mpol_rebind_task(struct task_struct *tsk, |
311 | const nodemask_t *new) | 319 | const nodemask_t *new, |
320 | enum mpol_rebind_step step) | ||
312 | { | 321 | { |
313 | } | 322 | } |
314 | 323 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9a50c5f6e727..db0990ac3fac 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -953,8 +953,8 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
953 | nodemask_t *newmems) | 953 | nodemask_t *newmems) |
954 | { | 954 | { |
955 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 955 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
956 | mpol_rebind_task(tsk, &tsk->mems_allowed); | 956 | mpol_rebind_task(tsk, &tsk->mems_allowed, MPOL_REBIND_ONCE); |
957 | mpol_rebind_task(tsk, newmems); | 957 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_ONCE); |
958 | tsk->mems_allowed = *newmems; | 958 | tsk->mems_allowed = *newmems; |
959 | } | 959 | } |
960 | 960 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0c73c8b814cd..8a993db88029 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -119,7 +119,22 @@ struct mempolicy default_policy = { | |||
119 | 119 | ||
120 | static const struct mempolicy_operations { | 120 | static const struct mempolicy_operations { |
121 | int (*create)(struct mempolicy *pol, const nodemask_t *nodes); | 121 | int (*create)(struct mempolicy *pol, const nodemask_t *nodes); |
122 | void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes); | 122 | /* |
123 | * If read-side task has no lock to protect task->mempolicy, write-side | ||
124 | * task will rebind the task->mempolicy by two step. The first step is | ||
125 | * setting all the newly nodes, and the second step is cleaning all the | ||
126 | * disallowed nodes. In this way, we can avoid finding no node to alloc | ||
127 | * page. | ||
128 | * If we have a lock to protect task->mempolicy in read-side, we do | ||
129 | * rebind directly. | ||
130 | * | ||
131 | * step: | ||
132 | * MPOL_REBIND_ONCE - do rebind work at once | ||
133 | * MPOL_REBIND_STEP1 - set all the newly nodes | ||
134 | * MPOL_REBIND_STEP2 - clean all the disallowed nodes | ||
135 | */ | ||
136 | void (*rebind)(struct mempolicy *pol, const nodemask_t *nodes, | ||
137 | enum mpol_rebind_step step); | ||
123 | } mpol_ops[MPOL_MAX]; | 138 | } mpol_ops[MPOL_MAX]; |
124 | 139 | ||
125 | /* Check that the nodemask contains at least one populated zone */ | 140 | /* Check that the nodemask contains at least one populated zone */ |
@@ -274,12 +289,19 @@ void __mpol_put(struct mempolicy *p) | |||
274 | kmem_cache_free(policy_cache, p); | 289 | kmem_cache_free(policy_cache, p); |
275 | } | 290 | } |
276 | 291 | ||
277 | static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes) | 292 | static void mpol_rebind_default(struct mempolicy *pol, const nodemask_t *nodes, |
293 | enum mpol_rebind_step step) | ||
278 | { | 294 | { |
279 | } | 295 | } |
280 | 296 | ||
281 | static void mpol_rebind_nodemask(struct mempolicy *pol, | 297 | /* |
282 | const nodemask_t *nodes) | 298 | * step: |
299 | * MPOL_REBIND_ONCE - do rebind work at once | ||
300 | * MPOL_REBIND_STEP1 - set all the newly nodes | ||
301 | * MPOL_REBIND_STEP2 - clean all the disallowed nodes | ||
302 | */ | ||
303 | static void mpol_rebind_nodemask(struct mempolicy *pol, const nodemask_t *nodes, | ||
304 | enum mpol_rebind_step step) | ||
283 | { | 305 | { |
284 | nodemask_t tmp; | 306 | nodemask_t tmp; |
285 | 307 | ||
@@ -288,12 +310,31 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, | |||
288 | else if (pol->flags & MPOL_F_RELATIVE_NODES) | 310 | else if (pol->flags & MPOL_F_RELATIVE_NODES) |
289 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); | 311 | mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); |
290 | else { | 312 | else { |
291 | nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed, | 313 | /* |
292 | *nodes); | 314 | * if step == 1, we use ->w.cpuset_mems_allowed to cache the |
293 | pol->w.cpuset_mems_allowed = *nodes; | 315 | * result |
316 | */ | ||
317 | if (step == MPOL_REBIND_ONCE || step == MPOL_REBIND_STEP1) { | ||
318 | nodes_remap(tmp, pol->v.nodes, | ||
319 | pol->w.cpuset_mems_allowed, *nodes); | ||
320 | pol->w.cpuset_mems_allowed = step ? tmp : *nodes; | ||
321 | } else if (step == MPOL_REBIND_STEP2) { | ||
322 | tmp = pol->w.cpuset_mems_allowed; | ||
323 | pol->w.cpuset_mems_allowed = *nodes; | ||
324 | } else | ||
325 | BUG(); | ||
294 | } | 326 | } |
295 | 327 | ||
296 | pol->v.nodes = tmp; | 328 | if (nodes_empty(tmp)) |
329 | tmp = *nodes; | ||
330 | |||
331 | if (step == MPOL_REBIND_STEP1) | ||
332 | nodes_or(pol->v.nodes, pol->v.nodes, tmp); | ||
333 | else if (step == MPOL_REBIND_ONCE || step == MPOL_REBIND_STEP2) | ||
334 | pol->v.nodes = tmp; | ||
335 | else | ||
336 | BUG(); | ||
337 | |||
297 | if (!node_isset(current->il_next, tmp)) { | 338 | if (!node_isset(current->il_next, tmp)) { |
298 | current->il_next = next_node(current->il_next, tmp); | 339 | current->il_next = next_node(current->il_next, tmp); |
299 | if (current->il_next >= MAX_NUMNODES) | 340 | if (current->il_next >= MAX_NUMNODES) |
@@ -304,7 +345,8 @@ static void mpol_rebind_nodemask(struct mempolicy *pol, | |||
304 | } | 345 | } |
305 | 346 | ||
306 | static void mpol_rebind_preferred(struct mempolicy *pol, | 347 | static void mpol_rebind_preferred(struct mempolicy *pol, |
307 | const nodemask_t *nodes) | 348 | const nodemask_t *nodes, |
349 | enum mpol_rebind_step step) | ||
308 | { | 350 | { |
309 | nodemask_t tmp; | 351 | nodemask_t tmp; |
310 | 352 | ||
@@ -327,16 +369,45 @@ static void mpol_rebind_preferred(struct mempolicy *pol, | |||
327 | } | 369 | } |
328 | } | 370 | } |
329 | 371 | ||
330 | /* Migrate a policy to a different set of nodes */ | 372 | /* |
331 | static void mpol_rebind_policy(struct mempolicy *pol, | 373 | * mpol_rebind_policy - Migrate a policy to a different set of nodes |
332 | const nodemask_t *newmask) | 374 | * |
375 | * If read-side task has no lock to protect task->mempolicy, write-side | ||
376 | * task will rebind the task->mempolicy by two step. The first step is | ||
377 | * setting all the newly nodes, and the second step is cleaning all the | ||
378 | * disallowed nodes. In this way, we can avoid finding no node to alloc | ||
379 | * page. | ||
380 | * If we have a lock to protect task->mempolicy in read-side, we do | ||
381 | * rebind directly. | ||
382 | * | ||
383 | * step: | ||
384 | * MPOL_REBIND_ONCE - do rebind work at once | ||
385 | * MPOL_REBIND_STEP1 - set all the newly nodes | ||
386 | * MPOL_REBIND_STEP2 - clean all the disallowed nodes | ||
387 | */ | ||
388 | static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask, | ||
389 | enum mpol_rebind_step step) | ||
333 | { | 390 | { |
334 | if (!pol) | 391 | if (!pol) |
335 | return; | 392 | return; |
336 | if (!mpol_store_user_nodemask(pol) && | 393 | if (!mpol_store_user_nodemask(pol) && step == 0 && |
337 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) | 394 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) |
338 | return; | 395 | return; |
339 | mpol_ops[pol->mode].rebind(pol, newmask); | 396 | |
397 | if (step == MPOL_REBIND_STEP1 && (pol->flags & MPOL_F_REBINDING)) | ||
398 | return; | ||
399 | |||
400 | if (step == MPOL_REBIND_STEP2 && !(pol->flags & MPOL_F_REBINDING)) | ||
401 | BUG(); | ||
402 | |||
403 | if (step == MPOL_REBIND_STEP1) | ||
404 | pol->flags |= MPOL_F_REBINDING; | ||
405 | else if (step == MPOL_REBIND_STEP2) | ||
406 | pol->flags &= ~MPOL_F_REBINDING; | ||
407 | else if (step >= MPOL_REBIND_NSTEP) | ||
408 | BUG(); | ||
409 | |||
410 | mpol_ops[pol->mode].rebind(pol, newmask, step); | ||
340 | } | 411 | } |
341 | 412 | ||
342 | /* | 413 | /* |
@@ -346,9 +417,10 @@ static void mpol_rebind_policy(struct mempolicy *pol, | |||
346 | * Called with task's alloc_lock held. | 417 | * Called with task's alloc_lock held. |
347 | */ | 418 | */ |
348 | 419 | ||
349 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) | 420 | void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, |
421 | enum mpol_rebind_step step) | ||
350 | { | 422 | { |
351 | mpol_rebind_policy(tsk->mempolicy, new); | 423 | mpol_rebind_policy(tsk->mempolicy, new, step); |
352 | } | 424 | } |
353 | 425 | ||
354 | /* | 426 | /* |
@@ -363,7 +435,7 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) | |||
363 | 435 | ||
364 | down_write(&mm->mmap_sem); | 436 | down_write(&mm->mmap_sem); |
365 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 437 | for (vma = mm->mmap; vma; vma = vma->vm_next) |
366 | mpol_rebind_policy(vma->vm_policy, new); | 438 | mpol_rebind_policy(vma->vm_policy, new, MPOL_REBIND_ONCE); |
367 | up_write(&mm->mmap_sem); | 439 | up_write(&mm->mmap_sem); |
368 | } | 440 | } |
369 | 441 | ||
@@ -1745,6 +1817,9 @@ EXPORT_SYMBOL(alloc_pages_current); | |||
1745 | * with the mems_allowed returned by cpuset_mems_allowed(). This | 1817 | * with the mems_allowed returned by cpuset_mems_allowed(). This |
1746 | * keeps mempolicies cpuset relative after its cpuset moves. See | 1818 | * keeps mempolicies cpuset relative after its cpuset moves. See |
1747 | * further kernel/cpuset.c update_nodemask(). | 1819 | * further kernel/cpuset.c update_nodemask(). |
1820 | * | ||
1821 | * current's mempolicy may be rebinded by the other task(the task that changes | ||
1822 | * cpuset's mems), so we needn't do rebind work for current task. | ||
1748 | */ | 1823 | */ |
1749 | 1824 | ||
1750 | /* Slow path of a mempolicy duplicate */ | 1825 | /* Slow path of a mempolicy duplicate */ |
@@ -1754,13 +1829,24 @@ struct mempolicy *__mpol_dup(struct mempolicy *old) | |||
1754 | 1829 | ||
1755 | if (!new) | 1830 | if (!new) |
1756 | return ERR_PTR(-ENOMEM); | 1831 | return ERR_PTR(-ENOMEM); |
1832 | |||
1833 | /* task's mempolicy is protected by alloc_lock */ | ||
1834 | if (old == current->mempolicy) { | ||
1835 | task_lock(current); | ||
1836 | *new = *old; | ||
1837 | task_unlock(current); | ||
1838 | } else | ||
1839 | *new = *old; | ||
1840 | |||
1757 | rcu_read_lock(); | 1841 | rcu_read_lock(); |
1758 | if (current_cpuset_is_being_rebound()) { | 1842 | if (current_cpuset_is_being_rebound()) { |
1759 | nodemask_t mems = cpuset_mems_allowed(current); | 1843 | nodemask_t mems = cpuset_mems_allowed(current); |
1760 | mpol_rebind_policy(old, &mems); | 1844 | if (new->flags & MPOL_F_REBINDING) |
1845 | mpol_rebind_policy(new, &mems, MPOL_REBIND_STEP2); | ||
1846 | else | ||
1847 | mpol_rebind_policy(new, &mems, MPOL_REBIND_ONCE); | ||
1761 | } | 1848 | } |
1762 | rcu_read_unlock(); | 1849 | rcu_read_unlock(); |
1763 | *new = *old; | ||
1764 | atomic_set(&new->refcnt, 1); | 1850 | atomic_set(&new->refcnt, 1); |
1765 | return new; | 1851 | return new; |
1766 | } | 1852 | } |