aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-01-08 04:01:56 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:13:44 -0500
commit74cb21553f4bf244185b9bec4c26e4e3169ad55e (patch)
tree3f8f13e8dacc8f0876b01f62765a123ce1722b17
parent909d75a3b77bdd8baa9429bad3b69a654d2954ce (diff)
[PATCH] cpuset: numa_policy_rebind cleanup
Cleanup, reorganize and make more robust the mempolicy.c code to rebind mempolicies relative to the containing cpuset after a tasks memory placement changes. The real motivator for this cleanup patch is to lay more groundwork for the upcoming patch to correctly rebind NUMA mempolicies that are attached to vma's after the containing cpuset memory placement changes. NUMA mempolicies are constrained by the cpuset their task is a member of. When either (1) a task is moved to a different cpuset, or (2) the 'mems' mems_allowed of a cpuset is changed, then the NUMA mempolicies have embedded node numbers (for MPOL_BIND, MPOL_INTERLEAVE and MPOL_PREFERRED) that need to be recalculated, relative to their new cpuset placement. The old code used an unreliable method of determining what was the old mems_allowed constraining the mempolicy. It just looked at the tasks mems_allowed value. This sort of worked with the present code, that just rebinds the -task- mempolicy, and leaves any -vma- mempolicies broken, referring to the old nodes. But in an upcoming patch, the vma mempolicies will be rebound as well. Then the order in which the various task and vma mempolicies are updated will no longer be deterministic, and one can no longer count on the task->mems_allowed holding the old value for as long as needed. It's not even clear if the current code was guaranteed to work reliably for task mempolicies. So I added a mems_allowed field to each mempolicy, stating exactly what mems_allowed the policy is relative to, and updated synchronously and reliably anytime that the mempolicy is rebound. Also removed a useless wrapper routine, numa_policy_rebind(), and had its caller, cpuset_update_task_memory_state(), call directly to the rewritten policy_rebind() routine, and made that rebind routine extern instead of static, and added a "mpol_" prefix to its name, making it mpol_rebind_policy(). Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mempolicy.h12
-rw-r--r--kernel/cpuset.c2
-rw-r--r--mm/mempolicy.c31
3 files changed, 30 insertions, 15 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 05fddd5bee5d..74357cb9bc7c 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -68,6 +68,7 @@ struct mempolicy {
68 nodemask_t nodes; /* interleave */ 68 nodemask_t nodes; /* interleave */
69 /* undefined for default */ 69 /* undefined for default */
70 } v; 70 } v;
71 nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
71}; 72};
72 73
73/* 74/*
@@ -146,7 +147,9 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
146 147
147extern void numa_default_policy(void); 148extern void numa_default_policy(void);
148extern void numa_policy_init(void); 149extern void numa_policy_init(void);
149extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); 150extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
151extern void mpol_rebind_task(struct task_struct *tsk,
152 const nodemask_t *new);
150extern struct mempolicy default_policy; 153extern struct mempolicy default_policy;
151extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 154extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
152 unsigned long addr); 155 unsigned long addr);
@@ -221,7 +224,12 @@ static inline void numa_default_policy(void)
221{ 224{
222} 225}
223 226
224static inline void numa_policy_rebind(const nodemask_t *old, 227static inline void mpol_rebind_policy(struct mempolicy *pol,
228 const nodemask_t *new)
229{
230}
231
232static inline void mpol_rebind_task(struct task_struct *tsk,
225 const nodemask_t *new) 233 const nodemask_t *new)
226{ 234{
227} 235}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0d0dbbd6560a..8f764de3a9e7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -643,7 +643,7 @@ void cpuset_update_task_memory_state()
643 tsk->cpuset_mems_generation = cs->mems_generation; 643 tsk->cpuset_mems_generation = cs->mems_generation;
644 task_unlock(tsk); 644 task_unlock(tsk);
645 up(&callback_sem); 645 up(&callback_sem);
646 numa_policy_rebind(&oldmem, &tsk->mems_allowed); 646 mpol_rebind_task(tsk, &tsk->mems_allowed);
647 if (!nodes_equal(oldmem, tsk->mems_allowed)) { 647 if (!nodes_equal(oldmem, tsk->mems_allowed)) {
648 if (migrate) { 648 if (migrate) {
649 do_migrate_pages(tsk->mm, &oldmem, 649 do_migrate_pages(tsk->mm, &oldmem,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 34d566ac147f..c39bd86f4ea0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -180,6 +180,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
180 break; 180 break;
181 } 181 }
182 policy->policy = mode; 182 policy->policy = mode;
183 policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
183 return policy; 184 return policy;
184} 185}
185 186
@@ -1411,25 +1412,31 @@ void numa_default_policy(void)
1411} 1412}
1412 1413
1413/* Migrate a policy to a different set of nodes */ 1414/* Migrate a policy to a different set of nodes */
1414static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, 1415void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
1415 const nodemask_t *new)
1416{ 1416{
1417 nodemask_t *mpolmask;
1417 nodemask_t tmp; 1418 nodemask_t tmp;
1418 1419
1419 if (!pol) 1420 if (!pol)
1420 return; 1421 return;
1422 mpolmask = &pol->cpuset_mems_allowed;
1423 if (nodes_equal(*mpolmask, *newmask))
1424 return;
1421 1425
1422 switch (pol->policy) { 1426 switch (pol->policy) {
1423 case MPOL_DEFAULT: 1427 case MPOL_DEFAULT:
1424 break; 1428 break;
1425 case MPOL_INTERLEAVE: 1429 case MPOL_INTERLEAVE:
1426 nodes_remap(tmp, pol->v.nodes, *old, *new); 1430 nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
1427 pol->v.nodes = tmp; 1431 pol->v.nodes = tmp;
1428 current->il_next = node_remap(current->il_next, *old, *new); 1432 *mpolmask = *newmask;
1433 current->il_next = node_remap(current->il_next,
1434 *mpolmask, *newmask);
1429 break; 1435 break;
1430 case MPOL_PREFERRED: 1436 case MPOL_PREFERRED:
1431 pol->v.preferred_node = node_remap(pol->v.preferred_node, 1437 pol->v.preferred_node = node_remap(pol->v.preferred_node,
1432 *old, *new); 1438 *mpolmask, *newmask);
1439 *mpolmask = *newmask;
1433 break; 1440 break;
1434 case MPOL_BIND: { 1441 case MPOL_BIND: {
1435 nodemask_t nodes; 1442 nodemask_t nodes;
@@ -1439,7 +1446,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
1439 nodes_clear(nodes); 1446 nodes_clear(nodes);
1440 for (z = pol->v.zonelist->zones; *z; z++) 1447 for (z = pol->v.zonelist->zones; *z; z++)
1441 node_set((*z)->zone_pgdat->node_id, nodes); 1448 node_set((*z)->zone_pgdat->node_id, nodes);
1442 nodes_remap(tmp, nodes, *old, *new); 1449 nodes_remap(tmp, nodes, *mpolmask, *newmask);
1443 nodes = tmp; 1450 nodes = tmp;
1444 1451
1445 zonelist = bind_zonelist(&nodes); 1452 zonelist = bind_zonelist(&nodes);
@@ -1454,6 +1461,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
1454 kfree(pol->v.zonelist); 1461 kfree(pol->v.zonelist);
1455 pol->v.zonelist = zonelist; 1462 pol->v.zonelist = zonelist;
1456 } 1463 }
1464 *mpolmask = *newmask;
1457 break; 1465 break;
1458 } 1466 }
1459 default: 1467 default:
@@ -1463,14 +1471,13 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
1463} 1471}
1464 1472
1465/* 1473/*
1466 * Someone moved this task to different nodes. Fixup mempolicies. 1474 * Wrapper for mpol_rebind_policy() that just requires task
1467 * 1475 * pointer, and updates task mempolicy.
1468 * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well,
1469 * once we have a cpuset mechanism to mark which cpuset subtree is migrating.
1470 */ 1476 */
1471void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) 1477
1478void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
1472{ 1479{
1473 rebind_policy(current->mempolicy, old, new); 1480 mpol_rebind_policy(tsk->mempolicy, new);
1474} 1481}
1475 1482
1476/* 1483/*