aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-03-31 05:30:52 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-31 15:18:55 -0500
commite4e364e865b382f9d99c7fc230ec2ce7df21257a (patch)
tree9ff5ab54a0e40d7ad2b55d3ec48c6e175ebf50c7 /kernel/cpuset.c
parent2741a559a01e1ba9bf87285569dc1a104d134ecf (diff)
[PATCH] cpuset: memory migration interaction fix
Fix memory migration so that it works regardless of what cpuset the invoking task is in. If a task invoked a memory migration, by doing one of: 1) writing a different nodemask to a cpuset 'mems' file, or 2) writing a tasks pid to a different cpuset's 'tasks' file, where the cpuset had its 'memory_migrate' option turned on, then the allocation of the new pages for the migrated task(s) was constrained by the invoking tasks cpuset. If this task wasn't in a cpuset that allowed the requested memory nodes, the memory migration would happen to some other nodes that were in that invoking tasks cpuset. This was usually surprising and puzzling behaviour: Why didn't the pages move? Why did the pages move -there-? To fix this, temporarilly change the invoking tasks 'mems_allowed' task_struct field to the nodes the migrating tasks is moving to, so that new pages can be allocated there. Signed-off-by: Paul Jackson <pj@sgi.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c57
1 files changed, 52 insertions, 5 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index bf42381a419..72248d1b9e3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -834,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf)
834} 834}
835 835
836/* 836/*
837 * cpuset_migrate_mm
838 *
839 * Migrate memory region from one set of nodes to another.
840 *
841 * Temporarilly set tasks mems_allowed to target nodes of migration,
842 * so that the migration code can allocate pages on these nodes.
843 *
844 * Call holding manage_mutex, so our current->cpuset won't change
845 * during this call, as manage_mutex holds off any attach_task()
846 * calls. Therefore we don't need to take task_lock around the
847 * call to guarantee_online_mems(), as we know no one is changing
848 * our tasks cpuset.
849 *
850 * Hold callback_mutex around the two modifications of our tasks
851 * mems_allowed to synchronize with cpuset_mems_allowed().
852 *
853 * While the mm_struct we are migrating is typically from some
854 * other task, the task_struct mems_allowed that we are hacking
855 * is for our current task, which must allocate new pages for that
856 * migrating memory region.
857 *
858 * We call cpuset_update_task_memory_state() before hacking
859 * our tasks mems_allowed, so that we are assured of being in
860 * sync with our tasks cpuset, and in particular, callbacks to
861 * cpuset_update_task_memory_state() from nested page allocations
862 * won't see any mismatch of our cpuset and task mems_generation
863 * values, so won't overwrite our hacked tasks mems_allowed
864 * nodemask.
865 */
866
867static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
868 const nodemask_t *to)
869{
870 struct task_struct *tsk = current;
871
872 cpuset_update_task_memory_state();
873
874 mutex_lock(&callback_mutex);
875 tsk->mems_allowed = *to;
876 mutex_unlock(&callback_mutex);
877
878 do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
879
880 mutex_lock(&callback_mutex);
881 guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
882 mutex_unlock(&callback_mutex);
883}
884
885/*
837 * Handle user request to change the 'mems' memory placement 886 * Handle user request to change the 'mems' memory placement
838 * of a cpuset. Needs to validate the request, update the 887 * of a cpuset. Needs to validate the request, update the
839 * cpusets mems_allowed and mems_generation, and for each 888 * cpusets mems_allowed and mems_generation, and for each
@@ -945,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
945 struct mm_struct *mm = mmarray[i]; 994 struct mm_struct *mm = mmarray[i];
946 995
947 mpol_rebind_mm(mm, &cs->mems_allowed); 996 mpol_rebind_mm(mm, &cs->mems_allowed);
948 if (migrate) { 997 if (migrate)
949 do_migrate_pages(mm, &oldmem, &cs->mems_allowed, 998 cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed);
950 MPOL_MF_MOVE_ALL);
951 }
952 mmput(mm); 999 mmput(mm);
953 } 1000 }
954 1001
@@ -1184,7 +1231,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1184 if (mm) { 1231 if (mm) {
1185 mpol_rebind_mm(mm, &to); 1232 mpol_rebind_mm(mm, &to);
1186 if (is_memory_migrate(cs)) 1233 if (is_memory_migrate(cs))
1187 do_migrate_pages(mm, &from, &to, MPOL_MF_MOVE_ALL); 1234 cpuset_migrate_mm(mm, &from, &to);
1188 mmput(mm); 1235 mmput(mm);
1189 } 1236 }
1190 1237