diff options
author | Paul Jackson <pj@sgi.com> | 2006-03-31 05:30:52 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-31 15:18:55 -0500 |
commit | e4e364e865b382f9d99c7fc230ec2ce7df21257a (patch) | |
tree | 9ff5ab54a0e40d7ad2b55d3ec48c6e175ebf50c7 | |
parent | 2741a559a01e1ba9bf87285569dc1a104d134ecf (diff) |
[PATCH] cpuset: memory migration interaction fix
Fix memory migration so that it works regardless of what cpuset the invoking
task is in.
If a task invoked a memory migration, by doing one of:
1) writing a different nodemask to a cpuset 'mems' file, or
2) writing a tasks pid to a different cpuset's 'tasks' file,
where the cpuset had its 'memory_migrate' option turned on, then the
allocation of the new pages for the migrated task(s) was constrained
by the invoking tasks cpuset.
If this task wasn't in a cpuset that allowed the requested memory nodes, the
memory migration would happen to some other nodes that were in that invoking
tasks cpuset. This was usually surprising and puzzling behaviour: Why didn't
the pages move? Why did the pages move -there-?
To fix this, temporarilly change the invoking tasks 'mems_allowed' task_struct
field to the nodes the migrating tasks is moving to, so that new pages can be
allocated there.
Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | kernel/cpuset.c | 57 |
1 files changed, 52 insertions, 5 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index bf42381a4195..72248d1b9e3f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -834,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
834 | } | 834 | } |
835 | 835 | ||
836 | /* | 836 | /* |
837 | * cpuset_migrate_mm | ||
838 | * | ||
839 | * Migrate memory region from one set of nodes to another. | ||
840 | * | ||
841 | * Temporarilly set tasks mems_allowed to target nodes of migration, | ||
842 | * so that the migration code can allocate pages on these nodes. | ||
843 | * | ||
844 | * Call holding manage_mutex, so our current->cpuset won't change | ||
845 | * during this call, as manage_mutex holds off any attach_task() | ||
846 | * calls. Therefore we don't need to take task_lock around the | ||
847 | * call to guarantee_online_mems(), as we know no one is changing | ||
848 | * our tasks cpuset. | ||
849 | * | ||
850 | * Hold callback_mutex around the two modifications of our tasks | ||
851 | * mems_allowed to synchronize with cpuset_mems_allowed(). | ||
852 | * | ||
853 | * While the mm_struct we are migrating is typically from some | ||
854 | * other task, the task_struct mems_allowed that we are hacking | ||
855 | * is for our current task, which must allocate new pages for that | ||
856 | * migrating memory region. | ||
857 | * | ||
858 | * We call cpuset_update_task_memory_state() before hacking | ||
859 | * our tasks mems_allowed, so that we are assured of being in | ||
860 | * sync with our tasks cpuset, and in particular, callbacks to | ||
861 | * cpuset_update_task_memory_state() from nested page allocations | ||
862 | * won't see any mismatch of our cpuset and task mems_generation | ||
863 | * values, so won't overwrite our hacked tasks mems_allowed | ||
864 | * nodemask. | ||
865 | */ | ||
866 | |||
867 | static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | ||
868 | const nodemask_t *to) | ||
869 | { | ||
870 | struct task_struct *tsk = current; | ||
871 | |||
872 | cpuset_update_task_memory_state(); | ||
873 | |||
874 | mutex_lock(&callback_mutex); | ||
875 | tsk->mems_allowed = *to; | ||
876 | mutex_unlock(&callback_mutex); | ||
877 | |||
878 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | ||
879 | |||
880 | mutex_lock(&callback_mutex); | ||
881 | guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); | ||
882 | mutex_unlock(&callback_mutex); | ||
883 | } | ||
884 | |||
885 | /* | ||
837 | * Handle user request to change the 'mems' memory placement | 886 | * Handle user request to change the 'mems' memory placement |
838 | * of a cpuset. Needs to validate the request, update the | 887 | * of a cpuset. Needs to validate the request, update the |
839 | * cpusets mems_allowed and mems_generation, and for each | 888 | * cpusets mems_allowed and mems_generation, and for each |
@@ -945,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
945 | struct mm_struct *mm = mmarray[i]; | 994 | struct mm_struct *mm = mmarray[i]; |
946 | 995 | ||
947 | mpol_rebind_mm(mm, &cs->mems_allowed); | 996 | mpol_rebind_mm(mm, &cs->mems_allowed); |
948 | if (migrate) { | 997 | if (migrate) |
949 | do_migrate_pages(mm, &oldmem, &cs->mems_allowed, | 998 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); |
950 | MPOL_MF_MOVE_ALL); | ||
951 | } | ||
952 | mmput(mm); | 999 | mmput(mm); |
953 | } | 1000 | } |
954 | 1001 | ||
@@ -1184,7 +1231,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
1184 | if (mm) { | 1231 | if (mm) { |
1185 | mpol_rebind_mm(mm, &to); | 1232 | mpol_rebind_mm(mm, &to); |
1186 | if (is_memory_migrate(cs)) | 1233 | if (is_memory_migrate(cs)) |
1187 | do_migrate_pages(mm, &from, &to, MPOL_MF_MOVE_ALL); | 1234 | cpuset_migrate_mm(mm, &from, &to); |
1188 | mmput(mm); | 1235 | mmput(mm); |
1189 | } | 1236 | } |
1190 | 1237 | ||