diff options
| author | Paul Jackson <pj@sgi.com> | 2006-03-31 05:30:52 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-31 15:18:55 -0500 |
| commit | e4e364e865b382f9d99c7fc230ec2ce7df21257a (patch) | |
| tree | 9ff5ab54a0e40d7ad2b55d3ec48c6e175ebf50c7 /kernel | |
| parent | 2741a559a01e1ba9bf87285569dc1a104d134ecf (diff) | |
[PATCH] cpuset: memory migration interaction fix
Fix memory migration so that it works regardless of what cpuset the invoking
task is in.
If a task invoked a memory migration, by doing one of:
1) writing a different nodemask to a cpuset 'mems' file, or
2) writing a tasks pid to a different cpuset's 'tasks' file,
where the cpuset had its 'memory_migrate' option turned on, then the
allocation of the new pages for the migrated task(s) was constrained
by the invoking tasks cpuset.
If this task wasn't in a cpuset that allowed the requested memory nodes, the
memory migration would happen to some other nodes that were in that invoking
tasks cpuset. This was usually surprising and puzzling behaviour: Why didn't
the pages move? Why did the pages move -there-?
To fix this, temporarilly change the invoking tasks 'mems_allowed' task_struct
field to the nodes the migrating tasks is moving to, so that new pages can be
allocated there.
Signed-off-by: Paul Jackson <pj@sgi.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cpuset.c | 57 |
1 files changed, 52 insertions, 5 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index bf42381a4195..72248d1b9e3f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -834,6 +834,55 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
| 834 | } | 834 | } |
| 835 | 835 | ||
| 836 | /* | 836 | /* |
| 837 | * cpuset_migrate_mm | ||
| 838 | * | ||
| 839 | * Migrate memory region from one set of nodes to another. | ||
| 840 | * | ||
| 841 | * Temporarilly set tasks mems_allowed to target nodes of migration, | ||
| 842 | * so that the migration code can allocate pages on these nodes. | ||
| 843 | * | ||
| 844 | * Call holding manage_mutex, so our current->cpuset won't change | ||
| 845 | * during this call, as manage_mutex holds off any attach_task() | ||
| 846 | * calls. Therefore we don't need to take task_lock around the | ||
| 847 | * call to guarantee_online_mems(), as we know no one is changing | ||
| 848 | * our tasks cpuset. | ||
| 849 | * | ||
| 850 | * Hold callback_mutex around the two modifications of our tasks | ||
| 851 | * mems_allowed to synchronize with cpuset_mems_allowed(). | ||
| 852 | * | ||
| 853 | * While the mm_struct we are migrating is typically from some | ||
| 854 | * other task, the task_struct mems_allowed that we are hacking | ||
| 855 | * is for our current task, which must allocate new pages for that | ||
| 856 | * migrating memory region. | ||
| 857 | * | ||
| 858 | * We call cpuset_update_task_memory_state() before hacking | ||
| 859 | * our tasks mems_allowed, so that we are assured of being in | ||
| 860 | * sync with our tasks cpuset, and in particular, callbacks to | ||
| 861 | * cpuset_update_task_memory_state() from nested page allocations | ||
| 862 | * won't see any mismatch of our cpuset and task mems_generation | ||
| 863 | * values, so won't overwrite our hacked tasks mems_allowed | ||
| 864 | * nodemask. | ||
| 865 | */ | ||
| 866 | |||
| 867 | static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | ||
| 868 | const nodemask_t *to) | ||
| 869 | { | ||
| 870 | struct task_struct *tsk = current; | ||
| 871 | |||
| 872 | cpuset_update_task_memory_state(); | ||
| 873 | |||
| 874 | mutex_lock(&callback_mutex); | ||
| 875 | tsk->mems_allowed = *to; | ||
| 876 | mutex_unlock(&callback_mutex); | ||
| 877 | |||
| 878 | do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); | ||
| 879 | |||
| 880 | mutex_lock(&callback_mutex); | ||
| 881 | guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); | ||
| 882 | mutex_unlock(&callback_mutex); | ||
| 883 | } | ||
| 884 | |||
| 885 | /* | ||
| 837 | * Handle user request to change the 'mems' memory placement | 886 | * Handle user request to change the 'mems' memory placement |
| 838 | * of a cpuset. Needs to validate the request, update the | 887 | * of a cpuset. Needs to validate the request, update the |
| 839 | * cpusets mems_allowed and mems_generation, and for each | 888 | * cpusets mems_allowed and mems_generation, and for each |
| @@ -945,10 +994,8 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 945 | struct mm_struct *mm = mmarray[i]; | 994 | struct mm_struct *mm = mmarray[i]; |
| 946 | 995 | ||
| 947 | mpol_rebind_mm(mm, &cs->mems_allowed); | 996 | mpol_rebind_mm(mm, &cs->mems_allowed); |
| 948 | if (migrate) { | 997 | if (migrate) |
| 949 | do_migrate_pages(mm, &oldmem, &cs->mems_allowed, | 998 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); |
| 950 | MPOL_MF_MOVE_ALL); | ||
| 951 | } | ||
| 952 | mmput(mm); | 999 | mmput(mm); |
| 953 | } | 1000 | } |
| 954 | 1001 | ||
| @@ -1184,7 +1231,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) | |||
| 1184 | if (mm) { | 1231 | if (mm) { |
| 1185 | mpol_rebind_mm(mm, &to); | 1232 | mpol_rebind_mm(mm, &to); |
| 1186 | if (is_memory_migrate(cs)) | 1233 | if (is_memory_migrate(cs)) |
| 1187 | do_migrate_pages(mm, &from, &to, MPOL_MF_MOVE_ALL); | 1234 | cpuset_migrate_mm(mm, &from, &to); |
| 1188 | mmput(mm); | 1235 | mmput(mm); |
| 1189 | } | 1236 | } |
| 1190 | 1237 | ||
