diff options
author | Li Zefan <lizefan@huawei.com> | 2013-06-13 03:11:44 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-06-13 13:51:22 -0400 |
commit | f047cecf2cfc9595b1f39c9aab383bb0682f5a53 (patch) | |
tree | e3cdfd41e39c1a43ab9cba2721efb0aaf000e858 /kernel/cpuset.c | |
parent | 88fa523bff295f1d60244a54833480b02f775152 (diff) |
cpuset: fix to migrate mm correctly in a corner case
Before moving tasks out of empty cpusets, update_tasks_nodemask()
is called, which calls do_migrate_pages(xx, from, to). Then those
tasks are moved to an ancestor, and do_migrate_pages() is called
again.
The first time: from = node_to_be_offlined, to = empty.
The second time: from = empty, to = ancestor's nodemask.
so looks like no pages will be migrated.
Fix this by:
- Don't call update_tasks_nodemask() on empty cpusets.
- Pass cs->old_mems_allowed to do_migrate_pages().
v4: added comment in cpuset_hotplug_update_tasks() and rephased comment
in cpuset_attach().
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 25 |
1 files changed, 19 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3b3fdfdd4d78..4c17d96bd3a5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1563,9 +1563,18 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1563 | struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); | 1563 | struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); |
1564 | 1564 | ||
1565 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); | 1565 | mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); |
1566 | if (is_memory_migrate(cs)) | 1566 | |
1567 | cpuset_migrate_mm(mm, &mems_oldcs->mems_allowed, | 1567 | /* |
1568 | * old_mems_allowed is the same with mems_allowed here, except | ||
1569 | * if this task is being moved automatically due to hotplug. | ||
1570 | * In that case @mems_allowed has been updated and is empty, | ||
1571 | * so @old_mems_allowed is the right nodesets that we migrate | ||
1572 | * mm from. | ||
1573 | */ | ||
1574 | if (is_memory_migrate(cs)) { | ||
1575 | cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed, | ||
1568 | &cpuset_attach_nodemask_to); | 1576 | &cpuset_attach_nodemask_to); |
1577 | } | ||
1569 | mmput(mm); | 1578 | mmput(mm); |
1570 | } | 1579 | } |
1571 | 1580 | ||
@@ -2152,10 +2161,12 @@ retry: | |||
2152 | 2161 | ||
2153 | /* | 2162 | /* |
2154 | * If sane_behavior flag is set, we need to update tasks' cpumask | 2163 | * If sane_behavior flag is set, we need to update tasks' cpumask |
2155 | * for empty cpuset to take on ancestor's cpumask. | 2164 | * for empty cpuset to take on ancestor's cpumask. Otherwise, don't |
2165 | * call update_tasks_cpumask() if the cpuset becomes empty, as | ||
2166 | * the tasks in it will be migrated to an ancestor. | ||
2156 | */ | 2167 | */ |
2157 | if ((sane && cpumask_empty(cs->cpus_allowed)) || | 2168 | if ((sane && cpumask_empty(cs->cpus_allowed)) || |
2158 | !cpumask_empty(&off_cpus)) | 2169 | (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed))) |
2159 | update_tasks_cpumask(cs, NULL); | 2170 | update_tasks_cpumask(cs, NULL); |
2160 | 2171 | ||
2161 | mutex_lock(&callback_mutex); | 2172 | mutex_lock(&callback_mutex); |
@@ -2164,10 +2175,12 @@ retry: | |||
2164 | 2175 | ||
2165 | /* | 2176 | /* |
2166 | * If sane_behavior flag is set, we need to update tasks' nodemask | 2177 | * If sane_behavior flag is set, we need to update tasks' nodemask |
2167 | * for empty cpuset to take on ancestor's nodemask. | 2178 | * for empty cpuset to take on ancestor's nodemask. Otherwise, don't |
2179 | * call update_tasks_nodemask() if the cpuset becomes empty, as | ||
2180 | * the tasks in it will be migratd to an ancestor. | ||
2168 | */ | 2181 | */ |
2169 | if ((sane && nodes_empty(cs->mems_allowed)) || | 2182 | if ((sane && nodes_empty(cs->mems_allowed)) || |
2170 | !nodes_empty(off_mems)) | 2183 | (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed))) |
2171 | update_tasks_nodemask(cs, NULL); | 2184 | update_tasks_nodemask(cs, NULL); |
2172 | 2185 | ||
2173 | is_empty = cpumask_empty(cs->cpus_allowed) || | 2186 | is_empty = cpumask_empty(cs->cpus_allowed) || |