aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorLi Zefan <lizefan@huawei.com>2013-06-13 03:11:44 -0400
committerTejun Heo <tj@kernel.org>2013-06-13 13:51:22 -0400
commitf047cecf2cfc9595b1f39c9aab383bb0682f5a53 (patch)
treee3cdfd41e39c1a43ab9cba2721efb0aaf000e858 /kernel/cpuset.c
parent88fa523bff295f1d60244a54833480b02f775152 (diff)
cpuset: fix to migrate mm correctly in a corner case
Before moving tasks out of empty cpusets, update_tasks_nodemask() is called, which calls do_migrate_pages(xx, from, to). Then those tasks are moved to an ancestor, and do_migrate_pages() is called again. The first time: from = node_to_be_offlined, to = empty. The second time: from = empty, to = ancestor's nodemask. so looks like no pages will be migrated. Fix this by: - Don't call update_tasks_nodemask() on empty cpusets. - Pass cs->old_mems_allowed to do_migrate_pages(). v4: added comment in cpuset_hotplug_update_tasks() and rephased comment in cpuset_attach(). Signed-off-by: Li Zefan <lizefan@huawei.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c25
1 files changed, 19 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3b3fdfdd4d78..4c17d96bd3a5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1563,9 +1563,18 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
1563 struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs); 1563 struct cpuset *mems_oldcs = effective_nodemask_cpuset(oldcs);
1564 1564
1565 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to); 1565 mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
1566 if (is_memory_migrate(cs)) 1566
1567 cpuset_migrate_mm(mm, &mems_oldcs->mems_allowed, 1567 /*
1568 * old_mems_allowed is the same with mems_allowed here, except
1569 * if this task is being moved automatically due to hotplug.
1570 * In that case @mems_allowed has been updated and is empty,
1571 * so @old_mems_allowed is the right nodesets that we migrate
1572 * mm from.
1573 */
1574 if (is_memory_migrate(cs)) {
1575 cpuset_migrate_mm(mm, &mems_oldcs->old_mems_allowed,
1568 &cpuset_attach_nodemask_to); 1576 &cpuset_attach_nodemask_to);
1577 }
1569 mmput(mm); 1578 mmput(mm);
1570 } 1579 }
1571 1580
@@ -2152,10 +2161,12 @@ retry:
2152 2161
2153 /* 2162 /*
2154 * If sane_behavior flag is set, we need to update tasks' cpumask 2163 * If sane_behavior flag is set, we need to update tasks' cpumask
2155 * for empty cpuset to take on ancestor's cpumask. 2164 * for empty cpuset to take on ancestor's cpumask. Otherwise, don't
2165 * call update_tasks_cpumask() if the cpuset becomes empty, as
2166 * the tasks in it will be migrated to an ancestor.
2156 */ 2167 */
2157 if ((sane && cpumask_empty(cs->cpus_allowed)) || 2168 if ((sane && cpumask_empty(cs->cpus_allowed)) ||
2158 !cpumask_empty(&off_cpus)) 2169 (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed)))
2159 update_tasks_cpumask(cs, NULL); 2170 update_tasks_cpumask(cs, NULL);
2160 2171
2161 mutex_lock(&callback_mutex); 2172 mutex_lock(&callback_mutex);
@@ -2164,10 +2175,12 @@ retry:
2164 2175
2165 /* 2176 /*
2166 * If sane_behavior flag is set, we need to update tasks' nodemask 2177 * If sane_behavior flag is set, we need to update tasks' nodemask
2167 * for empty cpuset to take on ancestor's nodemask. 2178 * for empty cpuset to take on ancestor's nodemask. Otherwise, don't
2179 * call update_tasks_nodemask() if the cpuset becomes empty, as
2180 * the tasks in it will be migratd to an ancestor.
2168 */ 2181 */
2169 if ((sane && nodes_empty(cs->mems_allowed)) || 2182 if ((sane && nodes_empty(cs->mems_allowed)) ||
2170 !nodes_empty(off_mems)) 2183 (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed)))
2171 update_tasks_nodemask(cs, NULL); 2184 update_tasks_nodemask(cs, NULL);
2172 2185
2173 is_empty = cpumask_empty(cs->cpus_allowed) || 2186 is_empty = cpumask_empty(cs->cpus_allowed) ||