diff options
author | Li Zefan <lizefan@huawei.com> | 2013-06-09 05:14:22 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-06-09 11:47:13 -0400 |
commit | e44193d39e8d4d1de5d996fcd37ed75e5c704f10 (patch) | |
tree | e73e4d6fe9de762e3c56fd6a122ed28f2fcf6979 /kernel/cpuset.c | |
parent | a73456f37b9dbc917398387d0cba926b4455b70f (diff) |
cpuset: let hotplug propagation work wait for task attaching
Instead of triggering propagation work in cpuset_attach(), we make
hotplug propagation work wait until there's no task attaching in
progress.
IMO this is more robust. We won't see empty masks in cpuset_attach().
Also it's a preparation for removing propagation work. Without asynchronous
propagation we can't call move_tasks_in_empty_cpuset() in cpuset_attach(),
because otherwise we'll deadlock on cgroup_mutex.
tj: typo fixes.
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 29 |
1 files changed, 17 insertions, 12 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 535dce685eec..e902473f76bf 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <linux/mutex.h> | 59 | #include <linux/mutex.h> |
60 | #include <linux/workqueue.h> | 60 | #include <linux/workqueue.h> |
61 | #include <linux/cgroup.h> | 61 | #include <linux/cgroup.h> |
62 | #include <linux/wait.h> | ||
62 | 63 | ||
63 | /* | 64 | /* |
64 | * Tracks how many cpusets are currently defined in system. | 65 | * Tracks how many cpusets are currently defined in system. |
@@ -275,6 +276,8 @@ static void schedule_cpuset_propagate_hotplug(struct cpuset *cs); | |||
275 | 276 | ||
276 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); | 277 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); |
277 | 278 | ||
279 | static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); | ||
280 | |||
278 | /* | 281 | /* |
279 | * This is ugly, but preserves the userspace API for existing cpuset | 282 | * This is ugly, but preserves the userspace API for existing cpuset |
280 | * users. If someone tries to mount the "cpuset" filesystem, we | 283 | * users. If someone tries to mount the "cpuset" filesystem, we |
@@ -1436,14 +1439,8 @@ static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
1436 | } | 1439 | } |
1437 | 1440 | ||
1438 | cs->attach_in_progress--; | 1441 | cs->attach_in_progress--; |
1439 | 1442 | if (!cs->attach_in_progress) | |
1440 | /* | 1443 | wake_up(&cpuset_attach_wq); |
1441 | * We may have raced with CPU/memory hotunplug. Trigger hotplug | ||
1442 | * propagation if @cs doesn't have any CPU or memory. It will move | ||
1443 | * the newly added tasks to the nearest parent which can execute. | ||
1444 | */ | ||
1445 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | ||
1446 | schedule_cpuset_propagate_hotplug(cs); | ||
1447 | 1444 | ||
1448 | mutex_unlock(&cpuset_mutex); | 1445 | mutex_unlock(&cpuset_mutex); |
1449 | } | 1446 | } |
@@ -1555,10 +1552,6 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | |||
1555 | * resources, wait for the previously scheduled operations before | 1552 | * resources, wait for the previously scheduled operations before |
1556 | * proceeding, so that we don't end up keep removing tasks added | 1553 | * proceeding, so that we don't end up keep removing tasks added |
1557 | * after execution capability is restored. | 1554 | * after execution capability is restored. |
1558 | * | ||
1559 | * Flushing cpuset_hotplug_work is enough to synchronize against | ||
1560 | * hotplug hanlding; however, cpuset_attach() may schedule | ||
1561 | * propagation work directly. Flush the workqueue too. | ||
1562 | */ | 1555 | */ |
1563 | flush_work(&cpuset_hotplug_work); | 1556 | flush_work(&cpuset_hotplug_work); |
1564 | flush_workqueue(cpuset_propagate_hotplug_wq); | 1557 | flush_workqueue(cpuset_propagate_hotplug_wq); |
@@ -2005,8 +1998,20 @@ static void cpuset_propagate_hotplug_workfn(struct work_struct *work) | |||
2005 | struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); | 1998 | struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); |
2006 | bool is_empty; | 1999 | bool is_empty; |
2007 | 2000 | ||
2001 | retry: | ||
2002 | wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); | ||
2003 | |||
2008 | mutex_lock(&cpuset_mutex); | 2004 | mutex_lock(&cpuset_mutex); |
2009 | 2005 | ||
2006 | /* | ||
2007 | * We have raced with task attaching. We wait until attaching | ||
2008 | * is finished, so we won't attach a task to an empty cpuset. | ||
2009 | */ | ||
2010 | if (cs->attach_in_progress) { | ||
2011 | mutex_unlock(&cpuset_mutex); | ||
2012 | goto retry; | ||
2013 | } | ||
2014 | |||
2010 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); | 2015 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); |
2011 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); | 2016 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); |
2012 | 2017 | ||