aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-01-07 11:51:07 -0500
committerTejun Heo <tj@kernel.org>2013-01-07 11:51:07 -0500
commit8d03394877ecdf87e1d694664c460747b8e05aa1 (patch)
treed7f2301996b9e0a747ada2385244c670e7da7bb2 /kernel
parent699140ba838dd3fa2c5cce474e14f194b09f91aa (diff)
cpuset: make CPU / memory hotplug propagation asynchronous
cpuset_hotplug_workfn() has been invoking cpuset_propagate_hotplug() directly to propagate hotplug updates to !root cpusets; however, this has the following problems. * cpuset locking is scheduled to be decoupled from cgroup_mutex, cgroup_mutex will be unexported, and cgroup_attach_task() will do cgroup locking internally, so propagation can't synchronously move tasks to a parent cgroup while walking the hierarchy. * We can't use cgroup generic tree iterator because propagation to each cpuset may sleep. With propagation done asynchronously, we can lose the rather ugly cpuset specific iteration. Convert cpuset_propagate_hotplug() to cpuset_propagate_hotplug_workfn() and execute it from newly added cpuset->hotplug_work. The work items are run on an ordered workqueue, so the propagation order is preserved. cpuset_hotplug_workfn() schedules all propagations while holding cgroup_mutex and waits for completion without cgroup_mutex. Each in-flight propagation holds a reference to the cpuset->css. This patch doesn't cause any functional difference. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c54
1 files changed, 48 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 74e412f908db..a7bb547786d7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -99,6 +99,8 @@ struct cpuset {
99 99
100 /* used for walking a cpuset hierarchy */ 100 /* used for walking a cpuset hierarchy */
101 struct list_head stack_list; 101 struct list_head stack_list;
102
103 struct work_struct hotplug_work;
102}; 104};
103 105
104/* Retrieve the cpuset for a cgroup */ 106/* Retrieve the cpuset for a cgroup */
@@ -254,7 +256,10 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock);
254/* 256/*
255 * CPU / memory hotplug is handled asynchronously. 257 * CPU / memory hotplug is handled asynchronously.
256 */ 258 */
259static struct workqueue_struct *cpuset_propagate_hotplug_wq;
260
257static void cpuset_hotplug_workfn(struct work_struct *work); 261static void cpuset_hotplug_workfn(struct work_struct *work);
262static void cpuset_propagate_hotplug_workfn(struct work_struct *work);
258 263
259static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); 264static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
260 265
@@ -1808,6 +1813,7 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
1808 cpumask_clear(cs->cpus_allowed); 1813 cpumask_clear(cs->cpus_allowed);
1809 nodes_clear(cs->mems_allowed); 1814 nodes_clear(cs->mems_allowed);
1810 fmeter_init(&cs->fmeter); 1815 fmeter_init(&cs->fmeter);
1816 INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn);
1811 cs->relax_domain_level = -1; 1817 cs->relax_domain_level = -1;
1812 cs->parent = cgroup_cs(cont->parent); 1818 cs->parent = cgroup_cs(cont->parent);
1813 1819
@@ -2033,21 +2039,20 @@ static struct cpuset *cpuset_next(struct list_head *queue)
2033} 2039}
2034 2040
2035/** 2041/**
2036 * cpuset_propagate_hotplug - propagate CPU/memory hotplug to a cpuset 2042 * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
2037 * @cs: cpuset in interest 2043 * @cs: cpuset in interest
2038 * 2044 *
2039 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone 2045 * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
2040 * offline, update @cs accordingly. If @cs ends up with no CPU or memory, 2046 * offline, update @cs accordingly. If @cs ends up with no CPU or memory,
2041 * all its tasks are moved to the nearest ancestor with both resources. 2047 * all its tasks are moved to the nearest ancestor with both resources.
2042 *
2043 * Should be called with cgroup_mutex held.
2044 */ 2048 */
2045static void cpuset_propagate_hotplug(struct cpuset *cs) 2049static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
2046{ 2050{
2047 static cpumask_t off_cpus; 2051 static cpumask_t off_cpus;
2048 static nodemask_t off_mems, tmp_mems; 2052 static nodemask_t off_mems, tmp_mems;
2053 struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
2049 2054
2050 WARN_ON_ONCE(!cgroup_lock_is_held()); 2055 cgroup_lock();
2051 2056
2052 cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); 2057 cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed);
2053 nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); 2058 nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed);
@@ -2071,6 +2076,36 @@ static void cpuset_propagate_hotplug(struct cpuset *cs)
2071 2076
2072 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 2077 if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
2073 remove_tasks_in_empty_cpuset(cs); 2078 remove_tasks_in_empty_cpuset(cs);
2079
2080 cgroup_unlock();
2081
2082 /* the following may free @cs, should be the last operation */
2083 css_put(&cs->css);
2084}
2085
2086/**
2087 * schedule_cpuset_propagate_hotplug - schedule hotplug propagation to a cpuset
2088 * @cs: cpuset of interest
2089 *
2090 * Schedule cpuset_propagate_hotplug_workfn() which will update CPU and
2091 * memory masks according to top_cpuset.
2092 */
2093static void schedule_cpuset_propagate_hotplug(struct cpuset *cs)
2094{
2095 /*
2096 * Pin @cs. The refcnt will be released when the work item
2097 * finishes executing.
2098 */
2099 if (!css_tryget(&cs->css))
2100 return;
2101
2102 /*
2103 * Queue @cs->hotplug_work. If already pending, lose the css ref.
2104 * cpuset_propagate_hotplug_wq is ordered and propagation will
2105 * happen in the order this function is called.
2106 */
2107 if (!queue_work(cpuset_propagate_hotplug_wq, &cs->hotplug_work))
2108 css_put(&cs->css);
2074} 2109}
2075 2110
2076/** 2111/**
@@ -2135,11 +2170,14 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
2135 list_add_tail(&top_cpuset.stack_list, &queue); 2170 list_add_tail(&top_cpuset.stack_list, &queue);
2136 while ((cs = cpuset_next(&queue))) 2171 while ((cs = cpuset_next(&queue)))
2137 if (cs != &top_cpuset) 2172 if (cs != &top_cpuset)
2138 cpuset_propagate_hotplug(cs); 2173 schedule_cpuset_propagate_hotplug(cs);
2139 } 2174 }
2140 2175
2141 cgroup_unlock(); 2176 cgroup_unlock();
2142 2177
2178 /* wait for propagations to finish */
2179 flush_workqueue(cpuset_propagate_hotplug_wq);
2180
2143 /* rebuild sched domains if cpus_allowed has changed */ 2181 /* rebuild sched domains if cpus_allowed has changed */
2144 if (cpus_updated) { 2182 if (cpus_updated) {
2145 struct sched_domain_attr *attr; 2183 struct sched_domain_attr *attr;
@@ -2196,6 +2234,10 @@ void __init cpuset_init_smp(void)
2196 top_cpuset.mems_allowed = node_states[N_MEMORY]; 2234 top_cpuset.mems_allowed = node_states[N_MEMORY];
2197 2235
2198 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2236 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2237
2238 cpuset_propagate_hotplug_wq =
2239 alloc_ordered_workqueue("cpuset_hotplug", 0);
2240 BUG_ON(!cpuset_propagate_hotplug_wq);
2199} 2241}
2200 2242
2201/** 2243/**