diff options
author | Tejun Heo <tj@kernel.org> | 2013-01-07 11:51:07 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-01-07 11:51:07 -0500 |
commit | 8d03394877ecdf87e1d694664c460747b8e05aa1 (patch) | |
tree | d7f2301996b9e0a747ada2385244c670e7da7bb2 /kernel | |
parent | 699140ba838dd3fa2c5cce474e14f194b09f91aa (diff) |
cpuset: make CPU / memory hotplug propagation asynchronous
cpuset_hotplug_workfn() has been invoking cpuset_propagate_hotplug()
directly to propagate hotplug updates to !root cpusets; however, this
has the following problems.
* cpuset locking is scheduled to be decoupled from cgroup_mutex,
cgroup_mutex will be unexported, and cgroup_attach_task() will do
cgroup locking internally, so propagation can't synchronously move
tasks to a parent cgroup while walking the hierarchy.
* We can't use cgroup generic tree iterator because propagation to
each cpuset may sleep. With propagation done asynchronously, we can
lose the rather ugly cpuset specific iteration.
Convert cpuset_propagate_hotplug() to
cpuset_propagate_hotplug_workfn() and execute it from newly added
cpuset->hotplug_work. The work items are run on an ordered workqueue,
so the propagation order is preserved. cpuset_hotplug_workfn()
schedules all propagations while holding cgroup_mutex and waits for
completion without cgroup_mutex. Each in-flight propagation holds a
reference to the cpuset->css.
This patch doesn't cause any functional difference.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 54 |
1 files changed, 48 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 74e412f908db..a7bb547786d7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -99,6 +99,8 @@ struct cpuset { | |||
99 | 99 | ||
100 | /* used for walking a cpuset hierarchy */ | 100 | /* used for walking a cpuset hierarchy */ |
101 | struct list_head stack_list; | 101 | struct list_head stack_list; |
102 | |||
103 | struct work_struct hotplug_work; | ||
102 | }; | 104 | }; |
103 | 105 | ||
104 | /* Retrieve the cpuset for a cgroup */ | 106 | /* Retrieve the cpuset for a cgroup */ |
@@ -254,7 +256,10 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock); | |||
254 | /* | 256 | /* |
255 | * CPU / memory hotplug is handled asynchronously. | 257 | * CPU / memory hotplug is handled asynchronously. |
256 | */ | 258 | */ |
259 | static struct workqueue_struct *cpuset_propagate_hotplug_wq; | ||
260 | |||
257 | static void cpuset_hotplug_workfn(struct work_struct *work); | 261 | static void cpuset_hotplug_workfn(struct work_struct *work); |
262 | static void cpuset_propagate_hotplug_workfn(struct work_struct *work); | ||
258 | 263 | ||
259 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); | 264 | static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); |
260 | 265 | ||
@@ -1808,6 +1813,7 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) | |||
1808 | cpumask_clear(cs->cpus_allowed); | 1813 | cpumask_clear(cs->cpus_allowed); |
1809 | nodes_clear(cs->mems_allowed); | 1814 | nodes_clear(cs->mems_allowed); |
1810 | fmeter_init(&cs->fmeter); | 1815 | fmeter_init(&cs->fmeter); |
1816 | INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn); | ||
1811 | cs->relax_domain_level = -1; | 1817 | cs->relax_domain_level = -1; |
1812 | cs->parent = cgroup_cs(cont->parent); | 1818 | cs->parent = cgroup_cs(cont->parent); |
1813 | 1819 | ||
@@ -2033,21 +2039,20 @@ static struct cpuset *cpuset_next(struct list_head *queue) | |||
2033 | } | 2039 | } |
2034 | 2040 | ||
2035 | /** | 2041 | /** |
2036 | * cpuset_propagate_hotplug - propagate CPU/memory hotplug to a cpuset | 2042 | * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset |
2037 | * @cs: cpuset in interest | 2043 | * @cs: cpuset in interest |
2038 | * | 2044 | * |
2039 | * Compare @cs's cpu and mem masks against top_cpuset and if some have gone | 2045 | * Compare @cs's cpu and mem masks against top_cpuset and if some have gone |
2040 | * offline, update @cs accordingly. If @cs ends up with no CPU or memory, | 2046 | * offline, update @cs accordingly. If @cs ends up with no CPU or memory, |
2041 | * all its tasks are moved to the nearest ancestor with both resources. | 2047 | * all its tasks are moved to the nearest ancestor with both resources. |
2042 | * | ||
2043 | * Should be called with cgroup_mutex held. | ||
2044 | */ | 2048 | */ |
2045 | static void cpuset_propagate_hotplug(struct cpuset *cs) | 2049 | static void cpuset_propagate_hotplug_workfn(struct work_struct *work) |
2046 | { | 2050 | { |
2047 | static cpumask_t off_cpus; | 2051 | static cpumask_t off_cpus; |
2048 | static nodemask_t off_mems, tmp_mems; | 2052 | static nodemask_t off_mems, tmp_mems; |
2053 | struct cpuset *cs = container_of(work, struct cpuset, hotplug_work); | ||
2049 | 2054 | ||
2050 | WARN_ON_ONCE(!cgroup_lock_is_held()); | 2055 | cgroup_lock(); |
2051 | 2056 | ||
2052 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); | 2057 | cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed); |
2053 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); | 2058 | nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed); |
@@ -2071,6 +2076,36 @@ static void cpuset_propagate_hotplug(struct cpuset *cs) | |||
2071 | 2076 | ||
2072 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 2077 | if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
2073 | remove_tasks_in_empty_cpuset(cs); | 2078 | remove_tasks_in_empty_cpuset(cs); |
2079 | |||
2080 | cgroup_unlock(); | ||
2081 | |||
2082 | /* the following may free @cs, should be the last operation */ | ||
2083 | css_put(&cs->css); | ||
2084 | } | ||
2085 | |||
2086 | /** | ||
2087 | * schedule_cpuset_propagate_hotplug - schedule hotplug propagation to a cpuset | ||
2088 | * @cs: cpuset of interest | ||
2089 | * | ||
2090 | * Schedule cpuset_propagate_hotplug_workfn() which will update CPU and | ||
2091 | * memory masks according to top_cpuset. | ||
2092 | */ | ||
2093 | static void schedule_cpuset_propagate_hotplug(struct cpuset *cs) | ||
2094 | { | ||
2095 | /* | ||
2096 | * Pin @cs. The refcnt will be released when the work item | ||
2097 | * finishes executing. | ||
2098 | */ | ||
2099 | if (!css_tryget(&cs->css)) | ||
2100 | return; | ||
2101 | |||
2102 | /* | ||
2103 | * Queue @cs->hotplug_work. If already pending, lose the css ref. | ||
2104 | * cpuset_propagate_hotplug_wq is ordered and propagation will | ||
2105 | * happen in the order this function is called. | ||
2106 | */ | ||
2107 | if (!queue_work(cpuset_propagate_hotplug_wq, &cs->hotplug_work)) | ||
2108 | css_put(&cs->css); | ||
2074 | } | 2109 | } |
2075 | 2110 | ||
2076 | /** | 2111 | /** |
@@ -2135,11 +2170,14 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
2135 | list_add_tail(&top_cpuset.stack_list, &queue); | 2170 | list_add_tail(&top_cpuset.stack_list, &queue); |
2136 | while ((cs = cpuset_next(&queue))) | 2171 | while ((cs = cpuset_next(&queue))) |
2137 | if (cs != &top_cpuset) | 2172 | if (cs != &top_cpuset) |
2138 | cpuset_propagate_hotplug(cs); | 2173 | schedule_cpuset_propagate_hotplug(cs); |
2139 | } | 2174 | } |
2140 | 2175 | ||
2141 | cgroup_unlock(); | 2176 | cgroup_unlock(); |
2142 | 2177 | ||
2178 | /* wait for propagations to finish */ | ||
2179 | flush_workqueue(cpuset_propagate_hotplug_wq); | ||
2180 | |||
2143 | /* rebuild sched domains if cpus_allowed has changed */ | 2181 | /* rebuild sched domains if cpus_allowed has changed */ |
2144 | if (cpus_updated) { | 2182 | if (cpus_updated) { |
2145 | struct sched_domain_attr *attr; | 2183 | struct sched_domain_attr *attr; |
@@ -2196,6 +2234,10 @@ void __init cpuset_init_smp(void) | |||
2196 | top_cpuset.mems_allowed = node_states[N_MEMORY]; | 2234 | top_cpuset.mems_allowed = node_states[N_MEMORY]; |
2197 | 2235 | ||
2198 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); | 2236 | hotplug_memory_notifier(cpuset_track_online_nodes, 10); |
2237 | |||
2238 | cpuset_propagate_hotplug_wq = | ||
2239 | alloc_ordered_workqueue("cpuset_hotplug", 0); | ||
2240 | BUG_ON(!cpuset_propagate_hotplug_wq); | ||
2199 | } | 2241 | } |
2200 | 2242 | ||
2201 | /** | 2243 | /** |