diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-21 05:36:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-21 05:36:40 -0400 |
commit | 06eb4cc2e7ad1b32a3b2580eff772c29b53a2cc6 (patch) | |
tree | c3e64d5ee6ffdf548c863bf64ec2aed704029fc3 | |
parent | 6ab9028d00da2ed34f46a72fa3271b04a402f1e1 (diff) | |
parent | 36e9d2ebcc15d029b33f42a36146ab5a5bcfcfe7 (diff) |
Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull more cgroup fixes from Tejun Heo:
"Three more patches to fix cgroup_freezer breakage due to the recent
cgroup internal locking changes - an operation cgroup_freezer was
using now requires sleepable context and cgroup_freezer was invoking
that while holding a spin lock. cgroup_freezer was using an overly
elaborate hierarchical locking scheme.
While it's possible to convert the hierarchical spinlocks directly to
mutexes, this patch simplifies the overall locking so that it uses a
global mutex. This has the added benefit of avoiding iterating
potentially huge number of tasks under a spinlock. While the patch is
on the larger side in the devel cycle, the changes made are mostly
straight-forward and the locking logic is a lot simpler afterwards"
* 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: fix rcu_read_lock() leak in update_if_frozen()
cgroup_freezer: replace freezer->lock with freezer_mutex
cgroup: introduce task_css_is_root()
-rw-r--r-- | include/linux/cgroup.h | 15 | ||||
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 116 |
3 files changed, 65 insertions, 68 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c2515851c1aa..d60904b9e505 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -473,6 +473,7 @@ struct cftype { | |||
473 | }; | 473 | }; |
474 | 474 | ||
475 | extern struct cgroup_root cgrp_dfl_root; | 475 | extern struct cgroup_root cgrp_dfl_root; |
476 | extern struct css_set init_css_set; | ||
476 | 477 | ||
477 | static inline bool cgroup_on_dfl(const struct cgroup *cgrp) | 478 | static inline bool cgroup_on_dfl(const struct cgroup *cgrp) |
478 | { | 479 | { |
@@ -700,6 +701,20 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task, | |||
700 | return task_css_check(task, subsys_id, false); | 701 | return task_css_check(task, subsys_id, false); |
701 | } | 702 | } |
702 | 703 | ||
704 | /** | ||
705 | * task_css_is_root - test whether a task belongs to the root css | ||
706 | * @task: the target task | ||
707 | * @subsys_id: the target subsystem ID | ||
708 | * | ||
709 | * Test whether @task belongs to the root css on the specified subsystem. | ||
710 | * May be invoked in any context. | ||
711 | */ | ||
712 | static inline bool task_css_is_root(struct task_struct *task, int subsys_id) | ||
713 | { | ||
714 | return task_css_check(task, subsys_id, true) == | ||
715 | init_css_set.subsys[subsys_id]; | ||
716 | } | ||
717 | |||
703 | static inline struct cgroup *task_cgroup(struct task_struct *task, | 718 | static inline struct cgroup *task_cgroup(struct task_struct *task, |
704 | int subsys_id) | 719 | int subsys_id) |
705 | { | 720 | { |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 11a03d67635a..3f1ca934a237 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -348,7 +348,7 @@ struct cgrp_cset_link { | |||
348 | * reference-counted, to improve performance when child cgroups | 348 | * reference-counted, to improve performance when child cgroups |
349 | * haven't been created. | 349 | * haven't been created. |
350 | */ | 350 | */ |
351 | static struct css_set init_css_set = { | 351 | struct css_set init_css_set = { |
352 | .refcount = ATOMIC_INIT(1), | 352 | .refcount = ATOMIC_INIT(1), |
353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), | 353 | .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), |
354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), | 354 | .tasks = LIST_HEAD_INIT(init_css_set.tasks), |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2bc4a2256444..345628c78b5b 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | #include <linux/mutex.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | 27 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is |
@@ -42,9 +43,10 @@ enum freezer_state_flags { | |||
42 | struct freezer { | 43 | struct freezer { |
43 | struct cgroup_subsys_state css; | 44 | struct cgroup_subsys_state css; |
44 | unsigned int state; | 45 | unsigned int state; |
45 | spinlock_t lock; | ||
46 | }; | 46 | }; |
47 | 47 | ||
48 | static DEFINE_MUTEX(freezer_mutex); | ||
49 | |||
48 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) | 50 | static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) |
49 | { | 51 | { |
50 | return css ? container_of(css, struct freezer, css) : NULL; | 52 | return css ? container_of(css, struct freezer, css) : NULL; |
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css) | |||
93 | if (!freezer) | 95 | if (!freezer) |
94 | return ERR_PTR(-ENOMEM); | 96 | return ERR_PTR(-ENOMEM); |
95 | 97 | ||
96 | spin_lock_init(&freezer->lock); | ||
97 | return &freezer->css; | 98 | return &freezer->css; |
98 | } | 99 | } |
99 | 100 | ||
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
110 | struct freezer *freezer = css_freezer(css); | 111 | struct freezer *freezer = css_freezer(css); |
111 | struct freezer *parent = parent_freezer(freezer); | 112 | struct freezer *parent = parent_freezer(freezer); |
112 | 113 | ||
113 | /* | 114 | mutex_lock(&freezer_mutex); |
114 | * The following double locking and freezing state inheritance | ||
115 | * guarantee that @cgroup can never escape ancestors' freezing | ||
116 | * states. See css_for_each_descendant_pre() for details. | ||
117 | */ | ||
118 | if (parent) | ||
119 | spin_lock_irq(&parent->lock); | ||
120 | spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); | ||
121 | 115 | ||
122 | freezer->state |= CGROUP_FREEZER_ONLINE; | 116 | freezer->state |= CGROUP_FREEZER_ONLINE; |
123 | 117 | ||
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css) | |||
126 | atomic_inc(&system_freezing_cnt); | 120 | atomic_inc(&system_freezing_cnt); |
127 | } | 121 | } |
128 | 122 | ||
129 | spin_unlock(&freezer->lock); | 123 | mutex_unlock(&freezer_mutex); |
130 | if (parent) | ||
131 | spin_unlock_irq(&parent->lock); | ||
132 | |||
133 | return 0; | 124 | return 0; |
134 | } | 125 | } |
135 | 126 | ||
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css) | |||
144 | { | 135 | { |
145 | struct freezer *freezer = css_freezer(css); | 136 | struct freezer *freezer = css_freezer(css); |
146 | 137 | ||
147 | spin_lock_irq(&freezer->lock); | 138 | mutex_lock(&freezer_mutex); |
148 | 139 | ||
149 | if (freezer->state & CGROUP_FREEZING) | 140 | if (freezer->state & CGROUP_FREEZING) |
150 | atomic_dec(&system_freezing_cnt); | 141 | atomic_dec(&system_freezing_cnt); |
151 | 142 | ||
152 | freezer->state = 0; | 143 | freezer->state = 0; |
153 | 144 | ||
154 | spin_unlock_irq(&freezer->lock); | 145 | mutex_unlock(&freezer_mutex); |
155 | } | 146 | } |
156 | 147 | ||
157 | static void freezer_css_free(struct cgroup_subsys_state *css) | 148 | static void freezer_css_free(struct cgroup_subsys_state *css) |
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
175 | struct task_struct *task; | 166 | struct task_struct *task; |
176 | bool clear_frozen = false; | 167 | bool clear_frozen = false; |
177 | 168 | ||
178 | spin_lock_irq(&freezer->lock); | 169 | mutex_lock(&freezer_mutex); |
179 | 170 | ||
180 | /* | 171 | /* |
181 | * Make the new tasks conform to the current state of @new_css. | 172 | * Make the new tasks conform to the current state of @new_css. |
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
197 | } | 188 | } |
198 | } | 189 | } |
199 | 190 | ||
200 | spin_unlock_irq(&freezer->lock); | 191 | /* propagate FROZEN clearing upwards */ |
201 | |||
202 | /* | ||
203 | * Propagate FROZEN clearing upwards. We may race with | ||
204 | * update_if_frozen(), but as long as both work bottom-up, either | ||
205 | * update_if_frozen() sees child's FROZEN cleared or we clear the | ||
206 | * parent's FROZEN later. No parent w/ !FROZEN children can be | ||
207 | * left FROZEN. | ||
208 | */ | ||
209 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | 192 | while (clear_frozen && (freezer = parent_freezer(freezer))) { |
210 | spin_lock_irq(&freezer->lock); | ||
211 | freezer->state &= ~CGROUP_FROZEN; | 193 | freezer->state &= ~CGROUP_FROZEN; |
212 | clear_frozen = freezer->state & CGROUP_FREEZING; | 194 | clear_frozen = freezer->state & CGROUP_FREEZING; |
213 | spin_unlock_irq(&freezer->lock); | ||
214 | } | 195 | } |
196 | |||
197 | mutex_unlock(&freezer_mutex); | ||
215 | } | 198 | } |
216 | 199 | ||
217 | /** | 200 | /** |
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task) | |||
228 | { | 211 | { |
229 | struct freezer *freezer; | 212 | struct freezer *freezer; |
230 | 213 | ||
231 | rcu_read_lock(); | ||
232 | freezer = task_freezer(task); | ||
233 | |||
234 | /* | 214 | /* |
235 | * The root cgroup is non-freezable, so we can skip locking the | 215 | * The root cgroup is non-freezable, so we can skip locking the |
236 | * freezer. This is safe regardless of race with task migration. | 216 | * freezer. This is safe regardless of race with task migration. |
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task) | |||
238 | * to do. If we lost and root is the new cgroup, noop is still the | 218 | * to do. If we lost and root is the new cgroup, noop is still the |
239 | * right thing to do. | 219 | * right thing to do. |
240 | */ | 220 | */ |
241 | if (!parent_freezer(freezer)) | 221 | if (task_css_is_root(task, freezer_cgrp_id)) |
242 | goto out; | 222 | return; |
243 | 223 | ||
244 | /* | 224 | mutex_lock(&freezer_mutex); |
245 | * Grab @freezer->lock and freeze @task after verifying @task still | 225 | rcu_read_lock(); |
246 | * belongs to @freezer and it's freezing. The former is for the | 226 | |
247 | * case where we have raced against task migration and lost and | 227 | freezer = task_freezer(task); |
248 | * @task is already in a different cgroup which may not be frozen. | 228 | if (freezer->state & CGROUP_FREEZING) |
249 | * This isn't strictly necessary as freeze_task() is allowed to be | ||
250 | * called spuriously but let's do it anyway for, if nothing else, | ||
251 | * documentation. | ||
252 | */ | ||
253 | spin_lock_irq(&freezer->lock); | ||
254 | if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING)) | ||
255 | freeze_task(task); | 229 | freeze_task(task); |
256 | spin_unlock_irq(&freezer->lock); | 230 | |
257 | out: | ||
258 | rcu_read_unlock(); | 231 | rcu_read_unlock(); |
232 | mutex_unlock(&freezer_mutex); | ||
259 | } | 233 | } |
260 | 234 | ||
261 | /** | 235 | /** |
@@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
281 | struct css_task_iter it; | 255 | struct css_task_iter it; |
282 | struct task_struct *task; | 256 | struct task_struct *task; |
283 | 257 | ||
284 | WARN_ON_ONCE(!rcu_read_lock_held()); | 258 | lockdep_assert_held(&freezer_mutex); |
285 | |||
286 | spin_lock_irq(&freezer->lock); | ||
287 | 259 | ||
288 | if (!(freezer->state & CGROUP_FREEZING) || | 260 | if (!(freezer->state & CGROUP_FREEZING) || |
289 | (freezer->state & CGROUP_FROZEN)) | 261 | (freezer->state & CGROUP_FROZEN)) |
290 | goto out_unlock; | 262 | return; |
291 | 263 | ||
292 | /* are all (live) children frozen? */ | 264 | /* are all (live) children frozen? */ |
265 | rcu_read_lock(); | ||
293 | css_for_each_child(pos, css) { | 266 | css_for_each_child(pos, css) { |
294 | struct freezer *child = css_freezer(pos); | 267 | struct freezer *child = css_freezer(pos); |
295 | 268 | ||
296 | if ((child->state & CGROUP_FREEZER_ONLINE) && | 269 | if ((child->state & CGROUP_FREEZER_ONLINE) && |
297 | !(child->state & CGROUP_FROZEN)) | 270 | !(child->state & CGROUP_FROZEN)) { |
298 | goto out_unlock; | 271 | rcu_read_unlock(); |
272 | return; | ||
273 | } | ||
299 | } | 274 | } |
275 | rcu_read_unlock(); | ||
300 | 276 | ||
301 | /* are all tasks frozen? */ | 277 | /* are all tasks frozen? */ |
302 | css_task_iter_start(css, &it); | 278 | css_task_iter_start(css, &it); |
@@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css) | |||
317 | freezer->state |= CGROUP_FROZEN; | 293 | freezer->state |= CGROUP_FROZEN; |
318 | out_iter_end: | 294 | out_iter_end: |
319 | css_task_iter_end(&it); | 295 | css_task_iter_end(&it); |
320 | out_unlock: | ||
321 | spin_unlock_irq(&freezer->lock); | ||
322 | } | 296 | } |
323 | 297 | ||
324 | static int freezer_read(struct seq_file *m, void *v) | 298 | static int freezer_read(struct seq_file *m, void *v) |
325 | { | 299 | { |
326 | struct cgroup_subsys_state *css = seq_css(m), *pos; | 300 | struct cgroup_subsys_state *css = seq_css(m), *pos; |
327 | 301 | ||
302 | mutex_lock(&freezer_mutex); | ||
328 | rcu_read_lock(); | 303 | rcu_read_lock(); |
329 | 304 | ||
330 | /* update states bottom-up */ | 305 | /* update states bottom-up */ |
331 | css_for_each_descendant_post(pos, css) | 306 | css_for_each_descendant_post(pos, css) { |
307 | if (!css_tryget(pos)) | ||
308 | continue; | ||
309 | rcu_read_unlock(); | ||
310 | |||
332 | update_if_frozen(pos); | 311 | update_if_frozen(pos); |
333 | 312 | ||
313 | rcu_read_lock(); | ||
314 | css_put(pos); | ||
315 | } | ||
316 | |||
334 | rcu_read_unlock(); | 317 | rcu_read_unlock(); |
318 | mutex_unlock(&freezer_mutex); | ||
335 | 319 | ||
336 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); | 320 | seq_puts(m, freezer_state_strs(css_freezer(css)->state)); |
337 | seq_putc(m, '\n'); | 321 | seq_putc(m, '\n'); |
@@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
373 | unsigned int state) | 357 | unsigned int state) |
374 | { | 358 | { |
375 | /* also synchronizes against task migration, see freezer_attach() */ | 359 | /* also synchronizes against task migration, see freezer_attach() */ |
376 | lockdep_assert_held(&freezer->lock); | 360 | lockdep_assert_held(&freezer_mutex); |
377 | 361 | ||
378 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) | 362 | if (!(freezer->state & CGROUP_FREEZER_ONLINE)) |
379 | return; | 363 | return; |
@@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) | |||
414 | * descendant will try to inherit its parent's FREEZING state as | 398 | * descendant will try to inherit its parent's FREEZING state as |
415 | * CGROUP_FREEZING_PARENT. | 399 | * CGROUP_FREEZING_PARENT. |
416 | */ | 400 | */ |
401 | mutex_lock(&freezer_mutex); | ||
417 | rcu_read_lock(); | 402 | rcu_read_lock(); |
418 | css_for_each_descendant_pre(pos, &freezer->css) { | 403 | css_for_each_descendant_pre(pos, &freezer->css) { |
419 | struct freezer *pos_f = css_freezer(pos); | 404 | struct freezer *pos_f = css_freezer(pos); |
420 | struct freezer *parent = parent_freezer(pos_f); | 405 | struct freezer *parent = parent_freezer(pos_f); |
421 | 406 | ||
422 | spin_lock_irq(&pos_f->lock); | 407 | if (!css_tryget(pos)) |
408 | continue; | ||
409 | rcu_read_unlock(); | ||
423 | 410 | ||
424 | if (pos_f == freezer) { | 411 | if (pos_f == freezer) |
425 | freezer_apply_state(pos_f, freeze, | 412 | freezer_apply_state(pos_f, freeze, |
426 | CGROUP_FREEZING_SELF); | 413 | CGROUP_FREEZING_SELF); |
427 | } else { | 414 | else |
428 | /* | ||
429 | * Our update to @parent->state is already visible | ||
430 | * which is all we need. No need to lock @parent. | ||
431 | * For more info on synchronization, see | ||
432 | * freezer_post_create(). | ||
433 | */ | ||
434 | freezer_apply_state(pos_f, | 415 | freezer_apply_state(pos_f, |
435 | parent->state & CGROUP_FREEZING, | 416 | parent->state & CGROUP_FREEZING, |
436 | CGROUP_FREEZING_PARENT); | 417 | CGROUP_FREEZING_PARENT); |
437 | } | ||
438 | 418 | ||
439 | spin_unlock_irq(&pos_f->lock); | 419 | rcu_read_lock(); |
420 | css_put(pos); | ||
440 | } | 421 | } |
441 | rcu_read_unlock(); | 422 | rcu_read_unlock(); |
423 | mutex_unlock(&freezer_mutex); | ||
442 | } | 424 | } |
443 | 425 | ||
444 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, | 426 | static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, |