aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-05-21 05:36:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-05-21 05:36:40 -0400
commit06eb4cc2e7ad1b32a3b2580eff772c29b53a2cc6 (patch)
treec3e64d5ee6ffdf548c863bf64ec2aed704029fc3
parent6ab9028d00da2ed34f46a72fa3271b04a402f1e1 (diff)
parent36e9d2ebcc15d029b33f42a36146ab5a5bcfcfe7 (diff)
Merge branch 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull more cgroup fixes from Tejun Heo: "Three more patches to fix cgroup_freezer breakage due to the recent cgroup internal locking changes - an operation cgroup_freezer was using now requires sleepable context and cgroup_freezer was invoking that while holding a spin lock. cgroup_freezer was using an overly elaborate hierarchical locking scheme. While it's possible to convert the hierarchical spinlocks directly to mutexes, this patch simplifies the overall locking so that it uses a global mutex. This has the added benefit of avoiding iterating potentially huge number of tasks under a spinlock. While the patch is on the larger side in the devel cycle, the changes made are mostly straight-forward and the locking logic is a lot simpler afterwards" * 'for-3.15-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: fix rcu_read_lock() leak in update_if_frozen() cgroup_freezer: replace freezer->lock with freezer_mutex cgroup: introduce task_css_is_root()
-rw-r--r--include/linux/cgroup.h15
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/cgroup_freezer.c116
3 files changed, 65 insertions, 68 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c2515851c1aa..d60904b9e505 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -473,6 +473,7 @@ struct cftype {
473}; 473};
474 474
475extern struct cgroup_root cgrp_dfl_root; 475extern struct cgroup_root cgrp_dfl_root;
476extern struct css_set init_css_set;
476 477
477static inline bool cgroup_on_dfl(const struct cgroup *cgrp) 478static inline bool cgroup_on_dfl(const struct cgroup *cgrp)
478{ 479{
@@ -700,6 +701,20 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
700 return task_css_check(task, subsys_id, false); 701 return task_css_check(task, subsys_id, false);
701} 702}
702 703
704/**
705 * task_css_is_root - test whether a task belongs to the root css
706 * @task: the target task
707 * @subsys_id: the target subsystem ID
708 *
709 * Test whether @task belongs to the root css on the specified subsystem.
710 * May be invoked in any context.
711 */
712static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
713{
714 return task_css_check(task, subsys_id, true) ==
715 init_css_set.subsys[subsys_id];
716}
717
703static inline struct cgroup *task_cgroup(struct task_struct *task, 718static inline struct cgroup *task_cgroup(struct task_struct *task,
704 int subsys_id) 719 int subsys_id)
705{ 720{
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 11a03d67635a..3f1ca934a237 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -348,7 +348,7 @@ struct cgrp_cset_link {
348 * reference-counted, to improve performance when child cgroups 348 * reference-counted, to improve performance when child cgroups
349 * haven't been created. 349 * haven't been created.
350 */ 350 */
351static struct css_set init_css_set = { 351struct css_set init_css_set = {
352 .refcount = ATOMIC_INIT(1), 352 .refcount = ATOMIC_INIT(1),
353 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), 353 .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
354 .tasks = LIST_HEAD_INIT(init_css_set.tasks), 354 .tasks = LIST_HEAD_INIT(init_css_set.tasks),
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2bc4a2256444..345628c78b5b 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -21,6 +21,7 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24#include <linux/mutex.h>
24 25
25/* 26/*
26 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is 27 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -42,9 +43,10 @@ enum freezer_state_flags {
42struct freezer { 43struct freezer {
43 struct cgroup_subsys_state css; 44 struct cgroup_subsys_state css;
44 unsigned int state; 45 unsigned int state;
45 spinlock_t lock;
46}; 46};
47 47
48static DEFINE_MUTEX(freezer_mutex);
49
48static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) 50static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
49{ 51{
50 return css ? container_of(css, struct freezer, css) : NULL; 52 return css ? container_of(css, struct freezer, css) : NULL;
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css)
93 if (!freezer) 95 if (!freezer)
94 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
95 97
96 spin_lock_init(&freezer->lock);
97 return &freezer->css; 98 return &freezer->css;
98} 99}
99 100
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
110 struct freezer *freezer = css_freezer(css); 111 struct freezer *freezer = css_freezer(css);
111 struct freezer *parent = parent_freezer(freezer); 112 struct freezer *parent = parent_freezer(freezer);
112 113
113 /* 114 mutex_lock(&freezer_mutex);
114 * The following double locking and freezing state inheritance
115 * guarantee that @cgroup can never escape ancestors' freezing
116 * states. See css_for_each_descendant_pre() for details.
117 */
118 if (parent)
119 spin_lock_irq(&parent->lock);
120 spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
121 115
122 freezer->state |= CGROUP_FREEZER_ONLINE; 116 freezer->state |= CGROUP_FREEZER_ONLINE;
123 117
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
126 atomic_inc(&system_freezing_cnt); 120 atomic_inc(&system_freezing_cnt);
127 } 121 }
128 122
129 spin_unlock(&freezer->lock); 123 mutex_unlock(&freezer_mutex);
130 if (parent)
131 spin_unlock_irq(&parent->lock);
132
133 return 0; 124 return 0;
134} 125}
135 126
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
144{ 135{
145 struct freezer *freezer = css_freezer(css); 136 struct freezer *freezer = css_freezer(css);
146 137
147 spin_lock_irq(&freezer->lock); 138 mutex_lock(&freezer_mutex);
148 139
149 if (freezer->state & CGROUP_FREEZING) 140 if (freezer->state & CGROUP_FREEZING)
150 atomic_dec(&system_freezing_cnt); 141 atomic_dec(&system_freezing_cnt);
151 142
152 freezer->state = 0; 143 freezer->state = 0;
153 144
154 spin_unlock_irq(&freezer->lock); 145 mutex_unlock(&freezer_mutex);
155} 146}
156 147
157static void freezer_css_free(struct cgroup_subsys_state *css) 148static void freezer_css_free(struct cgroup_subsys_state *css)
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
175 struct task_struct *task; 166 struct task_struct *task;
176 bool clear_frozen = false; 167 bool clear_frozen = false;
177 168
178 spin_lock_irq(&freezer->lock); 169 mutex_lock(&freezer_mutex);
179 170
180 /* 171 /*
181 * Make the new tasks conform to the current state of @new_css. 172 * Make the new tasks conform to the current state of @new_css.
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
197 } 188 }
198 } 189 }
199 190
200 spin_unlock_irq(&freezer->lock); 191 /* propagate FROZEN clearing upwards */
201
202 /*
203 * Propagate FROZEN clearing upwards. We may race with
204 * update_if_frozen(), but as long as both work bottom-up, either
205 * update_if_frozen() sees child's FROZEN cleared or we clear the
206 * parent's FROZEN later. No parent w/ !FROZEN children can be
207 * left FROZEN.
208 */
209 while (clear_frozen && (freezer = parent_freezer(freezer))) { 192 while (clear_frozen && (freezer = parent_freezer(freezer))) {
210 spin_lock_irq(&freezer->lock);
211 freezer->state &= ~CGROUP_FROZEN; 193 freezer->state &= ~CGROUP_FROZEN;
212 clear_frozen = freezer->state & CGROUP_FREEZING; 194 clear_frozen = freezer->state & CGROUP_FREEZING;
213 spin_unlock_irq(&freezer->lock);
214 } 195 }
196
197 mutex_unlock(&freezer_mutex);
215} 198}
216 199
217/** 200/**
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task)
228{ 211{
229 struct freezer *freezer; 212 struct freezer *freezer;
230 213
231 rcu_read_lock();
232 freezer = task_freezer(task);
233
234 /* 214 /*
235 * The root cgroup is non-freezable, so we can skip locking the 215 * The root cgroup is non-freezable, so we can skip locking the
236 * freezer. This is safe regardless of race with task migration. 216 * freezer. This is safe regardless of race with task migration.
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task)
238 * to do. If we lost and root is the new cgroup, noop is still the 218 * to do. If we lost and root is the new cgroup, noop is still the
239 * right thing to do. 219 * right thing to do.
240 */ 220 */
241 if (!parent_freezer(freezer)) 221 if (task_css_is_root(task, freezer_cgrp_id))
242 goto out; 222 return;
243 223
244 /* 224 mutex_lock(&freezer_mutex);
245 * Grab @freezer->lock and freeze @task after verifying @task still 225 rcu_read_lock();
246 * belongs to @freezer and it's freezing. The former is for the 226
247 * case where we have raced against task migration and lost and 227 freezer = task_freezer(task);
248 * @task is already in a different cgroup which may not be frozen. 228 if (freezer->state & CGROUP_FREEZING)
249 * This isn't strictly necessary as freeze_task() is allowed to be
250 * called spuriously but let's do it anyway for, if nothing else,
251 * documentation.
252 */
253 spin_lock_irq(&freezer->lock);
254 if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING))
255 freeze_task(task); 229 freeze_task(task);
256 spin_unlock_irq(&freezer->lock); 230
257out:
258 rcu_read_unlock(); 231 rcu_read_unlock();
232 mutex_unlock(&freezer_mutex);
259} 233}
260 234
261/** 235/**
@@ -281,22 +255,24 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
281 struct css_task_iter it; 255 struct css_task_iter it;
282 struct task_struct *task; 256 struct task_struct *task;
283 257
284 WARN_ON_ONCE(!rcu_read_lock_held()); 258 lockdep_assert_held(&freezer_mutex);
285
286 spin_lock_irq(&freezer->lock);
287 259
288 if (!(freezer->state & CGROUP_FREEZING) || 260 if (!(freezer->state & CGROUP_FREEZING) ||
289 (freezer->state & CGROUP_FROZEN)) 261 (freezer->state & CGROUP_FROZEN))
290 goto out_unlock; 262 return;
291 263
292 /* are all (live) children frozen? */ 264 /* are all (live) children frozen? */
265 rcu_read_lock();
293 css_for_each_child(pos, css) { 266 css_for_each_child(pos, css) {
294 struct freezer *child = css_freezer(pos); 267 struct freezer *child = css_freezer(pos);
295 268
296 if ((child->state & CGROUP_FREEZER_ONLINE) && 269 if ((child->state & CGROUP_FREEZER_ONLINE) &&
297 !(child->state & CGROUP_FROZEN)) 270 !(child->state & CGROUP_FROZEN)) {
298 goto out_unlock; 271 rcu_read_unlock();
272 return;
273 }
299 } 274 }
275 rcu_read_unlock();
300 276
301 /* are all tasks frozen? */ 277 /* are all tasks frozen? */
302 css_task_iter_start(css, &it); 278 css_task_iter_start(css, &it);
@@ -317,21 +293,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
317 freezer->state |= CGROUP_FROZEN; 293 freezer->state |= CGROUP_FROZEN;
318out_iter_end: 294out_iter_end:
319 css_task_iter_end(&it); 295 css_task_iter_end(&it);
320out_unlock:
321 spin_unlock_irq(&freezer->lock);
322} 296}
323 297
324static int freezer_read(struct seq_file *m, void *v) 298static int freezer_read(struct seq_file *m, void *v)
325{ 299{
326 struct cgroup_subsys_state *css = seq_css(m), *pos; 300 struct cgroup_subsys_state *css = seq_css(m), *pos;
327 301
302 mutex_lock(&freezer_mutex);
328 rcu_read_lock(); 303 rcu_read_lock();
329 304
330 /* update states bottom-up */ 305 /* update states bottom-up */
331 css_for_each_descendant_post(pos, css) 306 css_for_each_descendant_post(pos, css) {
307 if (!css_tryget(pos))
308 continue;
309 rcu_read_unlock();
310
332 update_if_frozen(pos); 311 update_if_frozen(pos);
333 312
313 rcu_read_lock();
314 css_put(pos);
315 }
316
334 rcu_read_unlock(); 317 rcu_read_unlock();
318 mutex_unlock(&freezer_mutex);
335 319
336 seq_puts(m, freezer_state_strs(css_freezer(css)->state)); 320 seq_puts(m, freezer_state_strs(css_freezer(css)->state));
337 seq_putc(m, '\n'); 321 seq_putc(m, '\n');
@@ -373,7 +357,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
373 unsigned int state) 357 unsigned int state)
374{ 358{
375 /* also synchronizes against task migration, see freezer_attach() */ 359 /* also synchronizes against task migration, see freezer_attach() */
376 lockdep_assert_held(&freezer->lock); 360 lockdep_assert_held(&freezer_mutex);
377 361
378 if (!(freezer->state & CGROUP_FREEZER_ONLINE)) 362 if (!(freezer->state & CGROUP_FREEZER_ONLINE))
379 return; 363 return;
@@ -414,31 +398,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
414 * descendant will try to inherit its parent's FREEZING state as 398 * descendant will try to inherit its parent's FREEZING state as
415 * CGROUP_FREEZING_PARENT. 399 * CGROUP_FREEZING_PARENT.
416 */ 400 */
401 mutex_lock(&freezer_mutex);
417 rcu_read_lock(); 402 rcu_read_lock();
418 css_for_each_descendant_pre(pos, &freezer->css) { 403 css_for_each_descendant_pre(pos, &freezer->css) {
419 struct freezer *pos_f = css_freezer(pos); 404 struct freezer *pos_f = css_freezer(pos);
420 struct freezer *parent = parent_freezer(pos_f); 405 struct freezer *parent = parent_freezer(pos_f);
421 406
422 spin_lock_irq(&pos_f->lock); 407 if (!css_tryget(pos))
408 continue;
409 rcu_read_unlock();
423 410
424 if (pos_f == freezer) { 411 if (pos_f == freezer)
425 freezer_apply_state(pos_f, freeze, 412 freezer_apply_state(pos_f, freeze,
426 CGROUP_FREEZING_SELF); 413 CGROUP_FREEZING_SELF);
427 } else { 414 else
428 /*
429 * Our update to @parent->state is already visible
430 * which is all we need. No need to lock @parent.
431 * For more info on synchronization, see
432 * freezer_post_create().
433 */
434 freezer_apply_state(pos_f, 415 freezer_apply_state(pos_f,
435 parent->state & CGROUP_FREEZING, 416 parent->state & CGROUP_FREEZING,
436 CGROUP_FREEZING_PARENT); 417 CGROUP_FREEZING_PARENT);
437 }
438 418
439 spin_unlock_irq(&pos_f->lock); 419 rcu_read_lock();
420 css_put(pos);
440 } 421 }
441 rcu_read_unlock(); 422 rcu_read_unlock();
423 mutex_unlock(&freezer_mutex);
442} 424}
443 425
444static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, 426static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,