aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup_freezer.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-05-07 21:31:17 -0400
committerTejun Heo <tj@kernel.org>2014-05-13 11:26:31 -0400
commite5ced8ebb10c20a3b349bd798b69ccabd3b25d21 (patch)
tree8af5430880909fb3346eb5c64270b34e34052973 /kernel/cgroup_freezer.c
parent5024ae29cd285ce9e736776414da645d3a91828c (diff)
cgroup_freezer: replace freezer->lock with freezer_mutex
After 96d365e0b86e ("cgroup: make css_set_lock a rwsem and rename it to css_set_rwsem"), css task iterators requires sleepable context as it may block on css_set_rwsem. I missed that cgroup_freezer was iterating tasks under IRQ-safe spinlock freezer->lock. This leads to errors like the following on freezer state reads and transitions. BUG: sleeping function called from invalid context at /work /os/work/kernel/locking/rwsem.c:20 in_atomic(): 0, irqs_disabled(): 0, pid: 462, name: bash 5 locks held by bash/462: #0: (sb_writers#7){.+.+.+}, at: [<ffffffff811f0843>] vfs_write+0x1a3/0x1c0 #1: (&of->mutex){+.+.+.}, at: [<ffffffff8126d78b>] kernfs_fop_write+0xbb/0x170 #2: (s_active#70){.+.+.+}, at: [<ffffffff8126d793>] kernfs_fop_write+0xc3/0x170 #3: (freezer_mutex){+.+...}, at: [<ffffffff81135981>] freezer_write+0x61/0x1e0 #4: (rcu_read_lock){......}, at: [<ffffffff81135973>] freezer_write+0x53/0x1e0 Preemption disabled at:[<ffffffff81104404>] console_unlock+0x1e4/0x460 CPU: 3 PID: 462 Comm: bash Not tainted 3.15.0-rc1-work+ #10 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 ffff88000916a6d0 ffff88000e0a3da0 ffffffff81cf8c96 0000000000000000 ffff88000e0a3dc8 ffffffff810cf4f2 ffffffff82388040 ffff880013aaf740 0000000000000002 ffff88000e0a3de8 ffffffff81d05974 0000000000000246 Call Trace: [<ffffffff81cf8c96>] dump_stack+0x4e/0x7a [<ffffffff810cf4f2>] __might_sleep+0x162/0x260 [<ffffffff81d05974>] down_read+0x24/0x60 [<ffffffff81133e87>] css_task_iter_start+0x27/0x70 [<ffffffff8113584d>] freezer_apply_state+0x5d/0x130 [<ffffffff81135a16>] freezer_write+0xf6/0x1e0 [<ffffffff8112eb88>] cgroup_file_write+0xd8/0x230 [<ffffffff8126d7b7>] kernfs_fop_write+0xe7/0x170 [<ffffffff811f0756>] vfs_write+0xb6/0x1c0 [<ffffffff811f121d>] SyS_write+0x4d/0xc0 [<ffffffff81d08292>] system_call_fastpath+0x16/0x1b freezer->lock used to be used in hot paths but that time is long gone and there's no reason for the lock to be IRQ-safe spinlock or even per-cgroup. In fact, given the fact that a cgroup may contain large number of tasks, it's not a good idea to iterate over them while holding IRQ-safe spinlock. Let's simplify locking by replacing per-cgroup freezer->lock with global freezer_mutex. This also makes the comments explaining the intricacies of policy inheritance and the locking around it as the states are protected by a common mutex. The conversion is mostly straight-forward. The followings are worth mentioning. * freezer_css_online() no longer needs double locking. * freezer_attach() now performs propagation simply while holding freezer_mutex. update_if_frozen() race no longer exists and the comment is removed. * freezer_fork() now tests whether the task is in root cgroup using the new task_css_is_root() without doing rcu_read_lock/unlock(). If not, it grabs freezer_mutex and performs the operation. * freezer_read() and freezer_change_state() grab freezer_mutex across the whole operation and pin the css while iterating so that each descendant processing happens in sleepable context. Fixes: 96d365e0b86e ("cgroup: make css_set_lock a rwsem and rename it to css_set_rwsem") Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel/cgroup_freezer.c')
-rw-r--r--kernel/cgroup_freezer.c112
1 files changed, 46 insertions, 66 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2bc4a2256444..12ead0b766ee 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -21,6 +21,7 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24#include <linux/mutex.h>
24 25
25/* 26/*
26 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is 27 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
@@ -42,9 +43,10 @@ enum freezer_state_flags {
42struct freezer { 43struct freezer {
43 struct cgroup_subsys_state css; 44 struct cgroup_subsys_state css;
44 unsigned int state; 45 unsigned int state;
45 spinlock_t lock;
46}; 46};
47 47
48static DEFINE_MUTEX(freezer_mutex);
49
48static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) 50static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
49{ 51{
50 return css ? container_of(css, struct freezer, css) : NULL; 52 return css ? container_of(css, struct freezer, css) : NULL;
@@ -93,7 +95,6 @@ freezer_css_alloc(struct cgroup_subsys_state *parent_css)
93 if (!freezer) 95 if (!freezer)
94 return ERR_PTR(-ENOMEM); 96 return ERR_PTR(-ENOMEM);
95 97
96 spin_lock_init(&freezer->lock);
97 return &freezer->css; 98 return &freezer->css;
98} 99}
99 100
@@ -110,14 +111,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
110 struct freezer *freezer = css_freezer(css); 111 struct freezer *freezer = css_freezer(css);
111 struct freezer *parent = parent_freezer(freezer); 112 struct freezer *parent = parent_freezer(freezer);
112 113
113 /* 114 mutex_lock(&freezer_mutex);
114 * The following double locking and freezing state inheritance
115 * guarantee that @cgroup can never escape ancestors' freezing
116 * states. See css_for_each_descendant_pre() for details.
117 */
118 if (parent)
119 spin_lock_irq(&parent->lock);
120 spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
121 115
122 freezer->state |= CGROUP_FREEZER_ONLINE; 116 freezer->state |= CGROUP_FREEZER_ONLINE;
123 117
@@ -126,10 +120,7 @@ static int freezer_css_online(struct cgroup_subsys_state *css)
126 atomic_inc(&system_freezing_cnt); 120 atomic_inc(&system_freezing_cnt);
127 } 121 }
128 122
129 spin_unlock(&freezer->lock); 123 mutex_unlock(&freezer_mutex);
130 if (parent)
131 spin_unlock_irq(&parent->lock);
132
133 return 0; 124 return 0;
134} 125}
135 126
@@ -144,14 +135,14 @@ static void freezer_css_offline(struct cgroup_subsys_state *css)
144{ 135{
145 struct freezer *freezer = css_freezer(css); 136 struct freezer *freezer = css_freezer(css);
146 137
147 spin_lock_irq(&freezer->lock); 138 mutex_lock(&freezer_mutex);
148 139
149 if (freezer->state & CGROUP_FREEZING) 140 if (freezer->state & CGROUP_FREEZING)
150 atomic_dec(&system_freezing_cnt); 141 atomic_dec(&system_freezing_cnt);
151 142
152 freezer->state = 0; 143 freezer->state = 0;
153 144
154 spin_unlock_irq(&freezer->lock); 145 mutex_unlock(&freezer_mutex);
155} 146}
156 147
157static void freezer_css_free(struct cgroup_subsys_state *css) 148static void freezer_css_free(struct cgroup_subsys_state *css)
@@ -175,7 +166,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
175 struct task_struct *task; 166 struct task_struct *task;
176 bool clear_frozen = false; 167 bool clear_frozen = false;
177 168
178 spin_lock_irq(&freezer->lock); 169 mutex_lock(&freezer_mutex);
179 170
180 /* 171 /*
181 * Make the new tasks conform to the current state of @new_css. 172 * Make the new tasks conform to the current state of @new_css.
@@ -197,21 +188,13 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
197 } 188 }
198 } 189 }
199 190
200 spin_unlock_irq(&freezer->lock); 191 /* propagate FROZEN clearing upwards */
201
202 /*
203 * Propagate FROZEN clearing upwards. We may race with
204 * update_if_frozen(), but as long as both work bottom-up, either
205 * update_if_frozen() sees child's FROZEN cleared or we clear the
206 * parent's FROZEN later. No parent w/ !FROZEN children can be
207 * left FROZEN.
208 */
209 while (clear_frozen && (freezer = parent_freezer(freezer))) { 192 while (clear_frozen && (freezer = parent_freezer(freezer))) {
210 spin_lock_irq(&freezer->lock);
211 freezer->state &= ~CGROUP_FROZEN; 193 freezer->state &= ~CGROUP_FROZEN;
212 clear_frozen = freezer->state & CGROUP_FREEZING; 194 clear_frozen = freezer->state & CGROUP_FREEZING;
213 spin_unlock_irq(&freezer->lock);
214 } 195 }
196
197 mutex_unlock(&freezer_mutex);
215} 198}
216 199
217/** 200/**
@@ -228,9 +211,6 @@ static void freezer_fork(struct task_struct *task)
228{ 211{
229 struct freezer *freezer; 212 struct freezer *freezer;
230 213
231 rcu_read_lock();
232 freezer = task_freezer(task);
233
234 /* 214 /*
235 * The root cgroup is non-freezable, so we can skip locking the 215 * The root cgroup is non-freezable, so we can skip locking the
236 * freezer. This is safe regardless of race with task migration. 216 * freezer. This is safe regardless of race with task migration.
@@ -238,24 +218,18 @@ static void freezer_fork(struct task_struct *task)
238 * to do. If we lost and root is the new cgroup, noop is still the 218 * to do. If we lost and root is the new cgroup, noop is still the
239 * right thing to do. 219 * right thing to do.
240 */ 220 */
241 if (!parent_freezer(freezer)) 221 if (task_css_is_root(task, freezer_cgrp_id))
242 goto out; 222 return;
243 223
244 /* 224 mutex_lock(&freezer_mutex);
245 * Grab @freezer->lock and freeze @task after verifying @task still 225 rcu_read_lock();
246 * belongs to @freezer and it's freezing. The former is for the 226
247 * case where we have raced against task migration and lost and 227 freezer = task_freezer(task);
248 * @task is already in a different cgroup which may not be frozen. 228 if (freezer->state & CGROUP_FREEZING)
249 * This isn't strictly necessary as freeze_task() is allowed to be
250 * called spuriously but let's do it anyway for, if nothing else,
251 * documentation.
252 */
253 spin_lock_irq(&freezer->lock);
254 if (freezer == task_freezer(task) && (freezer->state & CGROUP_FREEZING))
255 freeze_task(task); 229 freeze_task(task);
256 spin_unlock_irq(&freezer->lock); 230
257out:
258 rcu_read_unlock(); 231 rcu_read_unlock();
232 mutex_unlock(&freezer_mutex);
259} 233}
260 234
261/** 235/**
@@ -281,22 +255,22 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
281 struct css_task_iter it; 255 struct css_task_iter it;
282 struct task_struct *task; 256 struct task_struct *task;
283 257
284 WARN_ON_ONCE(!rcu_read_lock_held()); 258 lockdep_assert_held(&freezer_mutex);
285
286 spin_lock_irq(&freezer->lock);
287 259
288 if (!(freezer->state & CGROUP_FREEZING) || 260 if (!(freezer->state & CGROUP_FREEZING) ||
289 (freezer->state & CGROUP_FROZEN)) 261 (freezer->state & CGROUP_FROZEN))
290 goto out_unlock; 262 return;
291 263
292 /* are all (live) children frozen? */ 264 /* are all (live) children frozen? */
265 rcu_read_lock();
293 css_for_each_child(pos, css) { 266 css_for_each_child(pos, css) {
294 struct freezer *child = css_freezer(pos); 267 struct freezer *child = css_freezer(pos);
295 268
296 if ((child->state & CGROUP_FREEZER_ONLINE) && 269 if ((child->state & CGROUP_FREEZER_ONLINE) &&
297 !(child->state & CGROUP_FROZEN)) 270 !(child->state & CGROUP_FROZEN))
298 goto out_unlock; 271 return;
299 } 272 }
273 rcu_read_unlock();
300 274
301 /* are all tasks frozen? */ 275 /* are all tasks frozen? */
302 css_task_iter_start(css, &it); 276 css_task_iter_start(css, &it);
@@ -317,21 +291,29 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
317 freezer->state |= CGROUP_FROZEN; 291 freezer->state |= CGROUP_FROZEN;
318out_iter_end: 292out_iter_end:
319 css_task_iter_end(&it); 293 css_task_iter_end(&it);
320out_unlock:
321 spin_unlock_irq(&freezer->lock);
322} 294}
323 295
324static int freezer_read(struct seq_file *m, void *v) 296static int freezer_read(struct seq_file *m, void *v)
325{ 297{
326 struct cgroup_subsys_state *css = seq_css(m), *pos; 298 struct cgroup_subsys_state *css = seq_css(m), *pos;
327 299
300 mutex_lock(&freezer_mutex);
328 rcu_read_lock(); 301 rcu_read_lock();
329 302
330 /* update states bottom-up */ 303 /* update states bottom-up */
331 css_for_each_descendant_post(pos, css) 304 css_for_each_descendant_post(pos, css) {
305 if (!css_tryget(pos))
306 continue;
307 rcu_read_unlock();
308
332 update_if_frozen(pos); 309 update_if_frozen(pos);
333 310
311 rcu_read_lock();
312 css_put(pos);
313 }
314
334 rcu_read_unlock(); 315 rcu_read_unlock();
316 mutex_unlock(&freezer_mutex);
335 317
336 seq_puts(m, freezer_state_strs(css_freezer(css)->state)); 318 seq_puts(m, freezer_state_strs(css_freezer(css)->state));
337 seq_putc(m, '\n'); 319 seq_putc(m, '\n');
@@ -373,7 +355,7 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
373 unsigned int state) 355 unsigned int state)
374{ 356{
375 /* also synchronizes against task migration, see freezer_attach() */ 357 /* also synchronizes against task migration, see freezer_attach() */
376 lockdep_assert_held(&freezer->lock); 358 lockdep_assert_held(&freezer_mutex);
377 359
378 if (!(freezer->state & CGROUP_FREEZER_ONLINE)) 360 if (!(freezer->state & CGROUP_FREEZER_ONLINE))
379 return; 361 return;
@@ -414,31 +396,29 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
414 * descendant will try to inherit its parent's FREEZING state as 396 * descendant will try to inherit its parent's FREEZING state as
415 * CGROUP_FREEZING_PARENT. 397 * CGROUP_FREEZING_PARENT.
416 */ 398 */
399 mutex_lock(&freezer_mutex);
417 rcu_read_lock(); 400 rcu_read_lock();
418 css_for_each_descendant_pre(pos, &freezer->css) { 401 css_for_each_descendant_pre(pos, &freezer->css) {
419 struct freezer *pos_f = css_freezer(pos); 402 struct freezer *pos_f = css_freezer(pos);
420 struct freezer *parent = parent_freezer(pos_f); 403 struct freezer *parent = parent_freezer(pos_f);
421 404
422 spin_lock_irq(&pos_f->lock); 405 if (!css_tryget(pos))
406 continue;
407 rcu_read_unlock();
423 408
424 if (pos_f == freezer) { 409 if (pos_f == freezer)
425 freezer_apply_state(pos_f, freeze, 410 freezer_apply_state(pos_f, freeze,
426 CGROUP_FREEZING_SELF); 411 CGROUP_FREEZING_SELF);
427 } else { 412 else
428 /*
429 * Our update to @parent->state is already visible
430 * which is all we need. No need to lock @parent.
431 * For more info on synchronization, see
432 * freezer_post_create().
433 */
434 freezer_apply_state(pos_f, 413 freezer_apply_state(pos_f,
435 parent->state & CGROUP_FREEZING, 414 parent->state & CGROUP_FREEZING,
436 CGROUP_FREEZING_PARENT); 415 CGROUP_FREEZING_PARENT);
437 }
438 416
439 spin_unlock_irq(&pos_f->lock); 417 rcu_read_lock();
418 css_put(pos);
440 } 419 }
441 rcu_read_unlock(); 420 rcu_read_unlock();
421 mutex_unlock(&freezer_mutex);
442} 422}
443 423
444static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft, 424static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,