aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup_freezer.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-11-09 12:12:30 -0500
committerTejun Heo <tj@kernel.org>2012-11-09 13:52:30 -0500
commitef9fe980c6fcc1821ab955b74b242d2d6585fa75 (patch)
tree65257977c49732853d36c286ba824fb12f801c53 /kernel/cgroup_freezer.c
parent5300a9b3482b6d9c32de6d5f4eaeab0fbafa70a8 (diff)
cgroup_freezer: implement proper hierarchy support
Up until now, cgroup_freezer didn't implement hierarchy properly. cgroups could be arranged in hierarchy but it didn't make any difference in how each cgroup_freezer behaved. They all operated separately. This patch implements proper hierarchy support. If a cgroup is frozen, all its descendants are frozen. A cgroup is thawed iff it and all its ancestors are THAWED. freezer.self_freezing shows the current freezing state for the cgroup itself. freezer.parent_freezing shows whether the cgroup is freezing because any of its ancestors is freezing. freezer_post_create() locks the parent and new cgroup and inherits the parent's state and freezer_change_state() applies new state top-down using cgroup_for_each_descendant_pre() which guarantees that no child can escape its parent's state. update_if_frozen() uses cgroup_for_each_descendant_post() to propagate frozen states bottom-up. Synchronization could be coarser and easier by using a single mutex to protect all hierarchy operations. Finer grained approach was used because it wasn't too difficult for cgroup_freezer and I think it's beneficial to have an example implementation and cgroup_freezer is rather simple and can serve a good one. As this makes cgroup_freezer properly hierarchical, freezer_subsys.broken_hierarchy marking is removed. Note that this patch changes userland visible behavior - freezing a cgroup now freezes all its descendants too. This behavior change is intended and has been warned via .broken_hierarchy. v2: Michal spotted a bug in freezer_change_state() - descendants were inheriting from the wrong ancestor. Fixed. v3: Documentation/cgroups/freezer-subsystem.txt updated. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Michal Hocko <mhocko@suse.cz>
Diffstat (limited to 'kernel/cgroup_freezer.c')
-rw-r--r--kernel/cgroup_freezer.c161
1 files changed, 123 insertions, 38 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 4f12d317c4c..670a4af7dc9 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -22,6 +22,13 @@
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/seq_file.h> 23#include <linux/seq_file.h>
24 24
25/*
26 * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
27 * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
28 * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
29 * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
30 * its ancestors has FREEZING_SELF set.
31 */
25enum freezer_state_flags { 32enum freezer_state_flags {
26 CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ 33 CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
27 CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ 34 CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
@@ -50,6 +57,15 @@ static inline struct freezer *task_freezer(struct task_struct *task)
50 struct freezer, css); 57 struct freezer, css);
51} 58}
52 59
60static struct freezer *parent_freezer(struct freezer *freezer)
61{
62 struct cgroup *pcg = freezer->css.cgroup->parent;
63
64 if (pcg)
65 return cgroup_freezer(pcg);
66 return NULL;
67}
68
53bool cgroup_freezing(struct task_struct *task) 69bool cgroup_freezing(struct task_struct *task)
54{ 70{
55 bool ret; 71 bool ret;
@@ -74,17 +90,6 @@ static const char *freezer_state_strs(unsigned int state)
74 return "THAWED"; 90 return "THAWED";
75}; 91};
76 92
77/*
78 * State diagram
79 * Transitions are caused by userspace writes to the freezer.state file.
80 * The values in parenthesis are state labels. The rest are edge labels.
81 *
82 * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
83 * ^ ^ | |
84 * | \_______THAWED_______/ |
85 * \__________________________THAWED____________/
86 */
87
88struct cgroup_subsys freezer_subsys; 93struct cgroup_subsys freezer_subsys;
89 94
90static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) 95static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
@@ -103,15 +108,34 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
103 * freezer_post_create - commit creation of a freezer cgroup 108 * freezer_post_create - commit creation of a freezer cgroup
104 * @cgroup: cgroup being created 109 * @cgroup: cgroup being created
105 * 110 *
106 * We're committing to creation of @cgroup. Mark it online. 111 * We're committing to creation of @cgroup. Mark it online and inherit
112 * parent's freezing state while holding both parent's and our
113 * freezer->lock.
107 */ 114 */
108static void freezer_post_create(struct cgroup *cgroup) 115static void freezer_post_create(struct cgroup *cgroup)
109{ 116{
110 struct freezer *freezer = cgroup_freezer(cgroup); 117 struct freezer *freezer = cgroup_freezer(cgroup);
118 struct freezer *parent = parent_freezer(freezer);
119
120 /*
121 * The following double locking and freezing state inheritance
122 * guarantee that @cgroup can never escape ancestors' freezing
123 * states. See cgroup_for_each_descendant_pre() for details.
124 */
125 if (parent)
126 spin_lock_irq(&parent->lock);
127 spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING);
111 128
112 spin_lock_irq(&freezer->lock);
113 freezer->state |= CGROUP_FREEZER_ONLINE; 129 freezer->state |= CGROUP_FREEZER_ONLINE;
114 spin_unlock_irq(&freezer->lock); 130
131 if (parent && (parent->state & CGROUP_FREEZING)) {
132 freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
133 atomic_inc(&system_freezing_cnt);
134 }
135
136 spin_unlock(&freezer->lock);
137 if (parent)
138 spin_unlock_irq(&parent->lock);
115} 139}
116 140
117/** 141/**
@@ -153,6 +177,7 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
153{ 177{
154 struct freezer *freezer = cgroup_freezer(new_cgrp); 178 struct freezer *freezer = cgroup_freezer(new_cgrp);
155 struct task_struct *task; 179 struct task_struct *task;
180 bool clear_frozen = false;
156 181
157 spin_lock_irq(&freezer->lock); 182 spin_lock_irq(&freezer->lock);
158 183
@@ -172,10 +197,25 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset)
172 } else { 197 } else {
173 freeze_task(task); 198 freeze_task(task);
174 freezer->state &= ~CGROUP_FROZEN; 199 freezer->state &= ~CGROUP_FROZEN;
200 clear_frozen = true;
175 } 201 }
176 } 202 }
177 203
178 spin_unlock_irq(&freezer->lock); 204 spin_unlock_irq(&freezer->lock);
205
206 /*
207 * Propagate FROZEN clearing upwards. We may race with
208 * update_if_frozen(), but as long as both work bottom-up, either
209 * update_if_frozen() sees child's FROZEN cleared or we clear the
210 * parent's FROZEN later. No parent w/ !FROZEN children can be
211 * left FROZEN.
212 */
213 while (clear_frozen && (freezer = parent_freezer(freezer))) {
214 spin_lock_irq(&freezer->lock);
215 freezer->state &= ~CGROUP_FROZEN;
216 clear_frozen = freezer->state & CGROUP_FREEZING;
217 spin_unlock_irq(&freezer->lock);
218 }
179} 219}
180 220
181static void freezer_fork(struct task_struct *task) 221static void freezer_fork(struct task_struct *task)
@@ -200,24 +240,47 @@ out:
200 rcu_read_unlock(); 240 rcu_read_unlock();
201} 241}
202 242
203/* 243/**
204 * We change from FREEZING to FROZEN lazily if the cgroup was only 244 * update_if_frozen - update whether a cgroup finished freezing
205 * partially frozen when we exitted write. Caller must hold freezer->lock. 245 * @cgroup: cgroup of interest
246 *
247 * Once FREEZING is initiated, transition to FROZEN is lazily updated by
248 * calling this function. If the current state is FREEZING but not FROZEN,
249 * this function checks whether all tasks of this cgroup and the descendant
250 * cgroups finished freezing and, if so, sets FROZEN.
251 *
252 * The caller is responsible for grabbing RCU read lock and calling
253 * update_if_frozen() on all descendants prior to invoking this function.
206 * 254 *
207 * Task states and freezer state might disagree while tasks are being 255 * Task states and freezer state might disagree while tasks are being
208 * migrated into or out of @cgroup, so we can't verify task states against 256 * migrated into or out of @cgroup, so we can't verify task states against
209 * @freezer state here. See freezer_attach() for details. 257 * @freezer state here. See freezer_attach() for details.
210 */ 258 */
211static void update_if_frozen(struct freezer *freezer) 259static void update_if_frozen(struct cgroup *cgroup)
212{ 260{
213 struct cgroup *cgroup = freezer->css.cgroup; 261 struct freezer *freezer = cgroup_freezer(cgroup);
262 struct cgroup *pos;
214 struct cgroup_iter it; 263 struct cgroup_iter it;
215 struct task_struct *task; 264 struct task_struct *task;
216 265
266 WARN_ON_ONCE(!rcu_read_lock_held());
267
268 spin_lock_irq(&freezer->lock);
269
217 if (!(freezer->state & CGROUP_FREEZING) || 270 if (!(freezer->state & CGROUP_FREEZING) ||
218 (freezer->state & CGROUP_FROZEN)) 271 (freezer->state & CGROUP_FROZEN))
219 return; 272 goto out_unlock;
273
274 /* are all (live) children frozen? */
275 cgroup_for_each_child(pos, cgroup) {
276 struct freezer *child = cgroup_freezer(pos);
220 277
278 if ((child->state & CGROUP_FREEZER_ONLINE) &&
279 !(child->state & CGROUP_FROZEN))
280 goto out_unlock;
281 }
282
283 /* are all tasks frozen? */
221 cgroup_iter_start(cgroup, &it); 284 cgroup_iter_start(cgroup, &it);
222 285
223 while ((task = cgroup_iter_next(cgroup, &it))) { 286 while ((task = cgroup_iter_next(cgroup, &it))) {
@@ -229,27 +292,32 @@ static void update_if_frozen(struct freezer *freezer)
229 * the usual frozen condition. 292 * the usual frozen condition.
230 */ 293 */
231 if (!frozen(task) && !freezer_should_skip(task)) 294 if (!frozen(task) && !freezer_should_skip(task))
232 goto notyet; 295 goto out_iter_end;
233 } 296 }
234 } 297 }
235 298
236 freezer->state |= CGROUP_FROZEN; 299 freezer->state |= CGROUP_FROZEN;
237notyet: 300out_iter_end:
238 cgroup_iter_end(cgroup, &it); 301 cgroup_iter_end(cgroup, &it);
302out_unlock:
303 spin_unlock_irq(&freezer->lock);
239} 304}
240 305
241static int freezer_read(struct cgroup *cgroup, struct cftype *cft, 306static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
242 struct seq_file *m) 307 struct seq_file *m)
243{ 308{
244 struct freezer *freezer = cgroup_freezer(cgroup); 309 struct cgroup *pos;
245 unsigned int state;
246 310
247 spin_lock_irq(&freezer->lock); 311 rcu_read_lock();
248 update_if_frozen(freezer);
249 state = freezer->state;
250 spin_unlock_irq(&freezer->lock);
251 312
252 seq_puts(m, freezer_state_strs(state)); 313 /* update states bottom-up */
314 cgroup_for_each_descendant_post(pos, cgroup)
315 update_if_frozen(pos);
316 update_if_frozen(cgroup);
317
318 rcu_read_unlock();
319
320 seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state));
253 seq_putc(m, '\n'); 321 seq_putc(m, '\n');
254 return 0; 322 return 0;
255} 323}
@@ -320,14 +388,39 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze,
320 * @freezer: freezer of interest 388 * @freezer: freezer of interest
321 * @freeze: whether to freeze or thaw 389 * @freeze: whether to freeze or thaw
322 * 390 *
323 * Freeze or thaw @cgroup according to @freeze. 391 * Freeze or thaw @freezer according to @freeze. The operations are
392 * recursive - all descendants of @freezer will be affected.
324 */ 393 */
325static void freezer_change_state(struct freezer *freezer, bool freeze) 394static void freezer_change_state(struct freezer *freezer, bool freeze)
326{ 395{
396 struct cgroup *pos;
397
327 /* update @freezer */ 398 /* update @freezer */
328 spin_lock_irq(&freezer->lock); 399 spin_lock_irq(&freezer->lock);
329 freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF); 400 freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
330 spin_unlock_irq(&freezer->lock); 401 spin_unlock_irq(&freezer->lock);
402
403 /*
404 * Update all its descendants in pre-order traversal. Each
405 * descendant will try to inherit its parent's FREEZING state as
406 * CGROUP_FREEZING_PARENT.
407 */
408 rcu_read_lock();
409 cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) {
410 struct freezer *pos_f = cgroup_freezer(pos);
411 struct freezer *parent = parent_freezer(pos_f);
412
413 /*
414 * Our update to @parent->state is already visible which is
415 * all we need. No need to lock @parent. For more info on
416 * synchronization, see freezer_post_create().
417 */
418 spin_lock_irq(&pos_f->lock);
419 freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING,
420 CGROUP_FREEZING_PARENT);
421 spin_unlock_irq(&pos_f->lock);
422 }
423 rcu_read_unlock();
331} 424}
332 425
333static int freezer_write(struct cgroup *cgroup, struct cftype *cft, 426static int freezer_write(struct cgroup *cgroup, struct cftype *cft,
@@ -390,12 +483,4 @@ struct cgroup_subsys freezer_subsys = {
390 .attach = freezer_attach, 483 .attach = freezer_attach,
391 .fork = freezer_fork, 484 .fork = freezer_fork,
392 .base_cftypes = files, 485 .base_cftypes = files,
393
394 /*
395 * freezer subsys doesn't handle hierarchy at all. Frozen state
396 * should be inherited through the hierarchy - if a parent is
397 * frozen, all its children should be frozen. Fix it and remove
398 * the following.
399 */
400 .broken_hierarchy = true,
401}; 486};