diff options
author | Tejun Heo <tj@kernel.org> | 2012-11-09 12:12:30 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-11-09 13:52:30 -0500 |
commit | ef9fe980c6fcc1821ab955b74b242d2d6585fa75 (patch) | |
tree | 65257977c49732853d36c286ba824fb12f801c53 /kernel/cgroup_freezer.c | |
parent | 5300a9b3482b6d9c32de6d5f4eaeab0fbafa70a8 (diff) |
cgroup_freezer: implement proper hierarchy support
Up until now, cgroup_freezer didn't implement hierarchy properly.
cgroups could be arranged in hierarchy but it didn't make any
difference in how each cgroup_freezer behaved. They all operated
separately.
This patch implements proper hierarchy support. If a cgroup is
frozen, all its descendants are frozen. A cgroup is thawed iff it and
all its ancestors are THAWED. freezer.self_freezing shows the current
freezing state for the cgroup itself. freezer.parent_freezing shows
whether the cgroup is freezing because any of its ancestors is
freezing.
freezer_post_create() locks the parent and new cgroup and inherits the
parent's state and freezer_change_state() applies new state top-down
using cgroup_for_each_descendant_pre() which guarantees that no child
can escape its parent's state. update_if_frozen() uses
cgroup_for_each_descendant_post() to propagate frozen states
bottom-up.
Synchronization could be coarser and easier by using a single mutex to
protect all hierarchy operations. Finer grained approach was used
because it wasn't too difficult for cgroup_freezer and I think it's
beneficial to have an example implementation and cgroup_freezer is
rather simple and can serve a good one.
As this makes cgroup_freezer properly hierarchical,
freezer_subsys.broken_hierarchy marking is removed.
Note that this patch changes userland visible behavior - freezing a
cgroup now freezes all its descendants too. This behavior change is
intended and has been warned via .broken_hierarchy.
v2: Michal spotted a bug in freezer_change_state() - descendants were
inheriting from the wrong ancestor. Fixed.
v3: Documentation/cgroups/freezer-subsystem.txt updated.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Diffstat (limited to 'kernel/cgroup_freezer.c')
-rw-r--r-- | kernel/cgroup_freezer.c | 161 |
1 files changed, 123 insertions, 38 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 4f12d317c4c3..670a4af7dc94 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -22,6 +22,13 @@ | |||
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | 24 | ||
25 | /* | ||
26 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | ||
27 | * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared | ||
28 | * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING | ||
29 | * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of | ||
30 | * its ancestors has FREEZING_SELF set. | ||
31 | */ | ||
25 | enum freezer_state_flags { | 32 | enum freezer_state_flags { |
26 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ | 33 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ |
27 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ | 34 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ |
@@ -50,6 +57,15 @@ static inline struct freezer *task_freezer(struct task_struct *task) | |||
50 | struct freezer, css); | 57 | struct freezer, css); |
51 | } | 58 | } |
52 | 59 | ||
60 | static struct freezer *parent_freezer(struct freezer *freezer) | ||
61 | { | ||
62 | struct cgroup *pcg = freezer->css.cgroup->parent; | ||
63 | |||
64 | if (pcg) | ||
65 | return cgroup_freezer(pcg); | ||
66 | return NULL; | ||
67 | } | ||
68 | |||
53 | bool cgroup_freezing(struct task_struct *task) | 69 | bool cgroup_freezing(struct task_struct *task) |
54 | { | 70 | { |
55 | bool ret; | 71 | bool ret; |
@@ -74,17 +90,6 @@ static const char *freezer_state_strs(unsigned int state) | |||
74 | return "THAWED"; | 90 | return "THAWED"; |
75 | }; | 91 | }; |
76 | 92 | ||
77 | /* | ||
78 | * State diagram | ||
79 | * Transitions are caused by userspace writes to the freezer.state file. | ||
80 | * The values in parenthesis are state labels. The rest are edge labels. | ||
81 | * | ||
82 | * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) | ||
83 | * ^ ^ | | | ||
84 | * | \_______THAWED_______/ | | ||
85 | * \__________________________THAWED____________/ | ||
86 | */ | ||
87 | |||
88 | struct cgroup_subsys freezer_subsys; | 93 | struct cgroup_subsys freezer_subsys; |
89 | 94 | ||
90 | static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) | 95 | static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) |
@@ -103,15 +108,34 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) | |||
103 | * freezer_post_create - commit creation of a freezer cgroup | 108 | * freezer_post_create - commit creation of a freezer cgroup |
104 | * @cgroup: cgroup being created | 109 | * @cgroup: cgroup being created |
105 | * | 110 | * |
106 | * We're committing to creation of @cgroup. Mark it online. | 111 | * We're committing to creation of @cgroup. Mark it online and inherit |
112 | * parent's freezing state while holding both parent's and our | ||
113 | * freezer->lock. | ||
107 | */ | 114 | */ |
108 | static void freezer_post_create(struct cgroup *cgroup) | 115 | static void freezer_post_create(struct cgroup *cgroup) |
109 | { | 116 | { |
110 | struct freezer *freezer = cgroup_freezer(cgroup); | 117 | struct freezer *freezer = cgroup_freezer(cgroup); |
118 | struct freezer *parent = parent_freezer(freezer); | ||
119 | |||
120 | /* | ||
121 | * The following double locking and freezing state inheritance | ||
122 | * guarantee that @cgroup can never escape ancestors' freezing | ||
123 | * states. See cgroup_for_each_descendant_pre() for details. | ||
124 | */ | ||
125 | if (parent) | ||
126 | spin_lock_irq(&parent->lock); | ||
127 | spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); | ||
111 | 128 | ||
112 | spin_lock_irq(&freezer->lock); | ||
113 | freezer->state |= CGROUP_FREEZER_ONLINE; | 129 | freezer->state |= CGROUP_FREEZER_ONLINE; |
114 | spin_unlock_irq(&freezer->lock); | 130 | |
131 | if (parent && (parent->state & CGROUP_FREEZING)) { | ||
132 | freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; | ||
133 | atomic_inc(&system_freezing_cnt); | ||
134 | } | ||
135 | |||
136 | spin_unlock(&freezer->lock); | ||
137 | if (parent) | ||
138 | spin_unlock_irq(&parent->lock); | ||
115 | } | 139 | } |
116 | 140 | ||
117 | /** | 141 | /** |
@@ -153,6 +177,7 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset) | |||
153 | { | 177 | { |
154 | struct freezer *freezer = cgroup_freezer(new_cgrp); | 178 | struct freezer *freezer = cgroup_freezer(new_cgrp); |
155 | struct task_struct *task; | 179 | struct task_struct *task; |
180 | bool clear_frozen = false; | ||
156 | 181 | ||
157 | spin_lock_irq(&freezer->lock); | 182 | spin_lock_irq(&freezer->lock); |
158 | 183 | ||
@@ -172,10 +197,25 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset) | |||
172 | } else { | 197 | } else { |
173 | freeze_task(task); | 198 | freeze_task(task); |
174 | freezer->state &= ~CGROUP_FROZEN; | 199 | freezer->state &= ~CGROUP_FROZEN; |
200 | clear_frozen = true; | ||
175 | } | 201 | } |
176 | } | 202 | } |
177 | 203 | ||
178 | spin_unlock_irq(&freezer->lock); | 204 | spin_unlock_irq(&freezer->lock); |
205 | |||
206 | /* | ||
207 | * Propagate FROZEN clearing upwards. We may race with | ||
208 | * update_if_frozen(), but as long as both work bottom-up, either | ||
209 | * update_if_frozen() sees child's FROZEN cleared or we clear the | ||
210 | * parent's FROZEN later. No parent w/ !FROZEN children can be | ||
211 | * left FROZEN. | ||
212 | */ | ||
213 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | ||
214 | spin_lock_irq(&freezer->lock); | ||
215 | freezer->state &= ~CGROUP_FROZEN; | ||
216 | clear_frozen = freezer->state & CGROUP_FREEZING; | ||
217 | spin_unlock_irq(&freezer->lock); | ||
218 | } | ||
179 | } | 219 | } |
180 | 220 | ||
181 | static void freezer_fork(struct task_struct *task) | 221 | static void freezer_fork(struct task_struct *task) |
@@ -200,24 +240,47 @@ out: | |||
200 | rcu_read_unlock(); | 240 | rcu_read_unlock(); |
201 | } | 241 | } |
202 | 242 | ||
203 | /* | 243 | /** |
204 | * We change from FREEZING to FROZEN lazily if the cgroup was only | 244 | * update_if_frozen - update whether a cgroup finished freezing |
205 | * partially frozen when we exitted write. Caller must hold freezer->lock. | 245 | * @cgroup: cgroup of interest |
246 | * | ||
247 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by | ||
248 | * calling this function. If the current state is FREEZING but not FROZEN, | ||
249 | * this function checks whether all tasks of this cgroup and the descendant | ||
250 | * cgroups finished freezing and, if so, sets FROZEN. | ||
251 | * | ||
252 | * The caller is responsible for grabbing RCU read lock and calling | ||
253 | * update_if_frozen() on all descendants prior to invoking this function. | ||
206 | * | 254 | * |
207 | * Task states and freezer state might disagree while tasks are being | 255 | * Task states and freezer state might disagree while tasks are being |
208 | * migrated into or out of @cgroup, so we can't verify task states against | 256 | * migrated into or out of @cgroup, so we can't verify task states against |
209 | * @freezer state here. See freezer_attach() for details. | 257 | * @freezer state here. See freezer_attach() for details. |
210 | */ | 258 | */ |
211 | static void update_if_frozen(struct freezer *freezer) | 259 | static void update_if_frozen(struct cgroup *cgroup) |
212 | { | 260 | { |
213 | struct cgroup *cgroup = freezer->css.cgroup; | 261 | struct freezer *freezer = cgroup_freezer(cgroup); |
262 | struct cgroup *pos; | ||
214 | struct cgroup_iter it; | 263 | struct cgroup_iter it; |
215 | struct task_struct *task; | 264 | struct task_struct *task; |
216 | 265 | ||
266 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
267 | |||
268 | spin_lock_irq(&freezer->lock); | ||
269 | |||
217 | if (!(freezer->state & CGROUP_FREEZING) || | 270 | if (!(freezer->state & CGROUP_FREEZING) || |
218 | (freezer->state & CGROUP_FROZEN)) | 271 | (freezer->state & CGROUP_FROZEN)) |
219 | return; | 272 | goto out_unlock; |
273 | |||
274 | /* are all (live) children frozen? */ | ||
275 | cgroup_for_each_child(pos, cgroup) { | ||
276 | struct freezer *child = cgroup_freezer(pos); | ||
220 | 277 | ||
278 | if ((child->state & CGROUP_FREEZER_ONLINE) && | ||
279 | !(child->state & CGROUP_FROZEN)) | ||
280 | goto out_unlock; | ||
281 | } | ||
282 | |||
283 | /* are all tasks frozen? */ | ||
221 | cgroup_iter_start(cgroup, &it); | 284 | cgroup_iter_start(cgroup, &it); |
222 | 285 | ||
223 | while ((task = cgroup_iter_next(cgroup, &it))) { | 286 | while ((task = cgroup_iter_next(cgroup, &it))) { |
@@ -229,27 +292,32 @@ static void update_if_frozen(struct freezer *freezer) | |||
229 | * the usual frozen condition. | 292 | * the usual frozen condition. |
230 | */ | 293 | */ |
231 | if (!frozen(task) && !freezer_should_skip(task)) | 294 | if (!frozen(task) && !freezer_should_skip(task)) |
232 | goto notyet; | 295 | goto out_iter_end; |
233 | } | 296 | } |
234 | } | 297 | } |
235 | 298 | ||
236 | freezer->state |= CGROUP_FROZEN; | 299 | freezer->state |= CGROUP_FROZEN; |
237 | notyet: | 300 | out_iter_end: |
238 | cgroup_iter_end(cgroup, &it); | 301 | cgroup_iter_end(cgroup, &it); |
302 | out_unlock: | ||
303 | spin_unlock_irq(&freezer->lock); | ||
239 | } | 304 | } |
240 | 305 | ||
241 | static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | 306 | static int freezer_read(struct cgroup *cgroup, struct cftype *cft, |
242 | struct seq_file *m) | 307 | struct seq_file *m) |
243 | { | 308 | { |
244 | struct freezer *freezer = cgroup_freezer(cgroup); | 309 | struct cgroup *pos; |
245 | unsigned int state; | ||
246 | 310 | ||
247 | spin_lock_irq(&freezer->lock); | 311 | rcu_read_lock(); |
248 | update_if_frozen(freezer); | ||
249 | state = freezer->state; | ||
250 | spin_unlock_irq(&freezer->lock); | ||
251 | 312 | ||
252 | seq_puts(m, freezer_state_strs(state)); | 313 | /* update states bottom-up */ |
314 | cgroup_for_each_descendant_post(pos, cgroup) | ||
315 | update_if_frozen(pos); | ||
316 | update_if_frozen(cgroup); | ||
317 | |||
318 | rcu_read_unlock(); | ||
319 | |||
320 | seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state)); | ||
253 | seq_putc(m, '\n'); | 321 | seq_putc(m, '\n'); |
254 | return 0; | 322 | return 0; |
255 | } | 323 | } |
@@ -320,14 +388,39 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
320 | * @freezer: freezer of interest | 388 | * @freezer: freezer of interest |
321 | * @freeze: whether to freeze or thaw | 389 | * @freeze: whether to freeze or thaw |
322 | * | 390 | * |
323 | * Freeze or thaw @cgroup according to @freeze. | 391 | * Freeze or thaw @freezer according to @freeze. The operations are |
392 | * recursive - all descendants of @freezer will be affected. | ||
324 | */ | 393 | */ |
325 | static void freezer_change_state(struct freezer *freezer, bool freeze) | 394 | static void freezer_change_state(struct freezer *freezer, bool freeze) |
326 | { | 395 | { |
396 | struct cgroup *pos; | ||
397 | |||
327 | /* update @freezer */ | 398 | /* update @freezer */ |
328 | spin_lock_irq(&freezer->lock); | 399 | spin_lock_irq(&freezer->lock); |
329 | freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF); | 400 | freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF); |
330 | spin_unlock_irq(&freezer->lock); | 401 | spin_unlock_irq(&freezer->lock); |
402 | |||
403 | /* | ||
404 | * Update all its descendants in pre-order traversal. Each | ||
405 | * descendant will try to inherit its parent's FREEZING state as | ||
406 | * CGROUP_FREEZING_PARENT. | ||
407 | */ | ||
408 | rcu_read_lock(); | ||
409 | cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) { | ||
410 | struct freezer *pos_f = cgroup_freezer(pos); | ||
411 | struct freezer *parent = parent_freezer(pos_f); | ||
412 | |||
413 | /* | ||
414 | * Our update to @parent->state is already visible which is | ||
415 | * all we need. No need to lock @parent. For more info on | ||
416 | * synchronization, see freezer_post_create(). | ||
417 | */ | ||
418 | spin_lock_irq(&pos_f->lock); | ||
419 | freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING, | ||
420 | CGROUP_FREEZING_PARENT); | ||
421 | spin_unlock_irq(&pos_f->lock); | ||
422 | } | ||
423 | rcu_read_unlock(); | ||
331 | } | 424 | } |
332 | 425 | ||
333 | static int freezer_write(struct cgroup *cgroup, struct cftype *cft, | 426 | static int freezer_write(struct cgroup *cgroup, struct cftype *cft, |
@@ -390,12 +483,4 @@ struct cgroup_subsys freezer_subsys = { | |||
390 | .attach = freezer_attach, | 483 | .attach = freezer_attach, |
391 | .fork = freezer_fork, | 484 | .fork = freezer_fork, |
392 | .base_cftypes = files, | 485 | .base_cftypes = files, |
393 | |||
394 | /* | ||
395 | * freezer subsys doesn't handle hierarchy at all. Frozen state | ||
396 | * should be inherited through the hierarchy - if a parent is | ||
397 | * frozen, all its children should be frozen. Fix it and remove | ||
398 | * the following. | ||
399 | */ | ||
400 | .broken_hierarchy = true, | ||
401 | }; | 486 | }; |