diff options
-rw-r--r-- | Documentation/cgroups/freezer-subsystem.txt | 63 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 161 |
2 files changed, 165 insertions, 59 deletions
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt index 7e62de1e59ff..c96a72cbb30a 100644 --- a/Documentation/cgroups/freezer-subsystem.txt +++ b/Documentation/cgroups/freezer-subsystem.txt | |||
@@ -49,13 +49,49 @@ prevent the freeze/unfreeze cycle from becoming visible to the tasks | |||
49 | being frozen. This allows the bash example above and gdb to run as | 49 | being frozen. This allows the bash example above and gdb to run as |
50 | expected. | 50 | expected. |
51 | 51 | ||
52 | The freezer subsystem in the container filesystem defines a file named | 52 | The cgroup freezer is hierarchical. Freezing a cgroup freezes all |
53 | freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the | 53 | tasks beloning to the cgroup and all its descendant cgroups. Each |
54 | cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup. | 54 | cgroup has its own state (self-state) and the state inherited from the |
55 | Reading will return the current state. | 55 | parent (parent-state). Iff both states are THAWED, the cgroup is |
56 | THAWED. | ||
56 | 57 | ||
57 | Note freezer.state doesn't exist in root cgroup, which means root cgroup | 58 | The following cgroupfs files are created by cgroup freezer. |
58 | is non-freezable. | 59 | |
60 | * freezer.state: Read-write. | ||
61 | |||
62 | When read, returns the effective state of the cgroup - "THAWED", | ||
63 | "FREEZING" or "FROZEN". This is the combined self and parent-states. | ||
64 | If any is freezing, the cgroup is freezing (FREEZING or FROZEN). | ||
65 | |||
66 | FREEZING cgroup transitions into FROZEN state when all tasks | ||
67 | belonging to the cgroup and its descendants become frozen. Note that | ||
68 | a cgroup reverts to FREEZING from FROZEN after a new task is added | ||
69 | to the cgroup or one of its descendant cgroups until the new task is | ||
70 | frozen. | ||
71 | |||
72 | When written, sets the self-state of the cgroup. Two values are | ||
73 | allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup, | ||
74 | if not already freezing, enters FREEZING state along with all its | ||
75 | descendant cgroups. | ||
76 | |||
77 | If THAWED is written, the self-state of the cgroup is changed to | ||
78 | THAWED. Note that the effective state may not change to THAWED if | ||
79 | the parent-state is still freezing. If a cgroup's effective state | ||
80 | becomes THAWED, all its descendants which are freezing because of | ||
81 | the cgroup also leave the freezing state. | ||
82 | |||
83 | * freezer.self_freezing: Read only. | ||
84 | |||
85 | Shows the self-state. 0 if the self-state is THAWED; otherwise, 1. | ||
86 | This value is 1 iff the last write to freezer.state was "FROZEN". | ||
87 | |||
88 | * freezer.parent_freezing: Read only. | ||
89 | |||
90 | Shows the parent-state. 0 if none of the cgroup's ancestors is | ||
91 | frozen; otherwise, 1. | ||
92 | |||
93 | The root cgroup is non-freezable and the above interface files don't | ||
94 | exist. | ||
59 | 95 | ||
60 | * Examples of usage : | 96 | * Examples of usage : |
61 | 97 | ||
@@ -85,18 +121,3 @@ to unfreeze all tasks in the container : | |||
85 | 121 | ||
86 | This is the basic mechanism which should do the right thing for user space task | 122 | This is the basic mechanism which should do the right thing for user space task |
87 | in a simple scenario. | 123 | in a simple scenario. |
88 | |||
89 | It's important to note that freezing can be incomplete. In that case we return | ||
90 | EBUSY. This means that some tasks in the cgroup are busy doing something that | ||
91 | prevents us from completely freezing the cgroup at this time. After EBUSY, | ||
92 | the cgroup will remain partially frozen -- reflected by freezer.state reporting | ||
93 | "FREEZING" when read. The state will remain "FREEZING" until one of these | ||
94 | things happens: | ||
95 | |||
96 | 1) Userspace cancels the freezing operation by writing "THAWED" to | ||
97 | the freezer.state file | ||
98 | 2) Userspace retries the freezing operation by writing "FROZEN" to | ||
99 | the freezer.state file (writing "FREEZING" is not legal | ||
100 | and returns EINVAL) | ||
101 | 3) The tasks that blocked the cgroup from entering the "FROZEN" | ||
102 | state disappear from the cgroup's set of tasks. | ||
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 4f12d317c4c3..670a4af7dc94 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -22,6 +22,13 @@ | |||
22 | #include <linux/freezer.h> | 22 | #include <linux/freezer.h> |
23 | #include <linux/seq_file.h> | 23 | #include <linux/seq_file.h> |
24 | 24 | ||
25 | /* | ||
26 | * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is | ||
27 | * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared | ||
28 | * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING | ||
29 | * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of | ||
30 | * its ancestors has FREEZING_SELF set. | ||
31 | */ | ||
25 | enum freezer_state_flags { | 32 | enum freezer_state_flags { |
26 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ | 33 | CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ |
27 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ | 34 | CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ |
@@ -50,6 +57,15 @@ static inline struct freezer *task_freezer(struct task_struct *task) | |||
50 | struct freezer, css); | 57 | struct freezer, css); |
51 | } | 58 | } |
52 | 59 | ||
60 | static struct freezer *parent_freezer(struct freezer *freezer) | ||
61 | { | ||
62 | struct cgroup *pcg = freezer->css.cgroup->parent; | ||
63 | |||
64 | if (pcg) | ||
65 | return cgroup_freezer(pcg); | ||
66 | return NULL; | ||
67 | } | ||
68 | |||
53 | bool cgroup_freezing(struct task_struct *task) | 69 | bool cgroup_freezing(struct task_struct *task) |
54 | { | 70 | { |
55 | bool ret; | 71 | bool ret; |
@@ -74,17 +90,6 @@ static const char *freezer_state_strs(unsigned int state) | |||
74 | return "THAWED"; | 90 | return "THAWED"; |
75 | }; | 91 | }; |
76 | 92 | ||
77 | /* | ||
78 | * State diagram | ||
79 | * Transitions are caused by userspace writes to the freezer.state file. | ||
80 | * The values in parenthesis are state labels. The rest are edge labels. | ||
81 | * | ||
82 | * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) | ||
83 | * ^ ^ | | | ||
84 | * | \_______THAWED_______/ | | ||
85 | * \__________________________THAWED____________/ | ||
86 | */ | ||
87 | |||
88 | struct cgroup_subsys freezer_subsys; | 93 | struct cgroup_subsys freezer_subsys; |
89 | 94 | ||
90 | static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) | 95 | static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) |
@@ -103,15 +108,34 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup) | |||
103 | * freezer_post_create - commit creation of a freezer cgroup | 108 | * freezer_post_create - commit creation of a freezer cgroup |
104 | * @cgroup: cgroup being created | 109 | * @cgroup: cgroup being created |
105 | * | 110 | * |
106 | * We're committing to creation of @cgroup. Mark it online. | 111 | * We're committing to creation of @cgroup. Mark it online and inherit |
112 | * parent's freezing state while holding both parent's and our | ||
113 | * freezer->lock. | ||
107 | */ | 114 | */ |
108 | static void freezer_post_create(struct cgroup *cgroup) | 115 | static void freezer_post_create(struct cgroup *cgroup) |
109 | { | 116 | { |
110 | struct freezer *freezer = cgroup_freezer(cgroup); | 117 | struct freezer *freezer = cgroup_freezer(cgroup); |
118 | struct freezer *parent = parent_freezer(freezer); | ||
119 | |||
120 | /* | ||
121 | * The following double locking and freezing state inheritance | ||
122 | * guarantee that @cgroup can never escape ancestors' freezing | ||
123 | * states. See cgroup_for_each_descendant_pre() for details. | ||
124 | */ | ||
125 | if (parent) | ||
126 | spin_lock_irq(&parent->lock); | ||
127 | spin_lock_nested(&freezer->lock, SINGLE_DEPTH_NESTING); | ||
111 | 128 | ||
112 | spin_lock_irq(&freezer->lock); | ||
113 | freezer->state |= CGROUP_FREEZER_ONLINE; | 129 | freezer->state |= CGROUP_FREEZER_ONLINE; |
114 | spin_unlock_irq(&freezer->lock); | 130 | |
131 | if (parent && (parent->state & CGROUP_FREEZING)) { | ||
132 | freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; | ||
133 | atomic_inc(&system_freezing_cnt); | ||
134 | } | ||
135 | |||
136 | spin_unlock(&freezer->lock); | ||
137 | if (parent) | ||
138 | spin_unlock_irq(&parent->lock); | ||
115 | } | 139 | } |
116 | 140 | ||
117 | /** | 141 | /** |
@@ -153,6 +177,7 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset) | |||
153 | { | 177 | { |
154 | struct freezer *freezer = cgroup_freezer(new_cgrp); | 178 | struct freezer *freezer = cgroup_freezer(new_cgrp); |
155 | struct task_struct *task; | 179 | struct task_struct *task; |
180 | bool clear_frozen = false; | ||
156 | 181 | ||
157 | spin_lock_irq(&freezer->lock); | 182 | spin_lock_irq(&freezer->lock); |
158 | 183 | ||
@@ -172,10 +197,25 @@ static void freezer_attach(struct cgroup *new_cgrp, struct cgroup_taskset *tset) | |||
172 | } else { | 197 | } else { |
173 | freeze_task(task); | 198 | freeze_task(task); |
174 | freezer->state &= ~CGROUP_FROZEN; | 199 | freezer->state &= ~CGROUP_FROZEN; |
200 | clear_frozen = true; | ||
175 | } | 201 | } |
176 | } | 202 | } |
177 | 203 | ||
178 | spin_unlock_irq(&freezer->lock); | 204 | spin_unlock_irq(&freezer->lock); |
205 | |||
206 | /* | ||
207 | * Propagate FROZEN clearing upwards. We may race with | ||
208 | * update_if_frozen(), but as long as both work bottom-up, either | ||
209 | * update_if_frozen() sees child's FROZEN cleared or we clear the | ||
210 | * parent's FROZEN later. No parent w/ !FROZEN children can be | ||
211 | * left FROZEN. | ||
212 | */ | ||
213 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | ||
214 | spin_lock_irq(&freezer->lock); | ||
215 | freezer->state &= ~CGROUP_FROZEN; | ||
216 | clear_frozen = freezer->state & CGROUP_FREEZING; | ||
217 | spin_unlock_irq(&freezer->lock); | ||
218 | } | ||
179 | } | 219 | } |
180 | 220 | ||
181 | static void freezer_fork(struct task_struct *task) | 221 | static void freezer_fork(struct task_struct *task) |
@@ -200,24 +240,47 @@ out: | |||
200 | rcu_read_unlock(); | 240 | rcu_read_unlock(); |
201 | } | 241 | } |
202 | 242 | ||
203 | /* | 243 | /** |
204 | * We change from FREEZING to FROZEN lazily if the cgroup was only | 244 | * update_if_frozen - update whether a cgroup finished freezing |
205 | * partially frozen when we exitted write. Caller must hold freezer->lock. | 245 | * @cgroup: cgroup of interest |
246 | * | ||
247 | * Once FREEZING is initiated, transition to FROZEN is lazily updated by | ||
248 | * calling this function. If the current state is FREEZING but not FROZEN, | ||
249 | * this function checks whether all tasks of this cgroup and the descendant | ||
250 | * cgroups finished freezing and, if so, sets FROZEN. | ||
251 | * | ||
252 | * The caller is responsible for grabbing RCU read lock and calling | ||
253 | * update_if_frozen() on all descendants prior to invoking this function. | ||
206 | * | 254 | * |
207 | * Task states and freezer state might disagree while tasks are being | 255 | * Task states and freezer state might disagree while tasks are being |
208 | * migrated into or out of @cgroup, so we can't verify task states against | 256 | * migrated into or out of @cgroup, so we can't verify task states against |
209 | * @freezer state here. See freezer_attach() for details. | 257 | * @freezer state here. See freezer_attach() for details. |
210 | */ | 258 | */ |
211 | static void update_if_frozen(struct freezer *freezer) | 259 | static void update_if_frozen(struct cgroup *cgroup) |
212 | { | 260 | { |
213 | struct cgroup *cgroup = freezer->css.cgroup; | 261 | struct freezer *freezer = cgroup_freezer(cgroup); |
262 | struct cgroup *pos; | ||
214 | struct cgroup_iter it; | 263 | struct cgroup_iter it; |
215 | struct task_struct *task; | 264 | struct task_struct *task; |
216 | 265 | ||
266 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
267 | |||
268 | spin_lock_irq(&freezer->lock); | ||
269 | |||
217 | if (!(freezer->state & CGROUP_FREEZING) || | 270 | if (!(freezer->state & CGROUP_FREEZING) || |
218 | (freezer->state & CGROUP_FROZEN)) | 271 | (freezer->state & CGROUP_FROZEN)) |
219 | return; | 272 | goto out_unlock; |
273 | |||
274 | /* are all (live) children frozen? */ | ||
275 | cgroup_for_each_child(pos, cgroup) { | ||
276 | struct freezer *child = cgroup_freezer(pos); | ||
220 | 277 | ||
278 | if ((child->state & CGROUP_FREEZER_ONLINE) && | ||
279 | !(child->state & CGROUP_FROZEN)) | ||
280 | goto out_unlock; | ||
281 | } | ||
282 | |||
283 | /* are all tasks frozen? */ | ||
221 | cgroup_iter_start(cgroup, &it); | 284 | cgroup_iter_start(cgroup, &it); |
222 | 285 | ||
223 | while ((task = cgroup_iter_next(cgroup, &it))) { | 286 | while ((task = cgroup_iter_next(cgroup, &it))) { |
@@ -229,27 +292,32 @@ static void update_if_frozen(struct freezer *freezer) | |||
229 | * the usual frozen condition. | 292 | * the usual frozen condition. |
230 | */ | 293 | */ |
231 | if (!frozen(task) && !freezer_should_skip(task)) | 294 | if (!frozen(task) && !freezer_should_skip(task)) |
232 | goto notyet; | 295 | goto out_iter_end; |
233 | } | 296 | } |
234 | } | 297 | } |
235 | 298 | ||
236 | freezer->state |= CGROUP_FROZEN; | 299 | freezer->state |= CGROUP_FROZEN; |
237 | notyet: | 300 | out_iter_end: |
238 | cgroup_iter_end(cgroup, &it); | 301 | cgroup_iter_end(cgroup, &it); |
302 | out_unlock: | ||
303 | spin_unlock_irq(&freezer->lock); | ||
239 | } | 304 | } |
240 | 305 | ||
241 | static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | 306 | static int freezer_read(struct cgroup *cgroup, struct cftype *cft, |
242 | struct seq_file *m) | 307 | struct seq_file *m) |
243 | { | 308 | { |
244 | struct freezer *freezer = cgroup_freezer(cgroup); | 309 | struct cgroup *pos; |
245 | unsigned int state; | ||
246 | 310 | ||
247 | spin_lock_irq(&freezer->lock); | 311 | rcu_read_lock(); |
248 | update_if_frozen(freezer); | ||
249 | state = freezer->state; | ||
250 | spin_unlock_irq(&freezer->lock); | ||
251 | 312 | ||
252 | seq_puts(m, freezer_state_strs(state)); | 313 | /* update states bottom-up */ |
314 | cgroup_for_each_descendant_post(pos, cgroup) | ||
315 | update_if_frozen(pos); | ||
316 | update_if_frozen(cgroup); | ||
317 | |||
318 | rcu_read_unlock(); | ||
319 | |||
320 | seq_puts(m, freezer_state_strs(cgroup_freezer(cgroup)->state)); | ||
253 | seq_putc(m, '\n'); | 321 | seq_putc(m, '\n'); |
254 | return 0; | 322 | return 0; |
255 | } | 323 | } |
@@ -320,14 +388,39 @@ static void freezer_apply_state(struct freezer *freezer, bool freeze, | |||
320 | * @freezer: freezer of interest | 388 | * @freezer: freezer of interest |
321 | * @freeze: whether to freeze or thaw | 389 | * @freeze: whether to freeze or thaw |
322 | * | 390 | * |
323 | * Freeze or thaw @cgroup according to @freeze. | 391 | * Freeze or thaw @freezer according to @freeze. The operations are |
392 | * recursive - all descendants of @freezer will be affected. | ||
324 | */ | 393 | */ |
325 | static void freezer_change_state(struct freezer *freezer, bool freeze) | 394 | static void freezer_change_state(struct freezer *freezer, bool freeze) |
326 | { | 395 | { |
396 | struct cgroup *pos; | ||
397 | |||
327 | /* update @freezer */ | 398 | /* update @freezer */ |
328 | spin_lock_irq(&freezer->lock); | 399 | spin_lock_irq(&freezer->lock); |
329 | freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF); | 400 | freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF); |
330 | spin_unlock_irq(&freezer->lock); | 401 | spin_unlock_irq(&freezer->lock); |
402 | |||
403 | /* | ||
404 | * Update all its descendants in pre-order traversal. Each | ||
405 | * descendant will try to inherit its parent's FREEZING state as | ||
406 | * CGROUP_FREEZING_PARENT. | ||
407 | */ | ||
408 | rcu_read_lock(); | ||
409 | cgroup_for_each_descendant_pre(pos, freezer->css.cgroup) { | ||
410 | struct freezer *pos_f = cgroup_freezer(pos); | ||
411 | struct freezer *parent = parent_freezer(pos_f); | ||
412 | |||
413 | /* | ||
414 | * Our update to @parent->state is already visible which is | ||
415 | * all we need. No need to lock @parent. For more info on | ||
416 | * synchronization, see freezer_post_create(). | ||
417 | */ | ||
418 | spin_lock_irq(&pos_f->lock); | ||
419 | freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING, | ||
420 | CGROUP_FREEZING_PARENT); | ||
421 | spin_unlock_irq(&pos_f->lock); | ||
422 | } | ||
423 | rcu_read_unlock(); | ||
331 | } | 424 | } |
332 | 425 | ||
333 | static int freezer_write(struct cgroup *cgroup, struct cftype *cft, | 426 | static int freezer_write(struct cgroup *cgroup, struct cftype *cft, |
@@ -390,12 +483,4 @@ struct cgroup_subsys freezer_subsys = { | |||
390 | .attach = freezer_attach, | 483 | .attach = freezer_attach, |
391 | .fork = freezer_fork, | 484 | .fork = freezer_fork, |
392 | .base_cftypes = files, | 485 | .base_cftypes = files, |
393 | |||
394 | /* | ||
395 | * freezer subsys doesn't handle hierarchy at all. Frozen state | ||
396 | * should be inherited through the hierarchy - if a parent is | ||
397 | * frozen, all its children should be frozen. Fix it and remove | ||
398 | * the following. | ||
399 | */ | ||
400 | .broken_hierarchy = true, | ||
401 | }; | 486 | }; |