aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMatt Helsley <matthltc@us.ibm.com>2008-10-18 23:27:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-20 11:52:34 -0400
commitdc52ddc0e6f45b04780b26fc0813509f8e798c42 (patch)
tree384826e9fab4e434bc5c85ce744470ae472e52c3 /kernel
parent8174f1503f4bf7e9a14b3fbbfdb30c6be6e29f77 (diff)
container freezer: implement freezer cgroup subsystem
This patch implements a new freezer subsystem in the control groups framework. It provides a way to stop and resume execution of all tasks in a cgroup by writing in the cgroup filesystem. The freezer subsystem in the container filesystem defines a file named freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the cgroup. Subsequently writing "RUNNING" will unfreeze the tasks in the cgroup. Reading will return the current state. * Examples of usage : # mkdir /containers/freezer # mount -t cgroup -ofreezer freezer /containers # mkdir /containers/0 # echo $some_pid > /containers/0/tasks to get status of the freezer subsystem : # cat /containers/0/freezer.state RUNNING to freeze all tasks in the container : # echo FROZEN > /containers/0/freezer.state # cat /containers/0/freezer.state FREEZING # cat /containers/0/freezer.state FROZEN to unfreeze all tasks in the container : # echo RUNNING > /containers/0/freezer.state # cat /containers/0/freezer.state RUNNING This is the basic mechanism which should do the right thing for user space task in a simple scenario. It's important to note that freezing can be incomplete. In that case we return EBUSY. This means that some tasks in the cgroup are busy doing something that prevents us from completely freezing the cgroup at this time. After EBUSY, the cgroup will remain partially frozen -- reflected by freezer.state reporting "FREEZING" when read. The state will remain "FREEZING" until one of these things happens: 1) Userspace cancels the freezing operation by writing "RUNNING" to the freezer.state file 2) Userspace retries the freezing operation by writing "FROZEN" to the freezer.state file (writing "FREEZING" is not legal and returns EIO) 3) The tasks that blocked the cgroup from entering the "FROZEN" state disappear from the cgroup's set of tasks. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: export thaw_process] Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Matt Helsley <matthltc@us.ibm.com> Acked-by: Serge E. Hallyn <serue@us.ibm.com> Tested-by: Matt Helsley <matthltc@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.freezer2
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup_freezer.c366
-rw-r--r--kernel/freezer.c32
-rw-r--r--kernel/power/Kconfig3
5 files changed, 401 insertions, 3 deletions
diff --git a/kernel/Kconfig.freezer b/kernel/Kconfig.freezer
new file mode 100644
index 000000000000..a3bb4cb52539
--- /dev/null
+++ b/kernel/Kconfig.freezer
@@ -0,0 +1,2 @@
1config FREEZER
2 def_bool PM_SLEEP || CGROUP_FREEZER
diff --git a/kernel/Makefile b/kernel/Makefile
index e8194d15d5f4..066550aa61c5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
56obj-$(CONFIG_COMPAT) += compat.o 56obj-$(CONFIG_COMPAT) += compat.o
57obj-$(CONFIG_CGROUPS) += cgroup.o 57obj-$(CONFIG_CGROUPS) += cgroup.o
58obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o 58obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
59obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
59obj-$(CONFIG_CPUSETS) += cpuset.o 60obj-$(CONFIG_CPUSETS) += cpuset.o
60obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o 61obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
61obj-$(CONFIG_UTS_NS) += utsname.o 62obj-$(CONFIG_UTS_NS) += utsname.o
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
new file mode 100644
index 000000000000..b08722de610c
--- /dev/null
+++ b/kernel/cgroup_freezer.c
@@ -0,0 +1,366 @@
1/*
2 * cgroup_freezer.c - control group freezer subsystem
3 *
4 * Copyright IBM Corporation, 2007
5 *
6 * Author : Cedric Le Goater <clg@fr.ibm.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of version 2.1 of the GNU Lesser General Public License
10 * as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it would be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15 */
16
17#include <linux/module.h>
18#include <linux/cgroup.h>
19#include <linux/fs.h>
20#include <linux/uaccess.h>
21#include <linux/freezer.h>
22#include <linux/seq_file.h>
23
24enum freezer_state {
25 STATE_RUNNING = 0,
26 STATE_FREEZING,
27 STATE_FROZEN,
28};
29
30struct freezer {
31 struct cgroup_subsys_state css;
32 enum freezer_state state;
33 spinlock_t lock; /* protects _writes_ to state */
34};
35
36static inline struct freezer *cgroup_freezer(
37 struct cgroup *cgroup)
38{
39 return container_of(
40 cgroup_subsys_state(cgroup, freezer_subsys_id),
41 struct freezer, css);
42}
43
44static inline struct freezer *task_freezer(struct task_struct *task)
45{
46 return container_of(task_subsys_state(task, freezer_subsys_id),
47 struct freezer, css);
48}
49
50int cgroup_frozen(struct task_struct *task)
51{
52 struct freezer *freezer;
53 enum freezer_state state;
54
55 task_lock(task);
56 freezer = task_freezer(task);
57 state = freezer->state;
58 task_unlock(task);
59
60 return state == STATE_FROZEN;
61}
62
63/*
64 * cgroups_write_string() limits the size of freezer state strings to
65 * CGROUP_LOCAL_BUFFER_SIZE
66 */
67static const char *freezer_state_strs[] = {
68 "RUNNING",
69 "FREEZING",
70 "FROZEN",
71};
72
73/*
74 * State diagram
75 * Transitions are caused by userspace writes to the freezer.state file.
76 * The values in parenthesis are state labels. The rest are edge labels.
77 *
78 * (RUNNING) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
79 * ^ ^ | |
80 * | \_______RUNNING_______/ |
81 * \_____________________________RUNNING___________/
82 */
83
84struct cgroup_subsys freezer_subsys;
85
86/* Locks taken and their ordering
87 * ------------------------------
88 * css_set_lock
89 * cgroup_mutex (AKA cgroup_lock)
90 * task->alloc_lock (AKA task_lock)
91 * freezer->lock
92 * task->sighand->siglock
93 *
94 * cgroup code forces css_set_lock to be taken before task->alloc_lock
95 *
96 * freezer_create(), freezer_destroy():
97 * cgroup_mutex [ by cgroup core ]
98 *
99 * can_attach():
100 * cgroup_mutex
101 *
102 * cgroup_frozen():
103 * task->alloc_lock (to get task's cgroup)
104 *
105 * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
106 * task->alloc_lock (to get task's cgroup)
107 * freezer->lock
108 * sighand->siglock (if the cgroup is freezing)
109 *
110 * freezer_read():
111 * cgroup_mutex
112 * freezer->lock
113 * read_lock css_set_lock (cgroup iterator start)
114 *
115 * freezer_write() (freeze):
116 * cgroup_mutex
117 * freezer->lock
118 * read_lock css_set_lock (cgroup iterator start)
119 * sighand->siglock
120 *
121 * freezer_write() (unfreeze):
122 * cgroup_mutex
123 * freezer->lock
124 * read_lock css_set_lock (cgroup iterator start)
125 * task->alloc_lock (to prevent races with freeze_task())
126 * sighand->siglock
127 */
128static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
129 struct cgroup *cgroup)
130{
131 struct freezer *freezer;
132
133 freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
134 if (!freezer)
135 return ERR_PTR(-ENOMEM);
136
137 spin_lock_init(&freezer->lock);
138 freezer->state = STATE_RUNNING;
139 return &freezer->css;
140}
141
142static void freezer_destroy(struct cgroup_subsys *ss,
143 struct cgroup *cgroup)
144{
145 kfree(cgroup_freezer(cgroup));
146}
147
148
149static int freezer_can_attach(struct cgroup_subsys *ss,
150 struct cgroup *new_cgroup,
151 struct task_struct *task)
152{
153 struct freezer *freezer;
154 int retval = 0;
155
156 /*
157 * The call to cgroup_lock() in the freezer.state write method prevents
158 * a write to that file racing against an attach, and hence the
159 * can_attach() result will remain valid until the attach completes.
160 */
161 freezer = cgroup_freezer(new_cgroup);
162 if (freezer->state == STATE_FROZEN)
163 retval = -EBUSY;
164 return retval;
165}
166
167static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
168{
169 struct freezer *freezer;
170
171 task_lock(task);
172 freezer = task_freezer(task);
173 task_unlock(task);
174
175 BUG_ON(freezer->state == STATE_FROZEN);
176 spin_lock_irq(&freezer->lock);
177 /* Locking avoids race with FREEZING -> RUNNING transitions. */
178 if (freezer->state == STATE_FREEZING)
179 freeze_task(task, true);
180 spin_unlock_irq(&freezer->lock);
181}
182
183/*
184 * caller must hold freezer->lock
185 */
186static void check_if_frozen(struct cgroup *cgroup,
187 struct freezer *freezer)
188{
189 struct cgroup_iter it;
190 struct task_struct *task;
191 unsigned int nfrozen = 0, ntotal = 0;
192
193 cgroup_iter_start(cgroup, &it);
194 while ((task = cgroup_iter_next(cgroup, &it))) {
195 ntotal++;
196 /*
197 * Task is frozen or will freeze immediately when next it gets
198 * woken
199 */
200 if (frozen(task) ||
201 (task_is_stopped_or_traced(task) && freezing(task)))
202 nfrozen++;
203 }
204
205 /*
206 * Transition to FROZEN when no new tasks can be added ensures
207 * that we never exist in the FROZEN state while there are unfrozen
208 * tasks.
209 */
210 if (nfrozen == ntotal)
211 freezer->state = STATE_FROZEN;
212 cgroup_iter_end(cgroup, &it);
213}
214
215static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
216 struct seq_file *m)
217{
218 struct freezer *freezer;
219 enum freezer_state state;
220
221 if (!cgroup_lock_live_group(cgroup))
222 return -ENODEV;
223
224 freezer = cgroup_freezer(cgroup);
225 spin_lock_irq(&freezer->lock);
226 state = freezer->state;
227 if (state == STATE_FREEZING) {
228 /* We change from FREEZING to FROZEN lazily if the cgroup was
229 * only partially frozen when we exitted write. */
230 check_if_frozen(cgroup, freezer);
231 state = freezer->state;
232 }
233 spin_unlock_irq(&freezer->lock);
234 cgroup_unlock();
235
236 seq_puts(m, freezer_state_strs[state]);
237 seq_putc(m, '\n');
238 return 0;
239}
240
241static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
242{
243 struct cgroup_iter it;
244 struct task_struct *task;
245 unsigned int num_cant_freeze_now = 0;
246
247 freezer->state = STATE_FREEZING;
248 cgroup_iter_start(cgroup, &it);
249 while ((task = cgroup_iter_next(cgroup, &it))) {
250 if (!freeze_task(task, true))
251 continue;
252 if (task_is_stopped_or_traced(task) && freezing(task))
253 /*
254 * The freeze flag is set so these tasks will
255 * immediately go into the fridge upon waking.
256 */
257 continue;
258 if (!freezing(task) && !freezer_should_skip(task))
259 num_cant_freeze_now++;
260 }
261 cgroup_iter_end(cgroup, &it);
262
263 return num_cant_freeze_now ? -EBUSY : 0;
264}
265
266static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
267{
268 struct cgroup_iter it;
269 struct task_struct *task;
270
271 cgroup_iter_start(cgroup, &it);
272 while ((task = cgroup_iter_next(cgroup, &it))) {
273 int do_wake;
274
275 task_lock(task);
276 do_wake = __thaw_process(task);
277 task_unlock(task);
278 if (do_wake)
279 wake_up_process(task);
280 }
281 cgroup_iter_end(cgroup, &it);
282 freezer->state = STATE_RUNNING;
283
284 return 0;
285}
286
287static int freezer_change_state(struct cgroup *cgroup,
288 enum freezer_state goal_state)
289{
290 struct freezer *freezer;
291 int retval = 0;
292
293 freezer = cgroup_freezer(cgroup);
294 spin_lock_irq(&freezer->lock);
295 check_if_frozen(cgroup, freezer); /* may update freezer->state */
296 if (goal_state == freezer->state)
297 goto out;
298 switch (freezer->state) {
299 case STATE_RUNNING:
300 retval = try_to_freeze_cgroup(cgroup, freezer);
301 break;
302 case STATE_FREEZING:
303 if (goal_state == STATE_FROZEN) {
304 /* Userspace is retrying after
305 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
306 retval = try_to_freeze_cgroup(cgroup, freezer);
307 break;
308 }
309 /* state == FREEZING and goal_state == RUNNING, so unfreeze */
310 case STATE_FROZEN:
311 retval = unfreeze_cgroup(cgroup, freezer);
312 break;
313 default:
314 break;
315 }
316out:
317 spin_unlock_irq(&freezer->lock);
318
319 return retval;
320}
321
322static int freezer_write(struct cgroup *cgroup,
323 struct cftype *cft,
324 const char *buffer)
325{
326 int retval;
327 enum freezer_state goal_state;
328
329 if (strcmp(buffer, freezer_state_strs[STATE_RUNNING]) == 0)
330 goal_state = STATE_RUNNING;
331 else if (strcmp(buffer, freezer_state_strs[STATE_FROZEN]) == 0)
332 goal_state = STATE_FROZEN;
333 else
334 return -EIO;
335
336 if (!cgroup_lock_live_group(cgroup))
337 return -ENODEV;
338 retval = freezer_change_state(cgroup, goal_state);
339 cgroup_unlock();
340 return retval;
341}
342
343static struct cftype files[] = {
344 {
345 .name = "state",
346 .read_seq_string = freezer_read,
347 .write_string = freezer_write,
348 },
349};
350
351static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
352{
353 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
354}
355
356struct cgroup_subsys freezer_subsys = {
357 .name = "freezer",
358 .create = freezer_create,
359 .destroy = freezer_destroy,
360 .populate = freezer_populate,
361 .subsys_id = freezer_subsys_id,
362 .can_attach = freezer_can_attach,
363 .attach = NULL,
364 .fork = freezer_fork,
365 .exit = NULL,
366};
diff --git a/kernel/freezer.c b/kernel/freezer.c
index cb0931f89306..ba6248b323ef 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -120,3 +120,35 @@ void cancel_freezing(struct task_struct *p)
120 spin_unlock_irqrestore(&p->sighand->siglock, flags); 120 spin_unlock_irqrestore(&p->sighand->siglock, flags);
121 } 121 }
122} 122}
123
124/*
125 * Wake up a frozen process
126 *
127 * task_lock() is needed to prevent the race with refrigerator() which may
128 * occur if the freezing of tasks fails. Namely, without the lock, if the
129 * freezing of tasks failed, thaw_tasks() might have run before a task in
130 * refrigerator() could call frozen_process(), in which case the task would be
131 * frozen and no one would thaw it.
132 */
133int __thaw_process(struct task_struct *p)
134{
135 if (frozen(p)) {
136 p->flags &= ~PF_FROZEN;
137 return 1;
138 }
139 clear_freeze_flag(p);
140 return 0;
141}
142
143int thaw_process(struct task_struct *p)
144{
145 task_lock(p);
146 if (__thaw_process(p) == 1) {
147 task_unlock(p);
148 wake_up_process(p);
149 return 1;
150 }
151 task_unlock(p);
152 return 0;
153}
154EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ebdd7f55273d..dcd165f92a88 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -85,9 +85,6 @@ config PM_SLEEP
85 depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE 85 depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
86 default y 86 default y
87 87
88config FREEZER
89 def_bool PM_SLEEP
90
91config SUSPEND 88config SUSPEND
92 bool "Suspend to RAM and standby" 89 bool "Suspend to RAM and standby"
93 depends on PM && ARCH_SUSPEND_POSSIBLE 90 depends on PM && ARCH_SUSPEND_POSSIBLE