diff options
-rw-r--r-- | Documentation/cgroups/freezer-subsystem.txt | 21 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 11 |
2 files changed, 22 insertions, 10 deletions
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt index c50ab58b72eb..41f37fea1276 100644 --- a/Documentation/cgroups/freezer-subsystem.txt +++ b/Documentation/cgroups/freezer-subsystem.txt | |||
@@ -1,4 +1,4 @@ | |||
1 | The cgroup freezer is useful to batch job management system which start | 1 | The cgroup freezer is useful to batch job management system which start |
2 | and stop sets of tasks in order to schedule the resources of a machine | 2 | and stop sets of tasks in order to schedule the resources of a machine |
3 | according to the desires of a system administrator. This sort of program | 3 | according to the desires of a system administrator. This sort of program |
4 | is often used on HPC clusters to schedule access to the cluster as a | 4 | is often used on HPC clusters to schedule access to the cluster as a |
@@ -6,7 +6,7 @@ whole. The cgroup freezer uses cgroups to describe the set of tasks to | |||
6 | be started/stopped by the batch job management system. It also provides | 6 | be started/stopped by the batch job management system. It also provides |
7 | a means to start and stop the tasks composing the job. | 7 | a means to start and stop the tasks composing the job. |
8 | 8 | ||
9 | The cgroup freezer will also be useful for checkpointing running groups | 9 | The cgroup freezer will also be useful for checkpointing running groups |
10 | of tasks. The freezer allows the checkpoint code to obtain a consistent | 10 | of tasks. The freezer allows the checkpoint code to obtain a consistent |
11 | image of the tasks by attempting to force the tasks in a cgroup into a | 11 | image of the tasks by attempting to force the tasks in a cgroup into a |
12 | quiescent state. Once the tasks are quiescent another task can | 12 | quiescent state. Once the tasks are quiescent another task can |
@@ -16,7 +16,7 @@ recoverable error occur. This also allows the checkpointed tasks to be | |||
16 | migrated between nodes in a cluster by copying the gathered information | 16 | migrated between nodes in a cluster by copying the gathered information |
17 | to another node and restarting the tasks there. | 17 | to another node and restarting the tasks there. |
18 | 18 | ||
19 | Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping | 19 | Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping |
20 | and resuming tasks in userspace. Both of these signals are observable | 20 | and resuming tasks in userspace. Both of these signals are observable |
21 | from within the tasks we wish to freeze. While SIGSTOP cannot be caught, | 21 | from within the tasks we wish to freeze. While SIGSTOP cannot be caught, |
22 | blocked, or ignored it can be seen by waiting or ptracing parent tasks. | 22 | blocked, or ignored it can be seen by waiting or ptracing parent tasks. |
@@ -37,26 +37,29 @@ demonstrate this problem using nested bash shells: | |||
37 | 37 | ||
38 | <at this point 16990 exits and causes 16644 to exit too> | 38 | <at this point 16990 exits and causes 16644 to exit too> |
39 | 39 | ||
40 | This happens because bash can observe both signals and choose how it | 40 | This happens because bash can observe both signals and choose how it |
41 | responds to them. | 41 | responds to them. |
42 | 42 | ||
43 | Another example of a program which catches and responds to these | 43 | Another example of a program which catches and responds to these |
44 | signals is gdb. In fact any program designed to use ptrace is likely to | 44 | signals is gdb. In fact any program designed to use ptrace is likely to |
45 | have a problem with this method of stopping and resuming tasks. | 45 | have a problem with this method of stopping and resuming tasks. |
46 | 46 | ||
47 | In contrast, the cgroup freezer uses the kernel freezer code to | 47 | In contrast, the cgroup freezer uses the kernel freezer code to |
48 | prevent the freeze/unfreeze cycle from becoming visible to the tasks | 48 | prevent the freeze/unfreeze cycle from becoming visible to the tasks |
49 | being frozen. This allows the bash example above and gdb to run as | 49 | being frozen. This allows the bash example above and gdb to run as |
50 | expected. | 50 | expected. |
51 | 51 | ||
52 | The freezer subsystem in the container filesystem defines a file named | 52 | The freezer subsystem in the container filesystem defines a file named |
53 | freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the | 53 | freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the |
54 | cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup. | 54 | cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup. |
55 | Reading will return the current state. | 55 | Reading will return the current state. |
56 | 56 | ||
57 | Note freezer.state doesn't exist in root cgroup, which means root cgroup | ||
58 | is non-freezable. | ||
59 | |||
57 | * Examples of usage : | 60 | * Examples of usage : |
58 | 61 | ||
59 | # mkdir /containers/freezer | 62 | # mkdir /containers |
60 | # mount -t cgroup -ofreezer freezer /containers | 63 | # mount -t cgroup -ofreezer freezer /containers |
61 | # mkdir /containers/0 | 64 | # mkdir /containers/0 |
62 | # echo $some_pid > /containers/0/tasks | 65 | # echo $some_pid > /containers/0/tasks |
@@ -94,6 +97,6 @@ things happens: | |||
94 | the freezer.state file | 97 | the freezer.state file |
95 | 2) Userspace retries the freezing operation by writing "FROZEN" to | 98 | 2) Userspace retries the freezing operation by writing "FROZEN" to |
96 | the freezer.state file (writing "FREEZING" is not legal | 99 | the freezer.state file (writing "FREEZING" is not legal |
97 | and returns EIO) | 100 | and returns EINVAL) |
98 | 3) The tasks that blocked the cgroup from entering the "FROZEN" | 101 | 3) The tasks that blocked the cgroup from entering the "FROZEN" |
99 | state disappear from the cgroup's set of tasks. | 102 | state disappear from the cgroup's set of tasks. |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 660590710409..fb249e2bcada 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -192,6 +192,13 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | |||
192 | */ | 192 | */ |
193 | freezer = task_freezer(task); | 193 | freezer = task_freezer(task); |
194 | 194 | ||
195 | /* | ||
196 | * The root cgroup is non-freezable, so we can skip the | ||
197 | * following check. | ||
198 | */ | ||
199 | if (!freezer->css.cgroup->parent) | ||
200 | return; | ||
201 | |||
195 | spin_lock_irq(&freezer->lock); | 202 | spin_lock_irq(&freezer->lock); |
196 | BUG_ON(freezer->state == CGROUP_FROZEN); | 203 | BUG_ON(freezer->state == CGROUP_FROZEN); |
197 | 204 | ||
@@ -335,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup, | |||
335 | else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) | 342 | else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) |
336 | goal_state = CGROUP_FROZEN; | 343 | goal_state = CGROUP_FROZEN; |
337 | else | 344 | else |
338 | return -EIO; | 345 | return -EINVAL; |
339 | 346 | ||
340 | if (!cgroup_lock_live_group(cgroup)) | 347 | if (!cgroup_lock_live_group(cgroup)) |
341 | return -ENODEV; | 348 | return -ENODEV; |
@@ -354,6 +361,8 @@ static struct cftype files[] = { | |||
354 | 361 | ||
355 | static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) | 362 | static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) |
356 | { | 363 | { |
364 | if (!cgroup->parent) | ||
365 | return 0; | ||
357 | return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); | 366 | return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); |
358 | } | 367 | } |
359 | 368 | ||