diff options
author | Aleksa Sarai <cyphar@cyphar.com> | 2015-06-09 07:32:09 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2015-07-14 17:29:23 -0400 |
commit | 7e47682ea555e7c1edef1d8fd96e2aa4c12abe59 (patch) | |
tree | 6012cbc180ae7d633be4ed2ff1f1f6f7f188c1a0 | |
parent | d770e558e21961ad6cfdf0ff7df0eb5d7d4f0754 (diff) |
cgroup: allow a cgroup subsystem to reject a fork
Add a new cgroup subsystem callback can_fork that conditionally
states whether or not the fork is accepted or rejected by a cgroup
policy. In addition, add a cancel_fork callback so that if an error
occurs later in the forking process, any state modified by can_fork can
be reverted.
Allow for a private opaque pointer to be passed from cgroup_can_fork to
cgroup_post_fork, allowing for the fork state to be stored by each
subsystem separately.
Also add a tagging system for cgroup_subsys.h to allow for CGROUP_<TAG>
enumerations to be be defined and used. In addition, explicitly add a
CGROUP_CANFORK_COUNT macro to make arrays easier to define.
This is in preparation for implementing the pids cgroup subsystem.
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | include/linux/cgroup-defs.h | 10 | ||||
-rw-r--r-- | include/linux/cgroup.h | 15 | ||||
-rw-r--r-- | include/linux/cgroup_subsys.h | 23 | ||||
-rw-r--r-- | kernel/cgroup.c | 73 | ||||
-rw-r--r-- | kernel/cgroup_freezer.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 17 | ||||
-rw-r--r-- | kernel/sched/core.c | 2 |
7 files changed, 133 insertions, 9 deletions
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93755a629299..83e37d8c4d80 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h | |||
@@ -34,12 +34,17 @@ struct seq_file; | |||
34 | 34 | ||
35 | /* define the enumeration of all cgroup subsystems */ | 35 | /* define the enumeration of all cgroup subsystems */ |
36 | #define SUBSYS(_x) _x ## _cgrp_id, | 36 | #define SUBSYS(_x) _x ## _cgrp_id, |
37 | #define SUBSYS_TAG(_t) CGROUP_ ## _t, \ | ||
38 | __unused_tag_ ## _t = CGROUP_ ## _t - 1, | ||
37 | enum cgroup_subsys_id { | 39 | enum cgroup_subsys_id { |
38 | #include <linux/cgroup_subsys.h> | 40 | #include <linux/cgroup_subsys.h> |
39 | CGROUP_SUBSYS_COUNT, | 41 | CGROUP_SUBSYS_COUNT, |
40 | }; | 42 | }; |
43 | #undef SUBSYS_TAG | ||
41 | #undef SUBSYS | 44 | #undef SUBSYS |
42 | 45 | ||
46 | #define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) | ||
47 | |||
43 | /* bits in struct cgroup_subsys_state flags field */ | 48 | /* bits in struct cgroup_subsys_state flags field */ |
44 | enum { | 49 | enum { |
45 | CSS_NO_REF = (1 << 0), /* no reference counting for this css */ | 50 | CSS_NO_REF = (1 << 0), /* no reference counting for this css */ |
@@ -406,7 +411,9 @@ struct cgroup_subsys { | |||
406 | struct cgroup_taskset *tset); | 411 | struct cgroup_taskset *tset); |
407 | void (*attach)(struct cgroup_subsys_state *css, | 412 | void (*attach)(struct cgroup_subsys_state *css, |
408 | struct cgroup_taskset *tset); | 413 | struct cgroup_taskset *tset); |
409 | void (*fork)(struct task_struct *task); | 414 | int (*can_fork)(struct task_struct *task, void **priv_p); |
415 | void (*cancel_fork)(struct task_struct *task, void *priv); | ||
416 | void (*fork)(struct task_struct *task, void *priv); | ||
410 | void (*exit)(struct cgroup_subsys_state *css, | 417 | void (*exit)(struct cgroup_subsys_state *css, |
411 | struct cgroup_subsys_state *old_css, | 418 | struct cgroup_subsys_state *old_css, |
412 | struct task_struct *task); | 419 | struct task_struct *task); |
@@ -491,6 +498,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) | |||
491 | 498 | ||
492 | #else /* CONFIG_CGROUPS */ | 499 | #else /* CONFIG_CGROUPS */ |
493 | 500 | ||
501 | #define CGROUP_CANFORK_COUNT 0 | ||
494 | #define CGROUP_SUBSYS_COUNT 0 | 502 | #define CGROUP_SUBSYS_COUNT 0 |
495 | 503 | ||
496 | static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} | 504 | static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..a71fe2a3984e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -62,7 +62,12 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, | |||
62 | struct pid *pid, struct task_struct *tsk); | 62 | struct pid *pid, struct task_struct *tsk); |
63 | 63 | ||
64 | void cgroup_fork(struct task_struct *p); | 64 | void cgroup_fork(struct task_struct *p); |
65 | void cgroup_post_fork(struct task_struct *p); | 65 | extern int cgroup_can_fork(struct task_struct *p, |
66 | void *ss_priv[CGROUP_CANFORK_COUNT]); | ||
67 | extern void cgroup_cancel_fork(struct task_struct *p, | ||
68 | void *ss_priv[CGROUP_CANFORK_COUNT]); | ||
69 | extern void cgroup_post_fork(struct task_struct *p, | ||
70 | void *old_ss_priv[CGROUP_CANFORK_COUNT]); | ||
66 | void cgroup_exit(struct task_struct *p); | 71 | void cgroup_exit(struct task_struct *p); |
67 | 72 | ||
68 | int cgroup_init_early(void); | 73 | int cgroup_init_early(void); |
@@ -524,7 +529,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, | |||
524 | struct dentry *dentry) { return -EINVAL; } | 529 | struct dentry *dentry) { return -EINVAL; } |
525 | 530 | ||
526 | static inline void cgroup_fork(struct task_struct *p) {} | 531 | static inline void cgroup_fork(struct task_struct *p) {} |
527 | static inline void cgroup_post_fork(struct task_struct *p) {} | 532 | static inline int cgroup_can_fork(struct task_struct *p, |
533 | void *ss_priv[CGROUP_CANFORK_COUNT]) | ||
534 | { return 0; } | ||
535 | static inline void cgroup_cancel_fork(struct task_struct *p, | ||
536 | void *ss_priv[CGROUP_CANFORK_COUNT]) {} | ||
537 | static inline void cgroup_post_fork(struct task_struct *p, | ||
538 | void *ss_priv[CGROUP_CANFORK_COUNT]) {} | ||
528 | static inline void cgroup_exit(struct task_struct *p) {} | 539 | static inline void cgroup_exit(struct task_struct *p) {} |
529 | 540 | ||
530 | static inline int cgroup_init_early(void) { return 0; } | 541 | static inline int cgroup_init_early(void) { return 0; } |
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e4a96fb14403..ec43bce7e1ea 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h | |||
@@ -3,6 +3,17 @@ | |||
3 | * | 3 | * |
4 | * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. | 4 | * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. |
5 | */ | 5 | */ |
6 | |||
7 | /* | ||
8 | * This file *must* be included with SUBSYS() defined. | ||
9 | * SUBSYS_TAG() is a noop if undefined. | ||
10 | */ | ||
11 | |||
12 | #ifndef SUBSYS_TAG | ||
13 | #define __TMP_SUBSYS_TAG | ||
14 | #define SUBSYS_TAG(_x) | ||
15 | #endif | ||
16 | |||
6 | #if IS_ENABLED(CONFIG_CPUSETS) | 17 | #if IS_ENABLED(CONFIG_CPUSETS) |
7 | SUBSYS(cpuset) | 18 | SUBSYS(cpuset) |
8 | #endif | 19 | #endif |
@@ -48,11 +59,23 @@ SUBSYS(hugetlb) | |||
48 | #endif | 59 | #endif |
49 | 60 | ||
50 | /* | 61 | /* |
62 | * Subsystems that implement the can_fork() family of callbacks. | ||
63 | */ | ||
64 | SUBSYS_TAG(CANFORK_START) | ||
65 | SUBSYS_TAG(CANFORK_END) | ||
66 | |||
67 | /* | ||
51 | * The following subsystems are not supported on the default hierarchy. | 68 | * The following subsystems are not supported on the default hierarchy. |
52 | */ | 69 | */ |
53 | #if IS_ENABLED(CONFIG_CGROUP_DEBUG) | 70 | #if IS_ENABLED(CONFIG_CGROUP_DEBUG) |
54 | SUBSYS(debug) | 71 | SUBSYS(debug) |
55 | #endif | 72 | #endif |
73 | |||
74 | #ifdef __TMP_SUBSYS_TAG | ||
75 | #undef __TMP_SUBSYS_TAG | ||
76 | #undef SUBSYS_TAG | ||
77 | #endif | ||
78 | |||
56 | /* | 79 | /* |
57 | * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. | 80 | * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. |
58 | */ | 81 | */ |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f89d9292eee6..a59dd1a6b74a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -186,6 +186,9 @@ static u64 css_serial_nr_next = 1; | |||
186 | static unsigned long have_fork_callback __read_mostly; | 186 | static unsigned long have_fork_callback __read_mostly; |
187 | static unsigned long have_exit_callback __read_mostly; | 187 | static unsigned long have_exit_callback __read_mostly; |
188 | 188 | ||
189 | /* Ditto for the can_fork callback. */ | ||
190 | static unsigned long have_canfork_callback __read_mostly; | ||
191 | |||
189 | static struct cftype cgroup_dfl_base_files[]; | 192 | static struct cftype cgroup_dfl_base_files[]; |
190 | static struct cftype cgroup_legacy_base_files[]; | 193 | static struct cftype cgroup_legacy_base_files[]; |
191 | 194 | ||
@@ -4955,6 +4958,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) | |||
4955 | 4958 | ||
4956 | have_fork_callback |= (bool)ss->fork << ss->id; | 4959 | have_fork_callback |= (bool)ss->fork << ss->id; |
4957 | have_exit_callback |= (bool)ss->exit << ss->id; | 4960 | have_exit_callback |= (bool)ss->exit << ss->id; |
4961 | have_canfork_callback |= (bool)ss->can_fork << ss->id; | ||
4958 | 4962 | ||
4959 | /* At system boot, before all subsystems have been | 4963 | /* At system boot, before all subsystems have been |
4960 | * registered, no tasks have been forked, so we don't | 4964 | * registered, no tasks have been forked, so we don't |
@@ -5197,6 +5201,19 @@ static const struct file_operations proc_cgroupstats_operations = { | |||
5197 | .release = single_release, | 5201 | .release = single_release, |
5198 | }; | 5202 | }; |
5199 | 5203 | ||
5204 | static void **subsys_canfork_priv_p(void *ss_priv[CGROUP_CANFORK_COUNT], int i) | ||
5205 | { | ||
5206 | if (CGROUP_CANFORK_START <= i && i < CGROUP_CANFORK_END) | ||
5207 | return &ss_priv[i - CGROUP_CANFORK_START]; | ||
5208 | return NULL; | ||
5209 | } | ||
5210 | |||
5211 | static void *subsys_canfork_priv(void *ss_priv[CGROUP_CANFORK_COUNT], int i) | ||
5212 | { | ||
5213 | void **private = subsys_canfork_priv_p(ss_priv, i); | ||
5214 | return private ? *private : NULL; | ||
5215 | } | ||
5216 | |||
5200 | /** | 5217 | /** |
5201 | * cgroup_fork - initialize cgroup related fields during copy_process() | 5218 | * cgroup_fork - initialize cgroup related fields during copy_process() |
5202 | * @child: pointer to task_struct of forking parent process. | 5219 | * @child: pointer to task_struct of forking parent process. |
@@ -5212,6 +5229,57 @@ void cgroup_fork(struct task_struct *child) | |||
5212 | } | 5229 | } |
5213 | 5230 | ||
5214 | /** | 5231 | /** |
5232 | * cgroup_can_fork - called on a new task before the process is exposed | ||
5233 | * @child: the task in question. | ||
5234 | * | ||
5235 | * This calls the subsystem can_fork() callbacks. If the can_fork() callback | ||
5236 | * returns an error, the fork aborts with that error code. This allows for | ||
5237 | * a cgroup subsystem to conditionally allow or deny new forks. | ||
5238 | */ | ||
5239 | int cgroup_can_fork(struct task_struct *child, | ||
5240 | void *ss_priv[CGROUP_CANFORK_COUNT]) | ||
5241 | { | ||
5242 | struct cgroup_subsys *ss; | ||
5243 | int i, j, ret; | ||
5244 | |||
5245 | for_each_subsys_which(ss, i, &have_canfork_callback) { | ||
5246 | ret = ss->can_fork(child, subsys_canfork_priv_p(ss_priv, i)); | ||
5247 | if (ret) | ||
5248 | goto out_revert; | ||
5249 | } | ||
5250 | |||
5251 | return 0; | ||
5252 | |||
5253 | out_revert: | ||
5254 | for_each_subsys(ss, j) { | ||
5255 | if (j >= i) | ||
5256 | break; | ||
5257 | if (ss->cancel_fork) | ||
5258 | ss->cancel_fork(child, subsys_canfork_priv(ss_priv, j)); | ||
5259 | } | ||
5260 | |||
5261 | return ret; | ||
5262 | } | ||
5263 | |||
5264 | /** | ||
5265 | * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork() | ||
5266 | * @child: the task in question | ||
5267 | * | ||
5268 | * This calls the cancel_fork() callbacks if a fork failed *after* | ||
5269 | * cgroup_can_fork() succeded. | ||
5270 | */ | ||
5271 | void cgroup_cancel_fork(struct task_struct *child, | ||
5272 | void *ss_priv[CGROUP_CANFORK_COUNT]) | ||
5273 | { | ||
5274 | struct cgroup_subsys *ss; | ||
5275 | int i; | ||
5276 | |||
5277 | for_each_subsys(ss, i) | ||
5278 | if (ss->cancel_fork) | ||
5279 | ss->cancel_fork(child, subsys_canfork_priv(ss_priv, i)); | ||
5280 | } | ||
5281 | |||
5282 | /** | ||
5215 | * cgroup_post_fork - called on a new task after adding it to the task list | 5283 | * cgroup_post_fork - called on a new task after adding it to the task list |
5216 | * @child: the task in question | 5284 | * @child: the task in question |
5217 | * | 5285 | * |
@@ -5221,7 +5289,8 @@ void cgroup_fork(struct task_struct *child) | |||
5221 | * cgroup_task_iter_start() - to guarantee that the new task ends up on its | 5289 | * cgroup_task_iter_start() - to guarantee that the new task ends up on its |
5222 | * list. | 5290 | * list. |
5223 | */ | 5291 | */ |
5224 | void cgroup_post_fork(struct task_struct *child) | 5292 | void cgroup_post_fork(struct task_struct *child, |
5293 | void *old_ss_priv[CGROUP_CANFORK_COUNT]) | ||
5225 | { | 5294 | { |
5226 | struct cgroup_subsys *ss; | 5295 | struct cgroup_subsys *ss; |
5227 | int i; | 5296 | int i; |
@@ -5266,7 +5335,7 @@ void cgroup_post_fork(struct task_struct *child) | |||
5266 | * and addition to css_set. | 5335 | * and addition to css_set. |
5267 | */ | 5336 | */ |
5268 | for_each_subsys_which(ss, i, &have_fork_callback) | 5337 | for_each_subsys_which(ss, i, &have_fork_callback) |
5269 | ss->fork(child); | 5338 | ss->fork(child, subsys_canfork_priv(old_ss_priv, i)); |
5270 | } | 5339 | } |
5271 | 5340 | ||
5272 | /** | 5341 | /** |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 92b98cc0ee76..f1b30ad5dc6d 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -203,7 +203,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
203 | * to do anything as freezer_attach() will put @task into the appropriate | 203 | * to do anything as freezer_attach() will put @task into the appropriate |
204 | * state. | 204 | * state. |
205 | */ | 205 | */ |
206 | static void freezer_fork(struct task_struct *task) | 206 | static void freezer_fork(struct task_struct *task, void *private) |
207 | { | 207 | { |
208 | struct freezer *freezer; | 208 | struct freezer *freezer; |
209 | 209 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..40e3af12c55e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1239,6 +1239,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1239 | { | 1239 | { |
1240 | int retval; | 1240 | int retval; |
1241 | struct task_struct *p; | 1241 | struct task_struct *p; |
1242 | void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {}; | ||
1242 | 1243 | ||
1243 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) | 1244 | if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) |
1244 | return ERR_PTR(-EINVAL); | 1245 | return ERR_PTR(-EINVAL); |
@@ -1513,6 +1514,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1513 | p->task_works = NULL; | 1514 | p->task_works = NULL; |
1514 | 1515 | ||
1515 | /* | 1516 | /* |
1517 | * Ensure that the cgroup subsystem policies allow the new process to be | ||
1518 | * forked. It should be noted the the new process's css_set can be changed | ||
1519 | * between here and cgroup_post_fork() if an organisation operation is in | ||
1520 | * progress. | ||
1521 | */ | ||
1522 | retval = cgroup_can_fork(p, cgrp_ss_priv); | ||
1523 | if (retval) | ||
1524 | goto bad_fork_free_pid; | ||
1525 | |||
1526 | /* | ||
1516 | * Make it visible to the rest of the system, but dont wake it up yet. | 1527 | * Make it visible to the rest of the system, but dont wake it up yet. |
1517 | * Need tasklist lock for parent etc handling! | 1528 | * Need tasklist lock for parent etc handling! |
1518 | */ | 1529 | */ |
@@ -1548,7 +1559,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1548 | spin_unlock(¤t->sighand->siglock); | 1559 | spin_unlock(¤t->sighand->siglock); |
1549 | write_unlock_irq(&tasklist_lock); | 1560 | write_unlock_irq(&tasklist_lock); |
1550 | retval = -ERESTARTNOINTR; | 1561 | retval = -ERESTARTNOINTR; |
1551 | goto bad_fork_free_pid; | 1562 | goto bad_fork_cancel_cgroup; |
1552 | } | 1563 | } |
1553 | 1564 | ||
1554 | if (likely(p->pid)) { | 1565 | if (likely(p->pid)) { |
@@ -1590,7 +1601,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1590 | write_unlock_irq(&tasklist_lock); | 1601 | write_unlock_irq(&tasklist_lock); |
1591 | 1602 | ||
1592 | proc_fork_connector(p); | 1603 | proc_fork_connector(p); |
1593 | cgroup_post_fork(p); | 1604 | cgroup_post_fork(p, cgrp_ss_priv); |
1594 | if (clone_flags & CLONE_THREAD) | 1605 | if (clone_flags & CLONE_THREAD) |
1595 | threadgroup_change_end(current); | 1606 | threadgroup_change_end(current); |
1596 | perf_event_fork(p); | 1607 | perf_event_fork(p); |
@@ -1600,6 +1611,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1600 | 1611 | ||
1601 | return p; | 1612 | return p; |
1602 | 1613 | ||
1614 | bad_fork_cancel_cgroup: | ||
1615 | cgroup_cancel_fork(p, cgrp_ss_priv); | ||
1603 | bad_fork_free_pid: | 1616 | bad_fork_free_pid: |
1604 | if (pid != &init_struct_pid) | 1617 | if (pid != &init_struct_pid) |
1605 | free_pid(pid); | 1618 | free_pid(pid); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..d811652fe6f5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -8068,7 +8068,7 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
8068 | sched_offline_group(tg); | 8068 | sched_offline_group(tg); |
8069 | } | 8069 | } |
8070 | 8070 | ||
8071 | static void cpu_cgroup_fork(struct task_struct *task) | 8071 | static void cpu_cgroup_fork(struct task_struct *task, void *private) |
8072 | { | 8072 | { |
8073 | sched_move_task(task); | 8073 | sched_move_task(task); |
8074 | } | 8074 | } |