diff options
| -rw-r--r-- | Documentation/cgroups/cgroups.txt | 3 | ||||
| -rw-r--r-- | Documentation/cgroups/devices.txt | 70 | ||||
| -rw-r--r-- | arch/powerpc/mm/numa.c | 1 | ||||
| -rw-r--r-- | block/blk-cgroup.h | 2 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 170 | ||||
| -rw-r--r-- | include/linux/cpuset.h | 1 | ||||
| -rw-r--r-- | include/linux/res_counter.h | 2 | ||||
| -rw-r--r-- | kernel/cgroup.c | 724 | ||||
| -rw-r--r-- | kernel/cpuset.c | 115 | ||||
| -rw-r--r-- | kernel/events/core.c | 24 | ||||
| -rw-r--r-- | mm/memcontrol.c | 80 | ||||
| -rw-r--r-- | security/device_cgroup.c | 267 |
12 files changed, 826 insertions, 633 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index bcf1a00b06a1..638bf17ff869 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
| @@ -442,7 +442,7 @@ You can attach the current shell task by echoing 0: | |||
| 442 | You can use the cgroup.procs file instead of the tasks file to move all | 442 | You can use the cgroup.procs file instead of the tasks file to move all |
| 443 | threads in a threadgroup at once. Echoing the PID of any task in a | 443 | threads in a threadgroup at once. Echoing the PID of any task in a |
| 444 | threadgroup to cgroup.procs causes all tasks in that threadgroup to be | 444 | threadgroup to cgroup.procs causes all tasks in that threadgroup to be |
| 445 | be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks | 445 | attached to the cgroup. Writing 0 to cgroup.procs moves all tasks |
| 446 | in the writing task's threadgroup. | 446 | in the writing task's threadgroup. |
| 447 | 447 | ||
| 448 | Note: Since every task is always a member of exactly one cgroup in each | 448 | Note: Since every task is always a member of exactly one cgroup in each |
| @@ -580,6 +580,7 @@ propagation along the hierarchy. See the comment on | |||
| 580 | cgroup_for_each_descendant_pre() for details. | 580 | cgroup_for_each_descendant_pre() for details. |
| 581 | 581 | ||
| 582 | void css_offline(struct cgroup *cgrp); | 582 | void css_offline(struct cgroup *cgrp); |
| 583 | (cgroup_mutex held by caller) | ||
| 583 | 584 | ||
| 584 | This is the counterpart of css_online() and called iff css_online() | 585 | This is the counterpart of css_online() and called iff css_online() |
| 585 | has succeeded on @cgrp. This signifies the beginning of the end of | 586 | has succeeded on @cgrp. This signifies the beginning of the end of |
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 16624a7f8222..3c1095ca02ea 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt | |||
| @@ -13,9 +13,7 @@ either an integer or * for all. Access is a composition of r | |||
| 13 | The root device cgroup starts with rwm to 'all'. A child device | 13 | The root device cgroup starts with rwm to 'all'. A child device |
| 14 | cgroup gets a copy of the parent. Administrators can then remove | 14 | cgroup gets a copy of the parent. Administrators can then remove |
| 15 | devices from the whitelist or add new entries. A child cgroup can | 15 | devices from the whitelist or add new entries. A child cgroup can |
| 16 | never receive a device access which is denied by its parent. However | 16 | never receive a device access which is denied by its parent. |
| 17 | when a device access is removed from a parent it will not also be | ||
| 18 | removed from the child(ren). | ||
| 19 | 17 | ||
| 20 | 2. User Interface | 18 | 2. User Interface |
| 21 | 19 | ||
| @@ -50,3 +48,69 @@ task to a new cgroup. (Again we'll probably want to change that). | |||
| 50 | 48 | ||
| 51 | A cgroup may not be granted more permissions than the cgroup's | 49 | A cgroup may not be granted more permissions than the cgroup's |
| 52 | parent has. | 50 | parent has. |
| 51 | |||
| 52 | 4. Hierarchy | ||
| 53 | |||
| 54 | device cgroups maintain hierarchy by making sure a cgroup never has more | ||
| 55 | access permissions than its parent. Every time an entry is written to | ||
| 56 | a cgroup's devices.deny file, all its children will have that entry removed | ||
| 57 | from their whitelist and all the locally set whitelist entries will be | ||
| 58 | re-evaluated. In case one of the locally set whitelist entries would provide | ||
| 59 | more access than the cgroup's parent, it'll be removed from the whitelist. | ||
| 60 | |||
| 61 | Example: | ||
| 62 | A | ||
| 63 | / \ | ||
| 64 | B | ||
| 65 | |||
| 66 | group behavior exceptions | ||
| 67 | A allow "b 8:* rwm", "c 116:1 rw" | ||
| 68 | B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" | ||
| 69 | |||
| 70 | If a device is denied in group A: | ||
| 71 | # echo "c 116:* r" > A/devices.deny | ||
| 72 | it'll propagate down and after revalidating B's entries, the whitelist entry | ||
| 73 | "c 116:2 rwm" will be removed: | ||
| 74 | |||
| 75 | group whitelist entries denied devices | ||
| 76 | A all "b 8:* rwm", "c 116:* rw" | ||
| 77 | B "c 1:3 rwm", "b 3:* rwm" all the rest | ||
| 78 | |||
| 79 | In case parent's exceptions change and local exceptions are not allowed | ||
| 80 | anymore, they'll be deleted. | ||
| 81 | |||
| 82 | Notice that new whitelist entries will not be propagated: | ||
| 83 | A | ||
| 84 | / \ | ||
| 85 | B | ||
| 86 | |||
| 87 | group whitelist entries denied devices | ||
| 88 | A "c 1:3 rwm", "c 1:5 r" all the rest | ||
| 89 | B "c 1:3 rwm", "c 1:5 r" all the rest | ||
| 90 | |||
| 91 | when adding "c *:3 rwm": | ||
| 92 | # echo "c *:3 rwm" >A/devices.allow | ||
| 93 | |||
| 94 | the result: | ||
| 95 | group whitelist entries denied devices | ||
| 96 | A "c *:3 rwm", "c 1:5 r" all the rest | ||
| 97 | B "c 1:3 rwm", "c 1:5 r" all the rest | ||
| 98 | |||
| 99 | but now it'll be possible to add new entries to B: | ||
| 100 | # echo "c 2:3 rwm" >B/devices.allow | ||
| 101 | # echo "c 50:3 r" >B/devices.allow | ||
| 102 | or even | ||
| 103 | # echo "c *:3 rwm" >B/devices.allow | ||
| 104 | |||
| 105 | Allowing or denying all by writing 'a' to devices.allow or devices.deny will | ||
| 106 | not be possible once the device cgroups has children. | ||
| 107 | |||
| 108 | 4.1 Hierarchy (internal implementation) | ||
| 109 | |||
| 110 | device cgroups is implemented internally using a behavior (ALLOW, DENY) and a | ||
| 111 | list of exceptions. The internal state is controlled using the same user | ||
| 112 | interface to preserve compatibility with the previous whitelist-only | ||
| 113 | implementation. Removal or addition of exceptions that will reduce the access | ||
| 114 | to devices will be propagated down the hierarchy. | ||
| 115 | For every propagated exception, the effective rules will be re-evaluated based | ||
| 116 | on current parent's access rules. | ||
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b8020dc7b71e..fa33c546e778 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/pfn.h> | 22 | #include <linux/pfn.h> |
| 23 | #include <linux/cpuset.h> | 23 | #include <linux/cpuset.h> |
| 24 | #include <linux/node.h> | 24 | #include <linux/node.h> |
| 25 | #include <linux/slab.h> | ||
| 25 | #include <asm/sparsemem.h> | 26 | #include <asm/sparsemem.h> |
| 26 | #include <asm/prom.h> | 27 | #include <asm/prom.h> |
| 27 | #include <asm/smp.h> | 28 | #include <asm/smp.h> |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index f2b292925ccd..4e595ee8c915 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
| @@ -247,9 +247,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) | |||
| 247 | { | 247 | { |
| 248 | int ret; | 248 | int ret; |
| 249 | 249 | ||
| 250 | rcu_read_lock(); | ||
| 251 | ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); | 250 | ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); |
| 252 | rcu_read_unlock(); | ||
| 253 | if (ret) | 251 | if (ret) |
| 254 | strncpy(buf, "<unavailable>", buflen); | 252 | strncpy(buf, "<unavailable>", buflen); |
| 255 | return ret; | 253 | return ret; |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 470073bf93d0..d86e215ca2b8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/idr.h> | 19 | #include <linux/idr.h> |
| 20 | #include <linux/workqueue.h> | 20 | #include <linux/workqueue.h> |
| 21 | #include <linux/xattr.h> | 21 | #include <linux/xattr.h> |
| 22 | #include <linux/fs.h> | ||
| 22 | 23 | ||
| 23 | #ifdef CONFIG_CGROUPS | 24 | #ifdef CONFIG_CGROUPS |
| 24 | 25 | ||
| @@ -30,10 +31,6 @@ struct css_id; | |||
| 30 | 31 | ||
| 31 | extern int cgroup_init_early(void); | 32 | extern int cgroup_init_early(void); |
| 32 | extern int cgroup_init(void); | 33 | extern int cgroup_init(void); |
| 33 | extern void cgroup_lock(void); | ||
| 34 | extern int cgroup_lock_is_held(void); | ||
| 35 | extern bool cgroup_lock_live_group(struct cgroup *cgrp); | ||
| 36 | extern void cgroup_unlock(void); | ||
| 37 | extern void cgroup_fork(struct task_struct *p); | 34 | extern void cgroup_fork(struct task_struct *p); |
| 38 | extern void cgroup_post_fork(struct task_struct *p); | 35 | extern void cgroup_post_fork(struct task_struct *p); |
| 39 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); | 36 | extern void cgroup_exit(struct task_struct *p, int run_callbacks); |
| @@ -44,14 +41,25 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss); | |||
| 44 | 41 | ||
| 45 | extern const struct file_operations proc_cgroup_operations; | 42 | extern const struct file_operations proc_cgroup_operations; |
| 46 | 43 | ||
| 47 | /* Define the enumeration of all builtin cgroup subsystems */ | 44 | /* |
| 45 | * Define the enumeration of all cgroup subsystems. | ||
| 46 | * | ||
| 47 | * We define ids for builtin subsystems and then modular ones. | ||
| 48 | */ | ||
| 48 | #define SUBSYS(_x) _x ## _subsys_id, | 49 | #define SUBSYS(_x) _x ## _subsys_id, |
| 49 | #define IS_SUBSYS_ENABLED(option) IS_ENABLED(option) | ||
| 50 | enum cgroup_subsys_id { | 50 | enum cgroup_subsys_id { |
| 51 | #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) | ||
| 52 | #include <linux/cgroup_subsys.h> | ||
| 53 | #undef IS_SUBSYS_ENABLED | ||
| 54 | CGROUP_BUILTIN_SUBSYS_COUNT, | ||
| 55 | |||
| 56 | __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1, | ||
| 57 | |||
| 58 | #define IS_SUBSYS_ENABLED(option) IS_MODULE(option) | ||
| 51 | #include <linux/cgroup_subsys.h> | 59 | #include <linux/cgroup_subsys.h> |
| 60 | #undef IS_SUBSYS_ENABLED | ||
| 52 | CGROUP_SUBSYS_COUNT, | 61 | CGROUP_SUBSYS_COUNT, |
| 53 | }; | 62 | }; |
| 54 | #undef IS_SUBSYS_ENABLED | ||
| 55 | #undef SUBSYS | 63 | #undef SUBSYS |
| 56 | 64 | ||
| 57 | /* Per-subsystem/per-cgroup state maintained by the system. */ | 65 | /* Per-subsystem/per-cgroup state maintained by the system. */ |
| @@ -148,6 +156,13 @@ enum { | |||
| 148 | * specified at mount time and thus is implemented here. | 156 | * specified at mount time and thus is implemented here. |
| 149 | */ | 157 | */ |
| 150 | CGRP_CPUSET_CLONE_CHILDREN, | 158 | CGRP_CPUSET_CLONE_CHILDREN, |
| 159 | /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */ | ||
| 160 | CGRP_SANE_BEHAVIOR, | ||
| 161 | }; | ||
| 162 | |||
| 163 | struct cgroup_name { | ||
| 164 | struct rcu_head rcu_head; | ||
| 165 | char name[]; | ||
| 151 | }; | 166 | }; |
| 152 | 167 | ||
| 153 | struct cgroup { | 168 | struct cgroup { |
| @@ -172,11 +187,23 @@ struct cgroup { | |||
| 172 | struct cgroup *parent; /* my parent */ | 187 | struct cgroup *parent; /* my parent */ |
| 173 | struct dentry *dentry; /* cgroup fs entry, RCU protected */ | 188 | struct dentry *dentry; /* cgroup fs entry, RCU protected */ |
| 174 | 189 | ||
| 190 | /* | ||
| 191 | * This is a copy of dentry->d_name, and it's needed because | ||
| 192 | * we can't use dentry->d_name in cgroup_path(). | ||
| 193 | * | ||
| 194 | * You must acquire rcu_read_lock() to access cgrp->name, and | ||
| 195 | * the only place that can change it is rename(), which is | ||
| 196 | * protected by parent dir's i_mutex. | ||
| 197 | * | ||
| 198 | * Normally you should use cgroup_name() wrapper rather than | ||
| 199 | * access it directly. | ||
| 200 | */ | ||
| 201 | struct cgroup_name __rcu *name; | ||
| 202 | |||
| 175 | /* Private pointers for each registered subsystem */ | 203 | /* Private pointers for each registered subsystem */ |
| 176 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; | 204 | struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; |
| 177 | 205 | ||
| 178 | struct cgroupfs_root *root; | 206 | struct cgroupfs_root *root; |
| 179 | struct cgroup *top_cgroup; | ||
| 180 | 207 | ||
| 181 | /* | 208 | /* |
| 182 | * List of cg_cgroup_links pointing at css_sets with | 209 | * List of cg_cgroup_links pointing at css_sets with |
| @@ -213,6 +240,96 @@ struct cgroup { | |||
| 213 | struct simple_xattrs xattrs; | 240 | struct simple_xattrs xattrs; |
| 214 | }; | 241 | }; |
| 215 | 242 | ||
| 243 | #define MAX_CGROUP_ROOT_NAMELEN 64 | ||
| 244 | |||
| 245 | /* cgroupfs_root->flags */ | ||
| 246 | enum { | ||
| 247 | /* | ||
| 248 | * Unfortunately, cgroup core and various controllers are riddled | ||
| 249 | * with idiosyncrasies and pointless options. The following flag, | ||
| 250 | * when set, will force sane behavior - some options are forced on, | ||
| 251 | * others are disallowed, and some controllers will change their | ||
| 252 | * hierarchical or other behaviors. | ||
| 253 | * | ||
| 254 | * The set of behaviors affected by this flag are still being | ||
| 255 | * determined and developed and the mount option for this flag is | ||
| 256 | * prefixed with __DEVEL__. The prefix will be dropped once we | ||
| 257 | * reach the point where all behaviors are compatible with the | ||
| 258 | * planned unified hierarchy, which will automatically turn on this | ||
| 259 | * flag. | ||
| 260 | * | ||
| 261 | * The followings are the behaviors currently affected this flag. | ||
| 262 | * | ||
| 263 | * - Mount options "noprefix" and "clone_children" are disallowed. | ||
| 264 | * Also, cgroupfs file cgroup.clone_children is not created. | ||
| 265 | * | ||
| 266 | * - When mounting an existing superblock, mount options should | ||
| 267 | * match. | ||
| 268 | * | ||
| 269 | * - Remount is disallowed. | ||
| 270 | * | ||
| 271 | * - memcg: use_hierarchy is on by default and the cgroup file for | ||
| 272 | * the flag is not created. | ||
| 273 | * | ||
| 274 | * The followings are planned changes. | ||
| 275 | * | ||
| 276 | * - release_agent will be disallowed once replacement notification | ||
| 277 | * mechanism is implemented. | ||
| 278 | */ | ||
| 279 | CGRP_ROOT_SANE_BEHAVIOR = (1 << 0), | ||
| 280 | |||
| 281 | CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ | ||
| 282 | CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ | ||
| 283 | }; | ||
| 284 | |||
| 285 | /* | ||
| 286 | * A cgroupfs_root represents the root of a cgroup hierarchy, and may be | ||
| 287 | * associated with a superblock to form an active hierarchy. This is | ||
| 288 | * internal to cgroup core. Don't access directly from controllers. | ||
| 289 | */ | ||
| 290 | struct cgroupfs_root { | ||
| 291 | struct super_block *sb; | ||
| 292 | |||
| 293 | /* | ||
| 294 | * The bitmask of subsystems intended to be attached to this | ||
| 295 | * hierarchy | ||
| 296 | */ | ||
| 297 | unsigned long subsys_mask; | ||
| 298 | |||
| 299 | /* Unique id for this hierarchy. */ | ||
| 300 | int hierarchy_id; | ||
| 301 | |||
| 302 | /* The bitmask of subsystems currently attached to this hierarchy */ | ||
| 303 | unsigned long actual_subsys_mask; | ||
| 304 | |||
| 305 | /* A list running through the attached subsystems */ | ||
| 306 | struct list_head subsys_list; | ||
| 307 | |||
| 308 | /* The root cgroup for this hierarchy */ | ||
| 309 | struct cgroup top_cgroup; | ||
| 310 | |||
| 311 | /* Tracks how many cgroups are currently defined in hierarchy.*/ | ||
| 312 | int number_of_cgroups; | ||
| 313 | |||
| 314 | /* A list running through the active hierarchies */ | ||
| 315 | struct list_head root_list; | ||
| 316 | |||
| 317 | /* All cgroups on this root, cgroup_mutex protected */ | ||
| 318 | struct list_head allcg_list; | ||
| 319 | |||
| 320 | /* Hierarchy-specific flags */ | ||
| 321 | unsigned long flags; | ||
| 322 | |||
| 323 | /* IDs for cgroups in this hierarchy */ | ||
| 324 | struct ida cgroup_ida; | ||
| 325 | |||
| 326 | /* The path to use for release notifications. */ | ||
| 327 | char release_agent_path[PATH_MAX]; | ||
| 328 | |||
| 329 | /* The name for this hierarchy - may be empty */ | ||
| 330 | char name[MAX_CGROUP_ROOT_NAMELEN]; | ||
| 331 | }; | ||
| 332 | |||
| 216 | /* | 333 | /* |
| 217 | * A css_set is a structure holding pointers to a set of | 334 | * A css_set is a structure holding pointers to a set of |
| 218 | * cgroup_subsys_state objects. This saves space in the task struct | 335 | * cgroup_subsys_state objects. This saves space in the task struct |
| @@ -278,6 +395,7 @@ struct cgroup_map_cb { | |||
| 278 | /* cftype->flags */ | 395 | /* cftype->flags */ |
| 279 | #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ | 396 | #define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ |
| 280 | #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ | 397 | #define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ |
| 398 | #define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */ | ||
| 281 | 399 | ||
| 282 | #define MAX_CFTYPE_NAME 64 | 400 | #define MAX_CFTYPE_NAME 64 |
| 283 | 401 | ||
| @@ -304,9 +422,6 @@ struct cftype { | |||
| 304 | /* CFTYPE_* flags */ | 422 | /* CFTYPE_* flags */ |
| 305 | unsigned int flags; | 423 | unsigned int flags; |
| 306 | 424 | ||
| 307 | /* file xattrs */ | ||
| 308 | struct simple_xattrs xattrs; | ||
| 309 | |||
| 310 | int (*open)(struct inode *inode, struct file *file); | 425 | int (*open)(struct inode *inode, struct file *file); |
| 311 | ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, | 426 | ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, |
| 312 | struct file *file, | 427 | struct file *file, |
| @@ -404,18 +519,31 @@ struct cgroup_scanner { | |||
| 404 | void *data; | 519 | void *data; |
| 405 | }; | 520 | }; |
| 406 | 521 | ||
| 522 | /* | ||
| 523 | * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This | ||
| 524 | * function can be called as long as @cgrp is accessible. | ||
| 525 | */ | ||
| 526 | static inline bool cgroup_sane_behavior(const struct cgroup *cgrp) | ||
| 527 | { | ||
| 528 | return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR; | ||
| 529 | } | ||
| 530 | |||
| 531 | /* Caller should hold rcu_read_lock() */ | ||
| 532 | static inline const char *cgroup_name(const struct cgroup *cgrp) | ||
| 533 | { | ||
| 534 | return rcu_dereference(cgrp->name)->name; | ||
| 535 | } | ||
| 536 | |||
| 407 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 537 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
| 408 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); | 538 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
| 409 | 539 | ||
| 410 | int cgroup_is_removed(const struct cgroup *cgrp); | 540 | int cgroup_is_removed(const struct cgroup *cgrp); |
| 541 | bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); | ||
| 411 | 542 | ||
| 412 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); | 543 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); |
| 413 | 544 | ||
| 414 | int cgroup_task_count(const struct cgroup *cgrp); | 545 | int cgroup_task_count(const struct cgroup *cgrp); |
| 415 | 546 | ||
| 416 | /* Return true if cgrp is a descendant of the task's cgroup */ | ||
| 417 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task); | ||
| 418 | |||
| 419 | /* | 547 | /* |
| 420 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys | 548 | * Control Group taskset, used to pass around set of tasks to cgroup_subsys |
| 421 | * methods. | 549 | * methods. |
| @@ -523,10 +651,16 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( | |||
| 523 | * rcu_dereference_check() conditions, such as locks used during the | 651 | * rcu_dereference_check() conditions, such as locks used during the |
| 524 | * cgroup_subsys::attach() methods. | 652 | * cgroup_subsys::attach() methods. |
| 525 | */ | 653 | */ |
| 654 | #ifdef CONFIG_PROVE_RCU | ||
| 655 | extern struct mutex cgroup_mutex; | ||
| 656 | #define task_subsys_state_check(task, subsys_id, __c) \ | ||
| 657 | rcu_dereference_check((task)->cgroups->subsys[(subsys_id)], \ | ||
| 658 | lockdep_is_held(&(task)->alloc_lock) || \ | ||
| 659 | lockdep_is_held(&cgroup_mutex) || (__c)) | ||
| 660 | #else | ||
| 526 | #define task_subsys_state_check(task, subsys_id, __c) \ | 661 | #define task_subsys_state_check(task, subsys_id, __c) \ |
| 527 | rcu_dereference_check(task->cgroups->subsys[subsys_id], \ | 662 | rcu_dereference((task)->cgroups->subsys[(subsys_id)]) |
| 528 | lockdep_is_held(&task->alloc_lock) || \ | 663 | #endif |
| 529 | cgroup_lock_is_held() || (__c)) | ||
| 530 | 664 | ||
| 531 | static inline struct cgroup_subsys_state * | 665 | static inline struct cgroup_subsys_state * |
| 532 | task_subsys_state(struct task_struct *task, int subsys_id) | 666 | task_subsys_state(struct task_struct *task, int subsys_id) |
| @@ -661,8 +795,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
| 661 | struct cgroup_iter *it); | 795 | struct cgroup_iter *it); |
| 662 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); | 796 | void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); |
| 663 | int cgroup_scan_tasks(struct cgroup_scanner *scan); | 797 | int cgroup_scan_tasks(struct cgroup_scanner *scan); |
| 664 | int cgroup_attach_task(struct cgroup *, struct task_struct *); | ||
| 665 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); | 798 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); |
| 799 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); | ||
| 666 | 800 | ||
| 667 | /* | 801 | /* |
| 668 | * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works | 802 | * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works |
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 8c8a60d29407..ccd1de8ad822 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
| 12 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
| 13 | #include <linux/nodemask.h> | 13 | #include <linux/nodemask.h> |
| 14 | #include <linux/cgroup.h> | ||
| 15 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
| 16 | 15 | ||
| 17 | #ifdef CONFIG_CPUSETS | 16 | #ifdef CONFIG_CPUSETS |
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index c23099413ad6..96a509b6be04 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | * info about what this counter is. | 13 | * info about what this counter is. |
| 14 | */ | 14 | */ |
| 15 | 15 | ||
| 16 | #include <linux/cgroup.h> | 16 | #include <linux/spinlock.h> |
| 17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
| 18 | 18 | ||
| 19 | /* | 19 | /* |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1f628bc039f4..eeb7e49946b2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include <linux/cred.h> | 30 | #include <linux/cred.h> |
| 31 | #include <linux/ctype.h> | 31 | #include <linux/ctype.h> |
| 32 | #include <linux/errno.h> | 32 | #include <linux/errno.h> |
| 33 | #include <linux/fs.h> | ||
| 34 | #include <linux/init_task.h> | 33 | #include <linux/init_task.h> |
| 35 | #include <linux/kernel.h> | 34 | #include <linux/kernel.h> |
| 36 | #include <linux/list.h> | 35 | #include <linux/list.h> |
| @@ -59,7 +58,7 @@ | |||
| 59 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 58 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
| 60 | #include <linux/eventfd.h> | 59 | #include <linux/eventfd.h> |
| 61 | #include <linux/poll.h> | 60 | #include <linux/poll.h> |
| 62 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | 61 | #include <linux/flex_array.h> /* used in cgroup_attach_task */ |
| 63 | #include <linux/kthread.h> | 62 | #include <linux/kthread.h> |
| 64 | 63 | ||
| 65 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
| @@ -83,7 +82,13 @@ | |||
| 83 | * B happens only through cgroup_show_options() and using cgroup_root_mutex | 82 | * B happens only through cgroup_show_options() and using cgroup_root_mutex |
| 84 | * breaks it. | 83 | * breaks it. |
| 85 | */ | 84 | */ |
| 85 | #ifdef CONFIG_PROVE_RCU | ||
| 86 | DEFINE_MUTEX(cgroup_mutex); | ||
| 87 | EXPORT_SYMBOL_GPL(cgroup_mutex); /* only for task_subsys_state_check() */ | ||
| 88 | #else | ||
| 86 | static DEFINE_MUTEX(cgroup_mutex); | 89 | static DEFINE_MUTEX(cgroup_mutex); |
| 90 | #endif | ||
| 91 | |||
| 87 | static DEFINE_MUTEX(cgroup_root_mutex); | 92 | static DEFINE_MUTEX(cgroup_root_mutex); |
| 88 | 93 | ||
| 89 | /* | 94 | /* |
| @@ -98,56 +103,6 @@ static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { | |||
| 98 | #include <linux/cgroup_subsys.h> | 103 | #include <linux/cgroup_subsys.h> |
| 99 | }; | 104 | }; |
| 100 | 105 | ||
| 101 | #define MAX_CGROUP_ROOT_NAMELEN 64 | ||
| 102 | |||
| 103 | /* | ||
| 104 | * A cgroupfs_root represents the root of a cgroup hierarchy, | ||
| 105 | * and may be associated with a superblock to form an active | ||
| 106 | * hierarchy | ||
| 107 | */ | ||
| 108 | struct cgroupfs_root { | ||
| 109 | struct super_block *sb; | ||
| 110 | |||
| 111 | /* | ||
| 112 | * The bitmask of subsystems intended to be attached to this | ||
| 113 | * hierarchy | ||
| 114 | */ | ||
| 115 | unsigned long subsys_mask; | ||
| 116 | |||
| 117 | /* Unique id for this hierarchy. */ | ||
| 118 | int hierarchy_id; | ||
| 119 | |||
| 120 | /* The bitmask of subsystems currently attached to this hierarchy */ | ||
| 121 | unsigned long actual_subsys_mask; | ||
| 122 | |||
| 123 | /* A list running through the attached subsystems */ | ||
| 124 | struct list_head subsys_list; | ||
| 125 | |||
| 126 | /* The root cgroup for this hierarchy */ | ||
| 127 | struct cgroup top_cgroup; | ||
| 128 | |||
| 129 | /* Tracks how many cgroups are currently defined in hierarchy.*/ | ||
| 130 | int number_of_cgroups; | ||
| 131 | |||
| 132 | /* A list running through the active hierarchies */ | ||
| 133 | struct list_head root_list; | ||
| 134 | |||
| 135 | /* All cgroups on this root, cgroup_mutex protected */ | ||
| 136 | struct list_head allcg_list; | ||
| 137 | |||
| 138 | /* Hierarchy-specific flags */ | ||
| 139 | unsigned long flags; | ||
| 140 | |||
| 141 | /* IDs for cgroups in this hierarchy */ | ||
| 142 | struct ida cgroup_ida; | ||
| 143 | |||
| 144 | /* The path to use for release notifications. */ | ||
| 145 | char release_agent_path[PATH_MAX]; | ||
| 146 | |||
| 147 | /* The name for this hierarchy - may be empty */ | ||
| 148 | char name[MAX_CGROUP_ROOT_NAMELEN]; | ||
| 149 | }; | ||
| 150 | |||
| 151 | /* | 106 | /* |
| 152 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the | 107 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the |
| 153 | * subsystems that are otherwise unattached - it never has more than a | 108 | * subsystems that are otherwise unattached - it never has more than a |
| @@ -162,6 +117,9 @@ struct cfent { | |||
| 162 | struct list_head node; | 117 | struct list_head node; |
| 163 | struct dentry *dentry; | 118 | struct dentry *dentry; |
| 164 | struct cftype *type; | 119 | struct cftype *type; |
| 120 | |||
| 121 | /* file xattrs */ | ||
| 122 | struct simple_xattrs xattrs; | ||
| 165 | }; | 123 | }; |
| 166 | 124 | ||
| 167 | /* | 125 | /* |
| @@ -238,6 +196,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock); | |||
| 238 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ | 196 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ |
| 239 | #define dummytop (&rootnode.top_cgroup) | 197 | #define dummytop (&rootnode.top_cgroup) |
| 240 | 198 | ||
| 199 | static struct cgroup_name root_cgroup_name = { .name = "/" }; | ||
| 200 | |||
| 241 | /* This flag indicates whether tasks in the fork and exit paths should | 201 | /* This flag indicates whether tasks in the fork and exit paths should |
| 242 | * check for fork/exit handlers to call. This avoids us having to do | 202 | * check for fork/exit handlers to call. This avoids us having to do |
| 243 | * extra work in the fork/exit path if none of the subsystems need to | 203 | * extra work in the fork/exit path if none of the subsystems need to |
| @@ -249,20 +209,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp); | |||
| 249 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 209 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
| 250 | struct cftype cfts[], bool is_add); | 210 | struct cftype cfts[], bool is_add); |
| 251 | 211 | ||
| 252 | #ifdef CONFIG_PROVE_LOCKING | ||
| 253 | int cgroup_lock_is_held(void) | ||
| 254 | { | ||
| 255 | return lockdep_is_held(&cgroup_mutex); | ||
| 256 | } | ||
| 257 | #else /* #ifdef CONFIG_PROVE_LOCKING */ | ||
| 258 | int cgroup_lock_is_held(void) | ||
| 259 | { | ||
| 260 | return mutex_is_locked(&cgroup_mutex); | ||
| 261 | } | ||
| 262 | #endif /* #else #ifdef CONFIG_PROVE_LOCKING */ | ||
| 263 | |||
| 264 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); | ||
| 265 | |||
| 266 | static int css_unbias_refcnt(int refcnt) | 212 | static int css_unbias_refcnt(int refcnt) |
| 267 | { | 213 | { |
| 268 | return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; | 214 | return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; |
| @@ -282,11 +228,25 @@ inline int cgroup_is_removed(const struct cgroup *cgrp) | |||
| 282 | return test_bit(CGRP_REMOVED, &cgrp->flags); | 228 | return test_bit(CGRP_REMOVED, &cgrp->flags); |
| 283 | } | 229 | } |
| 284 | 230 | ||
| 285 | /* bits in struct cgroupfs_root flags field */ | 231 | /** |
| 286 | enum { | 232 | * cgroup_is_descendant - test ancestry |
| 287 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ | 233 | * @cgrp: the cgroup to be tested |
| 288 | ROOT_XATTR, /* supports extended attributes */ | 234 | * @ancestor: possible ancestor of @cgrp |
| 289 | }; | 235 | * |
| 236 | * Test whether @cgrp is a descendant of @ancestor. It also returns %true | ||
| 237 | * if @cgrp == @ancestor. This function is safe to call as long as @cgrp | ||
| 238 | * and @ancestor are accessible. | ||
| 239 | */ | ||
| 240 | bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor) | ||
| 241 | { | ||
| 242 | while (cgrp) { | ||
| 243 | if (cgrp == ancestor) | ||
| 244 | return true; | ||
| 245 | cgrp = cgrp->parent; | ||
| 246 | } | ||
| 247 | return false; | ||
| 248 | } | ||
| 249 | EXPORT_SYMBOL_GPL(cgroup_is_descendant); | ||
| 290 | 250 | ||
| 291 | static int cgroup_is_releasable(const struct cgroup *cgrp) | 251 | static int cgroup_is_releasable(const struct cgroup *cgrp) |
| 292 | { | 252 | { |
| @@ -327,6 +287,23 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
| 327 | return __d_cfe(dentry)->type; | 287 | return __d_cfe(dentry)->type; |
| 328 | } | 288 | } |
| 329 | 289 | ||
| 290 | /** | ||
| 291 | * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. | ||
| 292 | * @cgrp: the cgroup to be checked for liveness | ||
| 293 | * | ||
| 294 | * On success, returns true; the mutex should be later unlocked. On | ||
| 295 | * failure returns false with no lock held. | ||
| 296 | */ | ||
| 297 | static bool cgroup_lock_live_group(struct cgroup *cgrp) | ||
| 298 | { | ||
| 299 | mutex_lock(&cgroup_mutex); | ||
| 300 | if (cgroup_is_removed(cgrp)) { | ||
| 301 | mutex_unlock(&cgroup_mutex); | ||
| 302 | return false; | ||
| 303 | } | ||
| 304 | return true; | ||
| 305 | } | ||
| 306 | |||
| 330 | /* the list of cgroups eligible for automatic release. Protected by | 307 | /* the list of cgroups eligible for automatic release. Protected by |
| 331 | * release_list_lock */ | 308 | * release_list_lock */ |
| 332 | static LIST_HEAD(release_list); | 309 | static LIST_HEAD(release_list); |
| @@ -800,27 +777,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
| 800 | * update of a tasks cgroup pointer by cgroup_attach_task() | 777 | * update of a tasks cgroup pointer by cgroup_attach_task() |
| 801 | */ | 778 | */ |
| 802 | 779 | ||
| 803 | /** | ||
| 804 | * cgroup_lock - lock out any changes to cgroup structures | ||
| 805 | * | ||
| 806 | */ | ||
| 807 | void cgroup_lock(void) | ||
| 808 | { | ||
| 809 | mutex_lock(&cgroup_mutex); | ||
| 810 | } | ||
| 811 | EXPORT_SYMBOL_GPL(cgroup_lock); | ||
| 812 | |||
| 813 | /** | ||
| 814 | * cgroup_unlock - release lock on cgroup changes | ||
| 815 | * | ||
| 816 | * Undo the lock taken in a previous cgroup_lock() call. | ||
| 817 | */ | ||
| 818 | void cgroup_unlock(void) | ||
| 819 | { | ||
| 820 | mutex_unlock(&cgroup_mutex); | ||
| 821 | } | ||
| 822 | EXPORT_SYMBOL_GPL(cgroup_unlock); | ||
| 823 | |||
| 824 | /* | 780 | /* |
| 825 | * A couple of forward declarations required, due to cyclic reference loop: | 781 | * A couple of forward declarations required, due to cyclic reference loop: |
| 826 | * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir -> | 782 | * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir -> |
| @@ -859,6 +815,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | |||
| 859 | return inode; | 815 | return inode; |
| 860 | } | 816 | } |
| 861 | 817 | ||
| 818 | static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) | ||
| 819 | { | ||
| 820 | struct cgroup_name *name; | ||
| 821 | |||
| 822 | name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL); | ||
| 823 | if (!name) | ||
| 824 | return NULL; | ||
| 825 | strcpy(name->name, dentry->d_name.name); | ||
| 826 | return name; | ||
| 827 | } | ||
| 828 | |||
| 862 | static void cgroup_free_fn(struct work_struct *work) | 829 | static void cgroup_free_fn(struct work_struct *work) |
| 863 | { | 830 | { |
| 864 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); | 831 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); |
| @@ -875,8 +842,18 @@ static void cgroup_free_fn(struct work_struct *work) | |||
| 875 | mutex_unlock(&cgroup_mutex); | 842 | mutex_unlock(&cgroup_mutex); |
| 876 | 843 | ||
| 877 | /* | 844 | /* |
| 845 | * We get a ref to the parent's dentry, and put the ref when | ||
| 846 | * this cgroup is being freed, so it's guaranteed that the | ||
| 847 | * parent won't be destroyed before its children. | ||
| 848 | */ | ||
| 849 | dput(cgrp->parent->dentry); | ||
| 850 | |||
| 851 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | ||
| 852 | |||
| 853 | /* | ||
| 878 | * Drop the active superblock reference that we took when we | 854 | * Drop the active superblock reference that we took when we |
| 879 | * created the cgroup | 855 | * created the cgroup. This will free cgrp->root, if we are |
| 856 | * holding the last reference to @sb. | ||
| 880 | */ | 857 | */ |
| 881 | deactivate_super(cgrp->root->sb); | 858 | deactivate_super(cgrp->root->sb); |
| 882 | 859 | ||
| @@ -888,7 +865,7 @@ static void cgroup_free_fn(struct work_struct *work) | |||
| 888 | 865 | ||
| 889 | simple_xattrs_free(&cgrp->xattrs); | 866 | simple_xattrs_free(&cgrp->xattrs); |
| 890 | 867 | ||
| 891 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | 868 | kfree(rcu_dereference_raw(cgrp->name)); |
| 892 | kfree(cgrp); | 869 | kfree(cgrp); |
| 893 | } | 870 | } |
| 894 | 871 | ||
| @@ -910,13 +887,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 910 | } else { | 887 | } else { |
| 911 | struct cfent *cfe = __d_cfe(dentry); | 888 | struct cfent *cfe = __d_cfe(dentry); |
| 912 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 889 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
| 913 | struct cftype *cft = cfe->type; | ||
| 914 | 890 | ||
| 915 | WARN_ONCE(!list_empty(&cfe->node) && | 891 | WARN_ONCE(!list_empty(&cfe->node) && |
| 916 | cgrp != &cgrp->root->top_cgroup, | 892 | cgrp != &cgrp->root->top_cgroup, |
| 917 | "cfe still linked for %s\n", cfe->type->name); | 893 | "cfe still linked for %s\n", cfe->type->name); |
| 894 | simple_xattrs_free(&cfe->xattrs); | ||
| 918 | kfree(cfe); | 895 | kfree(cfe); |
| 919 | simple_xattrs_free(&cft->xattrs); | ||
| 920 | } | 896 | } |
| 921 | iput(inode); | 897 | iput(inode); |
| 922 | } | 898 | } |
| @@ -1108,9 +1084,11 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
| 1108 | mutex_lock(&cgroup_root_mutex); | 1084 | mutex_lock(&cgroup_root_mutex); |
| 1109 | for_each_subsys(root, ss) | 1085 | for_each_subsys(root, ss) |
| 1110 | seq_printf(seq, ",%s", ss->name); | 1086 | seq_printf(seq, ",%s", ss->name); |
| 1111 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1087 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) |
| 1088 | seq_puts(seq, ",sane_behavior"); | ||
| 1089 | if (root->flags & CGRP_ROOT_NOPREFIX) | ||
| 1112 | seq_puts(seq, ",noprefix"); | 1090 | seq_puts(seq, ",noprefix"); |
| 1113 | if (test_bit(ROOT_XATTR, &root->flags)) | 1091 | if (root->flags & CGRP_ROOT_XATTR) |
| 1114 | seq_puts(seq, ",xattr"); | 1092 | seq_puts(seq, ",xattr"); |
| 1115 | if (strlen(root->release_agent_path)) | 1093 | if (strlen(root->release_agent_path)) |
| 1116 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 1094 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
| @@ -1172,8 +1150,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1172 | all_ss = true; | 1150 | all_ss = true; |
| 1173 | continue; | 1151 | continue; |
| 1174 | } | 1152 | } |
| 1153 | if (!strcmp(token, "__DEVEL__sane_behavior")) { | ||
| 1154 | opts->flags |= CGRP_ROOT_SANE_BEHAVIOR; | ||
| 1155 | continue; | ||
| 1156 | } | ||
| 1175 | if (!strcmp(token, "noprefix")) { | 1157 | if (!strcmp(token, "noprefix")) { |
| 1176 | set_bit(ROOT_NOPREFIX, &opts->flags); | 1158 | opts->flags |= CGRP_ROOT_NOPREFIX; |
| 1177 | continue; | 1159 | continue; |
| 1178 | } | 1160 | } |
| 1179 | if (!strcmp(token, "clone_children")) { | 1161 | if (!strcmp(token, "clone_children")) { |
| @@ -1181,7 +1163,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1181 | continue; | 1163 | continue; |
| 1182 | } | 1164 | } |
| 1183 | if (!strcmp(token, "xattr")) { | 1165 | if (!strcmp(token, "xattr")) { |
| 1184 | set_bit(ROOT_XATTR, &opts->flags); | 1166 | opts->flags |= CGRP_ROOT_XATTR; |
| 1185 | continue; | 1167 | continue; |
| 1186 | } | 1168 | } |
| 1187 | if (!strncmp(token, "release_agent=", 14)) { | 1169 | if (!strncmp(token, "release_agent=", 14)) { |
| @@ -1259,13 +1241,26 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1259 | 1241 | ||
| 1260 | /* Consistency checks */ | 1242 | /* Consistency checks */ |
| 1261 | 1243 | ||
| 1244 | if (opts->flags & CGRP_ROOT_SANE_BEHAVIOR) { | ||
| 1245 | pr_warning("cgroup: sane_behavior: this is still under development and its behaviors will change, proceed at your own risk\n"); | ||
| 1246 | |||
| 1247 | if (opts->flags & CGRP_ROOT_NOPREFIX) { | ||
| 1248 | pr_err("cgroup: sane_behavior: noprefix is not allowed\n"); | ||
| 1249 | return -EINVAL; | ||
| 1250 | } | ||
| 1251 | |||
| 1252 | if (opts->cpuset_clone_children) { | ||
| 1253 | pr_err("cgroup: sane_behavior: clone_children is not allowed\n"); | ||
| 1254 | return -EINVAL; | ||
| 1255 | } | ||
| 1256 | } | ||
| 1257 | |||
| 1262 | /* | 1258 | /* |
| 1263 | * Option noprefix was introduced just for backward compatibility | 1259 | * Option noprefix was introduced just for backward compatibility |
| 1264 | * with the old cpuset, so we allow noprefix only if mounting just | 1260 | * with the old cpuset, so we allow noprefix only if mounting just |
| 1265 | * the cpuset subsystem. | 1261 | * the cpuset subsystem. |
| 1266 | */ | 1262 | */ |
| 1267 | if (test_bit(ROOT_NOPREFIX, &opts->flags) && | 1263 | if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask)) |
| 1268 | (opts->subsys_mask & mask)) | ||
| 1269 | return -EINVAL; | 1264 | return -EINVAL; |
| 1270 | 1265 | ||
| 1271 | 1266 | ||
| @@ -1336,6 +1331,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1336 | struct cgroup_sb_opts opts; | 1331 | struct cgroup_sb_opts opts; |
| 1337 | unsigned long added_mask, removed_mask; | 1332 | unsigned long added_mask, removed_mask; |
| 1338 | 1333 | ||
| 1334 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) { | ||
| 1335 | pr_err("cgroup: sane_behavior: remount is not allowed\n"); | ||
| 1336 | return -EINVAL; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1339 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
| 1340 | mutex_lock(&cgroup_mutex); | 1340 | mutex_lock(&cgroup_mutex); |
| 1341 | mutex_lock(&cgroup_root_mutex); | 1341 | mutex_lock(&cgroup_root_mutex); |
| @@ -1421,7 +1421,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
| 1421 | INIT_LIST_HEAD(&root->allcg_list); | 1421 | INIT_LIST_HEAD(&root->allcg_list); |
| 1422 | root->number_of_cgroups = 1; | 1422 | root->number_of_cgroups = 1; |
| 1423 | cgrp->root = root; | 1423 | cgrp->root = root; |
| 1424 | cgrp->top_cgroup = cgrp; | 1424 | cgrp->name = &root_cgroup_name; |
| 1425 | init_cgroup_housekeeping(cgrp); | 1425 | init_cgroup_housekeeping(cgrp); |
| 1426 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | 1426 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); |
| 1427 | } | 1427 | } |
| @@ -1685,6 +1685,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1685 | * any) is not needed | 1685 | * any) is not needed |
| 1686 | */ | 1686 | */ |
| 1687 | cgroup_drop_root(opts.new_root); | 1687 | cgroup_drop_root(opts.new_root); |
| 1688 | |||
| 1689 | if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) && | ||
| 1690 | root->flags != opts.flags) { | ||
| 1691 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); | ||
| 1692 | ret = -EINVAL; | ||
| 1693 | goto drop_new_super; | ||
| 1694 | } | ||
| 1695 | |||
| 1688 | /* no subsys rebinding, so refcounts don't change */ | 1696 | /* no subsys rebinding, so refcounts don't change */ |
| 1689 | drop_parsed_module_refcounts(opts.subsys_mask); | 1697 | drop_parsed_module_refcounts(opts.subsys_mask); |
| 1690 | } | 1698 | } |
| @@ -1769,49 +1777,48 @@ static struct kobject *cgroup_kobj; | |||
| 1769 | * @buf: the buffer to write the path into | 1777 | * @buf: the buffer to write the path into |
| 1770 | * @buflen: the length of the buffer | 1778 | * @buflen: the length of the buffer |
| 1771 | * | 1779 | * |
| 1772 | * Called with cgroup_mutex held or else with an RCU-protected cgroup | 1780 | * Writes path of cgroup into buf. Returns 0 on success, -errno on error. |
| 1773 | * reference. Writes path of cgroup into buf. Returns 0 on success, | 1781 | * |
| 1774 | * -errno on error. | 1782 | * We can't generate cgroup path using dentry->d_name, as accessing |
| 1783 | * dentry->name must be protected by irq-unsafe dentry->d_lock or parent | ||
| 1784 | * inode's i_mutex, while on the other hand cgroup_path() can be called | ||
| 1785 | * with some irq-safe spinlocks held. | ||
| 1775 | */ | 1786 | */ |
| 1776 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1787 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
| 1777 | { | 1788 | { |
| 1778 | struct dentry *dentry = cgrp->dentry; | 1789 | int ret = -ENAMETOOLONG; |
| 1779 | char *start; | 1790 | char *start; |
| 1780 | 1791 | ||
| 1781 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), | 1792 | if (!cgrp->parent) { |
| 1782 | "cgroup_path() called without proper locking"); | 1793 | if (strlcpy(buf, "/", buflen) >= buflen) |
| 1783 | 1794 | return -ENAMETOOLONG; | |
| 1784 | if (cgrp == dummytop) { | ||
| 1785 | /* | ||
| 1786 | * Inactive subsystems have no dentry for their root | ||
| 1787 | * cgroup | ||
| 1788 | */ | ||
| 1789 | strcpy(buf, "/"); | ||
| 1790 | return 0; | 1795 | return 0; |
| 1791 | } | 1796 | } |
| 1792 | 1797 | ||
| 1793 | start = buf + buflen - 1; | 1798 | start = buf + buflen - 1; |
| 1794 | |||
| 1795 | *start = '\0'; | 1799 | *start = '\0'; |
| 1796 | for (;;) { | ||
| 1797 | int len = dentry->d_name.len; | ||
| 1798 | 1800 | ||
| 1801 | rcu_read_lock(); | ||
| 1802 | do { | ||
| 1803 | const char *name = cgroup_name(cgrp); | ||
| 1804 | int len; | ||
| 1805 | |||
| 1806 | len = strlen(name); | ||
| 1799 | if ((start -= len) < buf) | 1807 | if ((start -= len) < buf) |
| 1800 | return -ENAMETOOLONG; | 1808 | goto out; |
| 1801 | memcpy(start, dentry->d_name.name, len); | 1809 | memcpy(start, name, len); |
| 1802 | cgrp = cgrp->parent; | ||
| 1803 | if (!cgrp) | ||
| 1804 | break; | ||
| 1805 | 1810 | ||
| 1806 | dentry = cgrp->dentry; | ||
| 1807 | if (!cgrp->parent) | ||
| 1808 | continue; | ||
| 1809 | if (--start < buf) | 1811 | if (--start < buf) |
| 1810 | return -ENAMETOOLONG; | 1812 | goto out; |
| 1811 | *start = '/'; | 1813 | *start = '/'; |
| 1812 | } | 1814 | |
| 1815 | cgrp = cgrp->parent; | ||
| 1816 | } while (cgrp->parent); | ||
| 1817 | ret = 0; | ||
| 1813 | memmove(buf, start, buf + buflen - start); | 1818 | memmove(buf, start, buf + buflen - start); |
| 1814 | return 0; | 1819 | out: |
| 1820 | rcu_read_unlock(); | ||
| 1821 | return ret; | ||
| 1815 | } | 1822 | } |
| 1816 | EXPORT_SYMBOL_GPL(cgroup_path); | 1823 | EXPORT_SYMBOL_GPL(cgroup_path); |
| 1817 | 1824 | ||
| @@ -1900,7 +1907,7 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size); | |||
| 1900 | * | 1907 | * |
| 1901 | * Must be called with cgroup_mutex and threadgroup locked. | 1908 | * Must be called with cgroup_mutex and threadgroup locked. |
| 1902 | */ | 1909 | */ |
| 1903 | static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | 1910 | static void cgroup_task_migrate(struct cgroup *oldcgrp, |
| 1904 | struct task_struct *tsk, struct css_set *newcg) | 1911 | struct task_struct *tsk, struct css_set *newcg) |
| 1905 | { | 1912 | { |
| 1906 | struct css_set *oldcg; | 1913 | struct css_set *oldcg; |
| @@ -1933,121 +1940,22 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp, | |||
| 1933 | } | 1940 | } |
| 1934 | 1941 | ||
| 1935 | /** | 1942 | /** |
| 1936 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' | 1943 | * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup |
| 1937 | * @cgrp: the cgroup the task is attaching to | ||
| 1938 | * @tsk: the task to be attached | ||
| 1939 | * | ||
| 1940 | * Call with cgroup_mutex and threadgroup locked. May take task_lock of | ||
| 1941 | * @tsk during call. | ||
| 1942 | */ | ||
| 1943 | int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | ||
| 1944 | { | ||
| 1945 | int retval = 0; | ||
| 1946 | struct cgroup_subsys *ss, *failed_ss = NULL; | ||
| 1947 | struct cgroup *oldcgrp; | ||
| 1948 | struct cgroupfs_root *root = cgrp->root; | ||
| 1949 | struct cgroup_taskset tset = { }; | ||
| 1950 | struct css_set *newcg; | ||
| 1951 | |||
| 1952 | /* @tsk either already exited or can't exit until the end */ | ||
| 1953 | if (tsk->flags & PF_EXITING) | ||
| 1954 | return -ESRCH; | ||
| 1955 | |||
| 1956 | /* Nothing to do if the task is already in that cgroup */ | ||
| 1957 | oldcgrp = task_cgroup_from_root(tsk, root); | ||
| 1958 | if (cgrp == oldcgrp) | ||
| 1959 | return 0; | ||
| 1960 | |||
| 1961 | tset.single.task = tsk; | ||
| 1962 | tset.single.cgrp = oldcgrp; | ||
| 1963 | |||
| 1964 | for_each_subsys(root, ss) { | ||
| 1965 | if (ss->can_attach) { | ||
| 1966 | retval = ss->can_attach(cgrp, &tset); | ||
| 1967 | if (retval) { | ||
| 1968 | /* | ||
| 1969 | * Remember on which subsystem the can_attach() | ||
| 1970 | * failed, so that we only call cancel_attach() | ||
| 1971 | * against the subsystems whose can_attach() | ||
| 1972 | * succeeded. (See below) | ||
| 1973 | */ | ||
| 1974 | failed_ss = ss; | ||
| 1975 | goto out; | ||
| 1976 | } | ||
| 1977 | } | ||
| 1978 | } | ||
| 1979 | |||
| 1980 | newcg = find_css_set(tsk->cgroups, cgrp); | ||
| 1981 | if (!newcg) { | ||
| 1982 | retval = -ENOMEM; | ||
| 1983 | goto out; | ||
| 1984 | } | ||
| 1985 | |||
| 1986 | cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg); | ||
| 1987 | |||
| 1988 | for_each_subsys(root, ss) { | ||
| 1989 | if (ss->attach) | ||
| 1990 | ss->attach(cgrp, &tset); | ||
| 1991 | } | ||
| 1992 | |||
| 1993 | out: | ||
| 1994 | if (retval) { | ||
| 1995 | for_each_subsys(root, ss) { | ||
| 1996 | if (ss == failed_ss) | ||
| 1997 | /* | ||
| 1998 | * This subsystem was the one that failed the | ||
| 1999 | * can_attach() check earlier, so we don't need | ||
| 2000 | * to call cancel_attach() against it or any | ||
| 2001 | * remaining subsystems. | ||
| 2002 | */ | ||
| 2003 | break; | ||
| 2004 | if (ss->cancel_attach) | ||
| 2005 | ss->cancel_attach(cgrp, &tset); | ||
| 2006 | } | ||
| 2007 | } | ||
| 2008 | return retval; | ||
| 2009 | } | ||
| 2010 | |||
| 2011 | /** | ||
| 2012 | * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' | ||
| 2013 | * @from: attach to all cgroups of a given task | ||
| 2014 | * @tsk: the task to be attached | ||
| 2015 | */ | ||
| 2016 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | ||
| 2017 | { | ||
| 2018 | struct cgroupfs_root *root; | ||
| 2019 | int retval = 0; | ||
| 2020 | |||
| 2021 | cgroup_lock(); | ||
| 2022 | for_each_active_root(root) { | ||
| 2023 | struct cgroup *from_cg = task_cgroup_from_root(from, root); | ||
| 2024 | |||
| 2025 | retval = cgroup_attach_task(from_cg, tsk); | ||
| 2026 | if (retval) | ||
| 2027 | break; | ||
| 2028 | } | ||
| 2029 | cgroup_unlock(); | ||
| 2030 | |||
| 2031 | return retval; | ||
| 2032 | } | ||
| 2033 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | ||
| 2034 | |||
| 2035 | /** | ||
| 2036 | * cgroup_attach_proc - attach all threads in a threadgroup to a cgroup | ||
| 2037 | * @cgrp: the cgroup to attach to | 1944 | * @cgrp: the cgroup to attach to |
| 2038 | * @leader: the threadgroup leader task_struct of the group to be attached | 1945 | * @tsk: the task or the leader of the threadgroup to be attached |
| 1946 | * @threadgroup: attach the whole threadgroup? | ||
| 2039 | * | 1947 | * |
| 2040 | * Call holding cgroup_mutex and the group_rwsem of the leader. Will take | 1948 | * Call holding cgroup_mutex and the group_rwsem of the leader. Will take |
| 2041 | * task_lock of each thread in leader's threadgroup individually in turn. | 1949 | * task_lock of @tsk or each thread in the threadgroup individually in turn. |
| 2042 | */ | 1950 | */ |
| 2043 | static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | 1951 | static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, |
| 1952 | bool threadgroup) | ||
| 2044 | { | 1953 | { |
| 2045 | int retval, i, group_size; | 1954 | int retval, i, group_size; |
| 2046 | struct cgroup_subsys *ss, *failed_ss = NULL; | 1955 | struct cgroup_subsys *ss, *failed_ss = NULL; |
| 2047 | /* guaranteed to be initialized later, but the compiler needs this */ | ||
| 2048 | struct cgroupfs_root *root = cgrp->root; | 1956 | struct cgroupfs_root *root = cgrp->root; |
| 2049 | /* threadgroup list cursor and array */ | 1957 | /* threadgroup list cursor and array */ |
| 2050 | struct task_struct *tsk; | 1958 | struct task_struct *leader = tsk; |
| 2051 | struct task_and_cgroup *tc; | 1959 | struct task_and_cgroup *tc; |
| 2052 | struct flex_array *group; | 1960 | struct flex_array *group; |
| 2053 | struct cgroup_taskset tset = { }; | 1961 | struct cgroup_taskset tset = { }; |
| @@ -2059,17 +1967,19 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2059 | * group - group_rwsem prevents new threads from appearing, and if | 1967 | * group - group_rwsem prevents new threads from appearing, and if |
| 2060 | * threads exit, this will just be an over-estimate. | 1968 | * threads exit, this will just be an over-estimate. |
| 2061 | */ | 1969 | */ |
| 2062 | group_size = get_nr_threads(leader); | 1970 | if (threadgroup) |
| 1971 | group_size = get_nr_threads(tsk); | ||
| 1972 | else | ||
| 1973 | group_size = 1; | ||
| 2063 | /* flex_array supports very large thread-groups better than kmalloc. */ | 1974 | /* flex_array supports very large thread-groups better than kmalloc. */ |
| 2064 | group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); | 1975 | group = flex_array_alloc(sizeof(*tc), group_size, GFP_KERNEL); |
| 2065 | if (!group) | 1976 | if (!group) |
| 2066 | return -ENOMEM; | 1977 | return -ENOMEM; |
| 2067 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ | 1978 | /* pre-allocate to guarantee space while iterating in rcu read-side. */ |
| 2068 | retval = flex_array_prealloc(group, 0, group_size - 1, GFP_KERNEL); | 1979 | retval = flex_array_prealloc(group, 0, group_size, GFP_KERNEL); |
| 2069 | if (retval) | 1980 | if (retval) |
| 2070 | goto out_free_group_list; | 1981 | goto out_free_group_list; |
| 2071 | 1982 | ||
| 2072 | tsk = leader; | ||
| 2073 | i = 0; | 1983 | i = 0; |
| 2074 | /* | 1984 | /* |
| 2075 | * Prevent freeing of tasks while we take a snapshot. Tasks that are | 1985 | * Prevent freeing of tasks while we take a snapshot. Tasks that are |
| @@ -2098,6 +2008,9 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2098 | retval = flex_array_put(group, i, &ent, GFP_ATOMIC); | 2008 | retval = flex_array_put(group, i, &ent, GFP_ATOMIC); |
| 2099 | BUG_ON(retval != 0); | 2009 | BUG_ON(retval != 0); |
| 2100 | i++; | 2010 | i++; |
| 2011 | |||
| 2012 | if (!threadgroup) | ||
| 2013 | break; | ||
| 2101 | } while_each_thread(leader, tsk); | 2014 | } while_each_thread(leader, tsk); |
| 2102 | rcu_read_unlock(); | 2015 | rcu_read_unlock(); |
| 2103 | /* remember the number of threads in the array for later. */ | 2016 | /* remember the number of threads in the array for later. */ |
| @@ -2143,7 +2056,7 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2143 | */ | 2056 | */ |
| 2144 | for (i = 0; i < group_size; i++) { | 2057 | for (i = 0; i < group_size; i++) { |
| 2145 | tc = flex_array_get(group, i); | 2058 | tc = flex_array_get(group, i); |
| 2146 | cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg); | 2059 | cgroup_task_migrate(tc->cgrp, tc->task, tc->cg); |
| 2147 | } | 2060 | } |
| 2148 | /* nothing is sensitive to fork() after this point. */ | 2061 | /* nothing is sensitive to fork() after this point. */ |
| 2149 | 2062 | ||
| @@ -2251,17 +2164,42 @@ retry_find_task: | |||
| 2251 | put_task_struct(tsk); | 2164 | put_task_struct(tsk); |
| 2252 | goto retry_find_task; | 2165 | goto retry_find_task; |
| 2253 | } | 2166 | } |
| 2254 | ret = cgroup_attach_proc(cgrp, tsk); | 2167 | } |
| 2255 | } else | 2168 | |
| 2256 | ret = cgroup_attach_task(cgrp, tsk); | 2169 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); |
| 2170 | |||
| 2257 | threadgroup_unlock(tsk); | 2171 | threadgroup_unlock(tsk); |
| 2258 | 2172 | ||
| 2259 | put_task_struct(tsk); | 2173 | put_task_struct(tsk); |
| 2260 | out_unlock_cgroup: | 2174 | out_unlock_cgroup: |
| 2261 | cgroup_unlock(); | 2175 | mutex_unlock(&cgroup_mutex); |
| 2262 | return ret; | 2176 | return ret; |
| 2263 | } | 2177 | } |
| 2264 | 2178 | ||
| 2179 | /** | ||
| 2180 | * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' | ||
| 2181 | * @from: attach to all cgroups of a given task | ||
| 2182 | * @tsk: the task to be attached | ||
| 2183 | */ | ||
| 2184 | int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | ||
| 2185 | { | ||
| 2186 | struct cgroupfs_root *root; | ||
| 2187 | int retval = 0; | ||
| 2188 | |||
| 2189 | mutex_lock(&cgroup_mutex); | ||
| 2190 | for_each_active_root(root) { | ||
| 2191 | struct cgroup *from_cg = task_cgroup_from_root(from, root); | ||
| 2192 | |||
| 2193 | retval = cgroup_attach_task(from_cg, tsk, false); | ||
| 2194 | if (retval) | ||
| 2195 | break; | ||
| 2196 | } | ||
| 2197 | mutex_unlock(&cgroup_mutex); | ||
| 2198 | |||
| 2199 | return retval; | ||
| 2200 | } | ||
| 2201 | EXPORT_SYMBOL_GPL(cgroup_attach_task_all); | ||
| 2202 | |||
| 2265 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | 2203 | static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) |
| 2266 | { | 2204 | { |
| 2267 | return attach_task_by_pid(cgrp, pid, false); | 2205 | return attach_task_by_pid(cgrp, pid, false); |
| @@ -2272,24 +2210,6 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) | |||
| 2272 | return attach_task_by_pid(cgrp, tgid, true); | 2210 | return attach_task_by_pid(cgrp, tgid, true); |
| 2273 | } | 2211 | } |
| 2274 | 2212 | ||
| 2275 | /** | ||
| 2276 | * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. | ||
| 2277 | * @cgrp: the cgroup to be checked for liveness | ||
| 2278 | * | ||
| 2279 | * On success, returns true; the lock should be later released with | ||
| 2280 | * cgroup_unlock(). On failure returns false with no lock held. | ||
| 2281 | */ | ||
| 2282 | bool cgroup_lock_live_group(struct cgroup *cgrp) | ||
| 2283 | { | ||
| 2284 | mutex_lock(&cgroup_mutex); | ||
| 2285 | if (cgroup_is_removed(cgrp)) { | ||
| 2286 | mutex_unlock(&cgroup_mutex); | ||
| 2287 | return false; | ||
| 2288 | } | ||
| 2289 | return true; | ||
| 2290 | } | ||
| 2291 | EXPORT_SYMBOL_GPL(cgroup_lock_live_group); | ||
| 2292 | |||
| 2293 | static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | 2213 | static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, |
| 2294 | const char *buffer) | 2214 | const char *buffer) |
| 2295 | { | 2215 | { |
| @@ -2301,7 +2221,7 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
| 2301 | mutex_lock(&cgroup_root_mutex); | 2221 | mutex_lock(&cgroup_root_mutex); |
| 2302 | strcpy(cgrp->root->release_agent_path, buffer); | 2222 | strcpy(cgrp->root->release_agent_path, buffer); |
| 2303 | mutex_unlock(&cgroup_root_mutex); | 2223 | mutex_unlock(&cgroup_root_mutex); |
| 2304 | cgroup_unlock(); | 2224 | mutex_unlock(&cgroup_mutex); |
| 2305 | return 0; | 2225 | return 0; |
| 2306 | } | 2226 | } |
| 2307 | 2227 | ||
| @@ -2312,7 +2232,14 @@ static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, | |||
| 2312 | return -ENODEV; | 2232 | return -ENODEV; |
| 2313 | seq_puts(seq, cgrp->root->release_agent_path); | 2233 | seq_puts(seq, cgrp->root->release_agent_path); |
| 2314 | seq_putc(seq, '\n'); | 2234 | seq_putc(seq, '\n'); |
| 2315 | cgroup_unlock(); | 2235 | mutex_unlock(&cgroup_mutex); |
| 2236 | return 0; | ||
| 2237 | } | ||
| 2238 | |||
| 2239 | static int cgroup_sane_behavior_show(struct cgroup *cgrp, struct cftype *cft, | ||
| 2240 | struct seq_file *seq) | ||
| 2241 | { | ||
| 2242 | seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp)); | ||
| 2316 | return 0; | 2243 | return 0; |
| 2317 | } | 2244 | } |
| 2318 | 2245 | ||
| @@ -2537,13 +2464,40 @@ static int cgroup_file_release(struct inode *inode, struct file *file) | |||
| 2537 | static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, | 2464 | static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 2538 | struct inode *new_dir, struct dentry *new_dentry) | 2465 | struct inode *new_dir, struct dentry *new_dentry) |
| 2539 | { | 2466 | { |
| 2467 | int ret; | ||
| 2468 | struct cgroup_name *name, *old_name; | ||
| 2469 | struct cgroup *cgrp; | ||
| 2470 | |||
| 2471 | /* | ||
| 2472 | * It's convinient to use parent dir's i_mutex to protected | ||
| 2473 | * cgrp->name. | ||
| 2474 | */ | ||
| 2475 | lockdep_assert_held(&old_dir->i_mutex); | ||
| 2476 | |||
| 2540 | if (!S_ISDIR(old_dentry->d_inode->i_mode)) | 2477 | if (!S_ISDIR(old_dentry->d_inode->i_mode)) |
| 2541 | return -ENOTDIR; | 2478 | return -ENOTDIR; |
| 2542 | if (new_dentry->d_inode) | 2479 | if (new_dentry->d_inode) |
| 2543 | return -EEXIST; | 2480 | return -EEXIST; |
| 2544 | if (old_dir != new_dir) | 2481 | if (old_dir != new_dir) |
| 2545 | return -EIO; | 2482 | return -EIO; |
| 2546 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); | 2483 | |
| 2484 | cgrp = __d_cgrp(old_dentry); | ||
| 2485 | |||
| 2486 | name = cgroup_alloc_name(new_dentry); | ||
| 2487 | if (!name) | ||
| 2488 | return -ENOMEM; | ||
| 2489 | |||
| 2490 | ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
| 2491 | if (ret) { | ||
| 2492 | kfree(name); | ||
| 2493 | return ret; | ||
| 2494 | } | ||
| 2495 | |||
| 2496 | old_name = cgrp->name; | ||
| 2497 | rcu_assign_pointer(cgrp->name, name); | ||
| 2498 | |||
| 2499 | kfree_rcu(old_name, rcu_head); | ||
| 2500 | return 0; | ||
| 2547 | } | 2501 | } |
| 2548 | 2502 | ||
| 2549 | static struct simple_xattrs *__d_xattrs(struct dentry *dentry) | 2503 | static struct simple_xattrs *__d_xattrs(struct dentry *dentry) |
| @@ -2551,13 +2505,13 @@ static struct simple_xattrs *__d_xattrs(struct dentry *dentry) | |||
| 2551 | if (S_ISDIR(dentry->d_inode->i_mode)) | 2505 | if (S_ISDIR(dentry->d_inode->i_mode)) |
| 2552 | return &__d_cgrp(dentry)->xattrs; | 2506 | return &__d_cgrp(dentry)->xattrs; |
| 2553 | else | 2507 | else |
| 2554 | return &__d_cft(dentry)->xattrs; | 2508 | return &__d_cfe(dentry)->xattrs; |
| 2555 | } | 2509 | } |
| 2556 | 2510 | ||
| 2557 | static inline int xattr_enabled(struct dentry *dentry) | 2511 | static inline int xattr_enabled(struct dentry *dentry) |
| 2558 | { | 2512 | { |
| 2559 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | 2513 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; |
| 2560 | return test_bit(ROOT_XATTR, &root->flags); | 2514 | return root->flags & CGRP_ROOT_XATTR; |
| 2561 | } | 2515 | } |
| 2562 | 2516 | ||
| 2563 | static bool is_valid_xattr(const char *name) | 2517 | static bool is_valid_xattr(const char *name) |
| @@ -2727,9 +2681,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
| 2727 | umode_t mode; | 2681 | umode_t mode; |
| 2728 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2682 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
| 2729 | 2683 | ||
| 2730 | simple_xattrs_init(&cft->xattrs); | 2684 | if (subsys && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) { |
| 2731 | |||
| 2732 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { | ||
| 2733 | strcpy(name, subsys->name); | 2685 | strcpy(name, subsys->name); |
| 2734 | strcat(name, "."); | 2686 | strcat(name, "."); |
| 2735 | } | 2687 | } |
| @@ -2753,6 +2705,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
| 2753 | cfe->type = (void *)cft; | 2705 | cfe->type = (void *)cft; |
| 2754 | cfe->dentry = dentry; | 2706 | cfe->dentry = dentry; |
| 2755 | dentry->d_fsdata = cfe; | 2707 | dentry->d_fsdata = cfe; |
| 2708 | simple_xattrs_init(&cfe->xattrs); | ||
| 2756 | list_add_tail(&cfe->node, &parent->files); | 2709 | list_add_tail(&cfe->node, &parent->files); |
| 2757 | cfe = NULL; | 2710 | cfe = NULL; |
| 2758 | } | 2711 | } |
| @@ -2770,6 +2723,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
| 2770 | 2723 | ||
| 2771 | for (cft = cfts; cft->name[0] != '\0'; cft++) { | 2724 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
| 2772 | /* does cft->flags tell us to skip this file on @cgrp? */ | 2725 | /* does cft->flags tell us to skip this file on @cgrp? */ |
| 2726 | if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp)) | ||
| 2727 | continue; | ||
| 2773 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) | 2728 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) |
| 2774 | continue; | 2729 | continue; |
| 2775 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | 2730 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) |
| @@ -3300,6 +3255,34 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
| 3300 | return 0; | 3255 | return 0; |
| 3301 | } | 3256 | } |
| 3302 | 3257 | ||
| 3258 | static void cgroup_transfer_one_task(struct task_struct *task, | ||
| 3259 | struct cgroup_scanner *scan) | ||
| 3260 | { | ||
| 3261 | struct cgroup *new_cgroup = scan->data; | ||
| 3262 | |||
| 3263 | mutex_lock(&cgroup_mutex); | ||
| 3264 | cgroup_attach_task(new_cgroup, task, false); | ||
| 3265 | mutex_unlock(&cgroup_mutex); | ||
| 3266 | } | ||
| 3267 | |||
| 3268 | /** | ||
| 3269 | * cgroup_trasnsfer_tasks - move tasks from one cgroup to another | ||
| 3270 | * @to: cgroup to which the tasks will be moved | ||
| 3271 | * @from: cgroup in which the tasks currently reside | ||
| 3272 | */ | ||
| 3273 | int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | ||
| 3274 | { | ||
| 3275 | struct cgroup_scanner scan; | ||
| 3276 | |||
| 3277 | scan.cg = from; | ||
| 3278 | scan.test_task = NULL; /* select all tasks in cgroup */ | ||
| 3279 | scan.process_task = cgroup_transfer_one_task; | ||
| 3280 | scan.heap = NULL; | ||
| 3281 | scan.data = to; | ||
| 3282 | |||
| 3283 | return cgroup_scan_tasks(&scan); | ||
| 3284 | } | ||
| 3285 | |||
| 3303 | /* | 3286 | /* |
| 3304 | * Stuff for reading the 'tasks'/'procs' files. | 3287 | * Stuff for reading the 'tasks'/'procs' files. |
| 3305 | * | 3288 | * |
| @@ -3362,35 +3345,14 @@ static void pidlist_free(void *p) | |||
| 3362 | else | 3345 | else |
| 3363 | kfree(p); | 3346 | kfree(p); |
| 3364 | } | 3347 | } |
| 3365 | static void *pidlist_resize(void *p, int newcount) | ||
| 3366 | { | ||
| 3367 | void *newlist; | ||
| 3368 | /* note: if new alloc fails, old p will still be valid either way */ | ||
| 3369 | if (is_vmalloc_addr(p)) { | ||
| 3370 | newlist = vmalloc(newcount * sizeof(pid_t)); | ||
| 3371 | if (!newlist) | ||
| 3372 | return NULL; | ||
| 3373 | memcpy(newlist, p, newcount * sizeof(pid_t)); | ||
| 3374 | vfree(p); | ||
| 3375 | } else { | ||
| 3376 | newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); | ||
| 3377 | } | ||
| 3378 | return newlist; | ||
| 3379 | } | ||
| 3380 | 3348 | ||
| 3381 | /* | 3349 | /* |
| 3382 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries | 3350 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries |
| 3383 | * If the new stripped list is sufficiently smaller and there's enough memory | 3351 | * Returns the number of unique elements. |
| 3384 | * to allocate a new buffer, will let go of the unneeded memory. Returns the | ||
| 3385 | * number of unique elements. | ||
| 3386 | */ | 3352 | */ |
| 3387 | /* is the size difference enough that we should re-allocate the array? */ | 3353 | static int pidlist_uniq(pid_t *list, int length) |
| 3388 | #define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new)) | ||
| 3389 | static int pidlist_uniq(pid_t **p, int length) | ||
| 3390 | { | 3354 | { |
| 3391 | int src, dest = 1; | 3355 | int src, dest = 1; |
| 3392 | pid_t *list = *p; | ||
| 3393 | pid_t *newlist; | ||
| 3394 | 3356 | ||
| 3395 | /* | 3357 | /* |
| 3396 | * we presume the 0th element is unique, so i starts at 1. trivial | 3358 | * we presume the 0th element is unique, so i starts at 1. trivial |
| @@ -3411,16 +3373,6 @@ static int pidlist_uniq(pid_t **p, int length) | |||
| 3411 | dest++; | 3373 | dest++; |
| 3412 | } | 3374 | } |
| 3413 | after: | 3375 | after: |
| 3414 | /* | ||
| 3415 | * if the length difference is large enough, we want to allocate a | ||
| 3416 | * smaller buffer to save memory. if this fails due to out of memory, | ||
| 3417 | * we'll just stay with what we've got. | ||
| 3418 | */ | ||
| 3419 | if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { | ||
| 3420 | newlist = pidlist_resize(list, dest); | ||
| 3421 | if (newlist) | ||
| 3422 | *p = newlist; | ||
| 3423 | } | ||
| 3424 | return dest; | 3376 | return dest; |
| 3425 | } | 3377 | } |
| 3426 | 3378 | ||
| @@ -3516,7 +3468,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | |||
| 3516 | /* now sort & (if procs) strip out duplicates */ | 3468 | /* now sort & (if procs) strip out duplicates */ |
| 3517 | sort(array, length, sizeof(pid_t), cmppid, NULL); | 3469 | sort(array, length, sizeof(pid_t), cmppid, NULL); |
| 3518 | if (type == CGROUP_FILE_PROCS) | 3470 | if (type == CGROUP_FILE_PROCS) |
| 3519 | length = pidlist_uniq(&array, length); | 3471 | length = pidlist_uniq(array, length); |
| 3520 | l = cgroup_pidlist_find(cgrp, type); | 3472 | l = cgroup_pidlist_find(cgrp, type); |
| 3521 | if (!l) { | 3473 | if (!l) { |
| 3522 | pidlist_free(array); | 3474 | pidlist_free(array); |
| @@ -3930,11 +3882,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
| 3930 | if (ret) | 3882 | if (ret) |
| 3931 | goto fail; | 3883 | goto fail; |
| 3932 | 3884 | ||
| 3933 | if (efile->f_op->poll(efile, &event->pt) & POLLHUP) { | 3885 | efile->f_op->poll(efile, &event->pt); |
| 3934 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | ||
| 3935 | ret = 0; | ||
| 3936 | goto fail; | ||
| 3937 | } | ||
| 3938 | 3886 | ||
| 3939 | /* | 3887 | /* |
| 3940 | * Events should be removed after rmdir of cgroup directory, but before | 3888 | * Events should be removed after rmdir of cgroup directory, but before |
| @@ -4016,10 +3964,16 @@ static struct cftype files[] = { | |||
| 4016 | }, | 3964 | }, |
| 4017 | { | 3965 | { |
| 4018 | .name = "cgroup.clone_children", | 3966 | .name = "cgroup.clone_children", |
| 3967 | .flags = CFTYPE_INSANE, | ||
| 4019 | .read_u64 = cgroup_clone_children_read, | 3968 | .read_u64 = cgroup_clone_children_read, |
| 4020 | .write_u64 = cgroup_clone_children_write, | 3969 | .write_u64 = cgroup_clone_children_write, |
| 4021 | }, | 3970 | }, |
| 4022 | { | 3971 | { |
| 3972 | .name = "cgroup.sane_behavior", | ||
| 3973 | .flags = CFTYPE_ONLY_ON_ROOT, | ||
| 3974 | .read_seq_string = cgroup_sane_behavior_show, | ||
| 3975 | }, | ||
| 3976 | { | ||
| 4023 | .name = "release_agent", | 3977 | .name = "release_agent", |
| 4024 | .flags = CFTYPE_ONLY_ON_ROOT, | 3978 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 4025 | .read_seq_string = cgroup_release_agent_show, | 3979 | .read_seq_string = cgroup_release_agent_show, |
| @@ -4131,17 +4085,8 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
| 4131 | if (!(css->flags & CSS_ONLINE)) | 4085 | if (!(css->flags & CSS_ONLINE)) |
| 4132 | return; | 4086 | return; |
| 4133 | 4087 | ||
| 4134 | /* | 4088 | if (ss->css_offline) |
| 4135 | * css_offline() should be called with cgroup_mutex unlocked. See | ||
| 4136 | * 3fa59dfbc3 ("cgroup: fix potential deadlock in pre_destroy") for | ||
| 4137 | * details. This temporary unlocking should go away once | ||
| 4138 | * cgroup_mutex is unexported from controllers. | ||
| 4139 | */ | ||
| 4140 | if (ss->css_offline) { | ||
| 4141 | mutex_unlock(&cgroup_mutex); | ||
| 4142 | ss->css_offline(cgrp); | 4089 | ss->css_offline(cgrp); |
| 4143 | mutex_lock(&cgroup_mutex); | ||
| 4144 | } | ||
| 4145 | 4090 | ||
| 4146 | cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE; | 4091 | cgrp->subsys[ss->subsys_id]->flags &= ~CSS_ONLINE; |
| 4147 | } | 4092 | } |
| @@ -4158,6 +4103,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4158 | umode_t mode) | 4103 | umode_t mode) |
| 4159 | { | 4104 | { |
| 4160 | struct cgroup *cgrp; | 4105 | struct cgroup *cgrp; |
| 4106 | struct cgroup_name *name; | ||
| 4161 | struct cgroupfs_root *root = parent->root; | 4107 | struct cgroupfs_root *root = parent->root; |
| 4162 | int err = 0; | 4108 | int err = 0; |
| 4163 | struct cgroup_subsys *ss; | 4109 | struct cgroup_subsys *ss; |
| @@ -4168,9 +4114,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4168 | if (!cgrp) | 4114 | if (!cgrp) |
| 4169 | return -ENOMEM; | 4115 | return -ENOMEM; |
| 4170 | 4116 | ||
| 4117 | name = cgroup_alloc_name(dentry); | ||
| 4118 | if (!name) | ||
| 4119 | goto err_free_cgrp; | ||
| 4120 | rcu_assign_pointer(cgrp->name, name); | ||
| 4121 | |||
| 4171 | cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL); | 4122 | cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL); |
| 4172 | if (cgrp->id < 0) | 4123 | if (cgrp->id < 0) |
| 4173 | goto err_free_cgrp; | 4124 | goto err_free_name; |
| 4174 | 4125 | ||
| 4175 | /* | 4126 | /* |
| 4176 | * Only live parents can have children. Note that the liveliness | 4127 | * Only live parents can have children. Note that the liveliness |
| @@ -4198,7 +4149,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4198 | 4149 | ||
| 4199 | cgrp->parent = parent; | 4150 | cgrp->parent = parent; |
| 4200 | cgrp->root = parent->root; | 4151 | cgrp->root = parent->root; |
| 4201 | cgrp->top_cgroup = parent->top_cgroup; | ||
| 4202 | 4152 | ||
| 4203 | if (notify_on_release(parent)) | 4153 | if (notify_on_release(parent)) |
| 4204 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 4154 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
| @@ -4241,6 +4191,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4241 | for_each_subsys(root, ss) | 4191 | for_each_subsys(root, ss) |
| 4242 | dget(dentry); | 4192 | dget(dentry); |
| 4243 | 4193 | ||
| 4194 | /* hold a ref to the parent's dentry */ | ||
| 4195 | dget(parent->dentry); | ||
| 4196 | |||
| 4244 | /* creation succeeded, notify subsystems */ | 4197 | /* creation succeeded, notify subsystems */ |
| 4245 | for_each_subsys(root, ss) { | 4198 | for_each_subsys(root, ss) { |
| 4246 | err = online_css(ss, cgrp); | 4199 | err = online_css(ss, cgrp); |
| @@ -4276,6 +4229,8 @@ err_free_all: | |||
| 4276 | deactivate_super(sb); | 4229 | deactivate_super(sb); |
| 4277 | err_free_id: | 4230 | err_free_id: |
| 4278 | ida_simple_remove(&root->cgroup_ida, cgrp->id); | 4231 | ida_simple_remove(&root->cgroup_ida, cgrp->id); |
| 4232 | err_free_name: | ||
| 4233 | kfree(rcu_dereference_raw(cgrp->name)); | ||
| 4279 | err_free_cgrp: | 4234 | err_free_cgrp: |
| 4280 | kfree(cgrp); | 4235 | kfree(cgrp); |
| 4281 | return err; | 4236 | return err; |
| @@ -4295,56 +4250,13 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 4295 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4250 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
| 4296 | } | 4251 | } |
| 4297 | 4252 | ||
| 4298 | /* | ||
| 4299 | * Check the reference count on each subsystem. Since we already | ||
| 4300 | * established that there are no tasks in the cgroup, if the css refcount | ||
| 4301 | * is also 1, then there should be no outstanding references, so the | ||
| 4302 | * subsystem is safe to destroy. We scan across all subsystems rather than | ||
| 4303 | * using the per-hierarchy linked list of mounted subsystems since we can | ||
| 4304 | * be called via check_for_release() with no synchronization other than | ||
| 4305 | * RCU, and the subsystem linked list isn't RCU-safe. | ||
| 4306 | */ | ||
| 4307 | static int cgroup_has_css_refs(struct cgroup *cgrp) | ||
| 4308 | { | ||
| 4309 | int i; | ||
| 4310 | |||
| 4311 | /* | ||
| 4312 | * We won't need to lock the subsys array, because the subsystems | ||
| 4313 | * we're concerned about aren't going anywhere since our cgroup root | ||
| 4314 | * has a reference on them. | ||
| 4315 | */ | ||
| 4316 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 4317 | struct cgroup_subsys *ss = subsys[i]; | ||
| 4318 | struct cgroup_subsys_state *css; | ||
| 4319 | |||
| 4320 | /* Skip subsystems not present or not in this hierarchy */ | ||
| 4321 | if (ss == NULL || ss->root != cgrp->root) | ||
| 4322 | continue; | ||
| 4323 | |||
| 4324 | css = cgrp->subsys[ss->subsys_id]; | ||
| 4325 | /* | ||
| 4326 | * When called from check_for_release() it's possible | ||
| 4327 | * that by this point the cgroup has been removed | ||
| 4328 | * and the css deleted. But a false-positive doesn't | ||
| 4329 | * matter, since it can only happen if the cgroup | ||
| 4330 | * has been deleted and hence no longer needs the | ||
| 4331 | * release agent to be called anyway. | ||
| 4332 | */ | ||
| 4333 | if (css && css_refcnt(css) > 1) | ||
| 4334 | return 1; | ||
| 4335 | } | ||
| 4336 | return 0; | ||
| 4337 | } | ||
| 4338 | |||
| 4339 | static int cgroup_destroy_locked(struct cgroup *cgrp) | 4253 | static int cgroup_destroy_locked(struct cgroup *cgrp) |
| 4340 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 4254 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
| 4341 | { | 4255 | { |
| 4342 | struct dentry *d = cgrp->dentry; | 4256 | struct dentry *d = cgrp->dentry; |
| 4343 | struct cgroup *parent = cgrp->parent; | 4257 | struct cgroup *parent = cgrp->parent; |
| 4344 | DEFINE_WAIT(wait); | ||
| 4345 | struct cgroup_event *event, *tmp; | 4258 | struct cgroup_event *event, *tmp; |
| 4346 | struct cgroup_subsys *ss; | 4259 | struct cgroup_subsys *ss; |
| 4347 | LIST_HEAD(tmp_list); | ||
| 4348 | 4260 | ||
| 4349 | lockdep_assert_held(&d->d_inode->i_mutex); | 4261 | lockdep_assert_held(&d->d_inode->i_mutex); |
| 4350 | lockdep_assert_held(&cgroup_mutex); | 4262 | lockdep_assert_held(&cgroup_mutex); |
| @@ -4935,17 +4847,17 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 4935 | * and addition to css_set. | 4847 | * and addition to css_set. |
| 4936 | */ | 4848 | */ |
| 4937 | if (need_forkexit_callback) { | 4849 | if (need_forkexit_callback) { |
| 4938 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4850 | /* |
| 4851 | * fork/exit callbacks are supported only for builtin | ||
| 4852 | * subsystems, and the builtin section of the subsys | ||
| 4853 | * array is immutable, so we don't need to lock the | ||
| 4854 | * subsys array here. On the other hand, modular section | ||
| 4855 | * of the array can be freed at module unload, so we | ||
| 4856 | * can't touch that. | ||
| 4857 | */ | ||
| 4858 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 4939 | struct cgroup_subsys *ss = subsys[i]; | 4859 | struct cgroup_subsys *ss = subsys[i]; |
| 4940 | 4860 | ||
| 4941 | /* | ||
| 4942 | * fork/exit callbacks are supported only for | ||
| 4943 | * builtin subsystems and we don't need further | ||
| 4944 | * synchronization as they never go away. | ||
| 4945 | */ | ||
| 4946 | if (!ss || ss->module) | ||
| 4947 | continue; | ||
| 4948 | |||
| 4949 | if (ss->fork) | 4861 | if (ss->fork) |
| 4950 | ss->fork(child); | 4862 | ss->fork(child); |
| 4951 | } | 4863 | } |
| @@ -5010,13 +4922,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
| 5010 | tsk->cgroups = &init_css_set; | 4922 | tsk->cgroups = &init_css_set; |
| 5011 | 4923 | ||
| 5012 | if (run_callbacks && need_forkexit_callback) { | 4924 | if (run_callbacks && need_forkexit_callback) { |
| 5013 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4925 | /* |
| 4926 | * fork/exit callbacks are supported only for builtin | ||
| 4927 | * subsystems, see cgroup_post_fork() for details. | ||
| 4928 | */ | ||
| 4929 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
| 5014 | struct cgroup_subsys *ss = subsys[i]; | 4930 | struct cgroup_subsys *ss = subsys[i]; |
| 5015 | 4931 | ||
| 5016 | /* modular subsystems can't use callbacks */ | ||
| 5017 | if (!ss || ss->module) | ||
| 5018 | continue; | ||
| 5019 | |||
| 5020 | if (ss->exit) { | 4932 | if (ss->exit) { |
| 5021 | struct cgroup *old_cgrp = | 4933 | struct cgroup *old_cgrp = |
| 5022 | rcu_dereference_raw(cg->subsys[i])->cgroup; | 4934 | rcu_dereference_raw(cg->subsys[i])->cgroup; |
| @@ -5030,44 +4942,19 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
| 5030 | put_css_set_taskexit(cg); | 4942 | put_css_set_taskexit(cg); |
| 5031 | } | 4943 | } |
| 5032 | 4944 | ||
| 5033 | /** | ||
| 5034 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp | ||
| 5035 | * @cgrp: the cgroup in question | ||
| 5036 | * @task: the task in question | ||
| 5037 | * | ||
| 5038 | * See if @cgrp is a descendant of @task's cgroup in the appropriate | ||
| 5039 | * hierarchy. | ||
| 5040 | * | ||
| 5041 | * If we are sending in dummytop, then presumably we are creating | ||
| 5042 | * the top cgroup in the subsystem. | ||
| 5043 | * | ||
| 5044 | * Called only by the ns (nsproxy) cgroup. | ||
| 5045 | */ | ||
| 5046 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) | ||
| 5047 | { | ||
| 5048 | int ret; | ||
| 5049 | struct cgroup *target; | ||
| 5050 | |||
| 5051 | if (cgrp == dummytop) | ||
| 5052 | return 1; | ||
| 5053 | |||
| 5054 | target = task_cgroup_from_root(task, cgrp->root); | ||
| 5055 | while (cgrp != target && cgrp!= cgrp->top_cgroup) | ||
| 5056 | cgrp = cgrp->parent; | ||
| 5057 | ret = (cgrp == target); | ||
| 5058 | return ret; | ||
| 5059 | } | ||
| 5060 | |||
| 5061 | static void check_for_release(struct cgroup *cgrp) | 4945 | static void check_for_release(struct cgroup *cgrp) |
| 5062 | { | 4946 | { |
| 5063 | /* All of these checks rely on RCU to keep the cgroup | 4947 | /* All of these checks rely on RCU to keep the cgroup |
| 5064 | * structure alive */ | 4948 | * structure alive */ |
| 5065 | if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count) | 4949 | if (cgroup_is_releasable(cgrp) && |
| 5066 | && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) { | 4950 | !atomic_read(&cgrp->count) && list_empty(&cgrp->children)) { |
| 5067 | /* Control Group is currently removeable. If it's not | 4951 | /* |
| 4952 | * Control Group is currently removeable. If it's not | ||
| 5068 | * already queued for a userspace notification, queue | 4953 | * already queued for a userspace notification, queue |
| 5069 | * it now */ | 4954 | * it now |
| 4955 | */ | ||
| 5070 | int need_schedule_work = 0; | 4956 | int need_schedule_work = 0; |
| 4957 | |||
| 5071 | raw_spin_lock(&release_list_lock); | 4958 | raw_spin_lock(&release_list_lock); |
| 5072 | if (!cgroup_is_removed(cgrp) && | 4959 | if (!cgroup_is_removed(cgrp) && |
| 5073 | list_empty(&cgrp->release_list)) { | 4960 | list_empty(&cgrp->release_list)) { |
| @@ -5100,24 +4987,11 @@ EXPORT_SYMBOL_GPL(__css_tryget); | |||
| 5100 | /* Caller must verify that the css is not for root cgroup */ | 4987 | /* Caller must verify that the css is not for root cgroup */ |
| 5101 | void __css_put(struct cgroup_subsys_state *css) | 4988 | void __css_put(struct cgroup_subsys_state *css) |
| 5102 | { | 4989 | { |
| 5103 | struct cgroup *cgrp = css->cgroup; | ||
| 5104 | int v; | 4990 | int v; |
| 5105 | 4991 | ||
| 5106 | rcu_read_lock(); | ||
| 5107 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); | 4992 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); |
| 5108 | 4993 | if (v == 0) | |
| 5109 | switch (v) { | ||
| 5110 | case 1: | ||
| 5111 | if (notify_on_release(cgrp)) { | ||
| 5112 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
| 5113 | check_for_release(cgrp); | ||
| 5114 | } | ||
| 5115 | break; | ||
| 5116 | case 0: | ||
| 5117 | schedule_work(&css->dput_work); | 4994 | schedule_work(&css->dput_work); |
| 5118 | break; | ||
| 5119 | } | ||
| 5120 | rcu_read_unlock(); | ||
| 5121 | } | 4995 | } |
| 5122 | EXPORT_SYMBOL_GPL(__css_put); | 4996 | EXPORT_SYMBOL_GPL(__css_put); |
| 5123 | 4997 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 027a6f65f2ad..12331120767c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -265,17 +265,6 @@ static DEFINE_MUTEX(cpuset_mutex); | |||
| 265 | static DEFINE_MUTEX(callback_mutex); | 265 | static DEFINE_MUTEX(callback_mutex); |
| 266 | 266 | ||
| 267 | /* | 267 | /* |
| 268 | * cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist | ||
| 269 | * buffers. They are statically allocated to prevent using excess stack | ||
| 270 | * when calling cpuset_print_task_mems_allowed(). | ||
| 271 | */ | ||
| 272 | #define CPUSET_NAME_LEN (128) | ||
| 273 | #define CPUSET_NODELIST_LEN (256) | ||
| 274 | static char cpuset_name[CPUSET_NAME_LEN]; | ||
| 275 | static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | ||
| 276 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | ||
| 277 | |||
| 278 | /* | ||
| 279 | * CPU / memory hotplug is handled asynchronously. | 268 | * CPU / memory hotplug is handled asynchronously. |
| 280 | */ | 269 | */ |
| 281 | static struct workqueue_struct *cpuset_propagate_hotplug_wq; | 270 | static struct workqueue_struct *cpuset_propagate_hotplug_wq; |
| @@ -780,25 +769,26 @@ static void rebuild_sched_domains_locked(void) | |||
| 780 | lockdep_assert_held(&cpuset_mutex); | 769 | lockdep_assert_held(&cpuset_mutex); |
| 781 | get_online_cpus(); | 770 | get_online_cpus(); |
| 782 | 771 | ||
| 772 | /* | ||
| 773 | * We have raced with CPU hotplug. Don't do anything to avoid | ||
| 774 | * passing doms with offlined cpu to partition_sched_domains(). | ||
| 775 | * Anyways, hotplug work item will rebuild sched domains. | ||
| 776 | */ | ||
| 777 | if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask)) | ||
| 778 | goto out; | ||
| 779 | |||
| 783 | /* Generate domain masks and attrs */ | 780 | /* Generate domain masks and attrs */ |
| 784 | ndoms = generate_sched_domains(&doms, &attr); | 781 | ndoms = generate_sched_domains(&doms, &attr); |
| 785 | 782 | ||
| 786 | /* Have scheduler rebuild the domains */ | 783 | /* Have scheduler rebuild the domains */ |
| 787 | partition_sched_domains(ndoms, doms, attr); | 784 | partition_sched_domains(ndoms, doms, attr); |
| 788 | 785 | out: | |
| 789 | put_online_cpus(); | 786 | put_online_cpus(); |
| 790 | } | 787 | } |
| 791 | #else /* !CONFIG_SMP */ | 788 | #else /* !CONFIG_SMP */ |
| 792 | static void rebuild_sched_domains_locked(void) | 789 | static void rebuild_sched_domains_locked(void) |
| 793 | { | 790 | { |
| 794 | } | 791 | } |
| 795 | |||
| 796 | static int generate_sched_domains(cpumask_var_t **domains, | ||
| 797 | struct sched_domain_attr **attributes) | ||
| 798 | { | ||
| 799 | *domains = NULL; | ||
| 800 | return 1; | ||
| 801 | } | ||
| 802 | #endif /* CONFIG_SMP */ | 792 | #endif /* CONFIG_SMP */ |
| 803 | 793 | ||
| 804 | void rebuild_sched_domains(void) | 794 | void rebuild_sched_domains(void) |
| @@ -2005,50 +1995,6 @@ int __init cpuset_init(void) | |||
| 2005 | return 0; | 1995 | return 0; |
| 2006 | } | 1996 | } |
| 2007 | 1997 | ||
| 2008 | /** | ||
| 2009 | * cpuset_do_move_task - move a given task to another cpuset | ||
| 2010 | * @tsk: pointer to task_struct the task to move | ||
| 2011 | * @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner | ||
| 2012 | * | ||
| 2013 | * Called by cgroup_scan_tasks() for each task in a cgroup. | ||
| 2014 | * Return nonzero to stop the walk through the tasks. | ||
| 2015 | */ | ||
| 2016 | static void cpuset_do_move_task(struct task_struct *tsk, | ||
| 2017 | struct cgroup_scanner *scan) | ||
| 2018 | { | ||
| 2019 | struct cgroup *new_cgroup = scan->data; | ||
| 2020 | |||
| 2021 | cgroup_lock(); | ||
| 2022 | cgroup_attach_task(new_cgroup, tsk); | ||
| 2023 | cgroup_unlock(); | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | /** | ||
| 2027 | * move_member_tasks_to_cpuset - move tasks from one cpuset to another | ||
| 2028 | * @from: cpuset in which the tasks currently reside | ||
| 2029 | * @to: cpuset to which the tasks will be moved | ||
| 2030 | * | ||
| 2031 | * Called with cpuset_mutex held | ||
| 2032 | * callback_mutex must not be held, as cpuset_attach() will take it. | ||
| 2033 | * | ||
| 2034 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | ||
| 2035 | * calling callback functions for each. | ||
| 2036 | */ | ||
| 2037 | static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) | ||
| 2038 | { | ||
| 2039 | struct cgroup_scanner scan; | ||
| 2040 | |||
| 2041 | scan.cg = from->css.cgroup; | ||
| 2042 | scan.test_task = NULL; /* select all tasks in cgroup */ | ||
| 2043 | scan.process_task = cpuset_do_move_task; | ||
| 2044 | scan.heap = NULL; | ||
| 2045 | scan.data = to->css.cgroup; | ||
| 2046 | |||
| 2047 | if (cgroup_scan_tasks(&scan)) | ||
| 2048 | printk(KERN_ERR "move_member_tasks_to_cpuset: " | ||
| 2049 | "cgroup_scan_tasks failed\n"); | ||
| 2050 | } | ||
| 2051 | |||
| 2052 | /* | 1998 | /* |
| 2053 | * If CPU and/or memory hotplug handlers, below, unplug any CPUs | 1999 | * If CPU and/or memory hotplug handlers, below, unplug any CPUs |
| 2054 | * or memory nodes, we need to walk over the cpuset hierarchy, | 2000 | * or memory nodes, we need to walk over the cpuset hierarchy, |
| @@ -2069,7 +2015,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
| 2069 | nodes_empty(parent->mems_allowed)) | 2015 | nodes_empty(parent->mems_allowed)) |
| 2070 | parent = parent_cs(parent); | 2016 | parent = parent_cs(parent); |
| 2071 | 2017 | ||
| 2072 | move_member_tasks_to_cpuset(cs, parent); | 2018 | if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) { |
| 2019 | rcu_read_lock(); | ||
| 2020 | printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n", | ||
| 2021 | cgroup_name(cs->css.cgroup)); | ||
| 2022 | rcu_read_unlock(); | ||
| 2023 | } | ||
| 2073 | } | 2024 | } |
| 2074 | 2025 | ||
| 2075 | /** | 2026 | /** |
| @@ -2222,17 +2173,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |||
| 2222 | flush_workqueue(cpuset_propagate_hotplug_wq); | 2173 | flush_workqueue(cpuset_propagate_hotplug_wq); |
| 2223 | 2174 | ||
| 2224 | /* rebuild sched domains if cpus_allowed has changed */ | 2175 | /* rebuild sched domains if cpus_allowed has changed */ |
| 2225 | if (cpus_updated) { | 2176 | if (cpus_updated) |
| 2226 | struct sched_domain_attr *attr; | 2177 | rebuild_sched_domains(); |
| 2227 | cpumask_var_t *doms; | ||
| 2228 | int ndoms; | ||
| 2229 | |||
| 2230 | mutex_lock(&cpuset_mutex); | ||
| 2231 | ndoms = generate_sched_domains(&doms, &attr); | ||
| 2232 | mutex_unlock(&cpuset_mutex); | ||
| 2233 | |||
| 2234 | partition_sched_domains(ndoms, doms, attr); | ||
| 2235 | } | ||
| 2236 | } | 2178 | } |
| 2237 | 2179 | ||
| 2238 | void cpuset_update_active_cpus(bool cpu_online) | 2180 | void cpuset_update_active_cpus(bool cpu_online) |
| @@ -2594,6 +2536,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
| 2594 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); | 2536 | return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); |
| 2595 | } | 2537 | } |
| 2596 | 2538 | ||
| 2539 | #define CPUSET_NODELIST_LEN (256) | ||
| 2540 | |||
| 2597 | /** | 2541 | /** |
| 2598 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed | 2542 | * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed |
| 2599 | * @task: pointer to task_struct of some task. | 2543 | * @task: pointer to task_struct of some task. |
| @@ -2604,25 +2548,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, | |||
| 2604 | */ | 2548 | */ |
| 2605 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) | 2549 | void cpuset_print_task_mems_allowed(struct task_struct *tsk) |
| 2606 | { | 2550 | { |
| 2607 | struct dentry *dentry; | 2551 | /* Statically allocated to prevent using excess stack. */ |
| 2552 | static char cpuset_nodelist[CPUSET_NODELIST_LEN]; | ||
| 2553 | static DEFINE_SPINLOCK(cpuset_buffer_lock); | ||
| 2608 | 2554 | ||
| 2609 | dentry = task_cs(tsk)->css.cgroup->dentry; | 2555 | struct cgroup *cgrp = task_cs(tsk)->css.cgroup; |
| 2610 | spin_lock(&cpuset_buffer_lock); | ||
| 2611 | 2556 | ||
| 2612 | if (!dentry) { | 2557 | rcu_read_lock(); |
| 2613 | strcpy(cpuset_name, "/"); | 2558 | spin_lock(&cpuset_buffer_lock); |
| 2614 | } else { | ||
| 2615 | spin_lock(&dentry->d_lock); | ||
| 2616 | strlcpy(cpuset_name, (const char *)dentry->d_name.name, | ||
| 2617 | CPUSET_NAME_LEN); | ||
| 2618 | spin_unlock(&dentry->d_lock); | ||
| 2619 | } | ||
| 2620 | 2559 | ||
| 2621 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, | 2560 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, |
| 2622 | tsk->mems_allowed); | 2561 | tsk->mems_allowed); |
| 2623 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", | 2562 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", |
| 2624 | tsk->comm, cpuset_name, cpuset_nodelist); | 2563 | tsk->comm, cgroup_name(cgrp), cpuset_nodelist); |
| 2564 | |||
| 2625 | spin_unlock(&cpuset_buffer_lock); | 2565 | spin_unlock(&cpuset_buffer_lock); |
| 2566 | rcu_read_unlock(); | ||
| 2626 | } | 2567 | } |
| 2627 | 2568 | ||
| 2628 | /* | 2569 | /* |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9fcb0944f071..dce6e13cf9d7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -251,7 +251,22 @@ perf_cgroup_match(struct perf_event *event) | |||
| 251 | struct perf_event_context *ctx = event->ctx; | 251 | struct perf_event_context *ctx = event->ctx; |
| 252 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 252 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
| 253 | 253 | ||
| 254 | return !event->cgrp || event->cgrp == cpuctx->cgrp; | 254 | /* @event doesn't care about cgroup */ |
| 255 | if (!event->cgrp) | ||
| 256 | return true; | ||
| 257 | |||
| 258 | /* wants specific cgroup scope but @cpuctx isn't associated with any */ | ||
| 259 | if (!cpuctx->cgrp) | ||
| 260 | return false; | ||
| 261 | |||
| 262 | /* | ||
| 263 | * Cgroup scoping is recursive. An event enabled for a cgroup is | ||
| 264 | * also enabled for all its descendant cgroups. If @cpuctx's | ||
| 265 | * cgroup is a descendant of @event's (the test covers identity | ||
| 266 | * case), it's a match. | ||
| 267 | */ | ||
| 268 | return cgroup_is_descendant(cpuctx->cgrp->css.cgroup, | ||
| 269 | event->cgrp->css.cgroup); | ||
| 255 | } | 270 | } |
| 256 | 271 | ||
| 257 | static inline bool perf_tryget_cgroup(struct perf_event *event) | 272 | static inline bool perf_tryget_cgroup(struct perf_event *event) |
| @@ -7517,12 +7532,5 @@ struct cgroup_subsys perf_subsys = { | |||
| 7517 | .css_free = perf_cgroup_css_free, | 7532 | .css_free = perf_cgroup_css_free, |
| 7518 | .exit = perf_cgroup_exit, | 7533 | .exit = perf_cgroup_exit, |
| 7519 | .attach = perf_cgroup_attach, | 7534 | .attach = perf_cgroup_attach, |
| 7520 | |||
| 7521 | /* | ||
| 7522 | * perf_event cgroup doesn't handle nesting correctly. | ||
| 7523 | * ctx->nr_cgroups adjustments should be propagated through the | ||
| 7524 | * cgroup hierarchy. Fix it and remove the following. | ||
| 7525 | */ | ||
| 7526 | .broken_hierarchy = true, | ||
| 7527 | }; | 7535 | }; |
| 7528 | #endif /* CONFIG_CGROUP_PERF */ | 7536 | #endif /* CONFIG_CGROUP_PERF */ |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b8dc8e4cbf6a..0f1d92163f30 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -3321,52 +3321,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) | |||
| 3321 | schedule_work(&cachep->memcg_params->destroy); | 3321 | schedule_work(&cachep->memcg_params->destroy); |
| 3322 | } | 3322 | } |
| 3323 | 3323 | ||
| 3324 | static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s) | 3324 | /* |
| 3325 | { | 3325 | * This lock protects updaters, not readers. We want readers to be as fast as |
| 3326 | char *name; | 3326 | * they can, and they will either see NULL or a valid cache value. Our model |
| 3327 | struct dentry *dentry; | 3327 | * allow them to see NULL, in which case the root memcg will be selected. |
| 3328 | 3328 | * | |
| 3329 | rcu_read_lock(); | 3329 | * We need this lock because multiple allocations to the same cache from a non |
| 3330 | dentry = rcu_dereference(memcg->css.cgroup->dentry); | 3330 | * will span more than one worker. Only one of them can create the cache. |
| 3331 | rcu_read_unlock(); | 3331 | */ |
| 3332 | 3332 | static DEFINE_MUTEX(memcg_cache_mutex); | |
| 3333 | BUG_ON(dentry == NULL); | ||
| 3334 | |||
| 3335 | name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name, | ||
| 3336 | memcg_cache_id(memcg), dentry->d_name.name); | ||
| 3337 | |||
| 3338 | return name; | ||
| 3339 | } | ||
| 3340 | 3333 | ||
| 3334 | /* | ||
| 3335 | * Called with memcg_cache_mutex held | ||
| 3336 | */ | ||
| 3341 | static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, | 3337 | static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, |
| 3342 | struct kmem_cache *s) | 3338 | struct kmem_cache *s) |
| 3343 | { | 3339 | { |
| 3344 | char *name; | ||
| 3345 | struct kmem_cache *new; | 3340 | struct kmem_cache *new; |
| 3341 | static char *tmp_name = NULL; | ||
| 3346 | 3342 | ||
| 3347 | name = memcg_cache_name(memcg, s); | 3343 | lockdep_assert_held(&memcg_cache_mutex); |
| 3348 | if (!name) | 3344 | |
| 3349 | return NULL; | 3345 | /* |
| 3346 | * kmem_cache_create_memcg duplicates the given name and | ||
| 3347 | * cgroup_name for this name requires RCU context. | ||
| 3348 | * This static temporary buffer is used to prevent from | ||
| 3349 | * pointless shortliving allocation. | ||
| 3350 | */ | ||
| 3351 | if (!tmp_name) { | ||
| 3352 | tmp_name = kmalloc(PATH_MAX, GFP_KERNEL); | ||
| 3353 | if (!tmp_name) | ||
| 3354 | return NULL; | ||
| 3355 | } | ||
| 3356 | |||
| 3357 | rcu_read_lock(); | ||
| 3358 | snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name, | ||
| 3359 | memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup)); | ||
| 3360 | rcu_read_unlock(); | ||
| 3350 | 3361 | ||
| 3351 | new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align, | 3362 | new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align, |
| 3352 | (s->flags & ~SLAB_PANIC), s->ctor, s); | 3363 | (s->flags & ~SLAB_PANIC), s->ctor, s); |
| 3353 | 3364 | ||
| 3354 | if (new) | 3365 | if (new) |
| 3355 | new->allocflags |= __GFP_KMEMCG; | 3366 | new->allocflags |= __GFP_KMEMCG; |
| 3356 | 3367 | ||
| 3357 | kfree(name); | ||
| 3358 | return new; | 3368 | return new; |
| 3359 | } | 3369 | } |
| 3360 | 3370 | ||
| 3361 | /* | ||
| 3362 | * This lock protects updaters, not readers. We want readers to be as fast as | ||
| 3363 | * they can, and they will either see NULL or a valid cache value. Our model | ||
| 3364 | * allow them to see NULL, in which case the root memcg will be selected. | ||
| 3365 | * | ||
| 3366 | * We need this lock because multiple allocations to the same cache from a non | ||
| 3367 | * will span more than one worker. Only one of them can create the cache. | ||
| 3368 | */ | ||
| 3369 | static DEFINE_MUTEX(memcg_cache_mutex); | ||
| 3370 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, | 3371 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, |
| 3371 | struct kmem_cache *cachep) | 3372 | struct kmem_cache *cachep) |
| 3372 | { | 3373 | { |
| @@ -5912,6 +5913,7 @@ static struct cftype mem_cgroup_files[] = { | |||
| 5912 | }, | 5913 | }, |
| 5913 | { | 5914 | { |
| 5914 | .name = "use_hierarchy", | 5915 | .name = "use_hierarchy", |
| 5916 | .flags = CFTYPE_INSANE, | ||
| 5915 | .write_u64 = mem_cgroup_hierarchy_write, | 5917 | .write_u64 = mem_cgroup_hierarchy_write, |
| 5916 | .read_u64 = mem_cgroup_hierarchy_read, | 5918 | .read_u64 = mem_cgroup_hierarchy_read, |
| 5917 | }, | 5919 | }, |
| @@ -6907,6 +6909,21 @@ static void mem_cgroup_move_task(struct cgroup *cont, | |||
| 6907 | } | 6909 | } |
| 6908 | #endif | 6910 | #endif |
| 6909 | 6911 | ||
| 6912 | /* | ||
| 6913 | * Cgroup retains root cgroups across [un]mount cycles making it necessary | ||
| 6914 | * to verify sane_behavior flag on each mount attempt. | ||
| 6915 | */ | ||
| 6916 | static void mem_cgroup_bind(struct cgroup *root) | ||
| 6917 | { | ||
| 6918 | /* | ||
| 6919 | * use_hierarchy is forced with sane_behavior. cgroup core | ||
| 6920 | * guarantees that @root doesn't have any children, so turning it | ||
| 6921 | * on for the root memcg is enough. | ||
| 6922 | */ | ||
| 6923 | if (cgroup_sane_behavior(root)) | ||
| 6924 | mem_cgroup_from_cont(root)->use_hierarchy = true; | ||
| 6925 | } | ||
| 6926 | |||
| 6910 | struct cgroup_subsys mem_cgroup_subsys = { | 6927 | struct cgroup_subsys mem_cgroup_subsys = { |
| 6911 | .name = "memory", | 6928 | .name = "memory", |
| 6912 | .subsys_id = mem_cgroup_subsys_id, | 6929 | .subsys_id = mem_cgroup_subsys_id, |
| @@ -6917,6 +6934,7 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
| 6917 | .can_attach = mem_cgroup_can_attach, | 6934 | .can_attach = mem_cgroup_can_attach, |
| 6918 | .cancel_attach = mem_cgroup_cancel_attach, | 6935 | .cancel_attach = mem_cgroup_cancel_attach, |
| 6919 | .attach = mem_cgroup_move_task, | 6936 | .attach = mem_cgroup_move_task, |
| 6937 | .bind = mem_cgroup_bind, | ||
| 6920 | .base_cftypes = mem_cgroup_files, | 6938 | .base_cftypes = mem_cgroup_files, |
| 6921 | .early_init = 0, | 6939 | .early_init = 0, |
| 6922 | .use_id = 1, | 6940 | .use_id = 1, |
diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 1c69e38e3a2c..dd0dc574d78d 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c | |||
| @@ -25,6 +25,12 @@ | |||
| 25 | 25 | ||
| 26 | static DEFINE_MUTEX(devcgroup_mutex); | 26 | static DEFINE_MUTEX(devcgroup_mutex); |
| 27 | 27 | ||
| 28 | enum devcg_behavior { | ||
| 29 | DEVCG_DEFAULT_NONE, | ||
| 30 | DEVCG_DEFAULT_ALLOW, | ||
| 31 | DEVCG_DEFAULT_DENY, | ||
| 32 | }; | ||
| 33 | |||
| 28 | /* | 34 | /* |
| 29 | * exception list locking rules: | 35 | * exception list locking rules: |
| 30 | * hold devcgroup_mutex for update/read. | 36 | * hold devcgroup_mutex for update/read. |
| @@ -42,10 +48,9 @@ struct dev_exception_item { | |||
| 42 | struct dev_cgroup { | 48 | struct dev_cgroup { |
| 43 | struct cgroup_subsys_state css; | 49 | struct cgroup_subsys_state css; |
| 44 | struct list_head exceptions; | 50 | struct list_head exceptions; |
| 45 | enum { | 51 | enum devcg_behavior behavior; |
| 46 | DEVCG_DEFAULT_ALLOW, | 52 | /* temporary list for pending propagation operations */ |
| 47 | DEVCG_DEFAULT_DENY, | 53 | struct list_head propagate_pending; |
| 48 | } behavior; | ||
| 49 | }; | 54 | }; |
| 50 | 55 | ||
| 51 | static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) | 56 | static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) |
| @@ -182,35 +187,62 @@ static void dev_exception_clean(struct dev_cgroup *dev_cgroup) | |||
| 182 | __dev_exception_clean(dev_cgroup); | 187 | __dev_exception_clean(dev_cgroup); |
| 183 | } | 188 | } |
| 184 | 189 | ||
| 190 | static inline bool is_devcg_online(const struct dev_cgroup *devcg) | ||
| 191 | { | ||
| 192 | return (devcg->behavior != DEVCG_DEFAULT_NONE); | ||
| 193 | } | ||
| 194 | |||
| 195 | /** | ||
| 196 | * devcgroup_online - initializes devcgroup's behavior and exceptions based on | ||
| 197 | * parent's | ||
| 198 | * @cgroup: cgroup getting online | ||
| 199 | * returns 0 in case of success, error code otherwise | ||
| 200 | */ | ||
| 201 | static int devcgroup_online(struct cgroup *cgroup) | ||
| 202 | { | ||
| 203 | struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL; | ||
| 204 | int ret = 0; | ||
| 205 | |||
| 206 | mutex_lock(&devcgroup_mutex); | ||
| 207 | dev_cgroup = cgroup_to_devcgroup(cgroup); | ||
| 208 | if (cgroup->parent) | ||
| 209 | parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent); | ||
| 210 | |||
| 211 | if (parent_dev_cgroup == NULL) | ||
| 212 | dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; | ||
| 213 | else { | ||
| 214 | ret = dev_exceptions_copy(&dev_cgroup->exceptions, | ||
| 215 | &parent_dev_cgroup->exceptions); | ||
| 216 | if (!ret) | ||
| 217 | dev_cgroup->behavior = parent_dev_cgroup->behavior; | ||
| 218 | } | ||
| 219 | mutex_unlock(&devcgroup_mutex); | ||
| 220 | |||
| 221 | return ret; | ||
| 222 | } | ||
| 223 | |||
| 224 | static void devcgroup_offline(struct cgroup *cgroup) | ||
| 225 | { | ||
| 226 | struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup); | ||
| 227 | |||
| 228 | mutex_lock(&devcgroup_mutex); | ||
| 229 | dev_cgroup->behavior = DEVCG_DEFAULT_NONE; | ||
| 230 | mutex_unlock(&devcgroup_mutex); | ||
| 231 | } | ||
| 232 | |||
| 185 | /* | 233 | /* |
| 186 | * called from kernel/cgroup.c with cgroup_lock() held. | 234 | * called from kernel/cgroup.c with cgroup_lock() held. |
| 187 | */ | 235 | */ |
| 188 | static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) | 236 | static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) |
| 189 | { | 237 | { |
| 190 | struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; | 238 | struct dev_cgroup *dev_cgroup; |
| 191 | struct cgroup *parent_cgroup; | ||
| 192 | int ret; | ||
| 193 | 239 | ||
| 194 | dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); | 240 | dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); |
| 195 | if (!dev_cgroup) | 241 | if (!dev_cgroup) |
| 196 | return ERR_PTR(-ENOMEM); | 242 | return ERR_PTR(-ENOMEM); |
| 197 | INIT_LIST_HEAD(&dev_cgroup->exceptions); | 243 | INIT_LIST_HEAD(&dev_cgroup->exceptions); |
| 198 | parent_cgroup = cgroup->parent; | 244 | INIT_LIST_HEAD(&dev_cgroup->propagate_pending); |
| 199 | 245 | dev_cgroup->behavior = DEVCG_DEFAULT_NONE; | |
| 200 | if (parent_cgroup == NULL) | ||
| 201 | dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW; | ||
| 202 | else { | ||
| 203 | parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); | ||
| 204 | mutex_lock(&devcgroup_mutex); | ||
| 205 | ret = dev_exceptions_copy(&dev_cgroup->exceptions, | ||
| 206 | &parent_dev_cgroup->exceptions); | ||
| 207 | dev_cgroup->behavior = parent_dev_cgroup->behavior; | ||
| 208 | mutex_unlock(&devcgroup_mutex); | ||
| 209 | if (ret) { | ||
| 210 | kfree(dev_cgroup); | ||
| 211 | return ERR_PTR(ret); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | 246 | ||
| 215 | return &dev_cgroup->css; | 247 | return &dev_cgroup->css; |
| 216 | } | 248 | } |
| @@ -304,9 +336,11 @@ static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
| 304 | * verify if a certain access is allowed. | 336 | * verify if a certain access is allowed. |
| 305 | * @dev_cgroup: dev cgroup to be tested against | 337 | * @dev_cgroup: dev cgroup to be tested against |
| 306 | * @refex: new exception | 338 | * @refex: new exception |
| 339 | * @behavior: behavior of the exception | ||
| 307 | */ | 340 | */ |
| 308 | static int may_access(struct dev_cgroup *dev_cgroup, | 341 | static bool may_access(struct dev_cgroup *dev_cgroup, |
| 309 | struct dev_exception_item *refex) | 342 | struct dev_exception_item *refex, |
| 343 | enum devcg_behavior behavior) | ||
| 310 | { | 344 | { |
| 311 | struct dev_exception_item *ex; | 345 | struct dev_exception_item *ex; |
| 312 | bool match = false; | 346 | bool match = false; |
| @@ -330,18 +364,29 @@ static int may_access(struct dev_cgroup *dev_cgroup, | |||
| 330 | break; | 364 | break; |
| 331 | } | 365 | } |
| 332 | 366 | ||
| 333 | /* | 367 | if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { |
| 334 | * In two cases we'll consider this new exception valid: | 368 | if (behavior == DEVCG_DEFAULT_ALLOW) { |
| 335 | * - the dev cgroup has its default policy to allow + exception list: | 369 | /* the exception will deny access to certain devices */ |
| 336 | * the new exception should *not* match any of the exceptions | 370 | return true; |
| 337 | * (behavior == DEVCG_DEFAULT_ALLOW, !match) | 371 | } else { |
| 338 | * - the dev cgroup has its default policy to deny + exception list: | 372 | /* the exception will allow access to certain devices */ |
| 339 | * the new exception *should* match the exceptions | 373 | if (match) |
| 340 | * (behavior == DEVCG_DEFAULT_DENY, match) | 374 | /* |
| 341 | */ | 375 | * a new exception allowing access shouldn't |
| 342 | if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match) | 376 | * match an parent's exception |
| 343 | return 1; | 377 | */ |
| 344 | return 0; | 378 | return false; |
| 379 | return true; | ||
| 380 | } | ||
| 381 | } else { | ||
| 382 | /* only behavior == DEVCG_DEFAULT_DENY allowed here */ | ||
| 383 | if (match) | ||
| 384 | /* parent has an exception that matches the proposed */ | ||
| 385 | return true; | ||
| 386 | else | ||
| 387 | return false; | ||
| 388 | } | ||
| 389 | return false; | ||
| 345 | } | 390 | } |
| 346 | 391 | ||
| 347 | /* | 392 | /* |
| @@ -358,7 +403,7 @@ static int parent_has_perm(struct dev_cgroup *childcg, | |||
| 358 | if (!pcg) | 403 | if (!pcg) |
| 359 | return 1; | 404 | return 1; |
| 360 | parent = cgroup_to_devcgroup(pcg); | 405 | parent = cgroup_to_devcgroup(pcg); |
| 361 | return may_access(parent, ex); | 406 | return may_access(parent, ex, childcg->behavior); |
| 362 | } | 407 | } |
| 363 | 408 | ||
| 364 | /** | 409 | /** |
| @@ -374,6 +419,111 @@ static inline int may_allow_all(struct dev_cgroup *parent) | |||
| 374 | return parent->behavior == DEVCG_DEFAULT_ALLOW; | 419 | return parent->behavior == DEVCG_DEFAULT_ALLOW; |
| 375 | } | 420 | } |
| 376 | 421 | ||
| 422 | /** | ||
| 423 | * revalidate_active_exceptions - walks through the active exception list and | ||
| 424 | * revalidates the exceptions based on parent's | ||
| 425 | * behavior and exceptions. The exceptions that | ||
| 426 | * are no longer valid will be removed. | ||
| 427 | * Called with devcgroup_mutex held. | ||
| 428 | * @devcg: cgroup which exceptions will be checked | ||
| 429 | * | ||
| 430 | * This is one of the three key functions for hierarchy implementation. | ||
| 431 | * This function is responsible for re-evaluating all the cgroup's active | ||
| 432 | * exceptions due to a parent's exception change. | ||
| 433 | * Refer to Documentation/cgroups/devices.txt for more details. | ||
| 434 | */ | ||
| 435 | static void revalidate_active_exceptions(struct dev_cgroup *devcg) | ||
| 436 | { | ||
| 437 | struct dev_exception_item *ex; | ||
| 438 | struct list_head *this, *tmp; | ||
| 439 | |||
| 440 | list_for_each_safe(this, tmp, &devcg->exceptions) { | ||
| 441 | ex = container_of(this, struct dev_exception_item, list); | ||
| 442 | if (!parent_has_perm(devcg, ex)) | ||
| 443 | dev_exception_rm(devcg, ex); | ||
| 444 | } | ||
| 445 | } | ||
| 446 | |||
| 447 | /** | ||
| 448 | * get_online_devcg - walks the cgroup tree and fills a list with the online | ||
| 449 | * groups | ||
| 450 | * @root: cgroup used as starting point | ||
| 451 | * @online: list that will be filled with online groups | ||
| 452 | * | ||
| 453 | * Must be called with devcgroup_mutex held. Grabs RCU lock. | ||
| 454 | * Because devcgroup_mutex is held, no devcg will become online or offline | ||
| 455 | * during the tree walk (see devcgroup_online, devcgroup_offline) | ||
| 456 | * A separated list is needed because propagate_behavior() and | ||
| 457 | * propagate_exception() need to allocate memory and can block. | ||
| 458 | */ | ||
| 459 | static void get_online_devcg(struct cgroup *root, struct list_head *online) | ||
| 460 | { | ||
| 461 | struct cgroup *pos; | ||
| 462 | struct dev_cgroup *devcg; | ||
| 463 | |||
| 464 | lockdep_assert_held(&devcgroup_mutex); | ||
| 465 | |||
| 466 | rcu_read_lock(); | ||
| 467 | cgroup_for_each_descendant_pre(pos, root) { | ||
| 468 | devcg = cgroup_to_devcgroup(pos); | ||
| 469 | if (is_devcg_online(devcg)) | ||
| 470 | list_add_tail(&devcg->propagate_pending, online); | ||
| 471 | } | ||
| 472 | rcu_read_unlock(); | ||
| 473 | } | ||
| 474 | |||
| 475 | /** | ||
| 476 | * propagate_exception - propagates a new exception to the children | ||
| 477 | * @devcg_root: device cgroup that added a new exception | ||
| 478 | * @ex: new exception to be propagated | ||
| 479 | * | ||
| 480 | * returns: 0 in case of success, != 0 in case of error | ||
| 481 | */ | ||
| 482 | static int propagate_exception(struct dev_cgroup *devcg_root, | ||
| 483 | struct dev_exception_item *ex) | ||
| 484 | { | ||
| 485 | struct cgroup *root = devcg_root->css.cgroup; | ||
| 486 | struct dev_cgroup *devcg, *parent, *tmp; | ||
| 487 | int rc = 0; | ||
| 488 | LIST_HEAD(pending); | ||
| 489 | |||
| 490 | get_online_devcg(root, &pending); | ||
| 491 | |||
| 492 | list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) { | ||
| 493 | parent = cgroup_to_devcgroup(devcg->css.cgroup->parent); | ||
| 494 | |||
| 495 | /* | ||
| 496 | * in case both root's behavior and devcg is allow, a new | ||
| 497 | * restriction means adding to the exception list | ||
| 498 | */ | ||
| 499 | if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW && | ||
| 500 | devcg->behavior == DEVCG_DEFAULT_ALLOW) { | ||
| 501 | rc = dev_exception_add(devcg, ex); | ||
| 502 | if (rc) | ||
| 503 | break; | ||
| 504 | } else { | ||
| 505 | /* | ||
| 506 | * in the other possible cases: | ||
| 507 | * root's behavior: allow, devcg's: deny | ||
| 508 | * root's behavior: deny, devcg's: deny | ||
| 509 | * the exception will be removed | ||
| 510 | */ | ||
| 511 | dev_exception_rm(devcg, ex); | ||
| 512 | } | ||
| 513 | revalidate_active_exceptions(devcg); | ||
| 514 | |||
| 515 | list_del_init(&devcg->propagate_pending); | ||
| 516 | } | ||
| 517 | return rc; | ||
| 518 | } | ||
| 519 | |||
| 520 | static inline bool has_children(struct dev_cgroup *devcgroup) | ||
| 521 | { | ||
| 522 | struct cgroup *cgrp = devcgroup->css.cgroup; | ||
| 523 | |||
| 524 | return !list_empty(&cgrp->children); | ||
| 525 | } | ||
| 526 | |||
| 377 | /* | 527 | /* |
| 378 | * Modify the exception list using allow/deny rules. | 528 | * Modify the exception list using allow/deny rules. |
| 379 | * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD | 529 | * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD |
| @@ -392,7 +542,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
| 392 | { | 542 | { |
| 393 | const char *b; | 543 | const char *b; |
| 394 | char temp[12]; /* 11 + 1 characters needed for a u32 */ | 544 | char temp[12]; /* 11 + 1 characters needed for a u32 */ |
| 395 | int count, rc; | 545 | int count, rc = 0; |
| 396 | struct dev_exception_item ex; | 546 | struct dev_exception_item ex; |
| 397 | struct cgroup *p = devcgroup->css.cgroup; | 547 | struct cgroup *p = devcgroup->css.cgroup; |
| 398 | struct dev_cgroup *parent = NULL; | 548 | struct dev_cgroup *parent = NULL; |
| @@ -410,6 +560,9 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
| 410 | case 'a': | 560 | case 'a': |
| 411 | switch (filetype) { | 561 | switch (filetype) { |
| 412 | case DEVCG_ALLOW: | 562 | case DEVCG_ALLOW: |
| 563 | if (has_children(devcgroup)) | ||
| 564 | return -EINVAL; | ||
| 565 | |||
| 413 | if (!may_allow_all(parent)) | 566 | if (!may_allow_all(parent)) |
| 414 | return -EPERM; | 567 | return -EPERM; |
| 415 | dev_exception_clean(devcgroup); | 568 | dev_exception_clean(devcgroup); |
| @@ -423,6 +576,9 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
| 423 | return rc; | 576 | return rc; |
| 424 | break; | 577 | break; |
| 425 | case DEVCG_DENY: | 578 | case DEVCG_DENY: |
| 579 | if (has_children(devcgroup)) | ||
| 580 | return -EINVAL; | ||
| 581 | |||
| 426 | dev_exception_clean(devcgroup); | 582 | dev_exception_clean(devcgroup); |
| 427 | devcgroup->behavior = DEVCG_DEFAULT_DENY; | 583 | devcgroup->behavior = DEVCG_DEFAULT_DENY; |
| 428 | break; | 584 | break; |
| @@ -517,22 +673,28 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, | |||
| 517 | dev_exception_rm(devcgroup, &ex); | 673 | dev_exception_rm(devcgroup, &ex); |
| 518 | return 0; | 674 | return 0; |
| 519 | } | 675 | } |
| 520 | return dev_exception_add(devcgroup, &ex); | 676 | rc = dev_exception_add(devcgroup, &ex); |
| 677 | break; | ||
| 521 | case DEVCG_DENY: | 678 | case DEVCG_DENY: |
| 522 | /* | 679 | /* |
| 523 | * If the default policy is to deny by default, try to remove | 680 | * If the default policy is to deny by default, try to remove |
| 524 | * an matching exception instead. And be silent about it: we | 681 | * an matching exception instead. And be silent about it: we |
| 525 | * don't want to break compatibility | 682 | * don't want to break compatibility |
| 526 | */ | 683 | */ |
| 527 | if (devcgroup->behavior == DEVCG_DEFAULT_DENY) { | 684 | if (devcgroup->behavior == DEVCG_DEFAULT_DENY) |
| 528 | dev_exception_rm(devcgroup, &ex); | 685 | dev_exception_rm(devcgroup, &ex); |
| 529 | return 0; | 686 | else |
| 530 | } | 687 | rc = dev_exception_add(devcgroup, &ex); |
| 531 | return dev_exception_add(devcgroup, &ex); | 688 | |
| 689 | if (rc) | ||
| 690 | break; | ||
| 691 | /* we only propagate new restrictions */ | ||
| 692 | rc = propagate_exception(devcgroup, &ex); | ||
| 693 | break; | ||
| 532 | default: | 694 | default: |
| 533 | return -EINVAL; | 695 | rc = -EINVAL; |
| 534 | } | 696 | } |
| 535 | return 0; | 697 | return rc; |
| 536 | } | 698 | } |
| 537 | 699 | ||
| 538 | static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, | 700 | static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, |
| @@ -571,17 +733,10 @@ struct cgroup_subsys devices_subsys = { | |||
| 571 | .can_attach = devcgroup_can_attach, | 733 | .can_attach = devcgroup_can_attach, |
| 572 | .css_alloc = devcgroup_css_alloc, | 734 | .css_alloc = devcgroup_css_alloc, |
| 573 | .css_free = devcgroup_css_free, | 735 | .css_free = devcgroup_css_free, |
| 736 | .css_online = devcgroup_online, | ||
| 737 | .css_offline = devcgroup_offline, | ||
| 574 | .subsys_id = devices_subsys_id, | 738 | .subsys_id = devices_subsys_id, |
| 575 | .base_cftypes = dev_cgroup_files, | 739 | .base_cftypes = dev_cgroup_files, |
| 576 | |||
| 577 | /* | ||
| 578 | * While devices cgroup has the rudimentary hierarchy support which | ||
| 579 | * checks the parent's restriction, it doesn't properly propagates | ||
| 580 | * config changes in ancestors to their descendents. A child | ||
| 581 | * should only be allowed to add more restrictions to the parent's | ||
| 582 | * configuration. Fix it and remove the following. | ||
| 583 | */ | ||
| 584 | .broken_hierarchy = true, | ||
| 585 | }; | 740 | }; |
| 586 | 741 | ||
| 587 | /** | 742 | /** |
| @@ -609,7 +764,7 @@ static int __devcgroup_check_permission(short type, u32 major, u32 minor, | |||
| 609 | 764 | ||
| 610 | rcu_read_lock(); | 765 | rcu_read_lock(); |
| 611 | dev_cgroup = task_devcgroup(current); | 766 | dev_cgroup = task_devcgroup(current); |
| 612 | rc = may_access(dev_cgroup, &ex); | 767 | rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior); |
| 613 | rcu_read_unlock(); | 768 | rcu_read_unlock(); |
| 614 | 769 | ||
| 615 | if (!rc) | 770 | if (!rc) |
