diff options
Diffstat (limited to 'kernel/cgroup.c')
| -rw-r--r-- | kernel/cgroup.c | 590 |
1 files changed, 437 insertions, 153 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ed64ccac67c9..0f3527d6184a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -60,9 +60,13 @@ | |||
| 60 | #include <linux/eventfd.h> | 60 | #include <linux/eventfd.h> |
| 61 | #include <linux/poll.h> | 61 | #include <linux/poll.h> |
| 62 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | 62 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ |
| 63 | #include <linux/kthread.h> | ||
| 63 | 64 | ||
| 64 | #include <linux/atomic.h> | 65 | #include <linux/atomic.h> |
| 65 | 66 | ||
| 67 | /* css deactivation bias, makes css->refcnt negative to deny new trygets */ | ||
| 68 | #define CSS_DEACT_BIAS INT_MIN | ||
| 69 | |||
| 66 | /* | 70 | /* |
| 67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 71 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
| 68 | * hierarchy must be performed while holding it. | 72 | * hierarchy must be performed while holding it. |
| @@ -127,6 +131,9 @@ struct cgroupfs_root { | |||
| 127 | /* A list running through the active hierarchies */ | 131 | /* A list running through the active hierarchies */ |
| 128 | struct list_head root_list; | 132 | struct list_head root_list; |
| 129 | 133 | ||
| 134 | /* All cgroups on this root, cgroup_mutex protected */ | ||
| 135 | struct list_head allcg_list; | ||
| 136 | |||
| 130 | /* Hierarchy-specific flags */ | 137 | /* Hierarchy-specific flags */ |
| 131 | unsigned long flags; | 138 | unsigned long flags; |
| 132 | 139 | ||
| @@ -145,6 +152,15 @@ struct cgroupfs_root { | |||
| 145 | static struct cgroupfs_root rootnode; | 152 | static struct cgroupfs_root rootnode; |
| 146 | 153 | ||
| 147 | /* | 154 | /* |
| 155 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. | ||
| 156 | */ | ||
| 157 | struct cfent { | ||
| 158 | struct list_head node; | ||
| 159 | struct dentry *dentry; | ||
| 160 | struct cftype *type; | ||
| 161 | }; | ||
| 162 | |||
| 163 | /* | ||
| 148 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when | 164 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when |
| 149 | * cgroup_subsys->use_id != 0. | 165 | * cgroup_subsys->use_id != 0. |
| 150 | */ | 166 | */ |
| @@ -239,6 +255,14 @@ int cgroup_lock_is_held(void) | |||
| 239 | 255 | ||
| 240 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); | 256 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); |
| 241 | 257 | ||
| 258 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ | ||
| 259 | static int css_refcnt(struct cgroup_subsys_state *css) | ||
| 260 | { | ||
| 261 | int v = atomic_read(&css->refcnt); | ||
| 262 | |||
| 263 | return v >= 0 ? v : v - CSS_DEACT_BIAS; | ||
| 264 | } | ||
| 265 | |||
| 242 | /* convenient tests for these bits */ | 266 | /* convenient tests for these bits */ |
| 243 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 267 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
| 244 | { | 268 | { |
| @@ -279,6 +303,21 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling) | |||
| 279 | #define for_each_active_root(_root) \ | 303 | #define for_each_active_root(_root) \ |
| 280 | list_for_each_entry(_root, &roots, root_list) | 304 | list_for_each_entry(_root, &roots, root_list) |
| 281 | 305 | ||
| 306 | static inline struct cgroup *__d_cgrp(struct dentry *dentry) | ||
| 307 | { | ||
| 308 | return dentry->d_fsdata; | ||
| 309 | } | ||
| 310 | |||
| 311 | static inline struct cfent *__d_cfe(struct dentry *dentry) | ||
| 312 | { | ||
| 313 | return dentry->d_fsdata; | ||
| 314 | } | ||
| 315 | |||
| 316 | static inline struct cftype *__d_cft(struct dentry *dentry) | ||
| 317 | { | ||
| 318 | return __d_cfe(dentry)->type; | ||
| 319 | } | ||
| 320 | |||
| 282 | /* the list of cgroups eligible for automatic release. Protected by | 321 | /* the list of cgroups eligible for automatic release. Protected by |
| 283 | * release_list_lock */ | 322 | * release_list_lock */ |
| 284 | static LIST_HEAD(release_list); | 323 | static LIST_HEAD(release_list); |
| @@ -816,12 +855,17 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
| 816 | struct cgroup_subsys *ss; | 855 | struct cgroup_subsys *ss; |
| 817 | int ret = 0; | 856 | int ret = 0; |
| 818 | 857 | ||
| 819 | for_each_subsys(cgrp->root, ss) | 858 | for_each_subsys(cgrp->root, ss) { |
| 820 | if (ss->pre_destroy) { | 859 | if (!ss->pre_destroy) |
| 821 | ret = ss->pre_destroy(cgrp); | 860 | continue; |
| 822 | if (ret) | 861 | |
| 823 | break; | 862 | ret = ss->pre_destroy(cgrp); |
| 863 | if (ret) { | ||
| 864 | /* ->pre_destroy() failure is being deprecated */ | ||
| 865 | WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs); | ||
| 866 | break; | ||
| 824 | } | 867 | } |
| 868 | } | ||
| 825 | 869 | ||
| 826 | return ret; | 870 | return ret; |
| 827 | } | 871 | } |
| @@ -864,6 +908,14 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 864 | BUG_ON(!list_empty(&cgrp->pidlists)); | 908 | BUG_ON(!list_empty(&cgrp->pidlists)); |
| 865 | 909 | ||
| 866 | kfree_rcu(cgrp, rcu_head); | 910 | kfree_rcu(cgrp, rcu_head); |
| 911 | } else { | ||
| 912 | struct cfent *cfe = __d_cfe(dentry); | ||
| 913 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | ||
| 914 | |||
| 915 | WARN_ONCE(!list_empty(&cfe->node) && | ||
| 916 | cgrp != &cgrp->root->top_cgroup, | ||
| 917 | "cfe still linked for %s\n", cfe->type->name); | ||
| 918 | kfree(cfe); | ||
| 867 | } | 919 | } |
| 868 | iput(inode); | 920 | iput(inode); |
| 869 | } | 921 | } |
| @@ -882,34 +934,36 @@ static void remove_dir(struct dentry *d) | |||
| 882 | dput(parent); | 934 | dput(parent); |
| 883 | } | 935 | } |
| 884 | 936 | ||
| 885 | static void cgroup_clear_directory(struct dentry *dentry) | 937 | static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) |
| 886 | { | 938 | { |
| 887 | struct list_head *node; | 939 | struct cfent *cfe; |
| 888 | 940 | ||
| 889 | BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | 941 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); |
| 890 | spin_lock(&dentry->d_lock); | 942 | lockdep_assert_held(&cgroup_mutex); |
| 891 | node = dentry->d_subdirs.next; | 943 | |
| 892 | while (node != &dentry->d_subdirs) { | 944 | list_for_each_entry(cfe, &cgrp->files, node) { |
| 893 | struct dentry *d = list_entry(node, struct dentry, d_u.d_child); | 945 | struct dentry *d = cfe->dentry; |
| 894 | 946 | ||
| 895 | spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); | 947 | if (cft && cfe->type != cft) |
| 896 | list_del_init(node); | 948 | continue; |
| 897 | if (d->d_inode) { | 949 | |
| 898 | /* This should never be called on a cgroup | 950 | dget(d); |
| 899 | * directory with child cgroups */ | 951 | d_delete(d); |
| 900 | BUG_ON(d->d_inode->i_mode & S_IFDIR); | 952 | simple_unlink(d->d_inode, d); |
| 901 | dget_dlock(d); | 953 | list_del_init(&cfe->node); |
| 902 | spin_unlock(&d->d_lock); | 954 | dput(d); |
| 903 | spin_unlock(&dentry->d_lock); | 955 | |
| 904 | d_delete(d); | 956 | return 0; |
| 905 | simple_unlink(dentry->d_inode, d); | ||
| 906 | dput(d); | ||
| 907 | spin_lock(&dentry->d_lock); | ||
| 908 | } else | ||
| 909 | spin_unlock(&d->d_lock); | ||
| 910 | node = dentry->d_subdirs.next; | ||
| 911 | } | 957 | } |
| 912 | spin_unlock(&dentry->d_lock); | 958 | return -ENOENT; |
| 959 | } | ||
| 960 | |||
| 961 | static void cgroup_clear_directory(struct dentry *dir) | ||
| 962 | { | ||
| 963 | struct cgroup *cgrp = __d_cgrp(dir); | ||
| 964 | |||
| 965 | while (!list_empty(&cgrp->files)) | ||
| 966 | cgroup_rm_file(cgrp, NULL); | ||
| 913 | } | 967 | } |
| 914 | 968 | ||
| 915 | /* | 969 | /* |
| @@ -1294,6 +1348,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1294 | if (ret) | 1348 | if (ret) |
| 1295 | goto out_unlock; | 1349 | goto out_unlock; |
| 1296 | 1350 | ||
| 1351 | /* See feature-removal-schedule.txt */ | ||
| 1352 | if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) | ||
| 1353 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", | ||
| 1354 | task_tgid_nr(current), current->comm); | ||
| 1355 | |||
| 1297 | /* Don't allow flags or name to change at remount */ | 1356 | /* Don't allow flags or name to change at remount */ |
| 1298 | if (opts.flags != root->flags || | 1357 | if (opts.flags != root->flags || |
| 1299 | (opts.name && strcmp(opts.name, root->name))) { | 1358 | (opts.name && strcmp(opts.name, root->name))) { |
| @@ -1308,7 +1367,8 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1308 | goto out_unlock; | 1367 | goto out_unlock; |
| 1309 | } | 1368 | } |
| 1310 | 1369 | ||
| 1311 | /* (re)populate subsystem files */ | 1370 | /* clear out any existing files and repopulate subsystem files */ |
| 1371 | cgroup_clear_directory(cgrp->dentry); | ||
| 1312 | cgroup_populate_dir(cgrp); | 1372 | cgroup_populate_dir(cgrp); |
| 1313 | 1373 | ||
| 1314 | if (opts.release_agent) | 1374 | if (opts.release_agent) |
| @@ -1333,6 +1393,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1333 | { | 1393 | { |
| 1334 | INIT_LIST_HEAD(&cgrp->sibling); | 1394 | INIT_LIST_HEAD(&cgrp->sibling); |
| 1335 | INIT_LIST_HEAD(&cgrp->children); | 1395 | INIT_LIST_HEAD(&cgrp->children); |
| 1396 | INIT_LIST_HEAD(&cgrp->files); | ||
| 1336 | INIT_LIST_HEAD(&cgrp->css_sets); | 1397 | INIT_LIST_HEAD(&cgrp->css_sets); |
| 1337 | INIT_LIST_HEAD(&cgrp->release_list); | 1398 | INIT_LIST_HEAD(&cgrp->release_list); |
| 1338 | INIT_LIST_HEAD(&cgrp->pidlists); | 1399 | INIT_LIST_HEAD(&cgrp->pidlists); |
| @@ -1344,11 +1405,14 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1344 | static void init_cgroup_root(struct cgroupfs_root *root) | 1405 | static void init_cgroup_root(struct cgroupfs_root *root) |
| 1345 | { | 1406 | { |
| 1346 | struct cgroup *cgrp = &root->top_cgroup; | 1407 | struct cgroup *cgrp = &root->top_cgroup; |
| 1408 | |||
| 1347 | INIT_LIST_HEAD(&root->subsys_list); | 1409 | INIT_LIST_HEAD(&root->subsys_list); |
| 1348 | INIT_LIST_HEAD(&root->root_list); | 1410 | INIT_LIST_HEAD(&root->root_list); |
| 1411 | INIT_LIST_HEAD(&root->allcg_list); | ||
| 1349 | root->number_of_cgroups = 1; | 1412 | root->number_of_cgroups = 1; |
| 1350 | cgrp->root = root; | 1413 | cgrp->root = root; |
| 1351 | cgrp->top_cgroup = cgrp; | 1414 | cgrp->top_cgroup = cgrp; |
| 1415 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | ||
| 1352 | init_cgroup_housekeeping(cgrp); | 1416 | init_cgroup_housekeeping(cgrp); |
| 1353 | } | 1417 | } |
| 1354 | 1418 | ||
| @@ -1692,16 +1756,6 @@ static struct file_system_type cgroup_fs_type = { | |||
| 1692 | 1756 | ||
| 1693 | static struct kobject *cgroup_kobj; | 1757 | static struct kobject *cgroup_kobj; |
| 1694 | 1758 | ||
| 1695 | static inline struct cgroup *__d_cgrp(struct dentry *dentry) | ||
| 1696 | { | ||
| 1697 | return dentry->d_fsdata; | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | static inline struct cftype *__d_cft(struct dentry *dentry) | ||
| 1701 | { | ||
| 1702 | return dentry->d_fsdata; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | /** | 1759 | /** |
| 1706 | * cgroup_path - generate the path of a cgroup | 1760 | * cgroup_path - generate the path of a cgroup |
| 1707 | * @cgrp: the cgroup in question | 1761 | * @cgrp: the cgroup in question |
| @@ -2160,9 +2214,9 @@ retry_find_task: | |||
| 2160 | * only need to check permissions on one of them. | 2214 | * only need to check permissions on one of them. |
| 2161 | */ | 2215 | */ |
| 2162 | tcred = __task_cred(tsk); | 2216 | tcred = __task_cred(tsk); |
| 2163 | if (cred->euid && | 2217 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && |
| 2164 | cred->euid != tcred->uid && | 2218 | !uid_eq(cred->euid, tcred->uid) && |
| 2165 | cred->euid != tcred->suid) { | 2219 | !uid_eq(cred->euid, tcred->suid)) { |
| 2166 | rcu_read_unlock(); | 2220 | rcu_read_unlock(); |
| 2167 | ret = -EACCES; | 2221 | ret = -EACCES; |
| 2168 | goto out_unlock_cgroup; | 2222 | goto out_unlock_cgroup; |
| @@ -2172,6 +2226,18 @@ retry_find_task: | |||
| 2172 | 2226 | ||
| 2173 | if (threadgroup) | 2227 | if (threadgroup) |
| 2174 | tsk = tsk->group_leader; | 2228 | tsk = tsk->group_leader; |
| 2229 | |||
| 2230 | /* | ||
| 2231 | * Workqueue threads may acquire PF_THREAD_BOUND and become | ||
| 2232 | * trapped in a cpuset, or RT worker may be born in a cgroup | ||
| 2233 | * with no rt_runtime allocated. Just say no. | ||
| 2234 | */ | ||
| 2235 | if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) { | ||
| 2236 | ret = -EINVAL; | ||
| 2237 | rcu_read_unlock(); | ||
| 2238 | goto out_unlock_cgroup; | ||
| 2239 | } | ||
| 2240 | |||
| 2175 | get_task_struct(tsk); | 2241 | get_task_struct(tsk); |
| 2176 | rcu_read_unlock(); | 2242 | rcu_read_unlock(); |
| 2177 | 2243 | ||
| @@ -2603,50 +2669,191 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
| 2603 | return mode; | 2669 | return mode; |
| 2604 | } | 2670 | } |
| 2605 | 2671 | ||
| 2606 | int cgroup_add_file(struct cgroup *cgrp, | 2672 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
| 2607 | struct cgroup_subsys *subsys, | 2673 | const struct cftype *cft) |
| 2608 | const struct cftype *cft) | ||
| 2609 | { | 2674 | { |
| 2610 | struct dentry *dir = cgrp->dentry; | 2675 | struct dentry *dir = cgrp->dentry; |
| 2676 | struct cgroup *parent = __d_cgrp(dir); | ||
| 2611 | struct dentry *dentry; | 2677 | struct dentry *dentry; |
| 2678 | struct cfent *cfe; | ||
| 2612 | int error; | 2679 | int error; |
| 2613 | umode_t mode; | 2680 | umode_t mode; |
| 2614 | |||
| 2615 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2681 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
| 2682 | |||
| 2683 | /* does @cft->flags tell us to skip creation on @cgrp? */ | ||
| 2684 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) | ||
| 2685 | return 0; | ||
| 2686 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | ||
| 2687 | return 0; | ||
| 2688 | |||
| 2616 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { | 2689 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { |
| 2617 | strcpy(name, subsys->name); | 2690 | strcpy(name, subsys->name); |
| 2618 | strcat(name, "."); | 2691 | strcat(name, "."); |
| 2619 | } | 2692 | } |
| 2620 | strcat(name, cft->name); | 2693 | strcat(name, cft->name); |
| 2694 | |||
| 2621 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); | 2695 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); |
| 2696 | |||
| 2697 | cfe = kzalloc(sizeof(*cfe), GFP_KERNEL); | ||
| 2698 | if (!cfe) | ||
| 2699 | return -ENOMEM; | ||
| 2700 | |||
| 2622 | dentry = lookup_one_len(name, dir, strlen(name)); | 2701 | dentry = lookup_one_len(name, dir, strlen(name)); |
| 2623 | if (!IS_ERR(dentry)) { | 2702 | if (IS_ERR(dentry)) { |
| 2624 | mode = cgroup_file_mode(cft); | ||
| 2625 | error = cgroup_create_file(dentry, mode | S_IFREG, | ||
| 2626 | cgrp->root->sb); | ||
| 2627 | if (!error) | ||
| 2628 | dentry->d_fsdata = (void *)cft; | ||
| 2629 | dput(dentry); | ||
| 2630 | } else | ||
| 2631 | error = PTR_ERR(dentry); | 2703 | error = PTR_ERR(dentry); |
| 2704 | goto out; | ||
| 2705 | } | ||
| 2706 | |||
| 2707 | mode = cgroup_file_mode(cft); | ||
| 2708 | error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); | ||
| 2709 | if (!error) { | ||
| 2710 | cfe->type = (void *)cft; | ||
| 2711 | cfe->dentry = dentry; | ||
| 2712 | dentry->d_fsdata = cfe; | ||
| 2713 | list_add_tail(&cfe->node, &parent->files); | ||
| 2714 | cfe = NULL; | ||
| 2715 | } | ||
| 2716 | dput(dentry); | ||
| 2717 | out: | ||
| 2718 | kfree(cfe); | ||
| 2632 | return error; | 2719 | return error; |
| 2633 | } | 2720 | } |
| 2634 | EXPORT_SYMBOL_GPL(cgroup_add_file); | ||
| 2635 | 2721 | ||
| 2636 | int cgroup_add_files(struct cgroup *cgrp, | 2722 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
| 2637 | struct cgroup_subsys *subsys, | 2723 | const struct cftype cfts[], bool is_add) |
| 2638 | const struct cftype cft[], | ||
| 2639 | int count) | ||
| 2640 | { | 2724 | { |
| 2641 | int i, err; | 2725 | const struct cftype *cft; |
| 2642 | for (i = 0; i < count; i++) { | 2726 | int err, ret = 0; |
| 2643 | err = cgroup_add_file(cgrp, subsys, &cft[i]); | 2727 | |
| 2644 | if (err) | 2728 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
| 2645 | return err; | 2729 | if (is_add) |
| 2730 | err = cgroup_add_file(cgrp, subsys, cft); | ||
| 2731 | else | ||
| 2732 | err = cgroup_rm_file(cgrp, cft); | ||
| 2733 | if (err) { | ||
| 2734 | pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", | ||
| 2735 | is_add ? "add" : "remove", cft->name, err); | ||
| 2736 | ret = err; | ||
| 2737 | } | ||
| 2738 | } | ||
| 2739 | return ret; | ||
| 2740 | } | ||
| 2741 | |||
| 2742 | static DEFINE_MUTEX(cgroup_cft_mutex); | ||
| 2743 | |||
| 2744 | static void cgroup_cfts_prepare(void) | ||
| 2745 | __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex) | ||
| 2746 | { | ||
| 2747 | /* | ||
| 2748 | * Thanks to the entanglement with vfs inode locking, we can't walk | ||
| 2749 | * the existing cgroups under cgroup_mutex and create files. | ||
| 2750 | * Instead, we increment reference on all cgroups and build list of | ||
| 2751 | * them using @cgrp->cft_q_node. Grab cgroup_cft_mutex to ensure | ||
| 2752 | * exclusive access to the field. | ||
| 2753 | */ | ||
| 2754 | mutex_lock(&cgroup_cft_mutex); | ||
| 2755 | mutex_lock(&cgroup_mutex); | ||
| 2756 | } | ||
| 2757 | |||
| 2758 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | ||
| 2759 | const struct cftype *cfts, bool is_add) | ||
| 2760 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) | ||
| 2761 | { | ||
| 2762 | LIST_HEAD(pending); | ||
| 2763 | struct cgroup *cgrp, *n; | ||
| 2764 | |||
| 2765 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ | ||
| 2766 | if (cfts && ss->root != &rootnode) { | ||
| 2767 | list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) { | ||
| 2768 | dget(cgrp->dentry); | ||
| 2769 | list_add_tail(&cgrp->cft_q_node, &pending); | ||
| 2770 | } | ||
| 2771 | } | ||
| 2772 | |||
| 2773 | mutex_unlock(&cgroup_mutex); | ||
| 2774 | |||
| 2775 | /* | ||
| 2776 | * All new cgroups will see @cfts update on @ss->cftsets. Add/rm | ||
| 2777 | * files for all cgroups which were created before. | ||
| 2778 | */ | ||
| 2779 | list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) { | ||
| 2780 | struct inode *inode = cgrp->dentry->d_inode; | ||
| 2781 | |||
| 2782 | mutex_lock(&inode->i_mutex); | ||
| 2783 | mutex_lock(&cgroup_mutex); | ||
| 2784 | if (!cgroup_is_removed(cgrp)) | ||
| 2785 | cgroup_addrm_files(cgrp, ss, cfts, is_add); | ||
| 2786 | mutex_unlock(&cgroup_mutex); | ||
| 2787 | mutex_unlock(&inode->i_mutex); | ||
| 2788 | |||
| 2789 | list_del_init(&cgrp->cft_q_node); | ||
| 2790 | dput(cgrp->dentry); | ||
| 2646 | } | 2791 | } |
| 2792 | |||
| 2793 | mutex_unlock(&cgroup_cft_mutex); | ||
| 2794 | } | ||
| 2795 | |||
| 2796 | /** | ||
| 2797 | * cgroup_add_cftypes - add an array of cftypes to a subsystem | ||
| 2798 | * @ss: target cgroup subsystem | ||
| 2799 | * @cfts: zero-length name terminated array of cftypes | ||
| 2800 | * | ||
| 2801 | * Register @cfts to @ss. Files described by @cfts are created for all | ||
| 2802 | * existing cgroups to which @ss is attached and all future cgroups will | ||
| 2803 | * have them too. This function can be called anytime whether @ss is | ||
| 2804 | * attached or not. | ||
| 2805 | * | ||
| 2806 | * Returns 0 on successful registration, -errno on failure. Note that this | ||
| 2807 | * function currently returns 0 as long as @cfts registration is successful | ||
| 2808 | * even if some file creation attempts on existing cgroups fail. | ||
| 2809 | */ | ||
| 2810 | int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | ||
| 2811 | { | ||
| 2812 | struct cftype_set *set; | ||
| 2813 | |||
| 2814 | set = kzalloc(sizeof(*set), GFP_KERNEL); | ||
| 2815 | if (!set) | ||
| 2816 | return -ENOMEM; | ||
| 2817 | |||
| 2818 | cgroup_cfts_prepare(); | ||
| 2819 | set->cfts = cfts; | ||
| 2820 | list_add_tail(&set->node, &ss->cftsets); | ||
| 2821 | cgroup_cfts_commit(ss, cfts, true); | ||
| 2822 | |||
| 2647 | return 0; | 2823 | return 0; |
| 2648 | } | 2824 | } |
| 2649 | EXPORT_SYMBOL_GPL(cgroup_add_files); | 2825 | EXPORT_SYMBOL_GPL(cgroup_add_cftypes); |
| 2826 | |||
| 2827 | /** | ||
| 2828 | * cgroup_rm_cftypes - remove an array of cftypes from a subsystem | ||
| 2829 | * @ss: target cgroup subsystem | ||
| 2830 | * @cfts: zero-length name terminated array of cftypes | ||
| 2831 | * | ||
| 2832 | * Unregister @cfts from @ss. Files described by @cfts are removed from | ||
| 2833 | * all existing cgroups to which @ss is attached and all future cgroups | ||
| 2834 | * won't have them either. This function can be called anytime whether @ss | ||
| 2835 | * is attached or not. | ||
| 2836 | * | ||
| 2837 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not | ||
| 2838 | * registered with @ss. | ||
| 2839 | */ | ||
| 2840 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | ||
| 2841 | { | ||
| 2842 | struct cftype_set *set; | ||
| 2843 | |||
| 2844 | cgroup_cfts_prepare(); | ||
| 2845 | |||
| 2846 | list_for_each_entry(set, &ss->cftsets, node) { | ||
| 2847 | if (set->cfts == cfts) { | ||
| 2848 | list_del_init(&set->node); | ||
| 2849 | cgroup_cfts_commit(ss, cfts, false); | ||
| 2850 | return 0; | ||
| 2851 | } | ||
| 2852 | } | ||
| 2853 | |||
| 2854 | cgroup_cfts_commit(ss, NULL, false); | ||
| 2855 | return -ENOENT; | ||
| 2856 | } | ||
| 2650 | 2857 | ||
| 2651 | /** | 2858 | /** |
| 2652 | * cgroup_task_count - count the number of tasks in a cgroup. | 2859 | * cgroup_task_count - count the number of tasks in a cgroup. |
| @@ -3625,13 +3832,14 @@ static struct cftype files[] = { | |||
| 3625 | .read_u64 = cgroup_clone_children_read, | 3832 | .read_u64 = cgroup_clone_children_read, |
| 3626 | .write_u64 = cgroup_clone_children_write, | 3833 | .write_u64 = cgroup_clone_children_write, |
| 3627 | }, | 3834 | }, |
| 3628 | }; | 3835 | { |
| 3629 | 3836 | .name = "release_agent", | |
| 3630 | static struct cftype cft_release_agent = { | 3837 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 3631 | .name = "release_agent", | 3838 | .read_seq_string = cgroup_release_agent_show, |
| 3632 | .read_seq_string = cgroup_release_agent_show, | 3839 | .write_string = cgroup_release_agent_write, |
| 3633 | .write_string = cgroup_release_agent_write, | 3840 | .max_write_len = PATH_MAX, |
| 3634 | .max_write_len = PATH_MAX, | 3841 | }, |
| 3842 | { } /* terminate */ | ||
| 3635 | }; | 3843 | }; |
| 3636 | 3844 | ||
| 3637 | static int cgroup_populate_dir(struct cgroup *cgrp) | 3845 | static int cgroup_populate_dir(struct cgroup *cgrp) |
| @@ -3639,22 +3847,18 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
| 3639 | int err; | 3847 | int err; |
| 3640 | struct cgroup_subsys *ss; | 3848 | struct cgroup_subsys *ss; |
| 3641 | 3849 | ||
| 3642 | /* First clear out any existing files */ | 3850 | err = cgroup_addrm_files(cgrp, NULL, files, true); |
| 3643 | cgroup_clear_directory(cgrp->dentry); | ||
| 3644 | |||
| 3645 | err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files)); | ||
| 3646 | if (err < 0) | 3851 | if (err < 0) |
| 3647 | return err; | 3852 | return err; |
| 3648 | 3853 | ||
| 3649 | if (cgrp == cgrp->top_cgroup) { | 3854 | /* process cftsets of each subsystem */ |
| 3650 | if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0) | ||
| 3651 | return err; | ||
| 3652 | } | ||
| 3653 | |||
| 3654 | for_each_subsys(cgrp->root, ss) { | 3855 | for_each_subsys(cgrp->root, ss) { |
| 3655 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) | 3856 | struct cftype_set *set; |
| 3656 | return err; | 3857 | |
| 3858 | list_for_each_entry(set, &ss->cftsets, node) | ||
| 3859 | cgroup_addrm_files(cgrp, ss, set->cfts, true); | ||
| 3657 | } | 3860 | } |
| 3861 | |||
| 3658 | /* This cgroup is ready now */ | 3862 | /* This cgroup is ready now */ |
| 3659 | for_each_subsys(cgrp->root, ss) { | 3863 | for_each_subsys(cgrp->root, ss) { |
| 3660 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 3864 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| @@ -3670,6 +3874,14 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
| 3670 | return 0; | 3874 | return 0; |
| 3671 | } | 3875 | } |
| 3672 | 3876 | ||
| 3877 | static void css_dput_fn(struct work_struct *work) | ||
| 3878 | { | ||
| 3879 | struct cgroup_subsys_state *css = | ||
| 3880 | container_of(work, struct cgroup_subsys_state, dput_work); | ||
| 3881 | |||
| 3882 | dput(css->cgroup->dentry); | ||
| 3883 | } | ||
| 3884 | |||
| 3673 | static void init_cgroup_css(struct cgroup_subsys_state *css, | 3885 | static void init_cgroup_css(struct cgroup_subsys_state *css, |
| 3674 | struct cgroup_subsys *ss, | 3886 | struct cgroup_subsys *ss, |
| 3675 | struct cgroup *cgrp) | 3887 | struct cgroup *cgrp) |
| @@ -3682,6 +3894,16 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
| 3682 | set_bit(CSS_ROOT, &css->flags); | 3894 | set_bit(CSS_ROOT, &css->flags); |
| 3683 | BUG_ON(cgrp->subsys[ss->subsys_id]); | 3895 | BUG_ON(cgrp->subsys[ss->subsys_id]); |
| 3684 | cgrp->subsys[ss->subsys_id] = css; | 3896 | cgrp->subsys[ss->subsys_id] = css; |
| 3897 | |||
| 3898 | /* | ||
| 3899 | * If !clear_css_refs, css holds an extra ref to @cgrp->dentry | ||
| 3900 | * which is put on the last css_put(). dput() requires process | ||
| 3901 | * context, which css_put() may be called without. @css->dput_work | ||
| 3902 | * will be used to invoke dput() asynchronously from css_put(). | ||
| 3903 | */ | ||
| 3904 | INIT_WORK(&css->dput_work, css_dput_fn); | ||
| 3905 | if (ss->__DEPRECATED_clear_css_refs) | ||
| 3906 | set_bit(CSS_CLEAR_CSS_REFS, &css->flags); | ||
| 3685 | } | 3907 | } |
| 3686 | 3908 | ||
| 3687 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | 3909 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) |
| @@ -3784,9 +4006,16 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 3784 | if (err < 0) | 4006 | if (err < 0) |
| 3785 | goto err_remove; | 4007 | goto err_remove; |
| 3786 | 4008 | ||
| 4009 | /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */ | ||
| 4010 | for_each_subsys(root, ss) | ||
| 4011 | if (!ss->__DEPRECATED_clear_css_refs) | ||
| 4012 | dget(dentry); | ||
| 4013 | |||
| 3787 | /* The cgroup directory was pre-locked for us */ | 4014 | /* The cgroup directory was pre-locked for us */ |
| 3788 | BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); | 4015 | BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); |
| 3789 | 4016 | ||
| 4017 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | ||
| 4018 | |||
| 3790 | err = cgroup_populate_dir(cgrp); | 4019 | err = cgroup_populate_dir(cgrp); |
| 3791 | /* If err < 0, we have a half-filled directory - oh well ;) */ | 4020 | /* If err < 0, we have a half-filled directory - oh well ;) */ |
| 3792 | 4021 | ||
| @@ -3826,18 +4055,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 3826 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4055 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
| 3827 | } | 4056 | } |
| 3828 | 4057 | ||
| 4058 | /* | ||
| 4059 | * Check the reference count on each subsystem. Since we already | ||
| 4060 | * established that there are no tasks in the cgroup, if the css refcount | ||
| 4061 | * is also 1, then there should be no outstanding references, so the | ||
| 4062 | * subsystem is safe to destroy. We scan across all subsystems rather than | ||
| 4063 | * using the per-hierarchy linked list of mounted subsystems since we can | ||
| 4064 | * be called via check_for_release() with no synchronization other than | ||
| 4065 | * RCU, and the subsystem linked list isn't RCU-safe. | ||
| 4066 | */ | ||
| 3829 | static int cgroup_has_css_refs(struct cgroup *cgrp) | 4067 | static int cgroup_has_css_refs(struct cgroup *cgrp) |
| 3830 | { | 4068 | { |
| 3831 | /* Check the reference count on each subsystem. Since we | ||
| 3832 | * already established that there are no tasks in the | ||
| 3833 | * cgroup, if the css refcount is also 1, then there should | ||
| 3834 | * be no outstanding references, so the subsystem is safe to | ||
| 3835 | * destroy. We scan across all subsystems rather than using | ||
| 3836 | * the per-hierarchy linked list of mounted subsystems since | ||
| 3837 | * we can be called via check_for_release() with no | ||
| 3838 | * synchronization other than RCU, and the subsystem linked | ||
| 3839 | * list isn't RCU-safe */ | ||
| 3840 | int i; | 4069 | int i; |
| 4070 | |||
| 3841 | /* | 4071 | /* |
| 3842 | * We won't need to lock the subsys array, because the subsystems | 4072 | * We won't need to lock the subsys array, because the subsystems |
| 3843 | * we're concerned about aren't going anywhere since our cgroup root | 4073 | * we're concerned about aren't going anywhere since our cgroup root |
| @@ -3846,17 +4076,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
| 3846 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4076 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 3847 | struct cgroup_subsys *ss = subsys[i]; | 4077 | struct cgroup_subsys *ss = subsys[i]; |
| 3848 | struct cgroup_subsys_state *css; | 4078 | struct cgroup_subsys_state *css; |
| 4079 | |||
| 3849 | /* Skip subsystems not present or not in this hierarchy */ | 4080 | /* Skip subsystems not present or not in this hierarchy */ |
| 3850 | if (ss == NULL || ss->root != cgrp->root) | 4081 | if (ss == NULL || ss->root != cgrp->root) |
| 3851 | continue; | 4082 | continue; |
| 4083 | |||
| 3852 | css = cgrp->subsys[ss->subsys_id]; | 4084 | css = cgrp->subsys[ss->subsys_id]; |
| 3853 | /* When called from check_for_release() it's possible | 4085 | /* |
| 4086 | * When called from check_for_release() it's possible | ||
| 3854 | * that by this point the cgroup has been removed | 4087 | * that by this point the cgroup has been removed |
| 3855 | * and the css deleted. But a false-positive doesn't | 4088 | * and the css deleted. But a false-positive doesn't |
| 3856 | * matter, since it can only happen if the cgroup | 4089 | * matter, since it can only happen if the cgroup |
| 3857 | * has been deleted and hence no longer needs the | 4090 | * has been deleted and hence no longer needs the |
| 3858 | * release agent to be called anyway. */ | 4091 | * release agent to be called anyway. |
| 3859 | if (css && (atomic_read(&css->refcnt) > 1)) | 4092 | */ |
| 4093 | if (css && css_refcnt(css) > 1) | ||
| 3860 | return 1; | 4094 | return 1; |
| 3861 | } | 4095 | } |
| 3862 | return 0; | 4096 | return 0; |
| @@ -3866,51 +4100,63 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
| 3866 | * Atomically mark all (or else none) of the cgroup's CSS objects as | 4100 | * Atomically mark all (or else none) of the cgroup's CSS objects as |
| 3867 | * CSS_REMOVED. Return true on success, or false if the cgroup has | 4101 | * CSS_REMOVED. Return true on success, or false if the cgroup has |
| 3868 | * busy subsystems. Call with cgroup_mutex held | 4102 | * busy subsystems. Call with cgroup_mutex held |
| 4103 | * | ||
| 4104 | * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or | ||
| 4105 | * not, cgroup removal behaves differently. | ||
| 4106 | * | ||
| 4107 | * If clear is set, css refcnt for the subsystem should be zero before | ||
| 4108 | * cgroup removal can be committed. This is implemented by | ||
| 4109 | * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be | ||
| 4110 | * called multiple times until all css refcnts reach zero and is allowed to | ||
| 4111 | * veto removal on any invocation. This behavior is deprecated and will be | ||
| 4112 | * removed as soon as the existing user (memcg) is updated. | ||
| 4113 | * | ||
| 4114 | * If clear is not set, each css holds an extra reference to the cgroup's | ||
| 4115 | * dentry and cgroup removal proceeds regardless of css refs. | ||
| 4116 | * ->pre_destroy() will be called at least once and is not allowed to fail. | ||
| 4117 | * On the last put of each css, whenever that may be, the extra dentry ref | ||
| 4118 | * is put so that dentry destruction happens only after all css's are | ||
| 4119 | * released. | ||
| 3869 | */ | 4120 | */ |
| 3870 | |||
| 3871 | static int cgroup_clear_css_refs(struct cgroup *cgrp) | 4121 | static int cgroup_clear_css_refs(struct cgroup *cgrp) |
| 3872 | { | 4122 | { |
| 3873 | struct cgroup_subsys *ss; | 4123 | struct cgroup_subsys *ss; |
| 3874 | unsigned long flags; | 4124 | unsigned long flags; |
| 3875 | bool failed = false; | 4125 | bool failed = false; |
| 4126 | |||
| 3876 | local_irq_save(flags); | 4127 | local_irq_save(flags); |
| 4128 | |||
| 4129 | /* | ||
| 4130 | * Block new css_tryget() by deactivating refcnt. If all refcnts | ||
| 4131 | * for subsystems w/ clear_css_refs set were 1 at the moment of | ||
| 4132 | * deactivation, we succeeded. | ||
| 4133 | */ | ||
| 3877 | for_each_subsys(cgrp->root, ss) { | 4134 | for_each_subsys(cgrp->root, ss) { |
| 3878 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4135 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 3879 | int refcnt; | 4136 | |
| 3880 | while (1) { | 4137 | WARN_ON(atomic_read(&css->refcnt) < 0); |
| 3881 | /* We can only remove a CSS with a refcnt==1 */ | 4138 | atomic_add(CSS_DEACT_BIAS, &css->refcnt); |
| 3882 | refcnt = atomic_read(&css->refcnt); | 4139 | |
| 3883 | if (refcnt > 1) { | 4140 | if (ss->__DEPRECATED_clear_css_refs) |
| 3884 | failed = true; | 4141 | failed |= css_refcnt(css) != 1; |
| 3885 | goto done; | ||
| 3886 | } | ||
| 3887 | BUG_ON(!refcnt); | ||
| 3888 | /* | ||
| 3889 | * Drop the refcnt to 0 while we check other | ||
| 3890 | * subsystems. This will cause any racing | ||
| 3891 | * css_tryget() to spin until we set the | ||
| 3892 | * CSS_REMOVED bits or abort | ||
| 3893 | */ | ||
| 3894 | if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) | ||
| 3895 | break; | ||
| 3896 | cpu_relax(); | ||
| 3897 | } | ||
| 3898 | } | 4142 | } |
| 3899 | done: | 4143 | |
| 4144 | /* | ||
| 4145 | * If succeeded, set REMOVED and put all the base refs; otherwise, | ||
| 4146 | * restore refcnts to positive values. Either way, all in-progress | ||
| 4147 | * css_tryget() will be released. | ||
| 4148 | */ | ||
| 3900 | for_each_subsys(cgrp->root, ss) { | 4149 | for_each_subsys(cgrp->root, ss) { |
| 3901 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4150 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 3902 | if (failed) { | 4151 | |
| 3903 | /* | 4152 | if (!failed) { |
| 3904 | * Restore old refcnt if we previously managed | ||
| 3905 | * to clear it from 1 to 0 | ||
| 3906 | */ | ||
| 3907 | if (!atomic_read(&css->refcnt)) | ||
| 3908 | atomic_set(&css->refcnt, 1); | ||
| 3909 | } else { | ||
| 3910 | /* Commit the fact that the CSS is removed */ | ||
| 3911 | set_bit(CSS_REMOVED, &css->flags); | 4153 | set_bit(CSS_REMOVED, &css->flags); |
| 4154 | css_put(css); | ||
| 4155 | } else { | ||
| 4156 | atomic_sub(CSS_DEACT_BIAS, &css->refcnt); | ||
| 3912 | } | 4157 | } |
| 3913 | } | 4158 | } |
| 4159 | |||
| 3914 | local_irq_restore(flags); | 4160 | local_irq_restore(flags); |
| 3915 | return !failed; | 4161 | return !failed; |
| 3916 | } | 4162 | } |
| @@ -3995,6 +4241,8 @@ again: | |||
| 3995 | list_del_init(&cgrp->sibling); | 4241 | list_del_init(&cgrp->sibling); |
| 3996 | cgroup_unlock_hierarchy(cgrp->root); | 4242 | cgroup_unlock_hierarchy(cgrp->root); |
| 3997 | 4243 | ||
| 4244 | list_del_init(&cgrp->allcg_node); | ||
| 4245 | |||
| 3998 | d = dget(cgrp->dentry); | 4246 | d = dget(cgrp->dentry); |
| 3999 | 4247 | ||
| 4000 | cgroup_d_remove_dir(d); | 4248 | cgroup_d_remove_dir(d); |
| @@ -4021,12 +4269,29 @@ again: | |||
| 4021 | return 0; | 4269 | return 0; |
| 4022 | } | 4270 | } |
| 4023 | 4271 | ||
| 4272 | static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss) | ||
| 4273 | { | ||
| 4274 | INIT_LIST_HEAD(&ss->cftsets); | ||
| 4275 | |||
| 4276 | /* | ||
| 4277 | * base_cftset is embedded in subsys itself, no need to worry about | ||
| 4278 | * deregistration. | ||
| 4279 | */ | ||
| 4280 | if (ss->base_cftypes) { | ||
| 4281 | ss->base_cftset.cfts = ss->base_cftypes; | ||
| 4282 | list_add_tail(&ss->base_cftset.node, &ss->cftsets); | ||
| 4283 | } | ||
| 4284 | } | ||
| 4285 | |||
| 4024 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | 4286 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) |
| 4025 | { | 4287 | { |
| 4026 | struct cgroup_subsys_state *css; | 4288 | struct cgroup_subsys_state *css; |
| 4027 | 4289 | ||
| 4028 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 4290 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
| 4029 | 4291 | ||
| 4292 | /* init base cftset */ | ||
| 4293 | cgroup_init_cftsets(ss); | ||
| 4294 | |||
| 4030 | /* Create the top cgroup state for this subsystem */ | 4295 | /* Create the top cgroup state for this subsystem */ |
| 4031 | list_add(&ss->sibling, &rootnode.subsys_list); | 4296 | list_add(&ss->sibling, &rootnode.subsys_list); |
| 4032 | ss->root = &rootnode; | 4297 | ss->root = &rootnode; |
| @@ -4096,6 +4361,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4096 | return 0; | 4361 | return 0; |
| 4097 | } | 4362 | } |
| 4098 | 4363 | ||
| 4364 | /* init base cftset */ | ||
| 4365 | cgroup_init_cftsets(ss); | ||
| 4366 | |||
| 4099 | /* | 4367 | /* |
| 4100 | * need to register a subsys id before anything else - for example, | 4368 | * need to register a subsys id before anything else - for example, |
| 4101 | * init_cgroup_css needs it. | 4369 | * init_cgroup_css needs it. |
| @@ -4685,21 +4953,41 @@ static void check_for_release(struct cgroup *cgrp) | |||
| 4685 | } | 4953 | } |
| 4686 | 4954 | ||
| 4687 | /* Caller must verify that the css is not for root cgroup */ | 4955 | /* Caller must verify that the css is not for root cgroup */ |
| 4688 | void __css_put(struct cgroup_subsys_state *css, int count) | 4956 | bool __css_tryget(struct cgroup_subsys_state *css) |
| 4957 | { | ||
| 4958 | do { | ||
| 4959 | int v = css_refcnt(css); | ||
| 4960 | |||
| 4961 | if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v) | ||
| 4962 | return true; | ||
| 4963 | cpu_relax(); | ||
| 4964 | } while (!test_bit(CSS_REMOVED, &css->flags)); | ||
| 4965 | |||
| 4966 | return false; | ||
| 4967 | } | ||
| 4968 | EXPORT_SYMBOL_GPL(__css_tryget); | ||
| 4969 | |||
| 4970 | /* Caller must verify that the css is not for root cgroup */ | ||
| 4971 | void __css_put(struct cgroup_subsys_state *css) | ||
| 4689 | { | 4972 | { |
| 4690 | struct cgroup *cgrp = css->cgroup; | 4973 | struct cgroup *cgrp = css->cgroup; |
| 4691 | int val; | 4974 | |
| 4692 | rcu_read_lock(); | 4975 | rcu_read_lock(); |
| 4693 | val = atomic_sub_return(count, &css->refcnt); | 4976 | atomic_dec(&css->refcnt); |
| 4694 | if (val == 1) { | 4977 | switch (css_refcnt(css)) { |
| 4978 | case 1: | ||
| 4695 | if (notify_on_release(cgrp)) { | 4979 | if (notify_on_release(cgrp)) { |
| 4696 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 4980 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 4697 | check_for_release(cgrp); | 4981 | check_for_release(cgrp); |
| 4698 | } | 4982 | } |
| 4699 | cgroup_wakeup_rmdir_waiter(cgrp); | 4983 | cgroup_wakeup_rmdir_waiter(cgrp); |
| 4984 | break; | ||
| 4985 | case 0: | ||
| 4986 | if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags)) | ||
| 4987 | schedule_work(&css->dput_work); | ||
| 4988 | break; | ||
| 4700 | } | 4989 | } |
| 4701 | rcu_read_unlock(); | 4990 | rcu_read_unlock(); |
| 4702 | WARN_ON_ONCE(val < 1); | ||
| 4703 | } | 4991 | } |
| 4704 | EXPORT_SYMBOL_GPL(__css_put); | 4992 | EXPORT_SYMBOL_GPL(__css_put); |
| 4705 | 4993 | ||
| @@ -4818,7 +5106,7 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
| 4818 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | 5106 | * on this or this is under rcu_read_lock(). Once css->id is allocated, |
| 4819 | * it's unchanged until freed. | 5107 | * it's unchanged until freed. |
| 4820 | */ | 5108 | */ |
| 4821 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5109 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
| 4822 | 5110 | ||
| 4823 | if (cssid) | 5111 | if (cssid) |
| 4824 | return cssid->id; | 5112 | return cssid->id; |
| @@ -4830,7 +5118,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
| 4830 | { | 5118 | { |
| 4831 | struct css_id *cssid; | 5119 | struct css_id *cssid; |
| 4832 | 5120 | ||
| 4833 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5121 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
| 4834 | 5122 | ||
| 4835 | if (cssid) | 5123 | if (cssid) |
| 4836 | return cssid->depth; | 5124 | return cssid->depth; |
| @@ -4844,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth); | |||
| 4844 | * @root: the css supporsed to be an ancestor of the child. | 5132 | * @root: the css supporsed to be an ancestor of the child. |
| 4845 | * | 5133 | * |
| 4846 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because | 5134 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because |
| 4847 | * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). | 5135 | * this function reads css->id, the caller must hold rcu_read_lock(). |
| 4848 | * But, considering usual usage, the csses should be valid objects after test. | 5136 | * But, considering usual usage, the csses should be valid objects after test. |
| 4849 | * Assuming that the caller will do some action to the child if this returns | 5137 | * Assuming that the caller will do some action to the child if this returns |
| 4850 | * returns true, the caller must take "child";s reference count. | 5138 | * returns true, the caller must take "child";s reference count. |
| @@ -4856,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, | |||
| 4856 | { | 5144 | { |
| 4857 | struct css_id *child_id; | 5145 | struct css_id *child_id; |
| 4858 | struct css_id *root_id; | 5146 | struct css_id *root_id; |
| 4859 | bool ret = true; | ||
| 4860 | 5147 | ||
| 4861 | rcu_read_lock(); | ||
| 4862 | child_id = rcu_dereference(child->id); | 5148 | child_id = rcu_dereference(child->id); |
| 5149 | if (!child_id) | ||
| 5150 | return false; | ||
| 4863 | root_id = rcu_dereference(root->id); | 5151 | root_id = rcu_dereference(root->id); |
| 4864 | if (!child_id | 5152 | if (!root_id) |
| 4865 | || !root_id | 5153 | return false; |
| 4866 | || (child_id->depth < root_id->depth) | 5154 | if (child_id->depth < root_id->depth) |
| 4867 | || (child_id->stack[root_id->depth] != root_id->id)) | 5155 | return false; |
| 4868 | ret = false; | 5156 | if (child_id->stack[root_id->depth] != root_id->id) |
| 4869 | rcu_read_unlock(); | 5157 | return false; |
| 4870 | return ret; | 5158 | return true; |
| 4871 | } | 5159 | } |
| 4872 | 5160 | ||
| 4873 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | 5161 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) |
| @@ -5211,19 +5499,15 @@ static struct cftype debug_files[] = { | |||
| 5211 | .name = "releasable", | 5499 | .name = "releasable", |
| 5212 | .read_u64 = releasable_read, | 5500 | .read_u64 = releasable_read, |
| 5213 | }, | 5501 | }, |
| 5214 | }; | ||
| 5215 | 5502 | ||
| 5216 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 5503 | { } /* terminate */ |
| 5217 | { | 5504 | }; |
| 5218 | return cgroup_add_files(cont, ss, debug_files, | ||
| 5219 | ARRAY_SIZE(debug_files)); | ||
| 5220 | } | ||
| 5221 | 5505 | ||
| 5222 | struct cgroup_subsys debug_subsys = { | 5506 | struct cgroup_subsys debug_subsys = { |
| 5223 | .name = "debug", | 5507 | .name = "debug", |
| 5224 | .create = debug_create, | 5508 | .create = debug_create, |
| 5225 | .destroy = debug_destroy, | 5509 | .destroy = debug_destroy, |
| 5226 | .populate = debug_populate, | ||
| 5227 | .subsys_id = debug_subsys_id, | 5510 | .subsys_id = debug_subsys_id, |
| 5511 | .base_cftypes = debug_files, | ||
| 5228 | }; | 5512 | }; |
| 5229 | #endif /* CONFIG_CGROUP_DEBUG */ | 5513 | #endif /* CONFIG_CGROUP_DEBUG */ |
