diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 601 |
1 files changed, 448 insertions, 153 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ed64ccac67c9..b303dfc7dce0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -60,9 +60,13 @@ | |||
60 | #include <linux/eventfd.h> | 60 | #include <linux/eventfd.h> |
61 | #include <linux/poll.h> | 61 | #include <linux/poll.h> |
62 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ | 62 | #include <linux/flex_array.h> /* used in cgroup_attach_proc */ |
63 | #include <linux/kthread.h> | ||
63 | 64 | ||
64 | #include <linux/atomic.h> | 65 | #include <linux/atomic.h> |
65 | 66 | ||
67 | /* css deactivation bias, makes css->refcnt negative to deny new trygets */ | ||
68 | #define CSS_DEACT_BIAS INT_MIN | ||
69 | |||
66 | /* | 70 | /* |
67 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 71 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
68 | * hierarchy must be performed while holding it. | 72 | * hierarchy must be performed while holding it. |
@@ -127,6 +131,9 @@ struct cgroupfs_root { | |||
127 | /* A list running through the active hierarchies */ | 131 | /* A list running through the active hierarchies */ |
128 | struct list_head root_list; | 132 | struct list_head root_list; |
129 | 133 | ||
134 | /* All cgroups on this root, cgroup_mutex protected */ | ||
135 | struct list_head allcg_list; | ||
136 | |||
130 | /* Hierarchy-specific flags */ | 137 | /* Hierarchy-specific flags */ |
131 | unsigned long flags; | 138 | unsigned long flags; |
132 | 139 | ||
@@ -145,6 +152,15 @@ struct cgroupfs_root { | |||
145 | static struct cgroupfs_root rootnode; | 152 | static struct cgroupfs_root rootnode; |
146 | 153 | ||
147 | /* | 154 | /* |
155 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. | ||
156 | */ | ||
157 | struct cfent { | ||
158 | struct list_head node; | ||
159 | struct dentry *dentry; | ||
160 | struct cftype *type; | ||
161 | }; | ||
162 | |||
163 | /* | ||
148 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when | 164 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when |
149 | * cgroup_subsys->use_id != 0. | 165 | * cgroup_subsys->use_id != 0. |
150 | */ | 166 | */ |
@@ -239,6 +255,19 @@ int cgroup_lock_is_held(void) | |||
239 | 255 | ||
240 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); | 256 | EXPORT_SYMBOL_GPL(cgroup_lock_is_held); |
241 | 257 | ||
258 | static int css_unbias_refcnt(int refcnt) | ||
259 | { | ||
260 | return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; | ||
261 | } | ||
262 | |||
263 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ | ||
264 | static int css_refcnt(struct cgroup_subsys_state *css) | ||
265 | { | ||
266 | int v = atomic_read(&css->refcnt); | ||
267 | |||
268 | return css_unbias_refcnt(v); | ||
269 | } | ||
270 | |||
242 | /* convenient tests for these bits */ | 271 | /* convenient tests for these bits */ |
243 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 272 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
244 | { | 273 | { |
@@ -279,6 +308,21 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling) | |||
279 | #define for_each_active_root(_root) \ | 308 | #define for_each_active_root(_root) \ |
280 | list_for_each_entry(_root, &roots, root_list) | 309 | list_for_each_entry(_root, &roots, root_list) |
281 | 310 | ||
311 | static inline struct cgroup *__d_cgrp(struct dentry *dentry) | ||
312 | { | ||
313 | return dentry->d_fsdata; | ||
314 | } | ||
315 | |||
316 | static inline struct cfent *__d_cfe(struct dentry *dentry) | ||
317 | { | ||
318 | return dentry->d_fsdata; | ||
319 | } | ||
320 | |||
321 | static inline struct cftype *__d_cft(struct dentry *dentry) | ||
322 | { | ||
323 | return __d_cfe(dentry)->type; | ||
324 | } | ||
325 | |||
282 | /* the list of cgroups eligible for automatic release. Protected by | 326 | /* the list of cgroups eligible for automatic release. Protected by |
283 | * release_list_lock */ | 327 | * release_list_lock */ |
284 | static LIST_HEAD(release_list); | 328 | static LIST_HEAD(release_list); |
@@ -816,12 +860,17 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
816 | struct cgroup_subsys *ss; | 860 | struct cgroup_subsys *ss; |
817 | int ret = 0; | 861 | int ret = 0; |
818 | 862 | ||
819 | for_each_subsys(cgrp->root, ss) | 863 | for_each_subsys(cgrp->root, ss) { |
820 | if (ss->pre_destroy) { | 864 | if (!ss->pre_destroy) |
821 | ret = ss->pre_destroy(cgrp); | 865 | continue; |
822 | if (ret) | 866 | |
823 | break; | 867 | ret = ss->pre_destroy(cgrp); |
868 | if (ret) { | ||
869 | /* ->pre_destroy() failure is being deprecated */ | ||
870 | WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs); | ||
871 | break; | ||
824 | } | 872 | } |
873 | } | ||
825 | 874 | ||
826 | return ret; | 875 | return ret; |
827 | } | 876 | } |
@@ -864,6 +913,14 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
864 | BUG_ON(!list_empty(&cgrp->pidlists)); | 913 | BUG_ON(!list_empty(&cgrp->pidlists)); |
865 | 914 | ||
866 | kfree_rcu(cgrp, rcu_head); | 915 | kfree_rcu(cgrp, rcu_head); |
916 | } else { | ||
917 | struct cfent *cfe = __d_cfe(dentry); | ||
918 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | ||
919 | |||
920 | WARN_ONCE(!list_empty(&cfe->node) && | ||
921 | cgrp != &cgrp->root->top_cgroup, | ||
922 | "cfe still linked for %s\n", cfe->type->name); | ||
923 | kfree(cfe); | ||
867 | } | 924 | } |
868 | iput(inode); | 925 | iput(inode); |
869 | } | 926 | } |
@@ -882,34 +939,36 @@ static void remove_dir(struct dentry *d) | |||
882 | dput(parent); | 939 | dput(parent); |
883 | } | 940 | } |
884 | 941 | ||
885 | static void cgroup_clear_directory(struct dentry *dentry) | 942 | static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) |
886 | { | 943 | { |
887 | struct list_head *node; | 944 | struct cfent *cfe; |
888 | 945 | ||
889 | BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); | 946 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); |
890 | spin_lock(&dentry->d_lock); | 947 | lockdep_assert_held(&cgroup_mutex); |
891 | node = dentry->d_subdirs.next; | 948 | |
892 | while (node != &dentry->d_subdirs) { | 949 | list_for_each_entry(cfe, &cgrp->files, node) { |
893 | struct dentry *d = list_entry(node, struct dentry, d_u.d_child); | 950 | struct dentry *d = cfe->dentry; |
894 | 951 | ||
895 | spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); | 952 | if (cft && cfe->type != cft) |
896 | list_del_init(node); | 953 | continue; |
897 | if (d->d_inode) { | 954 | |
898 | /* This should never be called on a cgroup | 955 | dget(d); |
899 | * directory with child cgroups */ | 956 | d_delete(d); |
900 | BUG_ON(d->d_inode->i_mode & S_IFDIR); | 957 | simple_unlink(d->d_inode, d); |
901 | dget_dlock(d); | 958 | list_del_init(&cfe->node); |
902 | spin_unlock(&d->d_lock); | 959 | dput(d); |
903 | spin_unlock(&dentry->d_lock); | 960 | |
904 | d_delete(d); | 961 | return 0; |
905 | simple_unlink(dentry->d_inode, d); | ||
906 | dput(d); | ||
907 | spin_lock(&dentry->d_lock); | ||
908 | } else | ||
909 | spin_unlock(&d->d_lock); | ||
910 | node = dentry->d_subdirs.next; | ||
911 | } | 962 | } |
912 | spin_unlock(&dentry->d_lock); | 963 | return -ENOENT; |
964 | } | ||
965 | |||
966 | static void cgroup_clear_directory(struct dentry *dir) | ||
967 | { | ||
968 | struct cgroup *cgrp = __d_cgrp(dir); | ||
969 | |||
970 | while (!list_empty(&cgrp->files)) | ||
971 | cgroup_rm_file(cgrp, NULL); | ||
913 | } | 972 | } |
914 | 973 | ||
915 | /* | 974 | /* |
@@ -1294,6 +1353,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1294 | if (ret) | 1353 | if (ret) |
1295 | goto out_unlock; | 1354 | goto out_unlock; |
1296 | 1355 | ||
1356 | /* See feature-removal-schedule.txt */ | ||
1357 | if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent) | ||
1358 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", | ||
1359 | task_tgid_nr(current), current->comm); | ||
1360 | |||
1297 | /* Don't allow flags or name to change at remount */ | 1361 | /* Don't allow flags or name to change at remount */ |
1298 | if (opts.flags != root->flags || | 1362 | if (opts.flags != root->flags || |
1299 | (opts.name && strcmp(opts.name, root->name))) { | 1363 | (opts.name && strcmp(opts.name, root->name))) { |
@@ -1308,7 +1372,8 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1308 | goto out_unlock; | 1372 | goto out_unlock; |
1309 | } | 1373 | } |
1310 | 1374 | ||
1311 | /* (re)populate subsystem files */ | 1375 | /* clear out any existing files and repopulate subsystem files */ |
1376 | cgroup_clear_directory(cgrp->dentry); | ||
1312 | cgroup_populate_dir(cgrp); | 1377 | cgroup_populate_dir(cgrp); |
1313 | 1378 | ||
1314 | if (opts.release_agent) | 1379 | if (opts.release_agent) |
@@ -1333,6 +1398,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1333 | { | 1398 | { |
1334 | INIT_LIST_HEAD(&cgrp->sibling); | 1399 | INIT_LIST_HEAD(&cgrp->sibling); |
1335 | INIT_LIST_HEAD(&cgrp->children); | 1400 | INIT_LIST_HEAD(&cgrp->children); |
1401 | INIT_LIST_HEAD(&cgrp->files); | ||
1336 | INIT_LIST_HEAD(&cgrp->css_sets); | 1402 | INIT_LIST_HEAD(&cgrp->css_sets); |
1337 | INIT_LIST_HEAD(&cgrp->release_list); | 1403 | INIT_LIST_HEAD(&cgrp->release_list); |
1338 | INIT_LIST_HEAD(&cgrp->pidlists); | 1404 | INIT_LIST_HEAD(&cgrp->pidlists); |
@@ -1344,11 +1410,14 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1344 | static void init_cgroup_root(struct cgroupfs_root *root) | 1410 | static void init_cgroup_root(struct cgroupfs_root *root) |
1345 | { | 1411 | { |
1346 | struct cgroup *cgrp = &root->top_cgroup; | 1412 | struct cgroup *cgrp = &root->top_cgroup; |
1413 | |||
1347 | INIT_LIST_HEAD(&root->subsys_list); | 1414 | INIT_LIST_HEAD(&root->subsys_list); |
1348 | INIT_LIST_HEAD(&root->root_list); | 1415 | INIT_LIST_HEAD(&root->root_list); |
1416 | INIT_LIST_HEAD(&root->allcg_list); | ||
1349 | root->number_of_cgroups = 1; | 1417 | root->number_of_cgroups = 1; |
1350 | cgrp->root = root; | 1418 | cgrp->root = root; |
1351 | cgrp->top_cgroup = cgrp; | 1419 | cgrp->top_cgroup = cgrp; |
1420 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | ||
1352 | init_cgroup_housekeeping(cgrp); | 1421 | init_cgroup_housekeeping(cgrp); |
1353 | } | 1422 | } |
1354 | 1423 | ||
@@ -1692,16 +1761,6 @@ static struct file_system_type cgroup_fs_type = { | |||
1692 | 1761 | ||
1693 | static struct kobject *cgroup_kobj; | 1762 | static struct kobject *cgroup_kobj; |
1694 | 1763 | ||
1695 | static inline struct cgroup *__d_cgrp(struct dentry *dentry) | ||
1696 | { | ||
1697 | return dentry->d_fsdata; | ||
1698 | } | ||
1699 | |||
1700 | static inline struct cftype *__d_cft(struct dentry *dentry) | ||
1701 | { | ||
1702 | return dentry->d_fsdata; | ||
1703 | } | ||
1704 | |||
1705 | /** | 1764 | /** |
1706 | * cgroup_path - generate the path of a cgroup | 1765 | * cgroup_path - generate the path of a cgroup |
1707 | * @cgrp: the cgroup in question | 1766 | * @cgrp: the cgroup in question |
@@ -2160,9 +2219,9 @@ retry_find_task: | |||
2160 | * only need to check permissions on one of them. | 2219 | * only need to check permissions on one of them. |
2161 | */ | 2220 | */ |
2162 | tcred = __task_cred(tsk); | 2221 | tcred = __task_cred(tsk); |
2163 | if (cred->euid && | 2222 | if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && |
2164 | cred->euid != tcred->uid && | 2223 | !uid_eq(cred->euid, tcred->uid) && |
2165 | cred->euid != tcred->suid) { | 2224 | !uid_eq(cred->euid, tcred->suid)) { |
2166 | rcu_read_unlock(); | 2225 | rcu_read_unlock(); |
2167 | ret = -EACCES; | 2226 | ret = -EACCES; |
2168 | goto out_unlock_cgroup; | 2227 | goto out_unlock_cgroup; |
@@ -2172,6 +2231,18 @@ retry_find_task: | |||
2172 | 2231 | ||
2173 | if (threadgroup) | 2232 | if (threadgroup) |
2174 | tsk = tsk->group_leader; | 2233 | tsk = tsk->group_leader; |
2234 | |||
2235 | /* | ||
2236 | * Workqueue threads may acquire PF_THREAD_BOUND and become | ||
2237 | * trapped in a cpuset, or RT worker may be born in a cgroup | ||
2238 | * with no rt_runtime allocated. Just say no. | ||
2239 | */ | ||
2240 | if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) { | ||
2241 | ret = -EINVAL; | ||
2242 | rcu_read_unlock(); | ||
2243 | goto out_unlock_cgroup; | ||
2244 | } | ||
2245 | |||
2175 | get_task_struct(tsk); | 2246 | get_task_struct(tsk); |
2176 | rcu_read_unlock(); | 2247 | rcu_read_unlock(); |
2177 | 2248 | ||
@@ -2603,50 +2674,191 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
2603 | return mode; | 2674 | return mode; |
2604 | } | 2675 | } |
2605 | 2676 | ||
2606 | int cgroup_add_file(struct cgroup *cgrp, | 2677 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
2607 | struct cgroup_subsys *subsys, | 2678 | const struct cftype *cft) |
2608 | const struct cftype *cft) | ||
2609 | { | 2679 | { |
2610 | struct dentry *dir = cgrp->dentry; | 2680 | struct dentry *dir = cgrp->dentry; |
2681 | struct cgroup *parent = __d_cgrp(dir); | ||
2611 | struct dentry *dentry; | 2682 | struct dentry *dentry; |
2683 | struct cfent *cfe; | ||
2612 | int error; | 2684 | int error; |
2613 | umode_t mode; | 2685 | umode_t mode; |
2614 | |||
2615 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2686 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
2687 | |||
2688 | /* does @cft->flags tell us to skip creation on @cgrp? */ | ||
2689 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) | ||
2690 | return 0; | ||
2691 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | ||
2692 | return 0; | ||
2693 | |||
2616 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { | 2694 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { |
2617 | strcpy(name, subsys->name); | 2695 | strcpy(name, subsys->name); |
2618 | strcat(name, "."); | 2696 | strcat(name, "."); |
2619 | } | 2697 | } |
2620 | strcat(name, cft->name); | 2698 | strcat(name, cft->name); |
2699 | |||
2621 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); | 2700 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); |
2701 | |||
2702 | cfe = kzalloc(sizeof(*cfe), GFP_KERNEL); | ||
2703 | if (!cfe) | ||
2704 | return -ENOMEM; | ||
2705 | |||
2622 | dentry = lookup_one_len(name, dir, strlen(name)); | 2706 | dentry = lookup_one_len(name, dir, strlen(name)); |
2623 | if (!IS_ERR(dentry)) { | 2707 | if (IS_ERR(dentry)) { |
2624 | mode = cgroup_file_mode(cft); | ||
2625 | error = cgroup_create_file(dentry, mode | S_IFREG, | ||
2626 | cgrp->root->sb); | ||
2627 | if (!error) | ||
2628 | dentry->d_fsdata = (void *)cft; | ||
2629 | dput(dentry); | ||
2630 | } else | ||
2631 | error = PTR_ERR(dentry); | 2708 | error = PTR_ERR(dentry); |
2709 | goto out; | ||
2710 | } | ||
2711 | |||
2712 | mode = cgroup_file_mode(cft); | ||
2713 | error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb); | ||
2714 | if (!error) { | ||
2715 | cfe->type = (void *)cft; | ||
2716 | cfe->dentry = dentry; | ||
2717 | dentry->d_fsdata = cfe; | ||
2718 | list_add_tail(&cfe->node, &parent->files); | ||
2719 | cfe = NULL; | ||
2720 | } | ||
2721 | dput(dentry); | ||
2722 | out: | ||
2723 | kfree(cfe); | ||
2632 | return error; | 2724 | return error; |
2633 | } | 2725 | } |
2634 | EXPORT_SYMBOL_GPL(cgroup_add_file); | ||
2635 | 2726 | ||
2636 | int cgroup_add_files(struct cgroup *cgrp, | 2727 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
2637 | struct cgroup_subsys *subsys, | 2728 | const struct cftype cfts[], bool is_add) |
2638 | const struct cftype cft[], | ||
2639 | int count) | ||
2640 | { | 2729 | { |
2641 | int i, err; | 2730 | const struct cftype *cft; |
2642 | for (i = 0; i < count; i++) { | 2731 | int err, ret = 0; |
2643 | err = cgroup_add_file(cgrp, subsys, &cft[i]); | 2732 | |
2644 | if (err) | 2733 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
2645 | return err; | 2734 | if (is_add) |
2735 | err = cgroup_add_file(cgrp, subsys, cft); | ||
2736 | else | ||
2737 | err = cgroup_rm_file(cgrp, cft); | ||
2738 | if (err) { | ||
2739 | pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", | ||
2740 | is_add ? "add" : "remove", cft->name, err); | ||
2741 | ret = err; | ||
2742 | } | ||
2743 | } | ||
2744 | return ret; | ||
2745 | } | ||
2746 | |||
2747 | static DEFINE_MUTEX(cgroup_cft_mutex); | ||
2748 | |||
2749 | static void cgroup_cfts_prepare(void) | ||
2750 | __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex) | ||
2751 | { | ||
2752 | /* | ||
2753 | * Thanks to the entanglement with vfs inode locking, we can't walk | ||
2754 | * the existing cgroups under cgroup_mutex and create files. | ||
2755 | * Instead, we increment reference on all cgroups and build list of | ||
2756 | * them using @cgrp->cft_q_node. Grab cgroup_cft_mutex to ensure | ||
2757 | * exclusive access to the field. | ||
2758 | */ | ||
2759 | mutex_lock(&cgroup_cft_mutex); | ||
2760 | mutex_lock(&cgroup_mutex); | ||
2761 | } | ||
2762 | |||
2763 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | ||
2764 | const struct cftype *cfts, bool is_add) | ||
2765 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) | ||
2766 | { | ||
2767 | LIST_HEAD(pending); | ||
2768 | struct cgroup *cgrp, *n; | ||
2769 | |||
2770 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ | ||
2771 | if (cfts && ss->root != &rootnode) { | ||
2772 | list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) { | ||
2773 | dget(cgrp->dentry); | ||
2774 | list_add_tail(&cgrp->cft_q_node, &pending); | ||
2775 | } | ||
2646 | } | 2776 | } |
2777 | |||
2778 | mutex_unlock(&cgroup_mutex); | ||
2779 | |||
2780 | /* | ||
2781 | * All new cgroups will see @cfts update on @ss->cftsets. Add/rm | ||
2782 | * files for all cgroups which were created before. | ||
2783 | */ | ||
2784 | list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) { | ||
2785 | struct inode *inode = cgrp->dentry->d_inode; | ||
2786 | |||
2787 | mutex_lock(&inode->i_mutex); | ||
2788 | mutex_lock(&cgroup_mutex); | ||
2789 | if (!cgroup_is_removed(cgrp)) | ||
2790 | cgroup_addrm_files(cgrp, ss, cfts, is_add); | ||
2791 | mutex_unlock(&cgroup_mutex); | ||
2792 | mutex_unlock(&inode->i_mutex); | ||
2793 | |||
2794 | list_del_init(&cgrp->cft_q_node); | ||
2795 | dput(cgrp->dentry); | ||
2796 | } | ||
2797 | |||
2798 | mutex_unlock(&cgroup_cft_mutex); | ||
2799 | } | ||
2800 | |||
2801 | /** | ||
2802 | * cgroup_add_cftypes - add an array of cftypes to a subsystem | ||
2803 | * @ss: target cgroup subsystem | ||
2804 | * @cfts: zero-length name terminated array of cftypes | ||
2805 | * | ||
2806 | * Register @cfts to @ss. Files described by @cfts are created for all | ||
2807 | * existing cgroups to which @ss is attached and all future cgroups will | ||
2808 | * have them too. This function can be called anytime whether @ss is | ||
2809 | * attached or not. | ||
2810 | * | ||
2811 | * Returns 0 on successful registration, -errno on failure. Note that this | ||
2812 | * function currently returns 0 as long as @cfts registration is successful | ||
2813 | * even if some file creation attempts on existing cgroups fail. | ||
2814 | */ | ||
2815 | int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | ||
2816 | { | ||
2817 | struct cftype_set *set; | ||
2818 | |||
2819 | set = kzalloc(sizeof(*set), GFP_KERNEL); | ||
2820 | if (!set) | ||
2821 | return -ENOMEM; | ||
2822 | |||
2823 | cgroup_cfts_prepare(); | ||
2824 | set->cfts = cfts; | ||
2825 | list_add_tail(&set->node, &ss->cftsets); | ||
2826 | cgroup_cfts_commit(ss, cfts, true); | ||
2827 | |||
2647 | return 0; | 2828 | return 0; |
2648 | } | 2829 | } |
2649 | EXPORT_SYMBOL_GPL(cgroup_add_files); | 2830 | EXPORT_SYMBOL_GPL(cgroup_add_cftypes); |
2831 | |||
2832 | /** | ||
2833 | * cgroup_rm_cftypes - remove an array of cftypes from a subsystem | ||
2834 | * @ss: target cgroup subsystem | ||
2835 | * @cfts: zero-length name terminated array of cftypes | ||
2836 | * | ||
2837 | * Unregister @cfts from @ss. Files described by @cfts are removed from | ||
2838 | * all existing cgroups to which @ss is attached and all future cgroups | ||
2839 | * won't have them either. This function can be called anytime whether @ss | ||
2840 | * is attached or not. | ||
2841 | * | ||
2842 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not | ||
2843 | * registered with @ss. | ||
2844 | */ | ||
2845 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | ||
2846 | { | ||
2847 | struct cftype_set *set; | ||
2848 | |||
2849 | cgroup_cfts_prepare(); | ||
2850 | |||
2851 | list_for_each_entry(set, &ss->cftsets, node) { | ||
2852 | if (set->cfts == cfts) { | ||
2853 | list_del_init(&set->node); | ||
2854 | cgroup_cfts_commit(ss, cfts, false); | ||
2855 | return 0; | ||
2856 | } | ||
2857 | } | ||
2858 | |||
2859 | cgroup_cfts_commit(ss, NULL, false); | ||
2860 | return -ENOENT; | ||
2861 | } | ||
2650 | 2862 | ||
2651 | /** | 2863 | /** |
2652 | * cgroup_task_count - count the number of tasks in a cgroup. | 2864 | * cgroup_task_count - count the number of tasks in a cgroup. |
@@ -3625,13 +3837,14 @@ static struct cftype files[] = { | |||
3625 | .read_u64 = cgroup_clone_children_read, | 3837 | .read_u64 = cgroup_clone_children_read, |
3626 | .write_u64 = cgroup_clone_children_write, | 3838 | .write_u64 = cgroup_clone_children_write, |
3627 | }, | 3839 | }, |
3628 | }; | 3840 | { |
3629 | 3841 | .name = "release_agent", | |
3630 | static struct cftype cft_release_agent = { | 3842 | .flags = CFTYPE_ONLY_ON_ROOT, |
3631 | .name = "release_agent", | 3843 | .read_seq_string = cgroup_release_agent_show, |
3632 | .read_seq_string = cgroup_release_agent_show, | 3844 | .write_string = cgroup_release_agent_write, |
3633 | .write_string = cgroup_release_agent_write, | 3845 | .max_write_len = PATH_MAX, |
3634 | .max_write_len = PATH_MAX, | 3846 | }, |
3847 | { } /* terminate */ | ||
3635 | }; | 3848 | }; |
3636 | 3849 | ||
3637 | static int cgroup_populate_dir(struct cgroup *cgrp) | 3850 | static int cgroup_populate_dir(struct cgroup *cgrp) |
@@ -3639,22 +3852,18 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
3639 | int err; | 3852 | int err; |
3640 | struct cgroup_subsys *ss; | 3853 | struct cgroup_subsys *ss; |
3641 | 3854 | ||
3642 | /* First clear out any existing files */ | 3855 | err = cgroup_addrm_files(cgrp, NULL, files, true); |
3643 | cgroup_clear_directory(cgrp->dentry); | ||
3644 | |||
3645 | err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files)); | ||
3646 | if (err < 0) | 3856 | if (err < 0) |
3647 | return err; | 3857 | return err; |
3648 | 3858 | ||
3649 | if (cgrp == cgrp->top_cgroup) { | 3859 | /* process cftsets of each subsystem */ |
3650 | if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0) | ||
3651 | return err; | ||
3652 | } | ||
3653 | |||
3654 | for_each_subsys(cgrp->root, ss) { | 3860 | for_each_subsys(cgrp->root, ss) { |
3655 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) | 3861 | struct cftype_set *set; |
3656 | return err; | 3862 | |
3863 | list_for_each_entry(set, &ss->cftsets, node) | ||
3864 | cgroup_addrm_files(cgrp, ss, set->cfts, true); | ||
3657 | } | 3865 | } |
3866 | |||
3658 | /* This cgroup is ready now */ | 3867 | /* This cgroup is ready now */ |
3659 | for_each_subsys(cgrp->root, ss) { | 3868 | for_each_subsys(cgrp->root, ss) { |
3660 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 3869 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
@@ -3670,6 +3879,18 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
3670 | return 0; | 3879 | return 0; |
3671 | } | 3880 | } |
3672 | 3881 | ||
3882 | static void css_dput_fn(struct work_struct *work) | ||
3883 | { | ||
3884 | struct cgroup_subsys_state *css = | ||
3885 | container_of(work, struct cgroup_subsys_state, dput_work); | ||
3886 | struct dentry *dentry = css->cgroup->dentry; | ||
3887 | struct super_block *sb = dentry->d_sb; | ||
3888 | |||
3889 | atomic_inc(&sb->s_active); | ||
3890 | dput(dentry); | ||
3891 | deactivate_super(sb); | ||
3892 | } | ||
3893 | |||
3673 | static void init_cgroup_css(struct cgroup_subsys_state *css, | 3894 | static void init_cgroup_css(struct cgroup_subsys_state *css, |
3674 | struct cgroup_subsys *ss, | 3895 | struct cgroup_subsys *ss, |
3675 | struct cgroup *cgrp) | 3896 | struct cgroup *cgrp) |
@@ -3682,6 +3903,16 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
3682 | set_bit(CSS_ROOT, &css->flags); | 3903 | set_bit(CSS_ROOT, &css->flags); |
3683 | BUG_ON(cgrp->subsys[ss->subsys_id]); | 3904 | BUG_ON(cgrp->subsys[ss->subsys_id]); |
3684 | cgrp->subsys[ss->subsys_id] = css; | 3905 | cgrp->subsys[ss->subsys_id] = css; |
3906 | |||
3907 | /* | ||
3908 | * If !clear_css_refs, css holds an extra ref to @cgrp->dentry | ||
3909 | * which is put on the last css_put(). dput() requires process | ||
3910 | * context, which css_put() may be called without. @css->dput_work | ||
3911 | * will be used to invoke dput() asynchronously from css_put(). | ||
3912 | */ | ||
3913 | INIT_WORK(&css->dput_work, css_dput_fn); | ||
3914 | if (ss->__DEPRECATED_clear_css_refs) | ||
3915 | set_bit(CSS_CLEAR_CSS_REFS, &css->flags); | ||
3685 | } | 3916 | } |
3686 | 3917 | ||
3687 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | 3918 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) |
@@ -3784,9 +4015,16 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3784 | if (err < 0) | 4015 | if (err < 0) |
3785 | goto err_remove; | 4016 | goto err_remove; |
3786 | 4017 | ||
4018 | /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */ | ||
4019 | for_each_subsys(root, ss) | ||
4020 | if (!ss->__DEPRECATED_clear_css_refs) | ||
4021 | dget(dentry); | ||
4022 | |||
3787 | /* The cgroup directory was pre-locked for us */ | 4023 | /* The cgroup directory was pre-locked for us */ |
3788 | BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); | 4024 | BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); |
3789 | 4025 | ||
4026 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | ||
4027 | |||
3790 | err = cgroup_populate_dir(cgrp); | 4028 | err = cgroup_populate_dir(cgrp); |
3791 | /* If err < 0, we have a half-filled directory - oh well ;) */ | 4029 | /* If err < 0, we have a half-filled directory - oh well ;) */ |
3792 | 4030 | ||
@@ -3826,18 +4064,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
3826 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4064 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
3827 | } | 4065 | } |
3828 | 4066 | ||
4067 | /* | ||
4068 | * Check the reference count on each subsystem. Since we already | ||
4069 | * established that there are no tasks in the cgroup, if the css refcount | ||
4070 | * is also 1, then there should be no outstanding references, so the | ||
4071 | * subsystem is safe to destroy. We scan across all subsystems rather than | ||
4072 | * using the per-hierarchy linked list of mounted subsystems since we can | ||
4073 | * be called via check_for_release() with no synchronization other than | ||
4074 | * RCU, and the subsystem linked list isn't RCU-safe. | ||
4075 | */ | ||
3829 | static int cgroup_has_css_refs(struct cgroup *cgrp) | 4076 | static int cgroup_has_css_refs(struct cgroup *cgrp) |
3830 | { | 4077 | { |
3831 | /* Check the reference count on each subsystem. Since we | ||
3832 | * already established that there are no tasks in the | ||
3833 | * cgroup, if the css refcount is also 1, then there should | ||
3834 | * be no outstanding references, so the subsystem is safe to | ||
3835 | * destroy. We scan across all subsystems rather than using | ||
3836 | * the per-hierarchy linked list of mounted subsystems since | ||
3837 | * we can be called via check_for_release() with no | ||
3838 | * synchronization other than RCU, and the subsystem linked | ||
3839 | * list isn't RCU-safe */ | ||
3840 | int i; | 4078 | int i; |
4079 | |||
3841 | /* | 4080 | /* |
3842 | * We won't need to lock the subsys array, because the subsystems | 4081 | * We won't need to lock the subsys array, because the subsystems |
3843 | * we're concerned about aren't going anywhere since our cgroup root | 4082 | * we're concerned about aren't going anywhere since our cgroup root |
@@ -3846,17 +4085,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
3846 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4085 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
3847 | struct cgroup_subsys *ss = subsys[i]; | 4086 | struct cgroup_subsys *ss = subsys[i]; |
3848 | struct cgroup_subsys_state *css; | 4087 | struct cgroup_subsys_state *css; |
4088 | |||
3849 | /* Skip subsystems not present or not in this hierarchy */ | 4089 | /* Skip subsystems not present or not in this hierarchy */ |
3850 | if (ss == NULL || ss->root != cgrp->root) | 4090 | if (ss == NULL || ss->root != cgrp->root) |
3851 | continue; | 4091 | continue; |
4092 | |||
3852 | css = cgrp->subsys[ss->subsys_id]; | 4093 | css = cgrp->subsys[ss->subsys_id]; |
3853 | /* When called from check_for_release() it's possible | 4094 | /* |
4095 | * When called from check_for_release() it's possible | ||
3854 | * that by this point the cgroup has been removed | 4096 | * that by this point the cgroup has been removed |
3855 | * and the css deleted. But a false-positive doesn't | 4097 | * and the css deleted. But a false-positive doesn't |
3856 | * matter, since it can only happen if the cgroup | 4098 | * matter, since it can only happen if the cgroup |
3857 | * has been deleted and hence no longer needs the | 4099 | * has been deleted and hence no longer needs the |
3858 | * release agent to be called anyway. */ | 4100 | * release agent to be called anyway. |
3859 | if (css && (atomic_read(&css->refcnt) > 1)) | 4101 | */ |
4102 | if (css && css_refcnt(css) > 1) | ||
3860 | return 1; | 4103 | return 1; |
3861 | } | 4104 | } |
3862 | return 0; | 4105 | return 0; |
@@ -3866,51 +4109,63 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
3866 | * Atomically mark all (or else none) of the cgroup's CSS objects as | 4109 | * Atomically mark all (or else none) of the cgroup's CSS objects as |
3867 | * CSS_REMOVED. Return true on success, or false if the cgroup has | 4110 | * CSS_REMOVED. Return true on success, or false if the cgroup has |
3868 | * busy subsystems. Call with cgroup_mutex held | 4111 | * busy subsystems. Call with cgroup_mutex held |
4112 | * | ||
4113 | * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or | ||
4114 | * not, cgroup removal behaves differently. | ||
4115 | * | ||
4116 | * If clear is set, css refcnt for the subsystem should be zero before | ||
4117 | * cgroup removal can be committed. This is implemented by | ||
4118 | * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be | ||
4119 | * called multiple times until all css refcnts reach zero and is allowed to | ||
4120 | * veto removal on any invocation. This behavior is deprecated and will be | ||
4121 | * removed as soon as the existing user (memcg) is updated. | ||
4122 | * | ||
4123 | * If clear is not set, each css holds an extra reference to the cgroup's | ||
4124 | * dentry and cgroup removal proceeds regardless of css refs. | ||
4125 | * ->pre_destroy() will be called at least once and is not allowed to fail. | ||
4126 | * On the last put of each css, whenever that may be, the extra dentry ref | ||
4127 | * is put so that dentry destruction happens only after all css's are | ||
4128 | * released. | ||
3869 | */ | 4129 | */ |
3870 | |||
3871 | static int cgroup_clear_css_refs(struct cgroup *cgrp) | 4130 | static int cgroup_clear_css_refs(struct cgroup *cgrp) |
3872 | { | 4131 | { |
3873 | struct cgroup_subsys *ss; | 4132 | struct cgroup_subsys *ss; |
3874 | unsigned long flags; | 4133 | unsigned long flags; |
3875 | bool failed = false; | 4134 | bool failed = false; |
4135 | |||
3876 | local_irq_save(flags); | 4136 | local_irq_save(flags); |
4137 | |||
4138 | /* | ||
4139 | * Block new css_tryget() by deactivating refcnt. If all refcnts | ||
4140 | * for subsystems w/ clear_css_refs set were 1 at the moment of | ||
4141 | * deactivation, we succeeded. | ||
4142 | */ | ||
3877 | for_each_subsys(cgrp->root, ss) { | 4143 | for_each_subsys(cgrp->root, ss) { |
3878 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4144 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
3879 | int refcnt; | 4145 | |
3880 | while (1) { | 4146 | WARN_ON(atomic_read(&css->refcnt) < 0); |
3881 | /* We can only remove a CSS with a refcnt==1 */ | 4147 | atomic_add(CSS_DEACT_BIAS, &css->refcnt); |
3882 | refcnt = atomic_read(&css->refcnt); | 4148 | |
3883 | if (refcnt > 1) { | 4149 | if (ss->__DEPRECATED_clear_css_refs) |
3884 | failed = true; | 4150 | failed |= css_refcnt(css) != 1; |
3885 | goto done; | ||
3886 | } | ||
3887 | BUG_ON(!refcnt); | ||
3888 | /* | ||
3889 | * Drop the refcnt to 0 while we check other | ||
3890 | * subsystems. This will cause any racing | ||
3891 | * css_tryget() to spin until we set the | ||
3892 | * CSS_REMOVED bits or abort | ||
3893 | */ | ||
3894 | if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) | ||
3895 | break; | ||
3896 | cpu_relax(); | ||
3897 | } | ||
3898 | } | 4151 | } |
3899 | done: | 4152 | |
4153 | /* | ||
4154 | * If succeeded, set REMOVED and put all the base refs; otherwise, | ||
4155 | * restore refcnts to positive values. Either way, all in-progress | ||
4156 | * css_tryget() will be released. | ||
4157 | */ | ||
3900 | for_each_subsys(cgrp->root, ss) { | 4158 | for_each_subsys(cgrp->root, ss) { |
3901 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4159 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
3902 | if (failed) { | 4160 | |
3903 | /* | 4161 | if (!failed) { |
3904 | * Restore old refcnt if we previously managed | ||
3905 | * to clear it from 1 to 0 | ||
3906 | */ | ||
3907 | if (!atomic_read(&css->refcnt)) | ||
3908 | atomic_set(&css->refcnt, 1); | ||
3909 | } else { | ||
3910 | /* Commit the fact that the CSS is removed */ | ||
3911 | set_bit(CSS_REMOVED, &css->flags); | 4162 | set_bit(CSS_REMOVED, &css->flags); |
4163 | css_put(css); | ||
4164 | } else { | ||
4165 | atomic_sub(CSS_DEACT_BIAS, &css->refcnt); | ||
3912 | } | 4166 | } |
3913 | } | 4167 | } |
4168 | |||
3914 | local_irq_restore(flags); | 4169 | local_irq_restore(flags); |
3915 | return !failed; | 4170 | return !failed; |
3916 | } | 4171 | } |
@@ -3995,6 +4250,8 @@ again: | |||
3995 | list_del_init(&cgrp->sibling); | 4250 | list_del_init(&cgrp->sibling); |
3996 | cgroup_unlock_hierarchy(cgrp->root); | 4251 | cgroup_unlock_hierarchy(cgrp->root); |
3997 | 4252 | ||
4253 | list_del_init(&cgrp->allcg_node); | ||
4254 | |||
3998 | d = dget(cgrp->dentry); | 4255 | d = dget(cgrp->dentry); |
3999 | 4256 | ||
4000 | cgroup_d_remove_dir(d); | 4257 | cgroup_d_remove_dir(d); |
@@ -4021,12 +4278,29 @@ again: | |||
4021 | return 0; | 4278 | return 0; |
4022 | } | 4279 | } |
4023 | 4280 | ||
4281 | static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss) | ||
4282 | { | ||
4283 | INIT_LIST_HEAD(&ss->cftsets); | ||
4284 | |||
4285 | /* | ||
4286 | * base_cftset is embedded in subsys itself, no need to worry about | ||
4287 | * deregistration. | ||
4288 | */ | ||
4289 | if (ss->base_cftypes) { | ||
4290 | ss->base_cftset.cfts = ss->base_cftypes; | ||
4291 | list_add_tail(&ss->base_cftset.node, &ss->cftsets); | ||
4292 | } | ||
4293 | } | ||
4294 | |||
4024 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | 4295 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) |
4025 | { | 4296 | { |
4026 | struct cgroup_subsys_state *css; | 4297 | struct cgroup_subsys_state *css; |
4027 | 4298 | ||
4028 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 4299 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
4029 | 4300 | ||
4301 | /* init base cftset */ | ||
4302 | cgroup_init_cftsets(ss); | ||
4303 | |||
4030 | /* Create the top cgroup state for this subsystem */ | 4304 | /* Create the top cgroup state for this subsystem */ |
4031 | list_add(&ss->sibling, &rootnode.subsys_list); | 4305 | list_add(&ss->sibling, &rootnode.subsys_list); |
4032 | ss->root = &rootnode; | 4306 | ss->root = &rootnode; |
@@ -4096,6 +4370,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4096 | return 0; | 4370 | return 0; |
4097 | } | 4371 | } |
4098 | 4372 | ||
4373 | /* init base cftset */ | ||
4374 | cgroup_init_cftsets(ss); | ||
4375 | |||
4099 | /* | 4376 | /* |
4100 | * need to register a subsys id before anything else - for example, | 4377 | * need to register a subsys id before anything else - for example, |
4101 | * init_cgroup_css needs it. | 4378 | * init_cgroup_css needs it. |
@@ -4685,21 +4962,43 @@ static void check_for_release(struct cgroup *cgrp) | |||
4685 | } | 4962 | } |
4686 | 4963 | ||
4687 | /* Caller must verify that the css is not for root cgroup */ | 4964 | /* Caller must verify that the css is not for root cgroup */ |
4688 | void __css_put(struct cgroup_subsys_state *css, int count) | 4965 | bool __css_tryget(struct cgroup_subsys_state *css) |
4966 | { | ||
4967 | do { | ||
4968 | int v = css_refcnt(css); | ||
4969 | |||
4970 | if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v) | ||
4971 | return true; | ||
4972 | cpu_relax(); | ||
4973 | } while (!test_bit(CSS_REMOVED, &css->flags)); | ||
4974 | |||
4975 | return false; | ||
4976 | } | ||
4977 | EXPORT_SYMBOL_GPL(__css_tryget); | ||
4978 | |||
4979 | /* Caller must verify that the css is not for root cgroup */ | ||
4980 | void __css_put(struct cgroup_subsys_state *css) | ||
4689 | { | 4981 | { |
4690 | struct cgroup *cgrp = css->cgroup; | 4982 | struct cgroup *cgrp = css->cgroup; |
4691 | int val; | 4983 | int v; |
4984 | |||
4692 | rcu_read_lock(); | 4985 | rcu_read_lock(); |
4693 | val = atomic_sub_return(count, &css->refcnt); | 4986 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); |
4694 | if (val == 1) { | 4987 | |
4988 | switch (v) { | ||
4989 | case 1: | ||
4695 | if (notify_on_release(cgrp)) { | 4990 | if (notify_on_release(cgrp)) { |
4696 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 4991 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
4697 | check_for_release(cgrp); | 4992 | check_for_release(cgrp); |
4698 | } | 4993 | } |
4699 | cgroup_wakeup_rmdir_waiter(cgrp); | 4994 | cgroup_wakeup_rmdir_waiter(cgrp); |
4995 | break; | ||
4996 | case 0: | ||
4997 | if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags)) | ||
4998 | schedule_work(&css->dput_work); | ||
4999 | break; | ||
4700 | } | 5000 | } |
4701 | rcu_read_unlock(); | 5001 | rcu_read_unlock(); |
4702 | WARN_ON_ONCE(val < 1); | ||
4703 | } | 5002 | } |
4704 | EXPORT_SYMBOL_GPL(__css_put); | 5003 | EXPORT_SYMBOL_GPL(__css_put); |
4705 | 5004 | ||
@@ -4818,7 +5117,7 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
4818 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | 5117 | * on this or this is under rcu_read_lock(). Once css->id is allocated, |
4819 | * it's unchanged until freed. | 5118 | * it's unchanged until freed. |
4820 | */ | 5119 | */ |
4821 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5120 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
4822 | 5121 | ||
4823 | if (cssid) | 5122 | if (cssid) |
4824 | return cssid->id; | 5123 | return cssid->id; |
@@ -4830,7 +5129,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
4830 | { | 5129 | { |
4831 | struct css_id *cssid; | 5130 | struct css_id *cssid; |
4832 | 5131 | ||
4833 | cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); | 5132 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); |
4834 | 5133 | ||
4835 | if (cssid) | 5134 | if (cssid) |
4836 | return cssid->depth; | 5135 | return cssid->depth; |
@@ -4844,7 +5143,7 @@ EXPORT_SYMBOL_GPL(css_depth); | |||
4844 | * @root: the css supporsed to be an ancestor of the child. | 5143 | * @root: the css supporsed to be an ancestor of the child. |
4845 | * | 5144 | * |
4846 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because | 5145 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because |
4847 | * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). | 5146 | * this function reads css->id, the caller must hold rcu_read_lock(). |
4848 | * But, considering usual usage, the csses should be valid objects after test. | 5147 | * But, considering usual usage, the csses should be valid objects after test. |
4849 | * Assuming that the caller will do some action to the child if this returns | 5148 | * Assuming that the caller will do some action to the child if this returns |
4850 | * returns true, the caller must take "child";s reference count. | 5149 | * returns true, the caller must take "child";s reference count. |
@@ -4856,18 +5155,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, | |||
4856 | { | 5155 | { |
4857 | struct css_id *child_id; | 5156 | struct css_id *child_id; |
4858 | struct css_id *root_id; | 5157 | struct css_id *root_id; |
4859 | bool ret = true; | ||
4860 | 5158 | ||
4861 | rcu_read_lock(); | ||
4862 | child_id = rcu_dereference(child->id); | 5159 | child_id = rcu_dereference(child->id); |
5160 | if (!child_id) | ||
5161 | return false; | ||
4863 | root_id = rcu_dereference(root->id); | 5162 | root_id = rcu_dereference(root->id); |
4864 | if (!child_id | 5163 | if (!root_id) |
4865 | || !root_id | 5164 | return false; |
4866 | || (child_id->depth < root_id->depth) | 5165 | if (child_id->depth < root_id->depth) |
4867 | || (child_id->stack[root_id->depth] != root_id->id)) | 5166 | return false; |
4868 | ret = false; | 5167 | if (child_id->stack[root_id->depth] != root_id->id) |
4869 | rcu_read_unlock(); | 5168 | return false; |
4870 | return ret; | 5169 | return true; |
4871 | } | 5170 | } |
4872 | 5171 | ||
4873 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | 5172 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) |
@@ -5211,19 +5510,15 @@ static struct cftype debug_files[] = { | |||
5211 | .name = "releasable", | 5510 | .name = "releasable", |
5212 | .read_u64 = releasable_read, | 5511 | .read_u64 = releasable_read, |
5213 | }, | 5512 | }, |
5214 | }; | ||
5215 | 5513 | ||
5216 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 5514 | { } /* terminate */ |
5217 | { | 5515 | }; |
5218 | return cgroup_add_files(cont, ss, debug_files, | ||
5219 | ARRAY_SIZE(debug_files)); | ||
5220 | } | ||
5221 | 5516 | ||
5222 | struct cgroup_subsys debug_subsys = { | 5517 | struct cgroup_subsys debug_subsys = { |
5223 | .name = "debug", | 5518 | .name = "debug", |
5224 | .create = debug_create, | 5519 | .create = debug_create, |
5225 | .destroy = debug_destroy, | 5520 | .destroy = debug_destroy, |
5226 | .populate = debug_populate, | ||
5227 | .subsys_id = debug_subsys_id, | 5521 | .subsys_id = debug_subsys_id, |
5522 | .base_cftypes = debug_files, | ||
5228 | }; | 5523 | }; |
5229 | #endif /* CONFIG_CGROUP_DEBUG */ | 5524 | #endif /* CONFIG_CGROUP_DEBUG */ |