aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c96
1 files changed, 80 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cace83ddbcdc..c92fb9549358 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -57,10 +57,14 @@
57 57
58static DEFINE_MUTEX(cgroup_mutex); 58static DEFINE_MUTEX(cgroup_mutex);
59 59
60/* Generate an array of cgroup subsystem pointers */ 60/*
61 * Generate an array of cgroup subsystem pointers. At boot time, this is
62 * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
63 * registered after that. The mutable section of this array is protected by
64 * cgroup_mutex.
65 */
61#define SUBSYS(_x) &_x ## _subsys, 66#define SUBSYS(_x) &_x ## _subsys,
62 67static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
63static struct cgroup_subsys *subsys[] = {
64#include <linux/cgroup_subsys.h> 68#include <linux/cgroup_subsys.h>
65}; 69};
66 70
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set(
448 struct hlist_node *node; 452 struct hlist_node *node;
449 struct css_set *cg; 453 struct css_set *cg;
450 454
451 /* Built the set of subsystem state objects that we want to 455 /*
452 * see in the new css_set */ 456 * Build the set of subsystem state objects that we want to see in the
457 * new css_set. while subsystems can change globally, the entries here
458 * won't change, so no need for locking.
459 */
453 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 460 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
454 if (root->subsys_bits & (1UL << i)) { 461 if (root->subsys_bits & (1UL << i)) {
455 /* Subsystem is in this hierarchy. So we want 462 /* Subsystem is in this hierarchy. So we want
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
884 css_put(css); 891 css_put(css);
885} 892}
886 893
887 894/*
895 * Call with cgroup_mutex held.
896 */
888static int rebind_subsystems(struct cgroupfs_root *root, 897static int rebind_subsystems(struct cgroupfs_root *root,
889 unsigned long final_bits) 898 unsigned long final_bits)
890{ 899{
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
892 struct cgroup *cgrp = &root->top_cgroup; 901 struct cgroup *cgrp = &root->top_cgroup;
893 int i; 902 int i;
894 903
904 BUG_ON(!mutex_is_locked(&cgroup_mutex));
905
895 removed_bits = root->actual_subsys_bits & ~final_bits; 906 removed_bits = root->actual_subsys_bits & ~final_bits;
896 added_bits = final_bits & ~root->actual_subsys_bits; 907 added_bits = final_bits & ~root->actual_subsys_bits;
897 /* Check that any added subsystems are currently free */ 908 /* Check that any added subsystems are currently free */
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
900 struct cgroup_subsys *ss = subsys[i]; 911 struct cgroup_subsys *ss = subsys[i];
901 if (!(bit & added_bits)) 912 if (!(bit & added_bits))
902 continue; 913 continue;
914 /*
915 * Nobody should tell us to do a subsys that doesn't exist:
916 * parse_cgroupfs_options should catch that case and refcounts
917 * ensure that subsystems won't disappear once selected.
918 */
919 BUG_ON(ss == NULL);
903 if (ss->root != &rootnode) { 920 if (ss->root != &rootnode) {
904 /* Subsystem isn't free */ 921 /* Subsystem isn't free */
905 return -EBUSY; 922 return -EBUSY;
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
919 unsigned long bit = 1UL << i; 936 unsigned long bit = 1UL << i;
920 if (bit & added_bits) { 937 if (bit & added_bits) {
921 /* We're binding this subsystem to this hierarchy */ 938 /* We're binding this subsystem to this hierarchy */
939 BUG_ON(ss == NULL);
922 BUG_ON(cgrp->subsys[i]); 940 BUG_ON(cgrp->subsys[i]);
923 BUG_ON(!dummytop->subsys[i]); 941 BUG_ON(!dummytop->subsys[i]);
924 BUG_ON(dummytop->subsys[i]->cgroup != dummytop); 942 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
932 mutex_unlock(&ss->hierarchy_mutex); 950 mutex_unlock(&ss->hierarchy_mutex);
933 } else if (bit & removed_bits) { 951 } else if (bit & removed_bits) {
934 /* We're removing this subsystem */ 952 /* We're removing this subsystem */
953 BUG_ON(ss == NULL);
935 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); 954 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
936 BUG_ON(cgrp->subsys[i]->cgroup != cgrp); 955 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
937 mutex_lock(&ss->hierarchy_mutex); 956 mutex_lock(&ss->hierarchy_mutex);
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
944 mutex_unlock(&ss->hierarchy_mutex); 963 mutex_unlock(&ss->hierarchy_mutex);
945 } else if (bit & final_bits) { 964 } else if (bit & final_bits) {
946 /* Subsystem state should already exist */ 965 /* Subsystem state should already exist */
966 BUG_ON(ss == NULL);
947 BUG_ON(!cgrp->subsys[i]); 967 BUG_ON(!cgrp->subsys[i]);
948 } else { 968 } else {
949 /* Subsystem state shouldn't exist */ 969 /* Subsystem state shouldn't exist */
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts {
986 1006
987}; 1007};
988 1008
989/* Convert a hierarchy specifier into a bitmask of subsystems and 1009/*
990 * flags. */ 1010 * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
1011 * with cgroup_mutex held to protect the subsys[] array.
1012 */
991static int parse_cgroupfs_options(char *data, 1013static int parse_cgroupfs_options(char *data,
992 struct cgroup_sb_opts *opts) 1014 struct cgroup_sb_opts *opts)
993{ 1015{
994 char *token, *o = data ?: "all"; 1016 char *token, *o = data ?: "all";
995 unsigned long mask = (unsigned long)-1; 1017 unsigned long mask = (unsigned long)-1;
996 1018
1019 BUG_ON(!mutex_is_locked(&cgroup_mutex));
1020
997#ifdef CONFIG_CPUSETS 1021#ifdef CONFIG_CPUSETS
998 mask = ~(1UL << cpuset_subsys_id); 1022 mask = ~(1UL << cpuset_subsys_id);
999#endif 1023#endif
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data,
1009 opts->subsys_bits = 0; 1033 opts->subsys_bits = 0;
1010 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1034 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1011 struct cgroup_subsys *ss = subsys[i]; 1035 struct cgroup_subsys *ss = subsys[i];
1036 if (ss == NULL)
1037 continue;
1012 if (!ss->disabled) 1038 if (!ss->disabled)
1013 opts->subsys_bits |= 1ul << i; 1039 opts->subsys_bits |= 1ul << i;
1014 } 1040 }
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data,
1053 int i; 1079 int i;
1054 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1080 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1055 ss = subsys[i]; 1081 ss = subsys[i];
1082 if (ss == NULL)
1083 continue;
1056 if (!strcmp(token, ss->name)) { 1084 if (!strcmp(token, ss->name)) {
1057 if (!ss->disabled) 1085 if (!ss->disabled)
1058 set_bit(i, &opts->subsys_bits); 1086 set_bit(i, &opts->subsys_bits);
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1306 struct cgroupfs_root *new_root; 1334 struct cgroupfs_root *new_root;
1307 1335
1308 /* First find the desired set of subsystems */ 1336 /* First find the desired set of subsystems */
1337 mutex_lock(&cgroup_mutex);
1309 ret = parse_cgroupfs_options(data, &opts); 1338 ret = parse_cgroupfs_options(data, &opts);
1339 mutex_unlock(&cgroup_mutex);
1310 if (ret) 1340 if (ret)
1311 goto out_err; 1341 goto out_err;
1312 1342
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2918 /* We need to take each hierarchy_mutex in a consistent order */ 2948 /* We need to take each hierarchy_mutex in a consistent order */
2919 int i; 2949 int i;
2920 2950
2951 /*
2952 * No worry about a race with rebind_subsystems that might mess up the
2953 * locking order, since both parties are under cgroup_mutex.
2954 */
2921 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 2955 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2922 struct cgroup_subsys *ss = subsys[i]; 2956 struct cgroup_subsys *ss = subsys[i];
2957 if (ss == NULL)
2958 continue;
2923 if (ss->root == root) 2959 if (ss->root == root)
2924 mutex_lock(&ss->hierarchy_mutex); 2960 mutex_lock(&ss->hierarchy_mutex);
2925 } 2961 }
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2931 2967
2932 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 2968 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2933 struct cgroup_subsys *ss = subsys[i]; 2969 struct cgroup_subsys *ss = subsys[i];
2970 if (ss == NULL)
2971 continue;
2934 if (ss->root == root) 2972 if (ss->root == root)
2935 mutex_unlock(&ss->hierarchy_mutex); 2973 mutex_unlock(&ss->hierarchy_mutex);
2936 } 2974 }
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
3054 * synchronization other than RCU, and the subsystem linked 3092 * synchronization other than RCU, and the subsystem linked
3055 * list isn't RCU-safe */ 3093 * list isn't RCU-safe */
3056 int i; 3094 int i;
3095 /*
3096 * We won't need to lock the subsys array, because the subsystems
3097 * we're concerned about aren't going anywhere since our cgroup root
3098 * has a reference on them.
3099 */
3057 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3100 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3058 struct cgroup_subsys *ss = subsys[i]; 3101 struct cgroup_subsys *ss = subsys[i];
3059 struct cgroup_subsys_state *css; 3102 struct cgroup_subsys_state *css;
3060 /* Skip subsystems not in this hierarchy */ 3103 /* Skip subsystems not present or not in this hierarchy */
3061 if (ss->root != cgrp->root) 3104 if (ss == NULL || ss->root != cgrp->root)
3062 continue; 3105 continue;
3063 css = cgrp->subsys[ss->subsys_id]; 3106 css = cgrp->subsys[ss->subsys_id];
3064 /* When called from check_for_release() it's possible 3107 /* When called from check_for_release() it's possible
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void)
3279 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) 3322 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3280 INIT_HLIST_HEAD(&css_set_table[i]); 3323 INIT_HLIST_HEAD(&css_set_table[i]);
3281 3324
3282 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3325 /* at bootup time, we don't worry about modular subsystems */
3326 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3283 struct cgroup_subsys *ss = subsys[i]; 3327 struct cgroup_subsys *ss = subsys[i];
3284 3328
3285 BUG_ON(!ss->name); 3329 BUG_ON(!ss->name);
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void)
3314 if (err) 3358 if (err)
3315 return err; 3359 return err;
3316 3360
3317 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3361 /* at bootup time, we don't worry about modular subsystems */
3362 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3318 struct cgroup_subsys *ss = subsys[i]; 3363 struct cgroup_subsys *ss = subsys[i];
3319 if (!ss->early_init) 3364 if (!ss->early_init)
3320 cgroup_init_subsys(ss); 3365 cgroup_init_subsys(ss);
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
3423 int i; 3468 int i;
3424 3469
3425 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); 3470 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
3471 /*
3472 * ideally we don't want subsystems moving around while we do this.
3473 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
3474 * subsys/hierarchy state.
3475 */
3426 mutex_lock(&cgroup_mutex); 3476 mutex_lock(&cgroup_mutex);
3427 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3477 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3428 struct cgroup_subsys *ss = subsys[i]; 3478 struct cgroup_subsys *ss = subsys[i];
3479 if (ss == NULL)
3480 continue;
3429 seq_printf(m, "%s\t%d\t%d\t%d\n", 3481 seq_printf(m, "%s\t%d\t%d\t%d\n",
3430 ss->name, ss->root->hierarchy_id, 3482 ss->name, ss->root->hierarchy_id,
3431 ss->root->number_of_cgroups, !ss->disabled); 3483 ss->root->number_of_cgroups, !ss->disabled);
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
3483{ 3535{
3484 if (need_forkexit_callback) { 3536 if (need_forkexit_callback) {
3485 int i; 3537 int i;
3486 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3538 /*
3539 * forkexit callbacks are only supported for builtin
3540 * subsystems, and the builtin section of the subsys array is
3541 * immutable, so we don't need to lock the subsys array here.
3542 */
3543 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3487 struct cgroup_subsys *ss = subsys[i]; 3544 struct cgroup_subsys *ss = subsys[i];
3488 if (ss->fork) 3545 if (ss->fork)
3489 ss->fork(ss, child); 3546 ss->fork(ss, child);
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
3552 struct css_set *cg; 3609 struct css_set *cg;
3553 3610
3554 if (run_callbacks && need_forkexit_callback) { 3611 if (run_callbacks && need_forkexit_callback) {
3555 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3612 /*
3613 * modular subsystems can't use callbacks, so no need to lock
3614 * the subsys array
3615 */
3616 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3556 struct cgroup_subsys *ss = subsys[i]; 3617 struct cgroup_subsys *ss = subsys[i];
3557 if (ss->exit) 3618 if (ss->exit)
3558 ss->exit(ss, tsk); 3619 ss->exit(ss, tsk);
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str)
3844 while ((token = strsep(&str, ",")) != NULL) { 3905 while ((token = strsep(&str, ",")) != NULL) {
3845 if (!*token) 3906 if (!*token)
3846 continue; 3907 continue;
3847 3908 /*
3848 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3909 * cgroup_disable, being at boot time, can't know about module
3910 * subsystems, so we don't worry about them.
3911 */
3912 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3849 struct cgroup_subsys *ss = subsys[i]; 3913 struct cgroup_subsys *ss = subsys[i];
3850 3914
3851 if (!strcmp(token, ss->name)) { 3915 if (!strcmp(token, ss->name)) {