diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 96 |
1 files changed, 80 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index cace83ddbcdc..c92fb9549358 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -57,10 +57,14 @@ | |||
57 | 57 | ||
58 | static DEFINE_MUTEX(cgroup_mutex); | 58 | static DEFINE_MUTEX(cgroup_mutex); |
59 | 59 | ||
60 | /* Generate an array of cgroup subsystem pointers */ | 60 | /* |
61 | * Generate an array of cgroup subsystem pointers. At boot time, this is | ||
62 | * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are | ||
63 | * registered after that. The mutable section of this array is protected by | ||
64 | * cgroup_mutex. | ||
65 | */ | ||
61 | #define SUBSYS(_x) &_x ## _subsys, | 66 | #define SUBSYS(_x) &_x ## _subsys, |
62 | 67 | static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { | |
63 | static struct cgroup_subsys *subsys[] = { | ||
64 | #include <linux/cgroup_subsys.h> | 68 | #include <linux/cgroup_subsys.h> |
65 | }; | 69 | }; |
66 | 70 | ||
@@ -448,8 +452,11 @@ static struct css_set *find_existing_css_set( | |||
448 | struct hlist_node *node; | 452 | struct hlist_node *node; |
449 | struct css_set *cg; | 453 | struct css_set *cg; |
450 | 454 | ||
451 | /* Built the set of subsystem state objects that we want to | 455 | /* |
452 | * see in the new css_set */ | 456 | * Build the set of subsystem state objects that we want to see in the |
457 | * new css_set. while subsystems can change globally, the entries here | ||
458 | * won't change, so no need for locking. | ||
459 | */ | ||
453 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 460 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
454 | if (root->subsys_bits & (1UL << i)) { | 461 | if (root->subsys_bits & (1UL << i)) { |
455 | /* Subsystem is in this hierarchy. So we want | 462 | /* Subsystem is in this hierarchy. So we want |
@@ -884,7 +891,9 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) | |||
884 | css_put(css); | 891 | css_put(css); |
885 | } | 892 | } |
886 | 893 | ||
887 | 894 | /* | |
895 | * Call with cgroup_mutex held. | ||
896 | */ | ||
888 | static int rebind_subsystems(struct cgroupfs_root *root, | 897 | static int rebind_subsystems(struct cgroupfs_root *root, |
889 | unsigned long final_bits) | 898 | unsigned long final_bits) |
890 | { | 899 | { |
@@ -892,6 +901,8 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
892 | struct cgroup *cgrp = &root->top_cgroup; | 901 | struct cgroup *cgrp = &root->top_cgroup; |
893 | int i; | 902 | int i; |
894 | 903 | ||
904 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | ||
905 | |||
895 | removed_bits = root->actual_subsys_bits & ~final_bits; | 906 | removed_bits = root->actual_subsys_bits & ~final_bits; |
896 | added_bits = final_bits & ~root->actual_subsys_bits; | 907 | added_bits = final_bits & ~root->actual_subsys_bits; |
897 | /* Check that any added subsystems are currently free */ | 908 | /* Check that any added subsystems are currently free */ |
@@ -900,6 +911,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
900 | struct cgroup_subsys *ss = subsys[i]; | 911 | struct cgroup_subsys *ss = subsys[i]; |
901 | if (!(bit & added_bits)) | 912 | if (!(bit & added_bits)) |
902 | continue; | 913 | continue; |
914 | /* | ||
915 | * Nobody should tell us to do a subsys that doesn't exist: | ||
916 | * parse_cgroupfs_options should catch that case and refcounts | ||
917 | * ensure that subsystems won't disappear once selected. | ||
918 | */ | ||
919 | BUG_ON(ss == NULL); | ||
903 | if (ss->root != &rootnode) { | 920 | if (ss->root != &rootnode) { |
904 | /* Subsystem isn't free */ | 921 | /* Subsystem isn't free */ |
905 | return -EBUSY; | 922 | return -EBUSY; |
@@ -919,6 +936,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
919 | unsigned long bit = 1UL << i; | 936 | unsigned long bit = 1UL << i; |
920 | if (bit & added_bits) { | 937 | if (bit & added_bits) { |
921 | /* We're binding this subsystem to this hierarchy */ | 938 | /* We're binding this subsystem to this hierarchy */ |
939 | BUG_ON(ss == NULL); | ||
922 | BUG_ON(cgrp->subsys[i]); | 940 | BUG_ON(cgrp->subsys[i]); |
923 | BUG_ON(!dummytop->subsys[i]); | 941 | BUG_ON(!dummytop->subsys[i]); |
924 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); | 942 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); |
@@ -932,6 +950,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
932 | mutex_unlock(&ss->hierarchy_mutex); | 950 | mutex_unlock(&ss->hierarchy_mutex); |
933 | } else if (bit & removed_bits) { | 951 | } else if (bit & removed_bits) { |
934 | /* We're removing this subsystem */ | 952 | /* We're removing this subsystem */ |
953 | BUG_ON(ss == NULL); | ||
935 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | 954 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); |
936 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 955 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
937 | mutex_lock(&ss->hierarchy_mutex); | 956 | mutex_lock(&ss->hierarchy_mutex); |
@@ -944,6 +963,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
944 | mutex_unlock(&ss->hierarchy_mutex); | 963 | mutex_unlock(&ss->hierarchy_mutex); |
945 | } else if (bit & final_bits) { | 964 | } else if (bit & final_bits) { |
946 | /* Subsystem state should already exist */ | 965 | /* Subsystem state should already exist */ |
966 | BUG_ON(ss == NULL); | ||
947 | BUG_ON(!cgrp->subsys[i]); | 967 | BUG_ON(!cgrp->subsys[i]); |
948 | } else { | 968 | } else { |
949 | /* Subsystem state shouldn't exist */ | 969 | /* Subsystem state shouldn't exist */ |
@@ -986,14 +1006,18 @@ struct cgroup_sb_opts { | |||
986 | 1006 | ||
987 | }; | 1007 | }; |
988 | 1008 | ||
989 | /* Convert a hierarchy specifier into a bitmask of subsystems and | 1009 | /* |
990 | * flags. */ | 1010 | * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call |
1011 | * with cgroup_mutex held to protect the subsys[] array. | ||
1012 | */ | ||
991 | static int parse_cgroupfs_options(char *data, | 1013 | static int parse_cgroupfs_options(char *data, |
992 | struct cgroup_sb_opts *opts) | 1014 | struct cgroup_sb_opts *opts) |
993 | { | 1015 | { |
994 | char *token, *o = data ?: "all"; | 1016 | char *token, *o = data ?: "all"; |
995 | unsigned long mask = (unsigned long)-1; | 1017 | unsigned long mask = (unsigned long)-1; |
996 | 1018 | ||
1019 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | ||
1020 | |||
997 | #ifdef CONFIG_CPUSETS | 1021 | #ifdef CONFIG_CPUSETS |
998 | mask = ~(1UL << cpuset_subsys_id); | 1022 | mask = ~(1UL << cpuset_subsys_id); |
999 | #endif | 1023 | #endif |
@@ -1009,6 +1033,8 @@ static int parse_cgroupfs_options(char *data, | |||
1009 | opts->subsys_bits = 0; | 1033 | opts->subsys_bits = 0; |
1010 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1034 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
1011 | struct cgroup_subsys *ss = subsys[i]; | 1035 | struct cgroup_subsys *ss = subsys[i]; |
1036 | if (ss == NULL) | ||
1037 | continue; | ||
1012 | if (!ss->disabled) | 1038 | if (!ss->disabled) |
1013 | opts->subsys_bits |= 1ul << i; | 1039 | opts->subsys_bits |= 1ul << i; |
1014 | } | 1040 | } |
@@ -1053,6 +1079,8 @@ static int parse_cgroupfs_options(char *data, | |||
1053 | int i; | 1079 | int i; |
1054 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1080 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
1055 | ss = subsys[i]; | 1081 | ss = subsys[i]; |
1082 | if (ss == NULL) | ||
1083 | continue; | ||
1056 | if (!strcmp(token, ss->name)) { | 1084 | if (!strcmp(token, ss->name)) { |
1057 | if (!ss->disabled) | 1085 | if (!ss->disabled) |
1058 | set_bit(i, &opts->subsys_bits); | 1086 | set_bit(i, &opts->subsys_bits); |
@@ -1306,7 +1334,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1306 | struct cgroupfs_root *new_root; | 1334 | struct cgroupfs_root *new_root; |
1307 | 1335 | ||
1308 | /* First find the desired set of subsystems */ | 1336 | /* First find the desired set of subsystems */ |
1337 | mutex_lock(&cgroup_mutex); | ||
1309 | ret = parse_cgroupfs_options(data, &opts); | 1338 | ret = parse_cgroupfs_options(data, &opts); |
1339 | mutex_unlock(&cgroup_mutex); | ||
1310 | if (ret) | 1340 | if (ret) |
1311 | goto out_err; | 1341 | goto out_err; |
1312 | 1342 | ||
@@ -2918,8 +2948,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | |||
2918 | /* We need to take each hierarchy_mutex in a consistent order */ | 2948 | /* We need to take each hierarchy_mutex in a consistent order */ |
2919 | int i; | 2949 | int i; |
2920 | 2950 | ||
2951 | /* | ||
2952 | * No worry about a race with rebind_subsystems that might mess up the | ||
2953 | * locking order, since both parties are under cgroup_mutex. | ||
2954 | */ | ||
2921 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 2955 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
2922 | struct cgroup_subsys *ss = subsys[i]; | 2956 | struct cgroup_subsys *ss = subsys[i]; |
2957 | if (ss == NULL) | ||
2958 | continue; | ||
2923 | if (ss->root == root) | 2959 | if (ss->root == root) |
2924 | mutex_lock(&ss->hierarchy_mutex); | 2960 | mutex_lock(&ss->hierarchy_mutex); |
2925 | } | 2961 | } |
@@ -2931,6 +2967,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | |||
2931 | 2967 | ||
2932 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 2968 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
2933 | struct cgroup_subsys *ss = subsys[i]; | 2969 | struct cgroup_subsys *ss = subsys[i]; |
2970 | if (ss == NULL) | ||
2971 | continue; | ||
2934 | if (ss->root == root) | 2972 | if (ss->root == root) |
2935 | mutex_unlock(&ss->hierarchy_mutex); | 2973 | mutex_unlock(&ss->hierarchy_mutex); |
2936 | } | 2974 | } |
@@ -3054,11 +3092,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
3054 | * synchronization other than RCU, and the subsystem linked | 3092 | * synchronization other than RCU, and the subsystem linked |
3055 | * list isn't RCU-safe */ | 3093 | * list isn't RCU-safe */ |
3056 | int i; | 3094 | int i; |
3095 | /* | ||
3096 | * We won't need to lock the subsys array, because the subsystems | ||
3097 | * we're concerned about aren't going anywhere since our cgroup root | ||
3098 | * has a reference on them. | ||
3099 | */ | ||
3057 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3100 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
3058 | struct cgroup_subsys *ss = subsys[i]; | 3101 | struct cgroup_subsys *ss = subsys[i]; |
3059 | struct cgroup_subsys_state *css; | 3102 | struct cgroup_subsys_state *css; |
3060 | /* Skip subsystems not in this hierarchy */ | 3103 | /* Skip subsystems not present or not in this hierarchy */ |
3061 | if (ss->root != cgrp->root) | 3104 | if (ss == NULL || ss->root != cgrp->root) |
3062 | continue; | 3105 | continue; |
3063 | css = cgrp->subsys[ss->subsys_id]; | 3106 | css = cgrp->subsys[ss->subsys_id]; |
3064 | /* When called from check_for_release() it's possible | 3107 | /* When called from check_for_release() it's possible |
@@ -3279,7 +3322,8 @@ int __init cgroup_init_early(void) | |||
3279 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | 3322 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) |
3280 | INIT_HLIST_HEAD(&css_set_table[i]); | 3323 | INIT_HLIST_HEAD(&css_set_table[i]); |
3281 | 3324 | ||
3282 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3325 | /* at bootup time, we don't worry about modular subsystems */ |
3326 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
3283 | struct cgroup_subsys *ss = subsys[i]; | 3327 | struct cgroup_subsys *ss = subsys[i]; |
3284 | 3328 | ||
3285 | BUG_ON(!ss->name); | 3329 | BUG_ON(!ss->name); |
@@ -3314,7 +3358,8 @@ int __init cgroup_init(void) | |||
3314 | if (err) | 3358 | if (err) |
3315 | return err; | 3359 | return err; |
3316 | 3360 | ||
3317 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3361 | /* at bootup time, we don't worry about modular subsystems */ |
3362 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
3318 | struct cgroup_subsys *ss = subsys[i]; | 3363 | struct cgroup_subsys *ss = subsys[i]; |
3319 | if (!ss->early_init) | 3364 | if (!ss->early_init) |
3320 | cgroup_init_subsys(ss); | 3365 | cgroup_init_subsys(ss); |
@@ -3423,9 +3468,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) | |||
3423 | int i; | 3468 | int i; |
3424 | 3469 | ||
3425 | seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); | 3470 | seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); |
3471 | /* | ||
3472 | * ideally we don't want subsystems moving around while we do this. | ||
3473 | * cgroup_mutex is also necessary to guarantee an atomic snapshot of | ||
3474 | * subsys/hierarchy state. | ||
3475 | */ | ||
3426 | mutex_lock(&cgroup_mutex); | 3476 | mutex_lock(&cgroup_mutex); |
3427 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3477 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
3428 | struct cgroup_subsys *ss = subsys[i]; | 3478 | struct cgroup_subsys *ss = subsys[i]; |
3479 | if (ss == NULL) | ||
3480 | continue; | ||
3429 | seq_printf(m, "%s\t%d\t%d\t%d\n", | 3481 | seq_printf(m, "%s\t%d\t%d\t%d\n", |
3430 | ss->name, ss->root->hierarchy_id, | 3482 | ss->name, ss->root->hierarchy_id, |
3431 | ss->root->number_of_cgroups, !ss->disabled); | 3483 | ss->root->number_of_cgroups, !ss->disabled); |
@@ -3483,7 +3535,12 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
3483 | { | 3535 | { |
3484 | if (need_forkexit_callback) { | 3536 | if (need_forkexit_callback) { |
3485 | int i; | 3537 | int i; |
3486 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3538 | /* |
3539 | * forkexit callbacks are only supported for builtin | ||
3540 | * subsystems, and the builtin section of the subsys array is | ||
3541 | * immutable, so we don't need to lock the subsys array here. | ||
3542 | */ | ||
3543 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
3487 | struct cgroup_subsys *ss = subsys[i]; | 3544 | struct cgroup_subsys *ss = subsys[i]; |
3488 | if (ss->fork) | 3545 | if (ss->fork) |
3489 | ss->fork(ss, child); | 3546 | ss->fork(ss, child); |
@@ -3552,7 +3609,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
3552 | struct css_set *cg; | 3609 | struct css_set *cg; |
3553 | 3610 | ||
3554 | if (run_callbacks && need_forkexit_callback) { | 3611 | if (run_callbacks && need_forkexit_callback) { |
3555 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3612 | /* |
3613 | * modular subsystems can't use callbacks, so no need to lock | ||
3614 | * the subsys array | ||
3615 | */ | ||
3616 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
3556 | struct cgroup_subsys *ss = subsys[i]; | 3617 | struct cgroup_subsys *ss = subsys[i]; |
3557 | if (ss->exit) | 3618 | if (ss->exit) |
3558 | ss->exit(ss, tsk); | 3619 | ss->exit(ss, tsk); |
@@ -3844,8 +3905,11 @@ static int __init cgroup_disable(char *str) | |||
3844 | while ((token = strsep(&str, ",")) != NULL) { | 3905 | while ((token = strsep(&str, ",")) != NULL) { |
3845 | if (!*token) | 3906 | if (!*token) |
3846 | continue; | 3907 | continue; |
3847 | 3908 | /* | |
3848 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3909 | * cgroup_disable, being at boot time, can't know about module |
3910 | * subsystems, so we don't worry about them. | ||
3911 | */ | ||
3912 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | ||
3849 | struct cgroup_subsys *ss = subsys[i]; | 3913 | struct cgroup_subsys *ss = subsys[i]; |
3850 | 3914 | ||
3851 | if (!strcmp(token, ss->name)) { | 3915 | if (!strcmp(token, ss->name)) { |