diff options
author | Paul Menage <menage@google.com> | 2009-09-23 18:56:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-24 10:20:57 -0400 |
commit | c6d57f3312a6619d47c5557b5f6154a74d04ff80 (patch) | |
tree | b2e9214e58120c5d91ed0039afd06def62bfdf54 | |
parent | 34f77a90f79fca31802c2e942bd73f7f557fe28c (diff) |
cgroups: support named cgroups hierarchies
To simplify referring to cgroup hierarchies in mount statements, and to
allow disambiguation in the presence of empty hierarchies and
multiply-bindable subsystems this patch adds support for naming a new
cgroup hierarchy via the "name=" mount option
A pre-existing hierarchy may be specified by either name or by subsystems;
a hierarchy's name cannot be changed by a remount operation.
Example usage:
# To create a hierarchy called "foo" containing the "cpu" subsystem
mount -t cgroup -oname=foo,cpu cgroup /mnt/cgroup1
# To mount the "foo" hierarchy on a second location
mount -t cgroup -oname=foo cgroup /mnt/cgroup2
Signed-off-by: Paul Menage <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/cgroups/cgroups.txt | 20 | ||||
-rw-r--r-- | kernel/cgroup.c | 184 |
2 files changed, 156 insertions, 48 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 6eb1a97e88ce..4bccfc19196b 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0: | |||
408 | 408 | ||
409 | # echo 0 > tasks | 409 | # echo 0 > tasks |
410 | 410 | ||
411 | 2.3 Mounting hierarchies by name | ||
412 | -------------------------------- | ||
413 | |||
414 | Passing the name=<x> option when mounting a cgroups hierarchy | ||
415 | associates the given name with the hierarchy. This can be used when | ||
416 | mounting a pre-existing hierarchy, in order to refer to it by name | ||
417 | rather than by its set of active subsystems. Each hierarchy is either | ||
418 | nameless, or has a unique name. | ||
419 | |||
420 | The name should match [\w.-]+ | ||
421 | |||
422 | When passing a name=<x> option for a new hierarchy, you need to | ||
423 | specify subsystems manually; the legacy behaviour of mounting all | ||
424 | subsystems when none are explicitly specified is not supported when | ||
425 | you give a subsystem a name. | ||
426 | |||
427 | The name of the subsystem appears as part of the hierarchy description | ||
428 | in /proc/mounts and /proc/<pid>/cgroups. | ||
429 | |||
430 | |||
411 | 3. Kernel API | 431 | 3. Kernel API |
412 | ============= | 432 | ============= |
413 | 433 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f5281aadbcab..03204044622f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -23,6 +23,7 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/cgroup.h> | 25 | #include <linux/cgroup.h> |
26 | #include <linux/ctype.h> | ||
26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
27 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
28 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
@@ -60,6 +61,8 @@ static struct cgroup_subsys *subsys[] = { | |||
60 | #include <linux/cgroup_subsys.h> | 61 | #include <linux/cgroup_subsys.h> |
61 | }; | 62 | }; |
62 | 63 | ||
64 | #define MAX_CGROUP_ROOT_NAMELEN 64 | ||
65 | |||
63 | /* | 66 | /* |
64 | * A cgroupfs_root represents the root of a cgroup hierarchy, | 67 | * A cgroupfs_root represents the root of a cgroup hierarchy, |
65 | * and may be associated with a superblock to form an active | 68 | * and may be associated with a superblock to form an active |
@@ -94,6 +97,9 @@ struct cgroupfs_root { | |||
94 | 97 | ||
95 | /* The path to use for release notifications. */ | 98 | /* The path to use for release notifications. */ |
96 | char release_agent_path[PATH_MAX]; | 99 | char release_agent_path[PATH_MAX]; |
100 | |||
101 | /* The name for this hierarchy - may be empty */ | ||
102 | char name[MAX_CGROUP_ROOT_NAMELEN]; | ||
97 | }; | 103 | }; |
98 | 104 | ||
99 | /* | 105 | /* |
@@ -841,6 +847,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
841 | seq_puts(seq, ",noprefix"); | 847 | seq_puts(seq, ",noprefix"); |
842 | if (strlen(root->release_agent_path)) | 848 | if (strlen(root->release_agent_path)) |
843 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 849 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
850 | if (strlen(root->name)) | ||
851 | seq_printf(seq, ",name=%s", root->name); | ||
844 | mutex_unlock(&cgroup_mutex); | 852 | mutex_unlock(&cgroup_mutex); |
845 | return 0; | 853 | return 0; |
846 | } | 854 | } |
@@ -849,6 +857,9 @@ struct cgroup_sb_opts { | |||
849 | unsigned long subsys_bits; | 857 | unsigned long subsys_bits; |
850 | unsigned long flags; | 858 | unsigned long flags; |
851 | char *release_agent; | 859 | char *release_agent; |
860 | char *name; | ||
861 | |||
862 | struct cgroupfs_root *new_root; | ||
852 | }; | 863 | }; |
853 | 864 | ||
854 | /* Convert a hierarchy specifier into a bitmask of subsystems and | 865 | /* Convert a hierarchy specifier into a bitmask of subsystems and |
@@ -863,9 +874,7 @@ static int parse_cgroupfs_options(char *data, | |||
863 | mask = ~(1UL << cpuset_subsys_id); | 874 | mask = ~(1UL << cpuset_subsys_id); |
864 | #endif | 875 | #endif |
865 | 876 | ||
866 | opts->subsys_bits = 0; | 877 | memset(opts, 0, sizeof(*opts)); |
867 | opts->flags = 0; | ||
868 | opts->release_agent = NULL; | ||
869 | 878 | ||
870 | while ((token = strsep(&o, ",")) != NULL) { | 879 | while ((token = strsep(&o, ",")) != NULL) { |
871 | if (!*token) | 880 | if (!*token) |
@@ -885,11 +894,33 @@ static int parse_cgroupfs_options(char *data, | |||
885 | /* Specifying two release agents is forbidden */ | 894 | /* Specifying two release agents is forbidden */ |
886 | if (opts->release_agent) | 895 | if (opts->release_agent) |
887 | return -EINVAL; | 896 | return -EINVAL; |
888 | opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL); | 897 | opts->release_agent = |
898 | kstrndup(token + 14, PATH_MAX, GFP_KERNEL); | ||
889 | if (!opts->release_agent) | 899 | if (!opts->release_agent) |
890 | return -ENOMEM; | 900 | return -ENOMEM; |
891 | strncpy(opts->release_agent, token + 14, PATH_MAX - 1); | 901 | } else if (!strncmp(token, "name=", 5)) { |
892 | opts->release_agent[PATH_MAX - 1] = 0; | 902 | int i; |
903 | const char *name = token + 5; | ||
904 | /* Can't specify an empty name */ | ||
905 | if (!strlen(name)) | ||
906 | return -EINVAL; | ||
907 | /* Must match [\w.-]+ */ | ||
908 | for (i = 0; i < strlen(name); i++) { | ||
909 | char c = name[i]; | ||
910 | if (isalnum(c)) | ||
911 | continue; | ||
912 | if ((c == '.') || (c == '-') || (c == '_')) | ||
913 | continue; | ||
914 | return -EINVAL; | ||
915 | } | ||
916 | /* Specifying two names is forbidden */ | ||
917 | if (opts->name) | ||
918 | return -EINVAL; | ||
919 | opts->name = kstrndup(name, | ||
920 | MAX_CGROUP_ROOT_NAMELEN, | ||
921 | GFP_KERNEL); | ||
922 | if (!opts->name) | ||
923 | return -ENOMEM; | ||
893 | } else { | 924 | } else { |
894 | struct cgroup_subsys *ss; | 925 | struct cgroup_subsys *ss; |
895 | int i; | 926 | int i; |
@@ -916,7 +947,7 @@ static int parse_cgroupfs_options(char *data, | |||
916 | return -EINVAL; | 947 | return -EINVAL; |
917 | 948 | ||
918 | /* We can't have an empty hierarchy */ | 949 | /* We can't have an empty hierarchy */ |
919 | if (!opts->subsys_bits) | 950 | if (!opts->subsys_bits && !opts->name) |
920 | return -EINVAL; | 951 | return -EINVAL; |
921 | 952 | ||
922 | return 0; | 953 | return 0; |
@@ -944,6 +975,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
944 | goto out_unlock; | 975 | goto out_unlock; |
945 | } | 976 | } |
946 | 977 | ||
978 | /* Don't allow name to change at remount */ | ||
979 | if (opts.name && strcmp(opts.name, root->name)) { | ||
980 | ret = -EINVAL; | ||
981 | goto out_unlock; | ||
982 | } | ||
983 | |||
947 | ret = rebind_subsystems(root, opts.subsys_bits); | 984 | ret = rebind_subsystems(root, opts.subsys_bits); |
948 | if (ret) | 985 | if (ret) |
949 | goto out_unlock; | 986 | goto out_unlock; |
@@ -955,6 +992,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
955 | strcpy(root->release_agent_path, opts.release_agent); | 992 | strcpy(root->release_agent_path, opts.release_agent); |
956 | out_unlock: | 993 | out_unlock: |
957 | kfree(opts.release_agent); | 994 | kfree(opts.release_agent); |
995 | kfree(opts.name); | ||
958 | mutex_unlock(&cgroup_mutex); | 996 | mutex_unlock(&cgroup_mutex); |
959 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 997 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
960 | unlock_kernel(); | 998 | unlock_kernel(); |
@@ -977,6 +1015,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
977 | INIT_LIST_HEAD(&cgrp->pids_list); | 1015 | INIT_LIST_HEAD(&cgrp->pids_list); |
978 | init_rwsem(&cgrp->pids_mutex); | 1016 | init_rwsem(&cgrp->pids_mutex); |
979 | } | 1017 | } |
1018 | |||
980 | static void init_cgroup_root(struct cgroupfs_root *root) | 1019 | static void init_cgroup_root(struct cgroupfs_root *root) |
981 | { | 1020 | { |
982 | struct cgroup *cgrp = &root->top_cgroup; | 1021 | struct cgroup *cgrp = &root->top_cgroup; |
@@ -990,31 +1029,59 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
990 | 1029 | ||
991 | static int cgroup_test_super(struct super_block *sb, void *data) | 1030 | static int cgroup_test_super(struct super_block *sb, void *data) |
992 | { | 1031 | { |
993 | struct cgroupfs_root *new = data; | 1032 | struct cgroup_sb_opts *opts = data; |
994 | struct cgroupfs_root *root = sb->s_fs_info; | 1033 | struct cgroupfs_root *root = sb->s_fs_info; |
995 | 1034 | ||
996 | /* First check subsystems */ | 1035 | /* If we asked for a name then it must match */ |
997 | if (new->subsys_bits != root->subsys_bits) | 1036 | if (opts->name && strcmp(opts->name, root->name)) |
998 | return 0; | 1037 | return 0; |
999 | 1038 | ||
1000 | /* Next check flags */ | 1039 | /* If we asked for subsystems then they must match */ |
1001 | if (new->flags != root->flags) | 1040 | if (opts->subsys_bits && (opts->subsys_bits != root->subsys_bits)) |
1002 | return 0; | 1041 | return 0; |
1003 | 1042 | ||
1004 | return 1; | 1043 | return 1; |
1005 | } | 1044 | } |
1006 | 1045 | ||
1046 | static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | ||
1047 | { | ||
1048 | struct cgroupfs_root *root; | ||
1049 | |||
1050 | /* Empty hierarchies aren't supported */ | ||
1051 | if (!opts->subsys_bits) | ||
1052 | return NULL; | ||
1053 | |||
1054 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
1055 | if (!root) | ||
1056 | return ERR_PTR(-ENOMEM); | ||
1057 | |||
1058 | init_cgroup_root(root); | ||
1059 | root->subsys_bits = opts->subsys_bits; | ||
1060 | root->flags = opts->flags; | ||
1061 | if (opts->release_agent) | ||
1062 | strcpy(root->release_agent_path, opts->release_agent); | ||
1063 | if (opts->name) | ||
1064 | strcpy(root->name, opts->name); | ||
1065 | return root; | ||
1066 | } | ||
1067 | |||
1007 | static int cgroup_set_super(struct super_block *sb, void *data) | 1068 | static int cgroup_set_super(struct super_block *sb, void *data) |
1008 | { | 1069 | { |
1009 | int ret; | 1070 | int ret; |
1010 | struct cgroupfs_root *root = data; | 1071 | struct cgroup_sb_opts *opts = data; |
1072 | |||
1073 | /* If we don't have a new root, we can't set up a new sb */ | ||
1074 | if (!opts->new_root) | ||
1075 | return -EINVAL; | ||
1076 | |||
1077 | BUG_ON(!opts->subsys_bits); | ||
1011 | 1078 | ||
1012 | ret = set_anon_super(sb, NULL); | 1079 | ret = set_anon_super(sb, NULL); |
1013 | if (ret) | 1080 | if (ret) |
1014 | return ret; | 1081 | return ret; |
1015 | 1082 | ||
1016 | sb->s_fs_info = root; | 1083 | sb->s_fs_info = opts->new_root; |
1017 | root->sb = sb; | 1084 | opts->new_root->sb = sb; |
1018 | 1085 | ||
1019 | sb->s_blocksize = PAGE_CACHE_SIZE; | 1086 | sb->s_blocksize = PAGE_CACHE_SIZE; |
1020 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 1087 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
@@ -1051,48 +1118,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1051 | void *data, struct vfsmount *mnt) | 1118 | void *data, struct vfsmount *mnt) |
1052 | { | 1119 | { |
1053 | struct cgroup_sb_opts opts; | 1120 | struct cgroup_sb_opts opts; |
1121 | struct cgroupfs_root *root; | ||
1054 | int ret = 0; | 1122 | int ret = 0; |
1055 | struct super_block *sb; | 1123 | struct super_block *sb; |
1056 | struct cgroupfs_root *root; | 1124 | struct cgroupfs_root *new_root; |
1057 | struct list_head tmp_cg_links; | ||
1058 | 1125 | ||
1059 | /* First find the desired set of subsystems */ | 1126 | /* First find the desired set of subsystems */ |
1060 | ret = parse_cgroupfs_options(data, &opts); | 1127 | ret = parse_cgroupfs_options(data, &opts); |
1061 | if (ret) { | 1128 | if (ret) |
1062 | kfree(opts.release_agent); | 1129 | goto out_err; |
1063 | return ret; | ||
1064 | } | ||
1065 | |||
1066 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
1067 | if (!root) { | ||
1068 | kfree(opts.release_agent); | ||
1069 | return -ENOMEM; | ||
1070 | } | ||
1071 | 1130 | ||
1072 | init_cgroup_root(root); | 1131 | /* |
1073 | root->subsys_bits = opts.subsys_bits; | 1132 | * Allocate a new cgroup root. We may not need it if we're |
1074 | root->flags = opts.flags; | 1133 | * reusing an existing hierarchy. |
1075 | if (opts.release_agent) { | 1134 | */ |
1076 | strcpy(root->release_agent_path, opts.release_agent); | 1135 | new_root = cgroup_root_from_opts(&opts); |
1077 | kfree(opts.release_agent); | 1136 | if (IS_ERR(new_root)) { |
1137 | ret = PTR_ERR(new_root); | ||
1138 | goto out_err; | ||
1078 | } | 1139 | } |
1140 | opts.new_root = new_root; | ||
1079 | 1141 | ||
1080 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root); | 1142 | /* Locate an existing or new sb for this hierarchy */ |
1081 | 1143 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); | |
1082 | if (IS_ERR(sb)) { | 1144 | if (IS_ERR(sb)) { |
1083 | kfree(root); | 1145 | ret = PTR_ERR(sb); |
1084 | return PTR_ERR(sb); | 1146 | kfree(opts.new_root); |
1147 | goto out_err; | ||
1085 | } | 1148 | } |
1086 | 1149 | ||
1087 | if (sb->s_fs_info != root) { | 1150 | root = sb->s_fs_info; |
1088 | /* Reusing an existing superblock */ | 1151 | BUG_ON(!root); |
1089 | BUG_ON(sb->s_root == NULL); | 1152 | if (root == opts.new_root) { |
1090 | kfree(root); | 1153 | /* We used the new root structure, so this is a new hierarchy */ |
1091 | root = NULL; | 1154 | struct list_head tmp_cg_links; |
1092 | } else { | ||
1093 | /* New superblock */ | ||
1094 | struct cgroup *root_cgrp = &root->top_cgroup; | 1155 | struct cgroup *root_cgrp = &root->top_cgroup; |
1095 | struct inode *inode; | 1156 | struct inode *inode; |
1157 | struct cgroupfs_root *existing_root; | ||
1096 | int i; | 1158 | int i; |
1097 | 1159 | ||
1098 | BUG_ON(sb->s_root != NULL); | 1160 | BUG_ON(sb->s_root != NULL); |
@@ -1105,6 +1167,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1105 | mutex_lock(&inode->i_mutex); | 1167 | mutex_lock(&inode->i_mutex); |
1106 | mutex_lock(&cgroup_mutex); | 1168 | mutex_lock(&cgroup_mutex); |
1107 | 1169 | ||
1170 | if (strlen(root->name)) { | ||
1171 | /* Check for name clashes with existing mounts */ | ||
1172 | for_each_active_root(existing_root) { | ||
1173 | if (!strcmp(existing_root->name, root->name)) { | ||
1174 | ret = -EBUSY; | ||
1175 | mutex_unlock(&cgroup_mutex); | ||
1176 | mutex_unlock(&inode->i_mutex); | ||
1177 | goto drop_new_super; | ||
1178 | } | ||
1179 | } | ||
1180 | } | ||
1181 | |||
1108 | /* | 1182 | /* |
1109 | * We're accessing css_set_count without locking | 1183 | * We're accessing css_set_count without locking |
1110 | * css_set_lock here, but that's OK - it can only be | 1184 | * css_set_lock here, but that's OK - it can only be |
@@ -1123,7 +1197,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1123 | if (ret == -EBUSY) { | 1197 | if (ret == -EBUSY) { |
1124 | mutex_unlock(&cgroup_mutex); | 1198 | mutex_unlock(&cgroup_mutex); |
1125 | mutex_unlock(&inode->i_mutex); | 1199 | mutex_unlock(&inode->i_mutex); |
1126 | goto free_cg_links; | 1200 | free_cg_links(&tmp_cg_links); |
1201 | goto drop_new_super; | ||
1127 | } | 1202 | } |
1128 | 1203 | ||
1129 | /* EBUSY should be the only error here */ | 1204 | /* EBUSY should be the only error here */ |
@@ -1157,15 +1232,25 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1157 | cgroup_populate_dir(root_cgrp); | 1232 | cgroup_populate_dir(root_cgrp); |
1158 | mutex_unlock(&cgroup_mutex); | 1233 | mutex_unlock(&cgroup_mutex); |
1159 | mutex_unlock(&inode->i_mutex); | 1234 | mutex_unlock(&inode->i_mutex); |
1235 | } else { | ||
1236 | /* | ||
1237 | * We re-used an existing hierarchy - the new root (if | ||
1238 | * any) is not needed | ||
1239 | */ | ||
1240 | kfree(opts.new_root); | ||
1160 | } | 1241 | } |
1161 | 1242 | ||
1162 | simple_set_mnt(mnt, sb); | 1243 | simple_set_mnt(mnt, sb); |
1244 | kfree(opts.release_agent); | ||
1245 | kfree(opts.name); | ||
1163 | return 0; | 1246 | return 0; |
1164 | 1247 | ||
1165 | free_cg_links: | ||
1166 | free_cg_links(&tmp_cg_links); | ||
1167 | drop_new_super: | 1248 | drop_new_super: |
1168 | deactivate_locked_super(sb); | 1249 | deactivate_locked_super(sb); |
1250 | out_err: | ||
1251 | kfree(opts.release_agent); | ||
1252 | kfree(opts.name); | ||
1253 | |||
1169 | return ret; | 1254 | return ret; |
1170 | } | 1255 | } |
1171 | 1256 | ||
@@ -2992,6 +3077,9 @@ static int proc_cgroup_show(struct seq_file *m, void *v) | |||
2992 | seq_printf(m, "%lu:", root->subsys_bits); | 3077 | seq_printf(m, "%lu:", root->subsys_bits); |
2993 | for_each_subsys(root, ss) | 3078 | for_each_subsys(root, ss) |
2994 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 3079 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
3080 | if (strlen(root->name)) | ||
3081 | seq_printf(m, "%sname=%s", count ? "," : "", | ||
3082 | root->name); | ||
2995 | seq_putc(m, ':'); | 3083 | seq_putc(m, ':'); |
2996 | get_first_subsys(&root->top_cgroup, NULL, &subsys_id); | 3084 | get_first_subsys(&root->top_cgroup, NULL, &subsys_id); |
2997 | cgrp = task_cgroup(tsk, subsys_id); | 3085 | cgrp = task_cgroup(tsk, subsys_id); |