aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-12-27 14:49:08 -0500
committerTejun Heo <tj@kernel.org>2016-12-27 14:49:08 -0500
commit1592c9b223749d59b933ebbfe37f1a8833d7a6cf (patch)
treedfed84440b8264b687e090bb60b642b7e739dd50
parentfa069904dd38c2d8e121a3c7e37f8daaddb6dafa (diff)
cgroup: move v1 mount functions to kernel/cgroup/cgroup-v1.c
Now that the v1 mount code is split into separate functions, move them to kernel/cgroup/cgroup-v1.c along with the mount option handling code. As this puts all v1-only kernfs_syscall_ops in cgroup-v1.c, move cgroup1_kf_syscall_ops to cgroup-v1.c too. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Acked-by: Zefan Li <lizefan@huawei.com>
-rw-r--r--kernel/cgroup/cgroup-internal.h28
-rw-r--r--kernel/cgroup/cgroup-v1.c381
-rw-r--r--kernel/cgroup/cgroup.c410
3 files changed, 413 insertions, 406 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 5790e5ff9a0f..710edeeb1f9f 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -26,6 +26,16 @@ struct cgrp_cset_link {
26 struct list_head cgrp_link; 26 struct list_head cgrp_link;
27}; 27};
28 28
29struct cgroup_sb_opts {
30 u16 subsys_mask;
31 unsigned int flags;
32 char *release_agent;
33 bool cpuset_clone_children;
34 char *name;
35 /* User explicitly requested empty subsystem */
36 bool none;
37};
38
29extern struct mutex cgroup_mutex; 39extern struct mutex cgroup_mutex;
30extern spinlock_t css_set_lock; 40extern spinlock_t css_set_lock;
31extern struct cgroup_subsys *cgroup_subsys[]; 41extern struct cgroup_subsys *cgroup_subsys[];
@@ -66,7 +76,13 @@ void cgroup_kn_unlock(struct kernfs_node *kn);
66int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, 76int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
67 struct cgroup_namespace *ns); 77 struct cgroup_namespace *ns);
68 78
79void cgroup_free_root(struct cgroup_root *root);
80void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
81int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
69int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); 82int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
83struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
84 struct cgroup_root *root, unsigned long magic,
85 struct cgroup_namespace *ns);
70 86
71bool cgroup_may_migrate_to(struct cgroup *dst_cgrp); 87bool cgroup_may_migrate_to(struct cgroup *dst_cgrp);
72void cgroup_migrate_finish(struct list_head *preloaded_csets); 88void cgroup_migrate_finish(struct list_head *preloaded_csets);
@@ -86,18 +102,24 @@ ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes
86 102
87void cgroup_lock_and_drain_offline(struct cgroup *cgrp); 103void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
88 104
105int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode);
106int cgroup_rmdir(struct kernfs_node *kn);
107int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
108 struct kernfs_root *kf_root);
109
89/* 110/*
90 * cgroup-v1.c 111 * cgroup-v1.c
91 */ 112 */
92extern spinlock_t release_agent_path_lock;
93extern struct cftype cgroup_legacy_base_files[]; 113extern struct cftype cgroup_legacy_base_files[];
94extern const struct file_operations proc_cgroupstats_operations; 114extern const struct file_operations proc_cgroupstats_operations;
115extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
95 116
96bool cgroup_ssid_no_v1(int ssid); 117bool cgroup_ssid_no_v1(int ssid);
97void cgroup_pidlist_destroy_all(struct cgroup *cgrp); 118void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
98int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
99 const char *new_name_str);
100void cgroup_release_agent(struct work_struct *work); 119void cgroup_release_agent(struct work_struct *work);
101void check_for_release(struct cgroup *cgrp); 120void check_for_release(struct cgroup *cgrp);
121struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
122 void *data, unsigned long magic,
123 struct cgroup_namespace *ns);
102 124
103#endif /* __CGROUP_INTERNAL_H */ 125#endif /* __CGROUP_INTERNAL_H */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 0b2c24f0b310..ae240c0d33cb 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1,7 +1,9 @@
1#include "cgroup-internal.h" 1#include "cgroup-internal.h"
2 2
3#include <linux/ctype.h>
3#include <linux/kmod.h> 4#include <linux/kmod.h>
4#include <linux/sort.h> 5#include <linux/sort.h>
6#include <linux/delay.h>
5#include <linux/mm.h> 7#include <linux/mm.h>
6#include <linux/slab.h> 8#include <linux/slab.h>
7#include <linux/vmalloc.h> 9#include <linux/vmalloc.h>
@@ -32,7 +34,7 @@ static struct workqueue_struct *cgroup_pidlist_destroy_wq;
32 * Protects cgroup_subsys->release_agent_path. Modifying it also requires 34 * Protects cgroup_subsys->release_agent_path. Modifying it also requires
33 * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock. 35 * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
34 */ 36 */
35DEFINE_SPINLOCK(release_agent_path_lock); 37static DEFINE_SPINLOCK(release_agent_path_lock);
36 38
37bool cgroup_ssid_no_v1(int ssid) 39bool cgroup_ssid_no_v1(int ssid)
38{ 40{
@@ -800,8 +802,8 @@ out_free:
800/* 802/*
801 * cgroup_rename - Only allow simple rename of directories in place. 803 * cgroup_rename - Only allow simple rename of directories in place.
802 */ 804 */
803int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent, 805static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
804 const char *new_name_str) 806 const char *new_name_str)
805{ 807{
806 struct cgroup *cgrp = kn->priv; 808 struct cgroup *cgrp = kn->priv;
807 int ret; 809 int ret;
@@ -832,6 +834,379 @@ int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
832 return ret; 834 return ret;
833} 835}
834 836
837static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
838{
839 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
840 struct cgroup_subsys *ss;
841 int ssid;
842
843 for_each_subsys(ss, ssid)
844 if (root->subsys_mask & (1 << ssid))
845 seq_show_option(seq, ss->legacy_name, NULL);
846 if (root->flags & CGRP_ROOT_NOPREFIX)
847 seq_puts(seq, ",noprefix");
848 if (root->flags & CGRP_ROOT_XATTR)
849 seq_puts(seq, ",xattr");
850
851 spin_lock(&release_agent_path_lock);
852 if (strlen(root->release_agent_path))
853 seq_show_option(seq, "release_agent",
854 root->release_agent_path);
855 spin_unlock(&release_agent_path_lock);
856
857 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
858 seq_puts(seq, ",clone_children");
859 if (strlen(root->name))
860 seq_show_option(seq, "name", root->name);
861 return 0;
862}
863
864static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
865{
866 char *token, *o = data;
867 bool all_ss = false, one_ss = false;
868 u16 mask = U16_MAX;
869 struct cgroup_subsys *ss;
870 int nr_opts = 0;
871 int i;
872
873#ifdef CONFIG_CPUSETS
874 mask = ~((u16)1 << cpuset_cgrp_id);
875#endif
876
877 memset(opts, 0, sizeof(*opts));
878
879 while ((token = strsep(&o, ",")) != NULL) {
880 nr_opts++;
881
882 if (!*token)
883 return -EINVAL;
884 if (!strcmp(token, "none")) {
885 /* Explicitly have no subsystems */
886 opts->none = true;
887 continue;
888 }
889 if (!strcmp(token, "all")) {
890 /* Mutually exclusive option 'all' + subsystem name */
891 if (one_ss)
892 return -EINVAL;
893 all_ss = true;
894 continue;
895 }
896 if (!strcmp(token, "noprefix")) {
897 opts->flags |= CGRP_ROOT_NOPREFIX;
898 continue;
899 }
900 if (!strcmp(token, "clone_children")) {
901 opts->cpuset_clone_children = true;
902 continue;
903 }
904 if (!strcmp(token, "xattr")) {
905 opts->flags |= CGRP_ROOT_XATTR;
906 continue;
907 }
908 if (!strncmp(token, "release_agent=", 14)) {
909 /* Specifying two release agents is forbidden */
910 if (opts->release_agent)
911 return -EINVAL;
912 opts->release_agent =
913 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
914 if (!opts->release_agent)
915 return -ENOMEM;
916 continue;
917 }
918 if (!strncmp(token, "name=", 5)) {
919 const char *name = token + 5;
920 /* Can't specify an empty name */
921 if (!strlen(name))
922 return -EINVAL;
923 /* Must match [\w.-]+ */
924 for (i = 0; i < strlen(name); i++) {
925 char c = name[i];
926 if (isalnum(c))
927 continue;
928 if ((c == '.') || (c == '-') || (c == '_'))
929 continue;
930 return -EINVAL;
931 }
932 /* Specifying two names is forbidden */
933 if (opts->name)
934 return -EINVAL;
935 opts->name = kstrndup(name,
936 MAX_CGROUP_ROOT_NAMELEN - 1,
937 GFP_KERNEL);
938 if (!opts->name)
939 return -ENOMEM;
940
941 continue;
942 }
943
944 for_each_subsys(ss, i) {
945 if (strcmp(token, ss->legacy_name))
946 continue;
947 if (!cgroup_ssid_enabled(i))
948 continue;
949 if (cgroup_ssid_no_v1(i))
950 continue;
951
952 /* Mutually exclusive option 'all' + subsystem name */
953 if (all_ss)
954 return -EINVAL;
955 opts->subsys_mask |= (1 << i);
956 one_ss = true;
957
958 break;
959 }
960 if (i == CGROUP_SUBSYS_COUNT)
961 return -ENOENT;
962 }
963
964 /*
965 * If the 'all' option was specified select all the subsystems,
966 * otherwise if 'none', 'name=' and a subsystem name options were
967 * not specified, let's default to 'all'
968 */
969 if (all_ss || (!one_ss && !opts->none && !opts->name))
970 for_each_subsys(ss, i)
971 if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i))
972 opts->subsys_mask |= (1 << i);
973
974 /*
975 * We either have to specify by name or by subsystems. (So all
976 * empty hierarchies must have a name).
977 */
978 if (!opts->subsys_mask && !opts->name)
979 return -EINVAL;
980
981 /*
982 * Option noprefix was introduced just for backward compatibility
983 * with the old cpuset, so we allow noprefix only if mounting just
984 * the cpuset subsystem.
985 */
986 if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
987 return -EINVAL;
988
989 /* Can't specify "none" and some subsystems */
990 if (opts->subsys_mask && opts->none)
991 return -EINVAL;
992
993 return 0;
994}
995
996static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
997{
998 int ret = 0;
999 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1000 struct cgroup_sb_opts opts;
1001 u16 added_mask, removed_mask;
1002
1003 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1004
1005 /* See what subsystems are wanted */
1006 ret = parse_cgroupfs_options(data, &opts);
1007 if (ret)
1008 goto out_unlock;
1009
1010 if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
1011 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1012 task_tgid_nr(current), current->comm);
1013
1014 added_mask = opts.subsys_mask & ~root->subsys_mask;
1015 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1016
1017 /* Don't allow flags or name to change at remount */
1018 if ((opts.flags ^ root->flags) ||
1019 (opts.name && strcmp(opts.name, root->name))) {
1020 pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
1021 opts.flags, opts.name ?: "", root->flags, root->name);
1022 ret = -EINVAL;
1023 goto out_unlock;
1024 }
1025
1026 /* remounting is not allowed for populated hierarchies */
1027 if (!list_empty(&root->cgrp.self.children)) {
1028 ret = -EBUSY;
1029 goto out_unlock;
1030 }
1031
1032 ret = rebind_subsystems(root, added_mask);
1033 if (ret)
1034 goto out_unlock;
1035
1036 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1037
1038 if (opts.release_agent) {
1039 spin_lock(&release_agent_path_lock);
1040 strcpy(root->release_agent_path, opts.release_agent);
1041 spin_unlock(&release_agent_path_lock);
1042 }
1043
1044 trace_cgroup_remount(root);
1045
1046 out_unlock:
1047 kfree(opts.release_agent);
1048 kfree(opts.name);
1049 mutex_unlock(&cgroup_mutex);
1050 return ret;
1051}
1052
1053struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
1054 .rename = cgroup1_rename,
1055 .show_options = cgroup1_show_options,
1056 .remount_fs = cgroup1_remount,
1057 .mkdir = cgroup_mkdir,
1058 .rmdir = cgroup_rmdir,
1059 .show_path = cgroup_show_path,
1060};
1061
1062struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
1063 void *data, unsigned long magic,
1064 struct cgroup_namespace *ns)
1065{
1066 struct super_block *pinned_sb = NULL;
1067 struct cgroup_sb_opts opts;
1068 struct cgroup_root *root;
1069 struct cgroup_subsys *ss;
1070 struct dentry *dentry;
1071 int i, ret;
1072
1073 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1074
1075 /* First find the desired set of subsystems */
1076 ret = parse_cgroupfs_options(data, &opts);
1077 if (ret)
1078 goto out_unlock;
1079
1080 /*
1081 * Destruction of cgroup root is asynchronous, so subsystems may
1082 * still be dying after the previous unmount. Let's drain the
1083 * dying subsystems. We just need to ensure that the ones
1084 * unmounted previously finish dying and don't care about new ones
1085 * starting. Testing ref liveliness is good enough.
1086 */
1087 for_each_subsys(ss, i) {
1088 if (!(opts.subsys_mask & (1 << i)) ||
1089 ss->root == &cgrp_dfl_root)
1090 continue;
1091
1092 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
1093 mutex_unlock(&cgroup_mutex);
1094 msleep(10);
1095 ret = restart_syscall();
1096 goto out_free;
1097 }
1098 cgroup_put(&ss->root->cgrp);
1099 }
1100
1101 for_each_root(root) {
1102 bool name_match = false;
1103
1104 if (root == &cgrp_dfl_root)
1105 continue;
1106
1107 /*
1108 * If we asked for a name then it must match. Also, if
1109 * name matches but sybsys_mask doesn't, we should fail.
1110 * Remember whether name matched.
1111 */
1112 if (opts.name) {
1113 if (strcmp(opts.name, root->name))
1114 continue;
1115 name_match = true;
1116 }
1117
1118 /*
1119 * If we asked for subsystems (or explicitly for no
1120 * subsystems) then they must match.
1121 */
1122 if ((opts.subsys_mask || opts.none) &&
1123 (opts.subsys_mask != root->subsys_mask)) {
1124 if (!name_match)
1125 continue;
1126 ret = -EBUSY;
1127 goto out_unlock;
1128 }
1129
1130 if (root->flags ^ opts.flags)
1131 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1132
1133 /*
1134 * We want to reuse @root whose lifetime is governed by its
1135 * ->cgrp. Let's check whether @root is alive and keep it
1136 * that way. As cgroup_kill_sb() can happen anytime, we
1137 * want to block it by pinning the sb so that @root doesn't
1138 * get killed before mount is complete.
1139 *
1140 * With the sb pinned, tryget_live can reliably indicate
1141 * whether @root can be reused. If it's being killed,
1142 * drain it. We can use wait_queue for the wait but this
1143 * path is super cold. Let's just sleep a bit and retry.
1144 */
1145 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
1146 if (IS_ERR(pinned_sb) ||
1147 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
1148 mutex_unlock(&cgroup_mutex);
1149 if (!IS_ERR_OR_NULL(pinned_sb))
1150 deactivate_super(pinned_sb);
1151 msleep(10);
1152 ret = restart_syscall();
1153 goto out_free;
1154 }
1155
1156 ret = 0;
1157 goto out_unlock;
1158 }
1159
1160 /*
1161 * No such thing, create a new one. name= matching without subsys
1162 * specification is allowed for already existing hierarchies but we
1163 * can't create new one without subsys specification.
1164 */
1165 if (!opts.subsys_mask && !opts.none) {
1166 ret = -EINVAL;
1167 goto out_unlock;
1168 }
1169
1170 /* Hierarchies may only be created in the initial cgroup namespace. */
1171 if (ns != &init_cgroup_ns) {
1172 ret = -EPERM;
1173 goto out_unlock;
1174 }
1175
1176 root = kzalloc(sizeof(*root), GFP_KERNEL);
1177 if (!root) {
1178 ret = -ENOMEM;
1179 goto out_unlock;
1180 }
1181
1182 init_cgroup_root(root, &opts);
1183
1184 ret = cgroup_setup_root(root, opts.subsys_mask);
1185 if (ret)
1186 cgroup_free_root(root);
1187
1188out_unlock:
1189 mutex_unlock(&cgroup_mutex);
1190out_free:
1191 kfree(opts.release_agent);
1192 kfree(opts.name);
1193
1194 if (ret)
1195 return ERR_PTR(ret);
1196
1197 dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
1198 CGROUP_SUPER_MAGIC, ns);
1199
1200 /*
1201 * If @pinned_sb, we're reusing an existing root and holding an
1202 * extra ref on its sb. Mount is complete. Put the extra ref.
1203 */
1204 if (pinned_sb)
1205 deactivate_super(pinned_sb);
1206
1207 return dentry;
1208}
1209
835static int __init cgroup1_wq_init(void) 1210static int __init cgroup1_wq_init(void)
836{ 1211{
837 /* 1212 /*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index de6a2ac41d0b..4be306510aff 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -31,7 +31,6 @@
31#include "cgroup-internal.h" 31#include "cgroup-internal.h"
32 32
33#include <linux/cred.h> 33#include <linux/cred.h>
34#include <linux/ctype.h>
35#include <linux/errno.h> 34#include <linux/errno.h>
36#include <linux/init_task.h> 35#include <linux/init_task.h>
37#include <linux/kernel.h> 36#include <linux/kernel.h>
@@ -49,7 +48,6 @@
49#include <linux/hashtable.h> 48#include <linux/hashtable.h>
50#include <linux/idr.h> 49#include <linux/idr.h>
51#include <linux/kthread.h> 50#include <linux/kthread.h>
52#include <linux/delay.h>
53#include <linux/atomic.h> 51#include <linux/atomic.h>
54#include <linux/cpuset.h> 52#include <linux/cpuset.h>
55#include <linux/proc_ns.h> 53#include <linux/proc_ns.h>
@@ -1078,7 +1076,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
1078 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id); 1076 idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
1079} 1077}
1080 1078
1081static void cgroup_free_root(struct cgroup_root *root) 1079void cgroup_free_root(struct cgroup_root *root)
1082{ 1080{
1083 if (root) { 1081 if (root) {
1084 idr_destroy(&root->cgroup_idr); 1082 idr_destroy(&root->cgroup_idr);
@@ -1232,7 +1230,6 @@ struct cgroup *task_cgroup_from_root(struct task_struct *task,
1232 * update of a tasks cgroup pointer by cgroup_attach_task() 1230 * update of a tasks cgroup pointer by cgroup_attach_task()
1233 */ 1231 */
1234 1232
1235static struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
1236static struct kernfs_syscall_ops cgroup_kf_syscall_ops; 1233static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
1237 1234
1238static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, 1235static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
@@ -1540,8 +1537,8 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
1540 return 0; 1537 return 0;
1541} 1538}
1542 1539
1543static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, 1540int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1544 struct kernfs_root *kf_root) 1541 struct kernfs_root *kf_root)
1545{ 1542{
1546 int len = 0; 1543 int len = 0;
1547 char *buf = NULL; 1544 char *buf = NULL;
@@ -1567,232 +1564,6 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
1567 return len; 1564 return len;
1568} 1565}
1569 1566
1570static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
1571{
1572 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1573 struct cgroup_subsys *ss;
1574 int ssid;
1575
1576 for_each_subsys(ss, ssid)
1577 if (root->subsys_mask & (1 << ssid))
1578 seq_show_option(seq, ss->legacy_name, NULL);
1579 if (root->flags & CGRP_ROOT_NOPREFIX)
1580 seq_puts(seq, ",noprefix");
1581 if (root->flags & CGRP_ROOT_XATTR)
1582 seq_puts(seq, ",xattr");
1583
1584 spin_lock(&release_agent_path_lock);
1585 if (strlen(root->release_agent_path))
1586 seq_show_option(seq, "release_agent",
1587 root->release_agent_path);
1588 spin_unlock(&release_agent_path_lock);
1589
1590 if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
1591 seq_puts(seq, ",clone_children");
1592 if (strlen(root->name))
1593 seq_show_option(seq, "name", root->name);
1594 return 0;
1595}
1596
1597struct cgroup_sb_opts {
1598 u16 subsys_mask;
1599 unsigned int flags;
1600 char *release_agent;
1601 bool cpuset_clone_children;
1602 char *name;
1603 /* User explicitly requested empty subsystem */
1604 bool none;
1605};
1606
1607static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1608{
1609 char *token, *o = data;
1610 bool all_ss = false, one_ss = false;
1611 u16 mask = U16_MAX;
1612 struct cgroup_subsys *ss;
1613 int nr_opts = 0;
1614 int i;
1615
1616#ifdef CONFIG_CPUSETS
1617 mask = ~((u16)1 << cpuset_cgrp_id);
1618#endif
1619
1620 memset(opts, 0, sizeof(*opts));
1621
1622 while ((token = strsep(&o, ",")) != NULL) {
1623 nr_opts++;
1624
1625 if (!*token)
1626 return -EINVAL;
1627 if (!strcmp(token, "none")) {
1628 /* Explicitly have no subsystems */
1629 opts->none = true;
1630 continue;
1631 }
1632 if (!strcmp(token, "all")) {
1633 /* Mutually exclusive option 'all' + subsystem name */
1634 if (one_ss)
1635 return -EINVAL;
1636 all_ss = true;
1637 continue;
1638 }
1639 if (!strcmp(token, "noprefix")) {
1640 opts->flags |= CGRP_ROOT_NOPREFIX;
1641 continue;
1642 }
1643 if (!strcmp(token, "clone_children")) {
1644 opts->cpuset_clone_children = true;
1645 continue;
1646 }
1647 if (!strcmp(token, "xattr")) {
1648 opts->flags |= CGRP_ROOT_XATTR;
1649 continue;
1650 }
1651 if (!strncmp(token, "release_agent=", 14)) {
1652 /* Specifying two release agents is forbidden */
1653 if (opts->release_agent)
1654 return -EINVAL;
1655 opts->release_agent =
1656 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1657 if (!opts->release_agent)
1658 return -ENOMEM;
1659 continue;
1660 }
1661 if (!strncmp(token, "name=", 5)) {
1662 const char *name = token + 5;
1663 /* Can't specify an empty name */
1664 if (!strlen(name))
1665 return -EINVAL;
1666 /* Must match [\w.-]+ */
1667 for (i = 0; i < strlen(name); i++) {
1668 char c = name[i];
1669 if (isalnum(c))
1670 continue;
1671 if ((c == '.') || (c == '-') || (c == '_'))
1672 continue;
1673 return -EINVAL;
1674 }
1675 /* Specifying two names is forbidden */
1676 if (opts->name)
1677 return -EINVAL;
1678 opts->name = kstrndup(name,
1679 MAX_CGROUP_ROOT_NAMELEN - 1,
1680 GFP_KERNEL);
1681 if (!opts->name)
1682 return -ENOMEM;
1683
1684 continue;
1685 }
1686
1687 for_each_subsys(ss, i) {
1688 if (strcmp(token, ss->legacy_name))
1689 continue;
1690 if (!cgroup_ssid_enabled(i))
1691 continue;
1692 if (cgroup_ssid_no_v1(i))
1693 continue;
1694
1695 /* Mutually exclusive option 'all' + subsystem name */
1696 if (all_ss)
1697 return -EINVAL;
1698 opts->subsys_mask |= (1 << i);
1699 one_ss = true;
1700
1701 break;
1702 }
1703 if (i == CGROUP_SUBSYS_COUNT)
1704 return -ENOENT;
1705 }
1706
1707 /*
1708 * If the 'all' option was specified select all the subsystems,
1709 * otherwise if 'none', 'name=' and a subsystem name options were
1710 * not specified, let's default to 'all'
1711 */
1712 if (all_ss || (!one_ss && !opts->none && !opts->name))
1713 for_each_subsys(ss, i)
1714 if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i))
1715 opts->subsys_mask |= (1 << i);
1716
1717 /*
1718 * We either have to specify by name or by subsystems. (So all
1719 * empty hierarchies must have a name).
1720 */
1721 if (!opts->subsys_mask && !opts->name)
1722 return -EINVAL;
1723
1724 /*
1725 * Option noprefix was introduced just for backward compatibility
1726 * with the old cpuset, so we allow noprefix only if mounting just
1727 * the cpuset subsystem.
1728 */
1729 if ((opts->flags & CGRP_ROOT_NOPREFIX) && (opts->subsys_mask & mask))
1730 return -EINVAL;
1731
1732 /* Can't specify "none" and some subsystems */
1733 if (opts->subsys_mask && opts->none)
1734 return -EINVAL;
1735
1736 return 0;
1737}
1738
1739static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data)
1740{
1741 int ret = 0;
1742 struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1743 struct cgroup_sb_opts opts;
1744 u16 added_mask, removed_mask;
1745
1746 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
1747
1748 /* See what subsystems are wanted */
1749 ret = parse_cgroupfs_options(data, &opts);
1750 if (ret)
1751 goto out_unlock;
1752
1753 if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
1754 pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1755 task_tgid_nr(current), current->comm);
1756
1757 added_mask = opts.subsys_mask & ~root->subsys_mask;
1758 removed_mask = root->subsys_mask & ~opts.subsys_mask;
1759
1760 /* Don't allow flags or name to change at remount */
1761 if ((opts.flags ^ root->flags) ||
1762 (opts.name && strcmp(opts.name, root->name))) {
1763 pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
1764 opts.flags, opts.name ?: "", root->flags, root->name);
1765 ret = -EINVAL;
1766 goto out_unlock;
1767 }
1768
1769 /* remounting is not allowed for populated hierarchies */
1770 if (!list_empty(&root->cgrp.self.children)) {
1771 ret = -EBUSY;
1772 goto out_unlock;
1773 }
1774
1775 ret = rebind_subsystems(root, added_mask);
1776 if (ret)
1777 goto out_unlock;
1778
1779 WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1780
1781 if (opts.release_agent) {
1782 spin_lock(&release_agent_path_lock);
1783 strcpy(root->release_agent_path, opts.release_agent);
1784 spin_unlock(&release_agent_path_lock);
1785 }
1786
1787 trace_cgroup_remount(root);
1788
1789 out_unlock:
1790 kfree(opts.release_agent);
1791 kfree(opts.name);
1792 mutex_unlock(&cgroup_mutex);
1793 return ret;
1794}
1795
1796static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) 1567static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
1797{ 1568{
1798 pr_err("remount is not allowed\n"); 1569 pr_err("remount is not allowed\n");
@@ -1877,8 +1648,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1877 INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent); 1648 INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
1878} 1649}
1879 1650
1880static void init_cgroup_root(struct cgroup_root *root, 1651void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
1881 struct cgroup_sb_opts *opts)
1882{ 1652{
1883 struct cgroup *cgrp = &root->cgrp; 1653 struct cgroup *cgrp = &root->cgrp;
1884 1654
@@ -1897,7 +1667,7 @@ static void init_cgroup_root(struct cgroup_root *root,
1897 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); 1667 set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
1898} 1668}
1899 1669
1900static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) 1670int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
1901{ 1671{
1902 LIST_HEAD(tmp_links); 1672 LIST_HEAD(tmp_links);
1903 struct cgroup *root_cgrp = &root->cgrp; 1673 struct cgroup *root_cgrp = &root->cgrp;
@@ -1994,10 +1764,9 @@ out:
1994 return ret; 1764 return ret;
1995} 1765}
1996 1766
1997static struct dentry *cgroup_do_mount(struct file_system_type *fs_type, 1767struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
1998 int flags, struct cgroup_root *root, 1768 struct cgroup_root *root, unsigned long magic,
1999 unsigned long magic, 1769 struct cgroup_namespace *ns)
2000 struct cgroup_namespace *ns)
2001{ 1770{
2002 struct dentry *dentry; 1771 struct dentry *dentry;
2003 bool new_sb; 1772 bool new_sb;
@@ -2031,155 +1800,6 @@ static struct dentry *cgroup_do_mount(struct file_system_type *fs_type,
2031 return dentry; 1800 return dentry;
2032} 1801}
2033 1802
2034static struct dentry *cgroup1_mount(struct file_system_type *fs_type,
2035 int flags, void *data,
2036 unsigned long magic,
2037 struct cgroup_namespace *ns)
2038{
2039 struct super_block *pinned_sb = NULL;
2040 struct cgroup_sb_opts opts;
2041 struct cgroup_root *root;
2042 struct cgroup_subsys *ss;
2043 struct dentry *dentry;
2044 int i, ret;
2045
2046 cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
2047
2048 /* First find the desired set of subsystems */
2049 ret = parse_cgroupfs_options(data, &opts);
2050 if (ret)
2051 goto out_unlock;
2052
2053 /*
2054 * Destruction of cgroup root is asynchronous, so subsystems may
2055 * still be dying after the previous unmount. Let's drain the
2056 * dying subsystems. We just need to ensure that the ones
2057 * unmounted previously finish dying and don't care about new ones
2058 * starting. Testing ref liveliness is good enough.
2059 */
2060 for_each_subsys(ss, i) {
2061 if (!(opts.subsys_mask & (1 << i)) ||
2062 ss->root == &cgrp_dfl_root)
2063 continue;
2064
2065 if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
2066 mutex_unlock(&cgroup_mutex);
2067 msleep(10);
2068 ret = restart_syscall();
2069 goto out_free;
2070 }
2071 cgroup_put(&ss->root->cgrp);
2072 }
2073
2074 for_each_root(root) {
2075 bool name_match = false;
2076
2077 if (root == &cgrp_dfl_root)
2078 continue;
2079
2080 /*
2081 * If we asked for a name then it must match. Also, if
2082 * name matches but sybsys_mask doesn't, we should fail.
2083 * Remember whether name matched.
2084 */
2085 if (opts.name) {
2086 if (strcmp(opts.name, root->name))
2087 continue;
2088 name_match = true;
2089 }
2090
2091 /*
2092 * If we asked for subsystems (or explicitly for no
2093 * subsystems) then they must match.
2094 */
2095 if ((opts.subsys_mask || opts.none) &&
2096 (opts.subsys_mask != root->subsys_mask)) {
2097 if (!name_match)
2098 continue;
2099 ret = -EBUSY;
2100 goto out_unlock;
2101 }
2102
2103 if (root->flags ^ opts.flags)
2104 pr_warn("new mount options do not match the existing superblock, will be ignored\n");
2105
2106 /*
2107 * We want to reuse @root whose lifetime is governed by its
2108 * ->cgrp. Let's check whether @root is alive and keep it
2109 * that way. As cgroup_kill_sb() can happen anytime, we
2110 * want to block it by pinning the sb so that @root doesn't
2111 * get killed before mount is complete.
2112 *
2113 * With the sb pinned, tryget_live can reliably indicate
2114 * whether @root can be reused. If it's being killed,
2115 * drain it. We can use wait_queue for the wait but this
2116 * path is super cold. Let's just sleep a bit and retry.
2117 */
2118 pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
2119 if (IS_ERR(pinned_sb) ||
2120 !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
2121 mutex_unlock(&cgroup_mutex);
2122 if (!IS_ERR_OR_NULL(pinned_sb))
2123 deactivate_super(pinned_sb);
2124 msleep(10);
2125 ret = restart_syscall();
2126 goto out_free;
2127 }
2128
2129 ret = 0;
2130 goto out_unlock;
2131 }
2132
2133 /*
2134 * No such thing, create a new one. name= matching without subsys
2135 * specification is allowed for already existing hierarchies but we
2136 * can't create new one without subsys specification.
2137 */
2138 if (!opts.subsys_mask && !opts.none) {
2139 ret = -EINVAL;
2140 goto out_unlock;
2141 }
2142
2143 /* Hierarchies may only be created in the initial cgroup namespace. */
2144 if (ns != &init_cgroup_ns) {
2145 ret = -EPERM;
2146 goto out_unlock;
2147 }
2148
2149 root = kzalloc(sizeof(*root), GFP_KERNEL);
2150 if (!root) {
2151 ret = -ENOMEM;
2152 goto out_unlock;
2153 }
2154
2155 init_cgroup_root(root, &opts);
2156
2157 ret = cgroup_setup_root(root, opts.subsys_mask);
2158 if (ret)
2159 cgroup_free_root(root);
2160
2161out_unlock:
2162 mutex_unlock(&cgroup_mutex);
2163out_free:
2164 kfree(opts.release_agent);
2165 kfree(opts.name);
2166
2167 if (ret)
2168 return ERR_PTR(ret);
2169
2170 dentry = cgroup_do_mount(&cgroup_fs_type, flags, root,
2171 CGROUP_SUPER_MAGIC, ns);
2172
2173 /*
2174 * If @pinned_sb, we're reusing an existing root and holding an
2175 * extra ref on its sb. Mount is complete. Put the extra ref.
2176 */
2177 if (pinned_sb)
2178 deactivate_super(pinned_sb);
2179
2180 return dentry;
2181}
2182
2183static struct dentry *cgroup_mount(struct file_system_type *fs_type, 1803static struct dentry *cgroup_mount(struct file_system_type *fs_type,
2184 int flags, const char *unused_dev_name, 1804 int flags, const char *unused_dev_name,
2185 void *data) 1805 void *data)
@@ -4587,8 +4207,7 @@ out_destroy:
4587 return ERR_PTR(ret); 4207 return ERR_PTR(ret);
4588} 4208}
4589 4209
4590static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, 4210int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
4591 umode_t mode)
4592{ 4211{
4593 struct cgroup *parent, *cgrp; 4212 struct cgroup *parent, *cgrp;
4594 struct kernfs_node *kn; 4213 struct kernfs_node *kn;
@@ -4800,7 +4419,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4800 return 0; 4419 return 0;
4801}; 4420};
4802 4421
4803static int cgroup_rmdir(struct kernfs_node *kn) 4422int cgroup_rmdir(struct kernfs_node *kn)
4804{ 4423{
4805 struct cgroup *cgrp; 4424 struct cgroup *cgrp;
4806 int ret = 0; 4425 int ret = 0;
@@ -4818,15 +4437,6 @@ static int cgroup_rmdir(struct kernfs_node *kn)
4818 return ret; 4437 return ret;
4819} 4438}
4820 4439
4821static struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
4822 .remount_fs = cgroup1_remount,
4823 .show_options = cgroup1_show_options,
4824 .rename = cgroup1_rename,
4825 .mkdir = cgroup_mkdir,
4826 .rmdir = cgroup_rmdir,
4827 .show_path = cgroup_show_path,
4828};
4829
4830static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { 4440static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
4831 .remount_fs = cgroup_remount, 4441 .remount_fs = cgroup_remount,
4832 .mkdir = cgroup_mkdir, 4442 .mkdir = cgroup_mkdir,