diff options
-rw-r--r-- | kernel/cgroup/Makefile | 2 | ||||
-rw-r--r-- | kernel/cgroup/cgroup-internal.h | 32 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 175 | ||||
-rw-r--r-- | kernel/cgroup/namespace.c | 155 |
4 files changed, 189 insertions, 175 deletions
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 719588cb18cd..6d42a3211164 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := cgroup.o cgroup-v1.o | 1 | obj-y := cgroup.o namespace.o cgroup-v1.o |
2 | 2 | ||
3 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o | 3 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o |
4 | obj-$(CONFIG_CGROUP_PIDS) += pids.o | 4 | obj-$(CONFIG_CGROUP_PIDS) += pids.o |
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index a890c92cb688..589b0e7013ec 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h | |||
@@ -65,6 +65,33 @@ static inline bool notify_on_release(const struct cgroup *cgrp) | |||
65 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 65 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
66 | } | 66 | } |
67 | 67 | ||
68 | void put_css_set_locked(struct css_set *cset); | ||
69 | |||
70 | static inline void put_css_set(struct css_set *cset) | ||
71 | { | ||
72 | unsigned long flags; | ||
73 | |||
74 | /* | ||
75 | * Ensure that the refcount doesn't hit zero while any readers | ||
76 | * can see it. Similar to atomic_dec_and_lock(), but for an | ||
77 | * rwlock | ||
78 | */ | ||
79 | if (atomic_add_unless(&cset->refcount, -1, 1)) | ||
80 | return; | ||
81 | |||
82 | spin_lock_irqsave(&css_set_lock, flags); | ||
83 | put_css_set_locked(cset); | ||
84 | spin_unlock_irqrestore(&css_set_lock, flags); | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * refcounted get/put for css_set objects | ||
89 | */ | ||
90 | static inline void get_css_set(struct css_set *cset) | ||
91 | { | ||
92 | atomic_inc(&cset->refcount); | ||
93 | } | ||
94 | |||
68 | bool cgroup_ssid_enabled(int ssid); | 95 | bool cgroup_ssid_enabled(int ssid); |
69 | bool cgroup_on_dfl(const struct cgroup *cgrp); | 96 | bool cgroup_on_dfl(const struct cgroup *cgrp); |
70 | 97 | ||
@@ -108,6 +135,11 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | |||
108 | struct kernfs_root *kf_root); | 135 | struct kernfs_root *kf_root); |
109 | 136 | ||
110 | /* | 137 | /* |
138 | * namespace.c | ||
139 | */ | ||
140 | extern const struct proc_ns_operations cgroupns_operations; | ||
141 | |||
142 | /* | ||
111 | * cgroup-v1.c | 143 | * cgroup-v1.c |
112 | */ | 144 | */ |
113 | extern struct cftype cgroup1_base_files[]; | 145 | extern struct cftype cgroup1_base_files[]; |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a05a2dacf5dc..b6b9068ef468 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
@@ -718,7 +718,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) | |||
718 | return key; | 718 | return key; |
719 | } | 719 | } |
720 | 720 | ||
721 | static void put_css_set_locked(struct css_set *cset) | 721 | void put_css_set_locked(struct css_set *cset) |
722 | { | 722 | { |
723 | struct cgrp_cset_link *link, *tmp_link; | 723 | struct cgrp_cset_link *link, *tmp_link; |
724 | struct cgroup_subsys *ss; | 724 | struct cgroup_subsys *ss; |
@@ -748,31 +748,6 @@ static void put_css_set_locked(struct css_set *cset) | |||
748 | kfree_rcu(cset, rcu_head); | 748 | kfree_rcu(cset, rcu_head); |
749 | } | 749 | } |
750 | 750 | ||
751 | static void put_css_set(struct css_set *cset) | ||
752 | { | ||
753 | unsigned long flags; | ||
754 | |||
755 | /* | ||
756 | * Ensure that the refcount doesn't hit zero while any readers | ||
757 | * can see it. Similar to atomic_dec_and_lock(), but for an | ||
758 | * rwlock | ||
759 | */ | ||
760 | if (atomic_add_unless(&cset->refcount, -1, 1)) | ||
761 | return; | ||
762 | |||
763 | spin_lock_irqsave(&css_set_lock, flags); | ||
764 | put_css_set_locked(cset); | ||
765 | spin_unlock_irqrestore(&css_set_lock, flags); | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * refcounted get/put for css_set objects | ||
770 | */ | ||
771 | static inline void get_css_set(struct css_set *cset) | ||
772 | { | ||
773 | atomic_inc(&cset->refcount); | ||
774 | } | ||
775 | |||
776 | /** | 751 | /** |
777 | * compare_css_sets - helper function for find_existing_css_set(). | 752 | * compare_css_sets - helper function for find_existing_css_set(). |
778 | * @cset: candidate css_set being tested | 753 | * @cset: candidate css_set being tested |
@@ -5109,154 +5084,6 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) | |||
5109 | 5084 | ||
5110 | #endif /* CONFIG_SOCK_CGROUP_DATA */ | 5085 | #endif /* CONFIG_SOCK_CGROUP_DATA */ |
5111 | 5086 | ||
5112 | /* cgroup namespaces */ | ||
5113 | |||
5114 | static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns) | ||
5115 | { | ||
5116 | return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES); | ||
5117 | } | ||
5118 | |||
5119 | static void dec_cgroup_namespaces(struct ucounts *ucounts) | ||
5120 | { | ||
5121 | dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES); | ||
5122 | } | ||
5123 | |||
5124 | static struct cgroup_namespace *alloc_cgroup_ns(void) | ||
5125 | { | ||
5126 | struct cgroup_namespace *new_ns; | ||
5127 | int ret; | ||
5128 | |||
5129 | new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL); | ||
5130 | if (!new_ns) | ||
5131 | return ERR_PTR(-ENOMEM); | ||
5132 | ret = ns_alloc_inum(&new_ns->ns); | ||
5133 | if (ret) { | ||
5134 | kfree(new_ns); | ||
5135 | return ERR_PTR(ret); | ||
5136 | } | ||
5137 | atomic_set(&new_ns->count, 1); | ||
5138 | new_ns->ns.ops = &cgroupns_operations; | ||
5139 | return new_ns; | ||
5140 | } | ||
5141 | |||
5142 | void free_cgroup_ns(struct cgroup_namespace *ns) | ||
5143 | { | ||
5144 | put_css_set(ns->root_cset); | ||
5145 | dec_cgroup_namespaces(ns->ucounts); | ||
5146 | put_user_ns(ns->user_ns); | ||
5147 | ns_free_inum(&ns->ns); | ||
5148 | kfree(ns); | ||
5149 | } | ||
5150 | EXPORT_SYMBOL(free_cgroup_ns); | ||
5151 | |||
5152 | struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, | ||
5153 | struct user_namespace *user_ns, | ||
5154 | struct cgroup_namespace *old_ns) | ||
5155 | { | ||
5156 | struct cgroup_namespace *new_ns; | ||
5157 | struct ucounts *ucounts; | ||
5158 | struct css_set *cset; | ||
5159 | |||
5160 | BUG_ON(!old_ns); | ||
5161 | |||
5162 | if (!(flags & CLONE_NEWCGROUP)) { | ||
5163 | get_cgroup_ns(old_ns); | ||
5164 | return old_ns; | ||
5165 | } | ||
5166 | |||
5167 | /* Allow only sysadmin to create cgroup namespace. */ | ||
5168 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
5169 | return ERR_PTR(-EPERM); | ||
5170 | |||
5171 | ucounts = inc_cgroup_namespaces(user_ns); | ||
5172 | if (!ucounts) | ||
5173 | return ERR_PTR(-ENOSPC); | ||
5174 | |||
5175 | /* It is not safe to take cgroup_mutex here */ | ||
5176 | spin_lock_irq(&css_set_lock); | ||
5177 | cset = task_css_set(current); | ||
5178 | get_css_set(cset); | ||
5179 | spin_unlock_irq(&css_set_lock); | ||
5180 | |||
5181 | new_ns = alloc_cgroup_ns(); | ||
5182 | if (IS_ERR(new_ns)) { | ||
5183 | put_css_set(cset); | ||
5184 | dec_cgroup_namespaces(ucounts); | ||
5185 | return new_ns; | ||
5186 | } | ||
5187 | |||
5188 | new_ns->user_ns = get_user_ns(user_ns); | ||
5189 | new_ns->ucounts = ucounts; | ||
5190 | new_ns->root_cset = cset; | ||
5191 | |||
5192 | return new_ns; | ||
5193 | } | ||
5194 | |||
5195 | static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) | ||
5196 | { | ||
5197 | return container_of(ns, struct cgroup_namespace, ns); | ||
5198 | } | ||
5199 | |||
5200 | static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns) | ||
5201 | { | ||
5202 | struct cgroup_namespace *cgroup_ns = to_cg_ns(ns); | ||
5203 | |||
5204 | if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) || | ||
5205 | !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN)) | ||
5206 | return -EPERM; | ||
5207 | |||
5208 | /* Don't need to do anything if we are attaching to our own cgroupns. */ | ||
5209 | if (cgroup_ns == nsproxy->cgroup_ns) | ||
5210 | return 0; | ||
5211 | |||
5212 | get_cgroup_ns(cgroup_ns); | ||
5213 | put_cgroup_ns(nsproxy->cgroup_ns); | ||
5214 | nsproxy->cgroup_ns = cgroup_ns; | ||
5215 | |||
5216 | return 0; | ||
5217 | } | ||
5218 | |||
5219 | static struct ns_common *cgroupns_get(struct task_struct *task) | ||
5220 | { | ||
5221 | struct cgroup_namespace *ns = NULL; | ||
5222 | struct nsproxy *nsproxy; | ||
5223 | |||
5224 | task_lock(task); | ||
5225 | nsproxy = task->nsproxy; | ||
5226 | if (nsproxy) { | ||
5227 | ns = nsproxy->cgroup_ns; | ||
5228 | get_cgroup_ns(ns); | ||
5229 | } | ||
5230 | task_unlock(task); | ||
5231 | |||
5232 | return ns ? &ns->ns : NULL; | ||
5233 | } | ||
5234 | |||
5235 | static void cgroupns_put(struct ns_common *ns) | ||
5236 | { | ||
5237 | put_cgroup_ns(to_cg_ns(ns)); | ||
5238 | } | ||
5239 | |||
5240 | static struct user_namespace *cgroupns_owner(struct ns_common *ns) | ||
5241 | { | ||
5242 | return to_cg_ns(ns)->user_ns; | ||
5243 | } | ||
5244 | |||
5245 | const struct proc_ns_operations cgroupns_operations = { | ||
5246 | .name = "cgroup", | ||
5247 | .type = CLONE_NEWCGROUP, | ||
5248 | .get = cgroupns_get, | ||
5249 | .put = cgroupns_put, | ||
5250 | .install = cgroupns_install, | ||
5251 | .owner = cgroupns_owner, | ||
5252 | }; | ||
5253 | |||
5254 | static __init int cgroup_namespaces_init(void) | ||
5255 | { | ||
5256 | return 0; | ||
5257 | } | ||
5258 | subsys_initcall(cgroup_namespaces_init); | ||
5259 | |||
5260 | #ifdef CONFIG_CGROUP_BPF | 5087 | #ifdef CONFIG_CGROUP_BPF |
5261 | void cgroup_bpf_update(struct cgroup *cgrp, | 5088 | void cgroup_bpf_update(struct cgroup *cgrp, |
5262 | struct bpf_prog *prog, | 5089 | struct bpf_prog *prog, |
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c new file mode 100644 index 000000000000..cff7ea62c38f --- /dev/null +++ b/kernel/cgroup/namespace.c | |||
@@ -0,0 +1,155 @@ | |||
1 | #include "cgroup-internal.h" | ||
2 | |||
3 | #include <linux/sched.h> | ||
4 | #include <linux/slab.h> | ||
5 | #include <linux/nsproxy.h> | ||
6 | #include <linux/proc_ns.h> | ||
7 | |||
8 | |||
9 | /* cgroup namespaces */ | ||
10 | |||
11 | static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns) | ||
12 | { | ||
13 | return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES); | ||
14 | } | ||
15 | |||
16 | static void dec_cgroup_namespaces(struct ucounts *ucounts) | ||
17 | { | ||
18 | dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES); | ||
19 | } | ||
20 | |||
21 | static struct cgroup_namespace *alloc_cgroup_ns(void) | ||
22 | { | ||
23 | struct cgroup_namespace *new_ns; | ||
24 | int ret; | ||
25 | |||
26 | new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL); | ||
27 | if (!new_ns) | ||
28 | return ERR_PTR(-ENOMEM); | ||
29 | ret = ns_alloc_inum(&new_ns->ns); | ||
30 | if (ret) { | ||
31 | kfree(new_ns); | ||
32 | return ERR_PTR(ret); | ||
33 | } | ||
34 | atomic_set(&new_ns->count, 1); | ||
35 | new_ns->ns.ops = &cgroupns_operations; | ||
36 | return new_ns; | ||
37 | } | ||
38 | |||
39 | void free_cgroup_ns(struct cgroup_namespace *ns) | ||
40 | { | ||
41 | put_css_set(ns->root_cset); | ||
42 | dec_cgroup_namespaces(ns->ucounts); | ||
43 | put_user_ns(ns->user_ns); | ||
44 | ns_free_inum(&ns->ns); | ||
45 | kfree(ns); | ||
46 | } | ||
47 | EXPORT_SYMBOL(free_cgroup_ns); | ||
48 | |||
49 | struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, | ||
50 | struct user_namespace *user_ns, | ||
51 | struct cgroup_namespace *old_ns) | ||
52 | { | ||
53 | struct cgroup_namespace *new_ns; | ||
54 | struct ucounts *ucounts; | ||
55 | struct css_set *cset; | ||
56 | |||
57 | BUG_ON(!old_ns); | ||
58 | |||
59 | if (!(flags & CLONE_NEWCGROUP)) { | ||
60 | get_cgroup_ns(old_ns); | ||
61 | return old_ns; | ||
62 | } | ||
63 | |||
64 | /* Allow only sysadmin to create cgroup namespace. */ | ||
65 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) | ||
66 | return ERR_PTR(-EPERM); | ||
67 | |||
68 | ucounts = inc_cgroup_namespaces(user_ns); | ||
69 | if (!ucounts) | ||
70 | return ERR_PTR(-ENOSPC); | ||
71 | |||
72 | /* It is not safe to take cgroup_mutex here */ | ||
73 | spin_lock_irq(&css_set_lock); | ||
74 | cset = task_css_set(current); | ||
75 | get_css_set(cset); | ||
76 | spin_unlock_irq(&css_set_lock); | ||
77 | |||
78 | new_ns = alloc_cgroup_ns(); | ||
79 | if (IS_ERR(new_ns)) { | ||
80 | put_css_set(cset); | ||
81 | dec_cgroup_namespaces(ucounts); | ||
82 | return new_ns; | ||
83 | } | ||
84 | |||
85 | new_ns->user_ns = get_user_ns(user_ns); | ||
86 | new_ns->ucounts = ucounts; | ||
87 | new_ns->root_cset = cset; | ||
88 | |||
89 | return new_ns; | ||
90 | } | ||
91 | |||
92 | static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns) | ||
93 | { | ||
94 | return container_of(ns, struct cgroup_namespace, ns); | ||
95 | } | ||
96 | |||
97 | static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns) | ||
98 | { | ||
99 | struct cgroup_namespace *cgroup_ns = to_cg_ns(ns); | ||
100 | |||
101 | if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) || | ||
102 | !ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN)) | ||
103 | return -EPERM; | ||
104 | |||
105 | /* Don't need to do anything if we are attaching to our own cgroupns. */ | ||
106 | if (cgroup_ns == nsproxy->cgroup_ns) | ||
107 | return 0; | ||
108 | |||
109 | get_cgroup_ns(cgroup_ns); | ||
110 | put_cgroup_ns(nsproxy->cgroup_ns); | ||
111 | nsproxy->cgroup_ns = cgroup_ns; | ||
112 | |||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static struct ns_common *cgroupns_get(struct task_struct *task) | ||
117 | { | ||
118 | struct cgroup_namespace *ns = NULL; | ||
119 | struct nsproxy *nsproxy; | ||
120 | |||
121 | task_lock(task); | ||
122 | nsproxy = task->nsproxy; | ||
123 | if (nsproxy) { | ||
124 | ns = nsproxy->cgroup_ns; | ||
125 | get_cgroup_ns(ns); | ||
126 | } | ||
127 | task_unlock(task); | ||
128 | |||
129 | return ns ? &ns->ns : NULL; | ||
130 | } | ||
131 | |||
132 | static void cgroupns_put(struct ns_common *ns) | ||
133 | { | ||
134 | put_cgroup_ns(to_cg_ns(ns)); | ||
135 | } | ||
136 | |||
137 | static struct user_namespace *cgroupns_owner(struct ns_common *ns) | ||
138 | { | ||
139 | return to_cg_ns(ns)->user_ns; | ||
140 | } | ||
141 | |||
142 | const struct proc_ns_operations cgroupns_operations = { | ||
143 | .name = "cgroup", | ||
144 | .type = CLONE_NEWCGROUP, | ||
145 | .get = cgroupns_get, | ||
146 | .put = cgroupns_put, | ||
147 | .install = cgroupns_install, | ||
148 | .owner = cgroupns_owner, | ||
149 | }; | ||
150 | |||
151 | static __init int cgroup_namespaces_init(void) | ||
152 | { | ||
153 | return 0; | ||
154 | } | ||
155 | subsys_initcall(cgroup_namespaces_init); | ||