aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Wagner <daniel.wagner@bmw-carit.de>2012-09-12 10:12:07 -0400
committerTejun Heo <tj@kernel.org>2012-09-14 12:57:43 -0400
commit8a8e04df4747661daaee77e98e102d99c9e09b98 (patch)
tree8ecd4288cfad84ed2bfc1820331b2483e70abfd3
parent80f4c87774721e864d5a5a1f7aca3e95fd90e194 (diff)
cgroup: Assign subsystem IDs during compile time
WARNING: With this change it is impossible to load external built controllers anymore. In case where CONFIG_NETPRIO_CGROUP=m and CONFIG_NET_CLS_CGROUP=m is set, corresponding subsys_id should also be a constant. Up to now, net_prio_subsys_id and net_cls_subsys_id would be of the type int and the value would be assigned during runtime. By switching the macro definition IS_SUBSYS_ENABLED from IS_BUILTIN to IS_ENABLED, all *_subsys_id will have constant value. That means we need to remove all the code which assumes a value can be assigned to net_prio_subsys_id and net_cls_subsys_id. A close look is necessary on the RCU part which was introduces by following patch: commit f845172531fb7410c7fb7780b1a6e51ee6df7d52 Author: Herbert Xu <herbert@gondor.apana.org.au> Mon May 24 09:12:34 2010 Committer: David S. Miller <davem@davemloft.net> Mon May 24 09:12:34 2010 cls_cgroup: Store classid in struct sock Tis code was added to init_cgroup_cls() /* We can't use rcu_assign_pointer because this is an int. */ smp_wmb(); net_cls_subsys_id = net_cls_subsys.subsys_id; respectively to exit_cgroup_cls() net_cls_subsys_id = -1; synchronize_rcu(); and in module version of task_cls_classid() rcu_read_lock(); id = rcu_dereference(net_cls_subsys_id); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; rcu_read_unlock(); Without an explicit explaination why the RCU part is needed. (The rcu_deference was fixed by exchanging it to rcu_derefence_index_check() in a later commit, but that is a minor detail.) So here is my pondering why it was introduced and why it safe to remove it now. Note that this code was copied over to net_prio the reasoning holds for that subsystem too. The idea behind the RCU use for net_cls_subsys_id is to make sure we get a valid pointer back from task_subsys_state(). task_subsys_state() is just blindly accessing the subsys array and returning the pointer. Obviously, passing in -1 as id into task_subsys_state() returns an invalid value (out of lower bound). So this code makes sure that only after module is loaded and the subsystem registered, the id is assigned. Before unregistering the module all old readers must have left the critical section. This is done by assigning -1 to the id and issuing a synchronized_rcu(). Any new readers wont call task_subsys_state() anymore and therefore it is safe to unregister the subsystem. The new code relies on the same trick, but it looks at the subsys pointer return by task_subsys_state() (remember the id is constant and therefore we allways have a valid index into the subsys array). No precautions need to be taken during module loading module. Eventually, all CPUs will get a valid pointer back from task_subsys_state() because rebind_subsystem() which is called after the module init() function will assigned subsys[net_cls_subsys_id] the newly loaded module subsystem pointer. When the subsystem is about to be removed, rebind_subsystem() will called before the module exit() function. In this case, rebind_subsys() will assign subsys[net_cls_subsys_id] a NULL pointer and then it calls synchronize_rcu(). All old readers have left by then the critical section. Any new reader wont access the subsystem anymore. At this point we are safe to unregister the subsystem. No synchronize_rcu() call is needed. Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Eric Dumazet <edumazet@google.com> Cc: Gao feng <gaofeng@cn.fujitsu.com> Cc: Glauber Costa <glommer@parallels.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: John Fastabend <john.r.fastabend@intel.com> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: netdev@vger.kernel.org Cc: cgroups@vger.kernel.org
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/net/cls_cgroup.h12
-rw-r--r--include/net/netprio_cgroup.h18
-rw-r--r--kernel/cgroup.c22
-rw-r--r--net/core/netprio_cgroup.c11
-rw-r--r--net/core/sock.c11
-rw-r--r--net/sched/cls_cgroup.c13
7 files changed, 13 insertions, 76 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a5ab5651441b..018f819405c8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -46,7 +46,7 @@ extern const struct file_operations proc_cgroup_operations;
46 46
47/* Define the enumeration of all builtin cgroup subsystems */ 47/* Define the enumeration of all builtin cgroup subsystems */
48#define SUBSYS(_x) _x ## _subsys_id, 48#define SUBSYS(_x) _x ## _subsys_id,
49#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) 49#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option)
50enum cgroup_subsys_id { 50enum cgroup_subsys_id {
51#include <linux/cgroup_subsys.h> 51#include <linux/cgroup_subsys.h>
52 __CGROUP_TEMPORARY_PLACEHOLDER 52 __CGROUP_TEMPORARY_PLACEHOLDER
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 9bd5db9e10ba..b6a6eeb3905f 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -42,22 +42,18 @@ static inline u32 task_cls_classid(struct task_struct *p)
42 return classid; 42 return classid;
43} 43}
44#elif IS_MODULE(CONFIG_NET_CLS_CGROUP) 44#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
45
46extern int net_cls_subsys_id;
47
48static inline u32 task_cls_classid(struct task_struct *p) 45static inline u32 task_cls_classid(struct task_struct *p)
49{ 46{
50 int id; 47 struct cgroup_subsys_state *css;
51 u32 classid = 0; 48 u32 classid = 0;
52 49
53 if (in_interrupt()) 50 if (in_interrupt())
54 return 0; 51 return 0;
55 52
56 rcu_read_lock(); 53 rcu_read_lock();
57 id = rcu_dereference_index_check(net_cls_subsys_id, 54 css = task_subsys_state(p, net_cls_subsys_id);
58 rcu_read_lock_held()); 55 if (css)
59 if (id >= 0) 56 classid = container_of(css,
60 classid = container_of(task_subsys_state(p, id),
61 struct cgroup_cls_state, css)->classid; 57 struct cgroup_cls_state, css)->classid;
62 rcu_read_unlock(); 58 rcu_read_unlock();
63 59
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index b202de882489..2760f4f4ae9b 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -30,10 +30,6 @@ struct cgroup_netprio_state {
30 u32 prioidx; 30 u32 prioidx;
31}; 31};
32 32
33#ifndef CONFIG_NETPRIO_CGROUP
34extern int net_prio_subsys_id;
35#endif
36
37extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task); 33extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task);
38 34
39#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP) 35#if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
@@ -55,18 +51,14 @@ static inline u32 task_netprioidx(struct task_struct *p)
55 51
56static inline u32 task_netprioidx(struct task_struct *p) 52static inline u32 task_netprioidx(struct task_struct *p)
57{ 53{
58 struct cgroup_netprio_state *state; 54 struct cgroup_subsys_state *css;
59 int subsys_id;
60 u32 idx = 0; 55 u32 idx = 0;
61 56
62 rcu_read_lock(); 57 rcu_read_lock();
63 subsys_id = rcu_dereference_index_check(net_prio_subsys_id, 58 css = task_subsys_state(p, net_prio_subsys_id);
64 rcu_read_lock_held()); 59 if (css)
65 if (subsys_id >= 0) { 60 idx = container_of(css,
66 state = container_of(task_subsys_state(p, subsys_id), 61 struct cgroup_netprio_state, css)->prioidx;
67 struct cgroup_netprio_state, css);
68 idx = state->prioidx;
69 }
70 rcu_read_unlock(); 62 rcu_read_unlock();
71 return idx; 63 return idx;
72} 64}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2bfc78f531b6..485cc1487ea2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4451,24 +4451,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4451 /* init base cftset */ 4451 /* init base cftset */
4452 cgroup_init_cftsets(ss); 4452 cgroup_init_cftsets(ss);
4453 4453
4454 /*
4455 * need to register a subsys id before anything else - for example,
4456 * init_cgroup_css needs it.
4457 */
4458 mutex_lock(&cgroup_mutex); 4454 mutex_lock(&cgroup_mutex);
4459 /* find the first empty slot in the array */ 4455 subsys[ss->subsys_id] = ss;
4460 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4461 if (subsys[i] == NULL)
4462 break;
4463 }
4464 if (i == CGROUP_SUBSYS_COUNT) {
4465 /* maximum number of subsystems already registered! */
4466 mutex_unlock(&cgroup_mutex);
4467 return -EBUSY;
4468 }
4469 /* assign ourselves the subsys_id */
4470 ss->subsys_id = i;
4471 subsys[i] = ss;
4472 4456
4473 /* 4457 /*
4474 * no ss->create seems to need anything important in the ss struct, so 4458 * no ss->create seems to need anything important in the ss struct, so
@@ -4477,7 +4461,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4477 css = ss->create(dummytop); 4461 css = ss->create(dummytop);
4478 if (IS_ERR(css)) { 4462 if (IS_ERR(css)) {
4479 /* failure case - need to deassign the subsys[] slot. */ 4463 /* failure case - need to deassign the subsys[] slot. */
4480 subsys[i] = NULL; 4464 subsys[ss->subsys_id] = NULL;
4481 mutex_unlock(&cgroup_mutex); 4465 mutex_unlock(&cgroup_mutex);
4482 return PTR_ERR(css); 4466 return PTR_ERR(css);
4483 } 4467 }
@@ -4493,7 +4477,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4493 if (ret) { 4477 if (ret) {
4494 dummytop->subsys[ss->subsys_id] = NULL; 4478 dummytop->subsys[ss->subsys_id] = NULL;
4495 ss->destroy(dummytop); 4479 ss->destroy(dummytop);
4496 subsys[i] = NULL; 4480 subsys[ss->subsys_id] = NULL;
4497 mutex_unlock(&cgroup_mutex); 4481 mutex_unlock(&cgroup_mutex);
4498 return ret; 4482 return ret;
4499 } 4483 }
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index c75e3f9d060f..6bc460c38e4f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -326,9 +326,7 @@ struct cgroup_subsys net_prio_subsys = {
326 .create = cgrp_create, 326 .create = cgrp_create,
327 .destroy = cgrp_destroy, 327 .destroy = cgrp_destroy,
328 .attach = net_prio_attach, 328 .attach = net_prio_attach,
329#ifdef CONFIG_NETPRIO_CGROUP
330 .subsys_id = net_prio_subsys_id, 329 .subsys_id = net_prio_subsys_id,
331#endif
332 .base_cftypes = ss_files, 330 .base_cftypes = ss_files,
333 .module = THIS_MODULE 331 .module = THIS_MODULE
334}; 332};
@@ -366,10 +364,6 @@ static int __init init_cgroup_netprio(void)
366 ret = cgroup_load_subsys(&net_prio_subsys); 364 ret = cgroup_load_subsys(&net_prio_subsys);
367 if (ret) 365 if (ret)
368 goto out; 366 goto out;
369#ifndef CONFIG_NETPRIO_CGROUP
370 smp_wmb();
371 net_prio_subsys_id = net_prio_subsys.subsys_id;
372#endif
373 367
374 register_netdevice_notifier(&netprio_device_notifier); 368 register_netdevice_notifier(&netprio_device_notifier);
375 369
@@ -386,11 +380,6 @@ static void __exit exit_cgroup_netprio(void)
386 380
387 cgroup_unload_subsys(&net_prio_subsys); 381 cgroup_unload_subsys(&net_prio_subsys);
388 382
389#ifndef CONFIG_NETPRIO_CGROUP
390 net_prio_subsys_id = -1;
391 synchronize_rcu();
392#endif
393
394 rtnl_lock(); 383 rtnl_lock();
395 for_each_netdev(&init_net, dev) { 384 for_each_netdev(&init_net, dev) {
396 old = rtnl_dereference(dev->priomap); 385 old = rtnl_dereference(dev->priomap);
diff --git a/net/core/sock.c b/net/core/sock.c
index ca3eaee66056..47b4ac048e88 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -326,17 +326,6 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
326} 326}
327EXPORT_SYMBOL(__sk_backlog_rcv); 327EXPORT_SYMBOL(__sk_backlog_rcv);
328 328
329#if defined(CONFIG_CGROUPS)
330#if !defined(CONFIG_NET_CLS_CGROUP)
331int net_cls_subsys_id = -1;
332EXPORT_SYMBOL_GPL(net_cls_subsys_id);
333#endif
334#if !defined(CONFIG_NETPRIO_CGROUP)
335int net_prio_subsys_id = -1;
336EXPORT_SYMBOL_GPL(net_prio_subsys_id);
337#endif
338#endif
339
340static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 329static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
341{ 330{
342 struct timeval tv; 331 struct timeval tv;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7743ea8d1d38..67cf90d962f4 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -77,9 +77,7 @@ struct cgroup_subsys net_cls_subsys = {
77 .name = "net_cls", 77 .name = "net_cls",
78 .create = cgrp_create, 78 .create = cgrp_create,
79 .destroy = cgrp_destroy, 79 .destroy = cgrp_destroy,
80#ifdef CONFIG_NET_CLS_CGROUP
81 .subsys_id = net_cls_subsys_id, 80 .subsys_id = net_cls_subsys_id,
82#endif
83 .base_cftypes = ss_files, 81 .base_cftypes = ss_files,
84 .module = THIS_MODULE, 82 .module = THIS_MODULE,
85}; 83};
@@ -283,12 +281,6 @@ static int __init init_cgroup_cls(void)
283 if (ret) 281 if (ret)
284 goto out; 282 goto out;
285 283
286#ifndef CONFIG_NET_CLS_CGROUP
287 /* We can't use rcu_assign_pointer because this is an int. */
288 smp_wmb();
289 net_cls_subsys_id = net_cls_subsys.subsys_id;
290#endif
291
292 ret = register_tcf_proto_ops(&cls_cgroup_ops); 284 ret = register_tcf_proto_ops(&cls_cgroup_ops);
293 if (ret) 285 if (ret)
294 cgroup_unload_subsys(&net_cls_subsys); 286 cgroup_unload_subsys(&net_cls_subsys);
@@ -301,11 +293,6 @@ static void __exit exit_cgroup_cls(void)
301{ 293{
302 unregister_tcf_proto_ops(&cls_cgroup_ops); 294 unregister_tcf_proto_ops(&cls_cgroup_ops);
303 295
304#ifndef CONFIG_NET_CLS_CGROUP
305 net_cls_subsys_id = -1;
306 synchronize_rcu();
307#endif
308
309 cgroup_unload_subsys(&net_cls_subsys); 296 cgroup_unload_subsys(&net_cls_subsys);
310} 297}
311 298