aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-05-16 13:22:47 -0400
committerTejun Heo <tj@kernel.org>2014-05-16 13:22:47 -0400
commit3b514d24e200fcdcde0a57c354a51d3677a86743 (patch)
tree7f38e805f1c6e8086e0a6f51383a9776e380e11d
parent9d755d33f0db8c9b49438f71b38a56e375b34360 (diff)
cgroup: skip refcnting on normal root csses and cgrp_dfl_root self css
9395a4500404 ("cgroup: enable refcnting for root csses") enabled reference counting for root csses (cgroup_subsys_states) so that cgroup's self csses can be used to manage the lifetime of the containing cgroups. Unfortunately, this change was incorrect. During early init, cgrp_dfl_root self css refcnt is used. percpu_ref can't initialized during early init and its initialization is deferred till cgroup_init() time. This means that cpu was using percpu_ref which wasn't properly initialized. Due to the way percpu variables are laid out on x86, this didn't blow up immediately on x86 but ended up incrementing and decrementing the percpu variable at offset zero, whatever it may be; however, on other archs, this caused fault and early boot failure. As cgroup self csses for root cgroups of non-dfl hierarchies need working refcounting, we can't revert 9395a4500404. This patch adds CSS_NO_REF which explicitly inhibits reference counting on the css and sets it on all normal (non-self) csses and cgroup_dfl_root self css. v2: cgrp_dfl_root.self is the offending one. Set the flag on it. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Stephen Warren <swarren@nvidia.com> Tested-by: Stephen Warren <swarren@nvidia.com> Fixes: 9395a4500404 ("cgroup: enable refcnting for root csses")
-rw-r--r--include/linux/cgroup.h11
-rw-r--r--kernel/cgroup.c11
2 files changed, 17 insertions, 5 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 76dadd77a120..1737db0c63fe 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -77,6 +77,7 @@ struct cgroup_subsys_state {
77 77
78/* bits in struct cgroup_subsys_state flags field */ 78/* bits in struct cgroup_subsys_state flags field */
79enum { 79enum {
80 CSS_NO_REF = (1 << 0), /* no reference counting for this css */
80 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ 81 CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */
81}; 82};
82 83
@@ -88,7 +89,8 @@ enum {
88 */ 89 */
89static inline void css_get(struct cgroup_subsys_state *css) 90static inline void css_get(struct cgroup_subsys_state *css)
90{ 91{
91 percpu_ref_get(&css->refcnt); 92 if (!(css->flags & CSS_NO_REF))
93 percpu_ref_get(&css->refcnt);
92} 94}
93 95
94/** 96/**
@@ -103,7 +105,9 @@ static inline void css_get(struct cgroup_subsys_state *css)
103 */ 105 */
104static inline bool css_tryget_online(struct cgroup_subsys_state *css) 106static inline bool css_tryget_online(struct cgroup_subsys_state *css)
105{ 107{
106 return percpu_ref_tryget_live(&css->refcnt); 108 if (!(css->flags & CSS_NO_REF))
109 return percpu_ref_tryget_live(&css->refcnt);
110 return true;
107} 111}
108 112
109/** 113/**
@@ -114,7 +118,8 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
114 */ 118 */
115static inline void css_put(struct cgroup_subsys_state *css) 119static inline void css_put(struct cgroup_subsys_state *css)
116{ 120{
117 percpu_ref_put(&css->refcnt); 121 if (!(css->flags & CSS_NO_REF))
122 percpu_ref_put(&css->refcnt);
118} 123}
119 124
120/* bits in struct cgroup flags field */ 125/* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c01e8e8dfad0..0343d7ee6d62 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4593,11 +4593,17 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
4593 /* We don't handle early failures gracefully */ 4593 /* We don't handle early failures gracefully */
4594 BUG_ON(IS_ERR(css)); 4594 BUG_ON(IS_ERR(css));
4595 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); 4595 init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
4596
4597 /*
4598 * Root csses are never destroyed and we can't initialize
4599 * percpu_ref during early init. Disable refcnting.
4600 */
4601 css->flags |= CSS_NO_REF;
4602
4596 if (early) { 4603 if (early) {
4597 /* allocation can't be done safely during early init */ 4604 /* allocation can't be done safely during early init */
4598 css->id = 1; 4605 css->id = 1;
4599 } else { 4606 } else {
4600 BUG_ON(percpu_ref_init(&css->refcnt, css_release));
4601 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); 4607 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
4602 BUG_ON(css->id < 0); 4608 BUG_ON(css->id < 0);
4603 } 4609 }
@@ -4636,6 +4642,8 @@ int __init cgroup_init_early(void)
4636 int i; 4642 int i;
4637 4643
4638 init_cgroup_root(&cgrp_dfl_root, &opts); 4644 init_cgroup_root(&cgrp_dfl_root, &opts);
4645 cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
4646
4639 RCU_INIT_POINTER(init_task.cgroups, &init_css_set); 4647 RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
4640 4648
4641 for_each_subsys(ss, i) { 4649 for_each_subsys(ss, i) {
@@ -4684,7 +4692,6 @@ int __init cgroup_init(void)
4684 struct cgroup_subsys_state *css = 4692 struct cgroup_subsys_state *css =
4685 init_css_set.subsys[ss->id]; 4693 init_css_set.subsys[ss->id];
4686 4694
4687 BUG_ON(percpu_ref_init(&css->refcnt, css_release));
4688 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, 4695 css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
4689 GFP_KERNEL); 4696 GFP_KERNEL);
4690 BUG_ON(css->id < 0); 4697 BUG_ON(css->id < 0);