aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/cgroup.h
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-06-13 22:39:16 -0400
committerTejun Heo <tj@kernel.org>2013-06-13 22:43:12 -0400
commitd3daf28da16a30af95bfb303189a634a87606725 (patch)
treed73530e2b759b2f180daec0814124e71e8bb149b /include/linux/cgroup.h
parent2b0e53a7c8a6972755c0f0152d7fad2289fdc5eb (diff)
cgroup: use percpu refcnt for cgroup_subsys_states
A css (cgroup_subsys_state) is how each cgroup is represented to a controller. As such, it can be used in hot paths across the various subsystems different controllers are associated with. One of the common operations is reference counting, which up until now has been implemented using a global atomic counter and can have significant adverse impact on scalability. For example, css refcnt can be gotten and put multiple times by blkcg for each IO request. For highops configurations which try to do as much per-cpu as possible, the global frequent refcnting can be very expensive. In general, given the various and hugely diverse paths css's end up being used from, we need to make it cheap and highly scalable. In its usage, css refcnting isn't very different from module refcnting. This patch converts css refcnting to use the recently added percpu_ref. css_get/tryget/put() directly maps to the matching percpu_ref operations and the deactivation logic is no longer necessary as percpu_ref already has refcnt killing. The only complication is that as the refcnt is per-cpu, percpu_ref_kill() in itself doesn't ensure that further tryget operations will fail, which we need to guarantee before invoking ->css_offline()'s. This is resolved collecting kill confirmation using percpu_ref_kill_and_confirm() and initiating the offline phase of destruction after all css refcnt's are confirmed to be seen as killed on all CPUs. The previous patches already splitted destruction into two phases, so percpu_ref_kill_and_confirm() can be hooked up easily. This patch removes css_refcnt() which is used for rcu dereference sanity check in css_id(). While we can add a percpu refcnt API to ask the same question, css_id() itself is scheduled to be removed fairly soon, so let's not bother with it. Just drop the sanity check and use rcu_dereference_raw() instead. v2: - init_cgroup_css() was calling percpu_ref_init() without checking the return value. This causes two problems - the obvious lack of error handling and percpu_ref_init() being called from cgroup_init_subsys() before the allocators are up, which triggers warnings but doesn't cause actual problems as the refcnt isn't used for roots anyway. Fix both by moving percpu_ref_init() to cgroup_create(). - The base references were put too early by percpu_ref_kill_and_confirm() and cgroup_offline_fn() put the refs one extra time. This wasn't noticeable because css's go through another RCU grace period before being freed. Update cgroup_destroy_locked() to grab an extra reference before killing the refcnts. This problem was noticed by Kent. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Kent Overstreet <koverstreet@google.com> Acked-by: Li Zefan <lizefan@huawei.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Mike Snitzer <snitzer@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: "Alasdair G. Kergon" <agk@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Glauber Costa <glommer@gmail.com>
Diffstat (limited to 'include/linux/cgroup.h')
-rw-r--r--include/linux/cgroup.h23
1 files changed, 8 insertions, 15 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e345d8b90046..b7bd4beae294 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -20,6 +20,7 @@
20#include <linux/workqueue.h> 20#include <linux/workqueue.h>
21#include <linux/xattr.h> 21#include <linux/xattr.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/percpu-refcount.h>
23 24
24#ifdef CONFIG_CGROUPS 25#ifdef CONFIG_CGROUPS
25 26
@@ -72,13 +73,8 @@ struct cgroup_subsys_state {
72 */ 73 */
73 struct cgroup *cgroup; 74 struct cgroup *cgroup;
74 75
75 /* 76 /* reference count - access via css_[try]get() and css_put() */
76 * State maintained by the cgroup system to allow subsystems 77 struct percpu_ref refcnt;
77 * to be "busy". Should be accessed via css_get(),
78 * css_tryget() and css_put().
79 */
80
81 atomic_t refcnt;
82 78
83 unsigned long flags; 79 unsigned long flags;
84 /* ID for this css, if possible */ 80 /* ID for this css, if possible */
@@ -104,11 +100,9 @@ static inline void css_get(struct cgroup_subsys_state *css)
104{ 100{
105 /* We don't need to reference count the root state */ 101 /* We don't need to reference count the root state */
106 if (!(css->flags & CSS_ROOT)) 102 if (!(css->flags & CSS_ROOT))
107 atomic_inc(&css->refcnt); 103 percpu_ref_get(&css->refcnt);
108} 104}
109 105
110extern bool __css_tryget(struct cgroup_subsys_state *css);
111
112/** 106/**
113 * css_tryget - try to obtain a reference on the specified css 107 * css_tryget - try to obtain a reference on the specified css
114 * @css: target css 108 * @css: target css
@@ -123,11 +117,9 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
123{ 117{
124 if (css->flags & CSS_ROOT) 118 if (css->flags & CSS_ROOT)
125 return true; 119 return true;
126 return __css_tryget(css); 120 return percpu_ref_tryget(&css->refcnt);
127} 121}
128 122
129extern void __css_put(struct cgroup_subsys_state *css);
130
131/** 123/**
132 * css_put - put a css reference 124 * css_put - put a css reference
133 * @css: target css 125 * @css: target css
@@ -137,7 +129,7 @@ extern void __css_put(struct cgroup_subsys_state *css);
137static inline void css_put(struct cgroup_subsys_state *css) 129static inline void css_put(struct cgroup_subsys_state *css)
138{ 130{
139 if (!(css->flags & CSS_ROOT)) 131 if (!(css->flags & CSS_ROOT))
140 __css_put(css); 132 percpu_ref_put(&css->refcnt);
141} 133}
142 134
143/* bits in struct cgroup flags field */ 135/* bits in struct cgroup flags field */
@@ -231,9 +223,10 @@ struct cgroup {
231 struct list_head pidlists; 223 struct list_head pidlists;
232 struct mutex pidlist_mutex; 224 struct mutex pidlist_mutex;
233 225
234 /* For RCU-protected deletion */ 226 /* For css percpu_ref killing and RCU-protected deletion */
235 struct rcu_head rcu_head; 227 struct rcu_head rcu_head;
236 struct work_struct destroy_work; 228 struct work_struct destroy_work;
229 atomic_t css_kill_cnt;
237 230
238 /* List of events which userspace want to receive */ 231 /* List of events which userspace want to receive */
239 struct list_head event_list; 232 struct list_head event_list;