aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 22:14:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-29 22:14:20 -0400
commit191a712090bb8a10e6f129360eeed2d68f3d4c9a (patch)
tree17e2d6c27fb8a7c3a61828fbcc7c343a4966a0a9 /include
parent46d9be3e5eb01f71fc02653755d970247174b400 (diff)
parent2a0010af17b1739ef8ea8cf02647a127241ee674 (diff)
Merge branch 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - Fixes and a lot of cleanups. Locking cleanup is finally complete. cgroup_mutex is no longer exposed to individual controlelrs which used to cause nasty deadlock issues. Li fixed and cleaned up quite a bit including long standing ones like racy cgroup_path(). - device cgroup now supports proper hierarchy thanks to Aristeu. - perf_event cgroup now supports proper hierarchy. - A new mount option "__DEVEL__sane_behavior" is added. As indicated by the name, this option is to be used for development only at this point and generates a warning message when used. Unfortunately, cgroup interface currently has too many brekages and inconsistencies to implement a consistent and unified hierarchy on top. The new flag is used to collect the behavior changes which are necessary to implement consistent unified hierarchy. It's likely that this flag won't be used verbatim when it becomes ready but will be enabled implicitly along with unified hierarchy. The option currently disables some of broken behaviors in cgroup core and also .use_hierarchy switch in memcg (will be routed through -mm), which can be used to make very unusual hierarchy where nesting is partially honored. It will also be used to implement hierarchy support for blk-throttle which would be impossible otherwise without introducing a full separate set of control knobs. This is essentially versioning of interface which isn't very nice but at this point I can't see any other options which would allow keeping the interface the same while moving towards hierarchy behavior which is at least somewhat sane. The planned unified hierarchy is likely to require some level of adaptation from userland anyway, so I think it'd be best to take the chance and update the interface such that it's supportable in the long term. Maintaining the existing interface does complicate cgroup core but shouldn't put too much strain on individual controllers and I think it'd be manageable for the foreseeable future. Maybe we'll be able to drop it in a decade. Fix up conflicts (including a semantic one adding a new #include to ppc that was uncovered by header the file changes) as per Tejun. * 'for-3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (45 commits) cpuset: fix compile warning when CONFIG_SMP=n cpuset: fix cpu hotplug vs rebuild_sched_domains() race cpuset: use rebuild_sched_domains() in cpuset_hotplug_workfn() cgroup: restore the call to eventfd->poll() cgroup: fix use-after-free when umounting cgroupfs cgroup: fix broken file xattrs devcg: remove parent_cgroup. memcg: force use_hierarchy if sane_behavior cgroup: remove cgrp->top_cgroup cgroup: introduce sane_behavior mount option move cgroupfs_root to include/linux/cgroup.h cgroup: convert cgroupfs_root flag bits to masks and add CGRP_ prefix cgroup: make cgroup_path() not print double slashes Revert "cgroup: remove bind() method from cgroup_subsys." perf: make perf_event cgroup hierarchical cgroup: implement cgroup_is_descendant() cgroup: make sure parent won't be destroyed before its children cgroup: remove bind() method from cgroup_subsys. devcg: remove broken_hierarchy tag cgroup: remove cgroup_lock_is_held() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/cgroup.h170
-rw-r--r--include/linux/cpuset.h1
-rw-r--r--include/linux/res_counter.h2
3 files changed, 153 insertions, 20 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 470073bf93d0..d86e215ca2b8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -19,6 +19,7 @@
19#include <linux/idr.h> 19#include <linux/idr.h>
20#include <linux/workqueue.h> 20#include <linux/workqueue.h>
21#include <linux/xattr.h> 21#include <linux/xattr.h>
22#include <linux/fs.h>
22 23
23#ifdef CONFIG_CGROUPS 24#ifdef CONFIG_CGROUPS
24 25
@@ -30,10 +31,6 @@ struct css_id;
30 31
31extern int cgroup_init_early(void); 32extern int cgroup_init_early(void);
32extern int cgroup_init(void); 33extern int cgroup_init(void);
33extern void cgroup_lock(void);
34extern int cgroup_lock_is_held(void);
35extern bool cgroup_lock_live_group(struct cgroup *cgrp);
36extern void cgroup_unlock(void);
37extern void cgroup_fork(struct task_struct *p); 34extern void cgroup_fork(struct task_struct *p);
38extern void cgroup_post_fork(struct task_struct *p); 35extern void cgroup_post_fork(struct task_struct *p);
39extern void cgroup_exit(struct task_struct *p, int run_callbacks); 36extern void cgroup_exit(struct task_struct *p, int run_callbacks);
@@ -44,14 +41,25 @@ extern void cgroup_unload_subsys(struct cgroup_subsys *ss);
44 41
45extern const struct file_operations proc_cgroup_operations; 42extern const struct file_operations proc_cgroup_operations;
46 43
47/* Define the enumeration of all builtin cgroup subsystems */ 44/*
45 * Define the enumeration of all cgroup subsystems.
46 *
47 * We define ids for builtin subsystems and then modular ones.
48 */
48#define SUBSYS(_x) _x ## _subsys_id, 49#define SUBSYS(_x) _x ## _subsys_id,
49#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option)
50enum cgroup_subsys_id { 50enum cgroup_subsys_id {
51#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
52#include <linux/cgroup_subsys.h>
53#undef IS_SUBSYS_ENABLED
54 CGROUP_BUILTIN_SUBSYS_COUNT,
55
56 __CGROUP_SUBSYS_TEMP_PLACEHOLDER = CGROUP_BUILTIN_SUBSYS_COUNT - 1,
57
58#define IS_SUBSYS_ENABLED(option) IS_MODULE(option)
51#include <linux/cgroup_subsys.h> 59#include <linux/cgroup_subsys.h>
60#undef IS_SUBSYS_ENABLED
52 CGROUP_SUBSYS_COUNT, 61 CGROUP_SUBSYS_COUNT,
53}; 62};
54#undef IS_SUBSYS_ENABLED
55#undef SUBSYS 63#undef SUBSYS
56 64
57/* Per-subsystem/per-cgroup state maintained by the system. */ 65/* Per-subsystem/per-cgroup state maintained by the system. */
@@ -148,6 +156,13 @@ enum {
148 * specified at mount time and thus is implemented here. 156 * specified at mount time and thus is implemented here.
149 */ 157 */
150 CGRP_CPUSET_CLONE_CHILDREN, 158 CGRP_CPUSET_CLONE_CHILDREN,
159 /* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
160 CGRP_SANE_BEHAVIOR,
161};
162
163struct cgroup_name {
164 struct rcu_head rcu_head;
165 char name[];
151}; 166};
152 167
153struct cgroup { 168struct cgroup {
@@ -172,11 +187,23 @@ struct cgroup {
172 struct cgroup *parent; /* my parent */ 187 struct cgroup *parent; /* my parent */
173 struct dentry *dentry; /* cgroup fs entry, RCU protected */ 188 struct dentry *dentry; /* cgroup fs entry, RCU protected */
174 189
190 /*
191 * This is a copy of dentry->d_name, and it's needed because
192 * we can't use dentry->d_name in cgroup_path().
193 *
194 * You must acquire rcu_read_lock() to access cgrp->name, and
195 * the only place that can change it is rename(), which is
196 * protected by parent dir's i_mutex.
197 *
198 * Normally you should use cgroup_name() wrapper rather than
199 * access it directly.
200 */
201 struct cgroup_name __rcu *name;
202
175 /* Private pointers for each registered subsystem */ 203 /* Private pointers for each registered subsystem */
176 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; 204 struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
177 205
178 struct cgroupfs_root *root; 206 struct cgroupfs_root *root;
179 struct cgroup *top_cgroup;
180 207
181 /* 208 /*
182 * List of cg_cgroup_links pointing at css_sets with 209 * List of cg_cgroup_links pointing at css_sets with
@@ -213,6 +240,96 @@ struct cgroup {
213 struct simple_xattrs xattrs; 240 struct simple_xattrs xattrs;
214}; 241};
215 242
243#define MAX_CGROUP_ROOT_NAMELEN 64
244
245/* cgroupfs_root->flags */
246enum {
247 /*
248 * Unfortunately, cgroup core and various controllers are riddled
249 * with idiosyncrasies and pointless options. The following flag,
250 * when set, will force sane behavior - some options are forced on,
251 * others are disallowed, and some controllers will change their
252 * hierarchical or other behaviors.
253 *
254 * The set of behaviors affected by this flag are still being
255 * determined and developed and the mount option for this flag is
256 * prefixed with __DEVEL__. The prefix will be dropped once we
257 * reach the point where all behaviors are compatible with the
258 * planned unified hierarchy, which will automatically turn on this
259 * flag.
260 *
261 * The followings are the behaviors currently affected this flag.
262 *
263 * - Mount options "noprefix" and "clone_children" are disallowed.
264 * Also, cgroupfs file cgroup.clone_children is not created.
265 *
266 * - When mounting an existing superblock, mount options should
267 * match.
268 *
269 * - Remount is disallowed.
270 *
271 * - memcg: use_hierarchy is on by default and the cgroup file for
272 * the flag is not created.
273 *
274 * The followings are planned changes.
275 *
276 * - release_agent will be disallowed once replacement notification
277 * mechanism is implemented.
278 */
279 CGRP_ROOT_SANE_BEHAVIOR = (1 << 0),
280
281 CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */
282 CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */
283};
284
285/*
286 * A cgroupfs_root represents the root of a cgroup hierarchy, and may be
287 * associated with a superblock to form an active hierarchy. This is
288 * internal to cgroup core. Don't access directly from controllers.
289 */
290struct cgroupfs_root {
291 struct super_block *sb;
292
293 /*
294 * The bitmask of subsystems intended to be attached to this
295 * hierarchy
296 */
297 unsigned long subsys_mask;
298
299 /* Unique id for this hierarchy. */
300 int hierarchy_id;
301
302 /* The bitmask of subsystems currently attached to this hierarchy */
303 unsigned long actual_subsys_mask;
304
305 /* A list running through the attached subsystems */
306 struct list_head subsys_list;
307
308 /* The root cgroup for this hierarchy */
309 struct cgroup top_cgroup;
310
311 /* Tracks how many cgroups are currently defined in hierarchy.*/
312 int number_of_cgroups;
313
314 /* A list running through the active hierarchies */
315 struct list_head root_list;
316
317 /* All cgroups on this root, cgroup_mutex protected */
318 struct list_head allcg_list;
319
320 /* Hierarchy-specific flags */
321 unsigned long flags;
322
323 /* IDs for cgroups in this hierarchy */
324 struct ida cgroup_ida;
325
326 /* The path to use for release notifications. */
327 char release_agent_path[PATH_MAX];
328
329 /* The name for this hierarchy - may be empty */
330 char name[MAX_CGROUP_ROOT_NAMELEN];
331};
332
216/* 333/*
217 * A css_set is a structure holding pointers to a set of 334 * A css_set is a structure holding pointers to a set of
218 * cgroup_subsys_state objects. This saves space in the task struct 335 * cgroup_subsys_state objects. This saves space in the task struct
@@ -278,6 +395,7 @@ struct cgroup_map_cb {
278/* cftype->flags */ 395/* cftype->flags */
279#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */ 396#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */
280#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */ 397#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create on root cg */
398#define CFTYPE_INSANE (1U << 2) /* don't create if sane_behavior */
281 399
282#define MAX_CFTYPE_NAME 64 400#define MAX_CFTYPE_NAME 64
283 401
@@ -304,9 +422,6 @@ struct cftype {
304 /* CFTYPE_* flags */ 422 /* CFTYPE_* flags */
305 unsigned int flags; 423 unsigned int flags;
306 424
307 /* file xattrs */
308 struct simple_xattrs xattrs;
309
310 int (*open)(struct inode *inode, struct file *file); 425 int (*open)(struct inode *inode, struct file *file);
311 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, 426 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
312 struct file *file, 427 struct file *file,
@@ -404,18 +519,31 @@ struct cgroup_scanner {
404 void *data; 519 void *data;
405}; 520};
406 521
522/*
523 * See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
524 * function can be called as long as @cgrp is accessible.
525 */
526static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
527{
528 return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
529}
530
531/* Caller should hold rcu_read_lock() */
532static inline const char *cgroup_name(const struct cgroup *cgrp)
533{
534 return rcu_dereference(cgrp->name)->name;
535}
536
407int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 537int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
408int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); 538int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
409 539
410int cgroup_is_removed(const struct cgroup *cgrp); 540int cgroup_is_removed(const struct cgroup *cgrp);
541bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
411 542
412int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); 543int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
413 544
414int cgroup_task_count(const struct cgroup *cgrp); 545int cgroup_task_count(const struct cgroup *cgrp);
415 546
416/* Return true if cgrp is a descendant of the task's cgroup */
417int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
418
419/* 547/*
420 * Control Group taskset, used to pass around set of tasks to cgroup_subsys 548 * Control Group taskset, used to pass around set of tasks to cgroup_subsys
421 * methods. 549 * methods.
@@ -523,10 +651,16 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state(
523 * rcu_dereference_check() conditions, such as locks used during the 651 * rcu_dereference_check() conditions, such as locks used during the
524 * cgroup_subsys::attach() methods. 652 * cgroup_subsys::attach() methods.
525 */ 653 */
654#ifdef CONFIG_PROVE_RCU
655extern struct mutex cgroup_mutex;
656#define task_subsys_state_check(task, subsys_id, __c) \
657 rcu_dereference_check((task)->cgroups->subsys[(subsys_id)], \
658 lockdep_is_held(&(task)->alloc_lock) || \
659 lockdep_is_held(&cgroup_mutex) || (__c))
660#else
526#define task_subsys_state_check(task, subsys_id, __c) \ 661#define task_subsys_state_check(task, subsys_id, __c) \
527 rcu_dereference_check(task->cgroups->subsys[subsys_id], \ 662 rcu_dereference((task)->cgroups->subsys[(subsys_id)])
528 lockdep_is_held(&task->alloc_lock) || \ 663#endif
529 cgroup_lock_is_held() || (__c))
530 664
531static inline struct cgroup_subsys_state * 665static inline struct cgroup_subsys_state *
532task_subsys_state(struct task_struct *task, int subsys_id) 666task_subsys_state(struct task_struct *task, int subsys_id)
@@ -661,8 +795,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
661 struct cgroup_iter *it); 795 struct cgroup_iter *it);
662void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); 796void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
663int cgroup_scan_tasks(struct cgroup_scanner *scan); 797int cgroup_scan_tasks(struct cgroup_scanner *scan);
664int cgroup_attach_task(struct cgroup *, struct task_struct *);
665int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); 798int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
799int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
666 800
667/* 801/*
668 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works 802 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 8c8a60d29407..ccd1de8ad822 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -11,7 +11,6 @@
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/nodemask.h> 13#include <linux/nodemask.h>
14#include <linux/cgroup.h>
15#include <linux/mm.h> 14#include <linux/mm.h>
16 15
17#ifdef CONFIG_CPUSETS 16#ifdef CONFIG_CPUSETS
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index c23099413ad6..96a509b6be04 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -13,7 +13,7 @@
13 * info about what this counter is. 13 * info about what this counter is.
14 */ 14 */
15 15
16#include <linux/cgroup.h> 16#include <linux/spinlock.h>
17#include <linux/errno.h> 17#include <linux/errno.h>
18 18
19/* 19/*