aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h50
-rw-r--r--include/linux/idr.h1
-rw-r--r--kernel/cgroup.c286
-rw-r--r--lib/idr.c46
4 files changed, 382 insertions, 1 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 788c4964c142..9a23bb098205 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -15,6 +15,7 @@
15#include <linux/cgroupstats.h> 15#include <linux/cgroupstats.h>
16#include <linux/prio_heap.h> 16#include <linux/prio_heap.h>
17#include <linux/rwsem.h> 17#include <linux/rwsem.h>
18#include <linux/idr.h>
18 19
19#ifdef CONFIG_CGROUPS 20#ifdef CONFIG_CGROUPS
20 21
@@ -22,6 +23,7 @@ struct cgroupfs_root;
22struct cgroup_subsys; 23struct cgroup_subsys;
23struct inode; 24struct inode;
24struct cgroup; 25struct cgroup;
26struct css_id;
25 27
26extern int cgroup_init_early(void); 28extern int cgroup_init_early(void);
27extern int cgroup_init(void); 29extern int cgroup_init(void);
@@ -63,6 +65,8 @@ struct cgroup_subsys_state {
63 atomic_t refcnt; 65 atomic_t refcnt;
64 66
65 unsigned long flags; 67 unsigned long flags;
68 /* ID for this css, if possible */
69 struct css_id *id;
66}; 70};
67 71
68/* bits in struct cgroup_subsys_state flags field */ 72/* bits in struct cgroup_subsys_state flags field */
@@ -373,6 +377,11 @@ struct cgroup_subsys {
373 int active; 377 int active;
374 int disabled; 378 int disabled;
375 int early_init; 379 int early_init;
380 /*
381 * True if this subsys uses ID. ID is not available before cgroup_init()
382 * (not available in early_init time.)
383 */
384 bool use_id;
376#define MAX_CGROUP_TYPE_NAMELEN 32 385#define MAX_CGROUP_TYPE_NAMELEN 32
377 const char *name; 386 const char *name;
378 387
@@ -395,6 +404,9 @@ struct cgroup_subsys {
395 */ 404 */
396 struct cgroupfs_root *root; 405 struct cgroupfs_root *root;
397 struct list_head sibling; 406 struct list_head sibling;
407 /* used when use_id == true */
408 struct idr idr;
409 spinlock_t id_lock;
398}; 410};
399 411
400#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys; 412#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
@@ -450,6 +462,44 @@ void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
450int cgroup_scan_tasks(struct cgroup_scanner *scan); 462int cgroup_scan_tasks(struct cgroup_scanner *scan);
451int cgroup_attach_task(struct cgroup *, struct task_struct *); 463int cgroup_attach_task(struct cgroup *, struct task_struct *);
452 464
465/*
466 * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
467 * if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
468 * CSS ID is assigned at cgroup allocation (create) automatically
469 * and removed when subsys calls free_css_id() function. This is because
470 * the lifetime of cgroup_subsys_state is subsys's matter.
471 *
472 * Looking up and scanning function should be called under rcu_read_lock().
473 * Taking cgroup_mutex()/hierarchy_mutex() is not necessary for following calls.
474 * But the css returned by this routine can be "not populated yet" or "being
475 * destroyed". The caller should check css and cgroup's status.
476 */
477
478/*
479 * Typically Called at ->destroy(), or somewhere the subsys frees
480 * cgroup_subsys_state.
481 */
482void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
483
484/* Find a cgroup_subsys_state which has given ID */
485
486struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
487
488/*
489 * Get a cgroup whose id is greater than or equal to id under tree of root.
490 * Returning a cgroup_subsys_state or NULL.
491 */
492struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id,
493 struct cgroup_subsys_state *root, int *foundid);
494
495/* Returns true if root is ancestor of cg */
496bool css_is_ancestor(struct cgroup_subsys_state *cg,
497 struct cgroup_subsys_state *root);
498
499/* Get id and depth of css */
500unsigned short css_id(struct cgroup_subsys_state *css);
501unsigned short css_depth(struct cgroup_subsys_state *css);
502
453#else /* !CONFIG_CGROUPS */ 503#else /* !CONFIG_CGROUPS */
454 504
455static inline int cgroup_init_early(void) { return 0; } 505static inline int cgroup_init_early(void) { return 0; }
diff --git a/include/linux/idr.h b/include/linux/idr.h
index dd846df8cd32..e968db71e33a 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -106,6 +106,7 @@ int idr_get_new(struct idr *idp, void *ptr, int *id);
106int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); 106int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
107int idr_for_each(struct idr *idp, 107int idr_for_each(struct idr *idp,
108 int (*fn)(int id, void *p, void *data), void *data); 108 int (*fn)(int id, void *p, void *data), void *data);
109void *idr_get_next(struct idr *idp, int *nextid);
109void *idr_replace(struct idr *idp, void *ptr, int id); 110void *idr_replace(struct idr *idp, void *ptr, int id);
110void idr_remove(struct idr *idp, int id); 111void idr_remove(struct idr *idp, int id);
111void idr_remove_all(struct idr *idp); 112void idr_remove_all(struct idr *idp);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 27792bcb0758..d3c521137425 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -94,7 +94,6 @@ struct cgroupfs_root {
94 char release_agent_path[PATH_MAX]; 94 char release_agent_path[PATH_MAX];
95}; 95};
96 96
97
98/* 97/*
99 * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the 98 * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
100 * subsystems that are otherwise unattached - it never has more than a 99 * subsystems that are otherwise unattached - it never has more than a
@@ -102,6 +101,39 @@ struct cgroupfs_root {
102 */ 101 */
103static struct cgroupfs_root rootnode; 102static struct cgroupfs_root rootnode;
104 103
104/*
105 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
106 * cgroup_subsys->use_id != 0.
107 */
108#define CSS_ID_MAX (65535)
109struct css_id {
110 /*
111 * The css to which this ID points. This pointer is set to valid value
112 * after cgroup is populated. If cgroup is removed, this will be NULL.
113 * This pointer is expected to be RCU-safe because destroy()
114 * is called after synchronize_rcu(). But for safe use, css_is_removed()
115 * css_tryget() should be used for avoiding race.
116 */
117 struct cgroup_subsys_state *css;
118 /*
119 * ID of this css.
120 */
121 unsigned short id;
122 /*
123 * Depth in hierarchy which this ID belongs to.
124 */
125 unsigned short depth;
126 /*
127 * ID is freed by RCU. (and lookup routine is RCU safe.)
128 */
129 struct rcu_head rcu_head;
130 /*
131 * Hierarchy of CSS ID belongs to.
132 */
133 unsigned short stack[0]; /* Array of Length (depth+1) */
134};
135
136
105/* The list of hierarchy roots */ 137/* The list of hierarchy roots */
106 138
107static LIST_HEAD(roots); 139static LIST_HEAD(roots);
@@ -185,6 +217,8 @@ struct cg_cgroup_link {
185static struct css_set init_css_set; 217static struct css_set init_css_set;
186static struct cg_cgroup_link init_css_set_link; 218static struct cg_cgroup_link init_css_set_link;
187 219
220static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
221
188/* css_set_lock protects the list of css_set objects, and the 222/* css_set_lock protects the list of css_set objects, and the
189 * chain of tasks off each css_set. Nests outside task->alloc_lock 223 * chain of tasks off each css_set. Nests outside task->alloc_lock
190 * due to cgroup_iter_start() */ 224 * due to cgroup_iter_start() */
@@ -567,6 +601,9 @@ static struct backing_dev_info cgroup_backing_dev_info = {
567 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 601 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
568}; 602};
569 603
604static int alloc_css_id(struct cgroup_subsys *ss,
605 struct cgroup *parent, struct cgroup *child);
606
570static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) 607static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
571{ 608{
572 struct inode *inode = new_inode(sb); 609 struct inode *inode = new_inode(sb);
@@ -2327,6 +2364,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
2327 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) 2364 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
2328 return err; 2365 return err;
2329 } 2366 }
2367 /* This cgroup is ready now */
2368 for_each_subsys(cgrp->root, ss) {
2369 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2370 /*
2371 * Update id->css pointer and make this css visible from
2372 * CSS ID functions. This pointer will be dereferened
2373 * from RCU-read-side without locks.
2374 */
2375 if (css->id)
2376 rcu_assign_pointer(css->id->css, css);
2377 }
2330 2378
2331 return 0; 2379 return 0;
2332} 2380}
@@ -2338,6 +2386,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
2338 css->cgroup = cgrp; 2386 css->cgroup = cgrp;
2339 atomic_set(&css->refcnt, 1); 2387 atomic_set(&css->refcnt, 1);
2340 css->flags = 0; 2388 css->flags = 0;
2389 css->id = NULL;
2341 if (cgrp == dummytop) 2390 if (cgrp == dummytop)
2342 set_bit(CSS_ROOT, &css->flags); 2391 set_bit(CSS_ROOT, &css->flags);
2343 BUG_ON(cgrp->subsys[ss->subsys_id]); 2392 BUG_ON(cgrp->subsys[ss->subsys_id]);
@@ -2413,6 +2462,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2413 goto err_destroy; 2462 goto err_destroy;
2414 } 2463 }
2415 init_cgroup_css(css, ss, cgrp); 2464 init_cgroup_css(css, ss, cgrp);
2465 if (ss->use_id)
2466 if (alloc_css_id(ss, parent, cgrp))
2467 goto err_destroy;
2468 /* At error, ->destroy() callback has to free assigned ID. */
2416 } 2469 }
2417 2470
2418 cgroup_lock_hierarchy(root); 2471 cgroup_lock_hierarchy(root);
@@ -2708,6 +2761,8 @@ int __init cgroup_init(void)
2708 struct cgroup_subsys *ss = subsys[i]; 2761 struct cgroup_subsys *ss = subsys[i];
2709 if (!ss->early_init) 2762 if (!ss->early_init)
2710 cgroup_init_subsys(ss); 2763 cgroup_init_subsys(ss);
2764 if (ss->use_id)
2765 cgroup_subsys_init_idr(ss);
2711 } 2766 }
2712 2767
2713 /* Add init_css_set to the hash table */ 2768 /* Add init_css_set to the hash table */
@@ -3242,3 +3297,232 @@ static int __init cgroup_disable(char *str)
3242 return 1; 3297 return 1;
3243} 3298}
3244__setup("cgroup_disable=", cgroup_disable); 3299__setup("cgroup_disable=", cgroup_disable);
3300
3301/*
3302 * Functons for CSS ID.
3303 */
3304
3305/*
3306 *To get ID other than 0, this should be called when !cgroup_is_removed().
3307 */
3308unsigned short css_id(struct cgroup_subsys_state *css)
3309{
3310 struct css_id *cssid = rcu_dereference(css->id);
3311
3312 if (cssid)
3313 return cssid->id;
3314 return 0;
3315}
3316
3317unsigned short css_depth(struct cgroup_subsys_state *css)
3318{
3319 struct css_id *cssid = rcu_dereference(css->id);
3320
3321 if (cssid)
3322 return cssid->depth;
3323 return 0;
3324}
3325
3326bool css_is_ancestor(struct cgroup_subsys_state *child,
3327 struct cgroup_subsys_state *root)
3328{
3329 struct css_id *child_id = rcu_dereference(child->id);
3330 struct css_id *root_id = rcu_dereference(root->id);
3331
3332 if (!child_id || !root_id || (child_id->depth < root_id->depth))
3333 return false;
3334 return child_id->stack[root_id->depth] == root_id->id;
3335}
3336
3337static void __free_css_id_cb(struct rcu_head *head)
3338{
3339 struct css_id *id;
3340
3341 id = container_of(head, struct css_id, rcu_head);
3342 kfree(id);
3343}
3344
3345void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
3346{
3347 struct css_id *id = css->id;
3348 /* When this is called before css_id initialization, id can be NULL */
3349 if (!id)
3350 return;
3351
3352 BUG_ON(!ss->use_id);
3353
3354 rcu_assign_pointer(id->css, NULL);
3355 rcu_assign_pointer(css->id, NULL);
3356 spin_lock(&ss->id_lock);
3357 idr_remove(&ss->idr, id->id);
3358 spin_unlock(&ss->id_lock);
3359 call_rcu(&id->rcu_head, __free_css_id_cb);
3360}
3361
3362/*
3363 * This is called by init or create(). Then, calls to this function are
3364 * always serialized (By cgroup_mutex() at create()).
3365 */
3366
3367static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
3368{
3369 struct css_id *newid;
3370 int myid, error, size;
3371
3372 BUG_ON(!ss->use_id);
3373
3374 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
3375 newid = kzalloc(size, GFP_KERNEL);
3376 if (!newid)
3377 return ERR_PTR(-ENOMEM);
3378 /* get id */
3379 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
3380 error = -ENOMEM;
3381 goto err_out;
3382 }
3383 spin_lock(&ss->id_lock);
3384 /* Don't use 0. allocates an ID of 1-65535 */
3385 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
3386 spin_unlock(&ss->id_lock);
3387
3388 /* Returns error when there are no free spaces for new ID.*/
3389 if (error) {
3390 error = -ENOSPC;
3391 goto err_out;
3392 }
3393 if (myid > CSS_ID_MAX)
3394 goto remove_idr;
3395
3396 newid->id = myid;
3397 newid->depth = depth;
3398 return newid;
3399remove_idr:
3400 error = -ENOSPC;
3401 spin_lock(&ss->id_lock);
3402 idr_remove(&ss->idr, myid);
3403 spin_unlock(&ss->id_lock);
3404err_out:
3405 kfree(newid);
3406 return ERR_PTR(error);
3407
3408}
3409
3410static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
3411{
3412 struct css_id *newid;
3413 struct cgroup_subsys_state *rootcss;
3414
3415 spin_lock_init(&ss->id_lock);
3416 idr_init(&ss->idr);
3417
3418 rootcss = init_css_set.subsys[ss->subsys_id];
3419 newid = get_new_cssid(ss, 0);
3420 if (IS_ERR(newid))
3421 return PTR_ERR(newid);
3422
3423 newid->stack[0] = newid->id;
3424 newid->css = rootcss;
3425 rootcss->id = newid;
3426 return 0;
3427}
3428
3429static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
3430 struct cgroup *child)
3431{
3432 int subsys_id, i, depth = 0;
3433 struct cgroup_subsys_state *parent_css, *child_css;
3434 struct css_id *child_id, *parent_id = NULL;
3435
3436 subsys_id = ss->subsys_id;
3437 parent_css = parent->subsys[subsys_id];
3438 child_css = child->subsys[subsys_id];
3439 depth = css_depth(parent_css) + 1;
3440 parent_id = parent_css->id;
3441
3442 child_id = get_new_cssid(ss, depth);
3443 if (IS_ERR(child_id))
3444 return PTR_ERR(child_id);
3445
3446 for (i = 0; i < depth; i++)
3447 child_id->stack[i] = parent_id->stack[i];
3448 child_id->stack[depth] = child_id->id;
3449 /*
3450 * child_id->css pointer will be set after this cgroup is available
3451 * see cgroup_populate_dir()
3452 */
3453 rcu_assign_pointer(child_css->id, child_id);
3454
3455 return 0;
3456}
3457
3458/**
3459 * css_lookup - lookup css by id
3460 * @ss: cgroup subsys to be looked into.
3461 * @id: the id
3462 *
3463 * Returns pointer to cgroup_subsys_state if there is valid one with id.
3464 * NULL if not. Should be called under rcu_read_lock()
3465 */
3466struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
3467{
3468 struct css_id *cssid = NULL;
3469
3470 BUG_ON(!ss->use_id);
3471 cssid = idr_find(&ss->idr, id);
3472
3473 if (unlikely(!cssid))
3474 return NULL;
3475
3476 return rcu_dereference(cssid->css);
3477}
3478
3479/**
3480 * css_get_next - lookup next cgroup under specified hierarchy.
3481 * @ss: pointer to subsystem
3482 * @id: current position of iteration.
3483 * @root: pointer to css. search tree under this.
3484 * @foundid: position of found object.
3485 *
3486 * Search next css under the specified hierarchy of rootid. Calling under
3487 * rcu_read_lock() is necessary. Returns NULL if it reaches the end.
3488 */
3489struct cgroup_subsys_state *
3490css_get_next(struct cgroup_subsys *ss, int id,
3491 struct cgroup_subsys_state *root, int *foundid)
3492{
3493 struct cgroup_subsys_state *ret = NULL;
3494 struct css_id *tmp;
3495 int tmpid;
3496 int rootid = css_id(root);
3497 int depth = css_depth(root);
3498
3499 if (!rootid)
3500 return NULL;
3501
3502 BUG_ON(!ss->use_id);
3503 /* fill start point for scan */
3504 tmpid = id;
3505 while (1) {
3506 /*
3507 * scan next entry from bitmap(tree), tmpid is updated after
3508 * idr_get_next().
3509 */
3510 spin_lock(&ss->id_lock);
3511 tmp = idr_get_next(&ss->idr, &tmpid);
3512 spin_unlock(&ss->id_lock);
3513
3514 if (!tmp)
3515 break;
3516 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
3517 ret = rcu_dereference(tmp->css);
3518 if (ret) {
3519 *foundid = tmpid;
3520 break;
3521 }
3522 }
3523 /* continue to scan from next id */
3524 tmpid = tmpid + 1;
3525 }
3526 return ret;
3527}
3528
diff --git a/lib/idr.c b/lib/idr.c
index dab4bca86f5d..80ca9aca038b 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -579,6 +579,52 @@ int idr_for_each(struct idr *idp,
579EXPORT_SYMBOL(idr_for_each); 579EXPORT_SYMBOL(idr_for_each);
580 580
581/** 581/**
582 * idr_get_next - lookup next object of id to given id.
583 * @idp: idr handle
584 * @id: pointer to lookup key
585 *
586 * Returns pointer to registered object with id, which is next number to
587 * given id.
588 */
589
590void *idr_get_next(struct idr *idp, int *nextidp)
591{
592 struct idr_layer *p, *pa[MAX_LEVEL];
593 struct idr_layer **paa = &pa[0];
594 int id = *nextidp;
595 int n, max;
596
597 /* find first ent */
598 n = idp->layers * IDR_BITS;
599 max = 1 << n;
600 p = rcu_dereference(idp->top);
601 if (!p)
602 return NULL;
603
604 while (id < max) {
605 while (n > 0 && p) {
606 n -= IDR_BITS;
607 *paa++ = p;
608 p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]);
609 }
610
611 if (p) {
612 *nextidp = id;
613 return p;
614 }
615
616 id += 1 << n;
617 while (n < fls(id)) {
618 n += IDR_BITS;
619 p = *--paa;
620 }
621 }
622 return NULL;
623}
624
625
626
627/**
582 * idr_replace - replace pointer for given id 628 * idr_replace - replace pointer for given id
583 * @idp: idr handle 629 * @idp: idr handle
584 * @ptr: pointer you want associated with the id 630 * @ptr: pointer you want associated with the id