diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 286 |
1 files changed, 285 insertions, 1 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 27792bcb0758..d3c521137425 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -94,7 +94,6 @@ struct cgroupfs_root { | |||
94 | char release_agent_path[PATH_MAX]; | 94 | char release_agent_path[PATH_MAX]; |
95 | }; | 95 | }; |
96 | 96 | ||
97 | |||
98 | /* | 97 | /* |
99 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the | 98 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the |
100 | * subsystems that are otherwise unattached - it never has more than a | 99 | * subsystems that are otherwise unattached - it never has more than a |
@@ -102,6 +101,39 @@ struct cgroupfs_root { | |||
102 | */ | 101 | */ |
103 | static struct cgroupfs_root rootnode; | 102 | static struct cgroupfs_root rootnode; |
104 | 103 | ||
104 | /* | ||
105 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when | ||
106 | * cgroup_subsys->use_id != 0. | ||
107 | */ | ||
108 | #define CSS_ID_MAX (65535) | ||
109 | struct css_id { | ||
110 | /* | ||
111 | * The css to which this ID points. This pointer is set to valid value | ||
112 | * after cgroup is populated. If cgroup is removed, this will be NULL. | ||
113 | * This pointer is expected to be RCU-safe because destroy() | ||
114 | * is called after synchronize_rcu(). But for safe use, css_is_removed() | ||
115 | * css_tryget() should be used for avoiding race. | ||
116 | */ | ||
117 | struct cgroup_subsys_state *css; | ||
118 | /* | ||
119 | * ID of this css. | ||
120 | */ | ||
121 | unsigned short id; | ||
122 | /* | ||
123 | * Depth in hierarchy which this ID belongs to. | ||
124 | */ | ||
125 | unsigned short depth; | ||
126 | /* | ||
127 | * ID is freed by RCU. (and lookup routine is RCU safe.) | ||
128 | */ | ||
129 | struct rcu_head rcu_head; | ||
130 | /* | ||
131 | * Hierarchy of CSS ID belongs to. | ||
132 | */ | ||
133 | unsigned short stack[0]; /* Array of Length (depth+1) */ | ||
134 | }; | ||
135 | |||
136 | |||
105 | /* The list of hierarchy roots */ | 137 | /* The list of hierarchy roots */ |
106 | 138 | ||
107 | static LIST_HEAD(roots); | 139 | static LIST_HEAD(roots); |
@@ -185,6 +217,8 @@ struct cg_cgroup_link { | |||
185 | static struct css_set init_css_set; | 217 | static struct css_set init_css_set; |
186 | static struct cg_cgroup_link init_css_set_link; | 218 | static struct cg_cgroup_link init_css_set_link; |
187 | 219 | ||
220 | static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); | ||
221 | |||
188 | /* css_set_lock protects the list of css_set objects, and the | 222 | /* css_set_lock protects the list of css_set objects, and the |
189 | * chain of tasks off each css_set. Nests outside task->alloc_lock | 223 | * chain of tasks off each css_set. Nests outside task->alloc_lock |
190 | * due to cgroup_iter_start() */ | 224 | * due to cgroup_iter_start() */ |
@@ -567,6 +601,9 @@ static struct backing_dev_info cgroup_backing_dev_info = { | |||
567 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 601 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
568 | }; | 602 | }; |
569 | 603 | ||
604 | static int alloc_css_id(struct cgroup_subsys *ss, | ||
605 | struct cgroup *parent, struct cgroup *child); | ||
606 | |||
570 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | 607 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) |
571 | { | 608 | { |
572 | struct inode *inode = new_inode(sb); | 609 | struct inode *inode = new_inode(sb); |
@@ -2327,6 +2364,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
2327 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) | 2364 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) |
2328 | return err; | 2365 | return err; |
2329 | } | 2366 | } |
2367 | /* This cgroup is ready now */ | ||
2368 | for_each_subsys(cgrp->root, ss) { | ||
2369 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2370 | /* | ||
2371 | * Update id->css pointer and make this css visible from | ||
2372 | * CSS ID functions. This pointer will be dereferened | ||
2373 | * from RCU-read-side without locks. | ||
2374 | */ | ||
2375 | if (css->id) | ||
2376 | rcu_assign_pointer(css->id->css, css); | ||
2377 | } | ||
2330 | 2378 | ||
2331 | return 0; | 2379 | return 0; |
2332 | } | 2380 | } |
@@ -2338,6 +2386,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2338 | css->cgroup = cgrp; | 2386 | css->cgroup = cgrp; |
2339 | atomic_set(&css->refcnt, 1); | 2387 | atomic_set(&css->refcnt, 1); |
2340 | css->flags = 0; | 2388 | css->flags = 0; |
2389 | css->id = NULL; | ||
2341 | if (cgrp == dummytop) | 2390 | if (cgrp == dummytop) |
2342 | set_bit(CSS_ROOT, &css->flags); | 2391 | set_bit(CSS_ROOT, &css->flags); |
2343 | BUG_ON(cgrp->subsys[ss->subsys_id]); | 2392 | BUG_ON(cgrp->subsys[ss->subsys_id]); |
@@ -2413,6 +2462,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2413 | goto err_destroy; | 2462 | goto err_destroy; |
2414 | } | 2463 | } |
2415 | init_cgroup_css(css, ss, cgrp); | 2464 | init_cgroup_css(css, ss, cgrp); |
2465 | if (ss->use_id) | ||
2466 | if (alloc_css_id(ss, parent, cgrp)) | ||
2467 | goto err_destroy; | ||
2468 | /* At error, ->destroy() callback has to free assigned ID. */ | ||
2416 | } | 2469 | } |
2417 | 2470 | ||
2418 | cgroup_lock_hierarchy(root); | 2471 | cgroup_lock_hierarchy(root); |
@@ -2708,6 +2761,8 @@ int __init cgroup_init(void) | |||
2708 | struct cgroup_subsys *ss = subsys[i]; | 2761 | struct cgroup_subsys *ss = subsys[i]; |
2709 | if (!ss->early_init) | 2762 | if (!ss->early_init) |
2710 | cgroup_init_subsys(ss); | 2763 | cgroup_init_subsys(ss); |
2764 | if (ss->use_id) | ||
2765 | cgroup_subsys_init_idr(ss); | ||
2711 | } | 2766 | } |
2712 | 2767 | ||
2713 | /* Add init_css_set to the hash table */ | 2768 | /* Add init_css_set to the hash table */ |
@@ -3242,3 +3297,232 @@ static int __init cgroup_disable(char *str) | |||
3242 | return 1; | 3297 | return 1; |
3243 | } | 3298 | } |
3244 | __setup("cgroup_disable=", cgroup_disable); | 3299 | __setup("cgroup_disable=", cgroup_disable); |
3300 | |||
3301 | /* | ||
3302 | * Functons for CSS ID. | ||
3303 | */ | ||
3304 | |||
3305 | /* | ||
3306 | *To get ID other than 0, this should be called when !cgroup_is_removed(). | ||
3307 | */ | ||
3308 | unsigned short css_id(struct cgroup_subsys_state *css) | ||
3309 | { | ||
3310 | struct css_id *cssid = rcu_dereference(css->id); | ||
3311 | |||
3312 | if (cssid) | ||
3313 | return cssid->id; | ||
3314 | return 0; | ||
3315 | } | ||
3316 | |||
3317 | unsigned short css_depth(struct cgroup_subsys_state *css) | ||
3318 | { | ||
3319 | struct css_id *cssid = rcu_dereference(css->id); | ||
3320 | |||
3321 | if (cssid) | ||
3322 | return cssid->depth; | ||
3323 | return 0; | ||
3324 | } | ||
3325 | |||
3326 | bool css_is_ancestor(struct cgroup_subsys_state *child, | ||
3327 | struct cgroup_subsys_state *root) | ||
3328 | { | ||
3329 | struct css_id *child_id = rcu_dereference(child->id); | ||
3330 | struct css_id *root_id = rcu_dereference(root->id); | ||
3331 | |||
3332 | if (!child_id || !root_id || (child_id->depth < root_id->depth)) | ||
3333 | return false; | ||
3334 | return child_id->stack[root_id->depth] == root_id->id; | ||
3335 | } | ||
3336 | |||
3337 | static void __free_css_id_cb(struct rcu_head *head) | ||
3338 | { | ||
3339 | struct css_id *id; | ||
3340 | |||
3341 | id = container_of(head, struct css_id, rcu_head); | ||
3342 | kfree(id); | ||
3343 | } | ||
3344 | |||
3345 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | ||
3346 | { | ||
3347 | struct css_id *id = css->id; | ||
3348 | /* When this is called before css_id initialization, id can be NULL */ | ||
3349 | if (!id) | ||
3350 | return; | ||
3351 | |||
3352 | BUG_ON(!ss->use_id); | ||
3353 | |||
3354 | rcu_assign_pointer(id->css, NULL); | ||
3355 | rcu_assign_pointer(css->id, NULL); | ||
3356 | spin_lock(&ss->id_lock); | ||
3357 | idr_remove(&ss->idr, id->id); | ||
3358 | spin_unlock(&ss->id_lock); | ||
3359 | call_rcu(&id->rcu_head, __free_css_id_cb); | ||
3360 | } | ||
3361 | |||
3362 | /* | ||
3363 | * This is called by init or create(). Then, calls to this function are | ||
3364 | * always serialized (By cgroup_mutex() at create()). | ||
3365 | */ | ||
3366 | |||
3367 | static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | ||
3368 | { | ||
3369 | struct css_id *newid; | ||
3370 | int myid, error, size; | ||
3371 | |||
3372 | BUG_ON(!ss->use_id); | ||
3373 | |||
3374 | size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1); | ||
3375 | newid = kzalloc(size, GFP_KERNEL); | ||
3376 | if (!newid) | ||
3377 | return ERR_PTR(-ENOMEM); | ||
3378 | /* get id */ | ||
3379 | if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) { | ||
3380 | error = -ENOMEM; | ||
3381 | goto err_out; | ||
3382 | } | ||
3383 | spin_lock(&ss->id_lock); | ||
3384 | /* Don't use 0. allocates an ID of 1-65535 */ | ||
3385 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | ||
3386 | spin_unlock(&ss->id_lock); | ||
3387 | |||
3388 | /* Returns error when there are no free spaces for new ID.*/ | ||
3389 | if (error) { | ||
3390 | error = -ENOSPC; | ||
3391 | goto err_out; | ||
3392 | } | ||
3393 | if (myid > CSS_ID_MAX) | ||
3394 | goto remove_idr; | ||
3395 | |||
3396 | newid->id = myid; | ||
3397 | newid->depth = depth; | ||
3398 | return newid; | ||
3399 | remove_idr: | ||
3400 | error = -ENOSPC; | ||
3401 | spin_lock(&ss->id_lock); | ||
3402 | idr_remove(&ss->idr, myid); | ||
3403 | spin_unlock(&ss->id_lock); | ||
3404 | err_out: | ||
3405 | kfree(newid); | ||
3406 | return ERR_PTR(error); | ||
3407 | |||
3408 | } | ||
3409 | |||
3410 | static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss) | ||
3411 | { | ||
3412 | struct css_id *newid; | ||
3413 | struct cgroup_subsys_state *rootcss; | ||
3414 | |||
3415 | spin_lock_init(&ss->id_lock); | ||
3416 | idr_init(&ss->idr); | ||
3417 | |||
3418 | rootcss = init_css_set.subsys[ss->subsys_id]; | ||
3419 | newid = get_new_cssid(ss, 0); | ||
3420 | if (IS_ERR(newid)) | ||
3421 | return PTR_ERR(newid); | ||
3422 | |||
3423 | newid->stack[0] = newid->id; | ||
3424 | newid->css = rootcss; | ||
3425 | rootcss->id = newid; | ||
3426 | return 0; | ||
3427 | } | ||
3428 | |||
3429 | static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | ||
3430 | struct cgroup *child) | ||
3431 | { | ||
3432 | int subsys_id, i, depth = 0; | ||
3433 | struct cgroup_subsys_state *parent_css, *child_css; | ||
3434 | struct css_id *child_id, *parent_id = NULL; | ||
3435 | |||
3436 | subsys_id = ss->subsys_id; | ||
3437 | parent_css = parent->subsys[subsys_id]; | ||
3438 | child_css = child->subsys[subsys_id]; | ||
3439 | depth = css_depth(parent_css) + 1; | ||
3440 | parent_id = parent_css->id; | ||
3441 | |||
3442 | child_id = get_new_cssid(ss, depth); | ||
3443 | if (IS_ERR(child_id)) | ||
3444 | return PTR_ERR(child_id); | ||
3445 | |||
3446 | for (i = 0; i < depth; i++) | ||
3447 | child_id->stack[i] = parent_id->stack[i]; | ||
3448 | child_id->stack[depth] = child_id->id; | ||
3449 | /* | ||
3450 | * child_id->css pointer will be set after this cgroup is available | ||
3451 | * see cgroup_populate_dir() | ||
3452 | */ | ||
3453 | rcu_assign_pointer(child_css->id, child_id); | ||
3454 | |||
3455 | return 0; | ||
3456 | } | ||
3457 | |||
3458 | /** | ||
3459 | * css_lookup - lookup css by id | ||
3460 | * @ss: cgroup subsys to be looked into. | ||
3461 | * @id: the id | ||
3462 | * | ||
3463 | * Returns pointer to cgroup_subsys_state if there is valid one with id. | ||
3464 | * NULL if not. Should be called under rcu_read_lock() | ||
3465 | */ | ||
3466 | struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) | ||
3467 | { | ||
3468 | struct css_id *cssid = NULL; | ||
3469 | |||
3470 | BUG_ON(!ss->use_id); | ||
3471 | cssid = idr_find(&ss->idr, id); | ||
3472 | |||
3473 | if (unlikely(!cssid)) | ||
3474 | return NULL; | ||
3475 | |||
3476 | return rcu_dereference(cssid->css); | ||
3477 | } | ||
3478 | |||
3479 | /** | ||
3480 | * css_get_next - lookup next cgroup under specified hierarchy. | ||
3481 | * @ss: pointer to subsystem | ||
3482 | * @id: current position of iteration. | ||
3483 | * @root: pointer to css. search tree under this. | ||
3484 | * @foundid: position of found object. | ||
3485 | * | ||
3486 | * Search next css under the specified hierarchy of rootid. Calling under | ||
3487 | * rcu_read_lock() is necessary. Returns NULL if it reaches the end. | ||
3488 | */ | ||
3489 | struct cgroup_subsys_state * | ||
3490 | css_get_next(struct cgroup_subsys *ss, int id, | ||
3491 | struct cgroup_subsys_state *root, int *foundid) | ||
3492 | { | ||
3493 | struct cgroup_subsys_state *ret = NULL; | ||
3494 | struct css_id *tmp; | ||
3495 | int tmpid; | ||
3496 | int rootid = css_id(root); | ||
3497 | int depth = css_depth(root); | ||
3498 | |||
3499 | if (!rootid) | ||
3500 | return NULL; | ||
3501 | |||
3502 | BUG_ON(!ss->use_id); | ||
3503 | /* fill start point for scan */ | ||
3504 | tmpid = id; | ||
3505 | while (1) { | ||
3506 | /* | ||
3507 | * scan next entry from bitmap(tree), tmpid is updated after | ||
3508 | * idr_get_next(). | ||
3509 | */ | ||
3510 | spin_lock(&ss->id_lock); | ||
3511 | tmp = idr_get_next(&ss->idr, &tmpid); | ||
3512 | spin_unlock(&ss->id_lock); | ||
3513 | |||
3514 | if (!tmp) | ||
3515 | break; | ||
3516 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { | ||
3517 | ret = rcu_dereference(tmp->css); | ||
3518 | if (ret) { | ||
3519 | *foundid = tmpid; | ||
3520 | break; | ||
3521 | } | ||
3522 | } | ||
3523 | /* continue to scan from next id */ | ||
3524 | tmpid = tmpid + 1; | ||
3525 | } | ||
3526 | return ret; | ||
3527 | } | ||
3528 | |||