diff options
Diffstat (limited to 'kernel/cgroup.c')
| -rw-r--r-- | kernel/cgroup.c | 1129 |
1 files changed, 817 insertions, 312 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b6eadfe30e7b..ca83b73fba19 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/cgroup.h> | 25 | #include <linux/cgroup.h> |
| 26 | #include <linux/ctype.h> | ||
| 26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
| 27 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
| 28 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
| @@ -48,6 +49,8 @@ | |||
| 48 | #include <linux/namei.h> | 49 | #include <linux/namei.h> |
| 49 | #include <linux/smp_lock.h> | 50 | #include <linux/smp_lock.h> |
| 50 | #include <linux/pid_namespace.h> | 51 | #include <linux/pid_namespace.h> |
| 52 | #include <linux/idr.h> | ||
| 53 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | ||
| 51 | 54 | ||
| 52 | #include <asm/atomic.h> | 55 | #include <asm/atomic.h> |
| 53 | 56 | ||
| @@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = { | |||
| 60 | #include <linux/cgroup_subsys.h> | 63 | #include <linux/cgroup_subsys.h> |
| 61 | }; | 64 | }; |
| 62 | 65 | ||
| 66 | #define MAX_CGROUP_ROOT_NAMELEN 64 | ||
| 67 | |||
| 63 | /* | 68 | /* |
| 64 | * A cgroupfs_root represents the root of a cgroup hierarchy, | 69 | * A cgroupfs_root represents the root of a cgroup hierarchy, |
| 65 | * and may be associated with a superblock to form an active | 70 | * and may be associated with a superblock to form an active |
| @@ -74,6 +79,9 @@ struct cgroupfs_root { | |||
| 74 | */ | 79 | */ |
| 75 | unsigned long subsys_bits; | 80 | unsigned long subsys_bits; |
| 76 | 81 | ||
| 82 | /* Unique id for this hierarchy. */ | ||
| 83 | int hierarchy_id; | ||
| 84 | |||
| 77 | /* The bitmask of subsystems currently attached to this hierarchy */ | 85 | /* The bitmask of subsystems currently attached to this hierarchy */ |
| 78 | unsigned long actual_subsys_bits; | 86 | unsigned long actual_subsys_bits; |
| 79 | 87 | ||
| @@ -94,6 +102,9 @@ struct cgroupfs_root { | |||
| 94 | 102 | ||
| 95 | /* The path to use for release notifications. */ | 103 | /* The path to use for release notifications. */ |
| 96 | char release_agent_path[PATH_MAX]; | 104 | char release_agent_path[PATH_MAX]; |
| 105 | |||
| 106 | /* The name for this hierarchy - may be empty */ | ||
| 107 | char name[MAX_CGROUP_ROOT_NAMELEN]; | ||
| 97 | }; | 108 | }; |
| 98 | 109 | ||
| 99 | /* | 110 | /* |
| @@ -141,6 +152,10 @@ struct css_id { | |||
| 141 | static LIST_HEAD(roots); | 152 | static LIST_HEAD(roots); |
| 142 | static int root_count; | 153 | static int root_count; |
| 143 | 154 | ||
| 155 | static DEFINE_IDA(hierarchy_ida); | ||
| 156 | static int next_hierarchy_id; | ||
| 157 | static DEFINE_SPINLOCK(hierarchy_id_lock); | ||
| 158 | |||
| 144 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ | 159 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ |
| 145 | #define dummytop (&rootnode.top_cgroup) | 160 | #define dummytop (&rootnode.top_cgroup) |
| 146 | 161 | ||
| @@ -201,6 +216,7 @@ struct cg_cgroup_link { | |||
| 201 | * cgroup, anchored on cgroup->css_sets | 216 | * cgroup, anchored on cgroup->css_sets |
| 202 | */ | 217 | */ |
| 203 | struct list_head cgrp_link_list; | 218 | struct list_head cgrp_link_list; |
| 219 | struct cgroup *cgrp; | ||
| 204 | /* | 220 | /* |
| 205 | * List running through cg_cgroup_links pointing at a | 221 | * List running through cg_cgroup_links pointing at a |
| 206 | * single css_set object, anchored on css_set->cg_links | 222 | * single css_set object, anchored on css_set->cg_links |
| @@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); | |||
| 227 | static DEFINE_RWLOCK(css_set_lock); | 243 | static DEFINE_RWLOCK(css_set_lock); |
| 228 | static int css_set_count; | 244 | static int css_set_count; |
| 229 | 245 | ||
| 230 | /* hash table for cgroup groups. This improves the performance to | 246 | /* |
| 231 | * find an existing css_set */ | 247 | * hash table for cgroup groups. This improves the performance to find |
| 248 | * an existing css_set. This hash doesn't (currently) take into | ||
| 249 | * account cgroups in empty hierarchies. | ||
| 250 | */ | ||
| 232 | #define CSS_SET_HASH_BITS 7 | 251 | #define CSS_SET_HASH_BITS 7 |
| 233 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | 252 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) |
| 234 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | 253 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; |
| @@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | |||
| 248 | return &css_set_table[index]; | 267 | return &css_set_table[index]; |
| 249 | } | 268 | } |
| 250 | 269 | ||
| 270 | static void free_css_set_rcu(struct rcu_head *obj) | ||
| 271 | { | ||
| 272 | struct css_set *cg = container_of(obj, struct css_set, rcu_head); | ||
| 273 | kfree(cg); | ||
| 274 | } | ||
| 275 | |||
| 251 | /* We don't maintain the lists running through each css_set to its | 276 | /* We don't maintain the lists running through each css_set to its |
| 252 | * task until after the first call to cgroup_iter_start(). This | 277 | * task until after the first call to cgroup_iter_start(). This |
| 253 | * reduces the fork()/exit() overhead for people who have cgroups | 278 | * reduces the fork()/exit() overhead for people who have cgroups |
| 254 | * compiled into their kernel but not actually in use */ | 279 | * compiled into their kernel but not actually in use */ |
| 255 | static int use_task_css_set_links __read_mostly; | 280 | static int use_task_css_set_links __read_mostly; |
| 256 | 281 | ||
| 257 | /* When we create or destroy a css_set, the operation simply | 282 | static void __put_css_set(struct css_set *cg, int taskexit) |
| 258 | * takes/releases a reference count on all the cgroups referenced | ||
| 259 | * by subsystems in this css_set. This can end up multiple-counting | ||
| 260 | * some cgroups, but that's OK - the ref-count is just a | ||
| 261 | * busy/not-busy indicator; ensuring that we only count each cgroup | ||
| 262 | * once would require taking a global lock to ensure that no | ||
| 263 | * subsystems moved between hierarchies while we were doing so. | ||
| 264 | * | ||
| 265 | * Possible TODO: decide at boot time based on the number of | ||
| 266 | * registered subsystems and the number of CPUs or NUMA nodes whether | ||
| 267 | * it's better for performance to ref-count every subsystem, or to | ||
| 268 | * take a global lock and only add one ref count to each hierarchy. | ||
| 269 | */ | ||
| 270 | |||
| 271 | /* | ||
| 272 | * unlink a css_set from the list and free it | ||
| 273 | */ | ||
| 274 | static void unlink_css_set(struct css_set *cg) | ||
| 275 | { | 283 | { |
| 276 | struct cg_cgroup_link *link; | 284 | struct cg_cgroup_link *link; |
| 277 | struct cg_cgroup_link *saved_link; | 285 | struct cg_cgroup_link *saved_link; |
| 278 | |||
| 279 | hlist_del(&cg->hlist); | ||
| 280 | css_set_count--; | ||
| 281 | |||
| 282 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | ||
| 283 | cg_link_list) { | ||
| 284 | list_del(&link->cg_link_list); | ||
| 285 | list_del(&link->cgrp_link_list); | ||
| 286 | kfree(link); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | static void __put_css_set(struct css_set *cg, int taskexit) | ||
| 291 | { | ||
| 292 | int i; | ||
| 293 | /* | 286 | /* |
| 294 | * Ensure that the refcount doesn't hit zero while any readers | 287 | * Ensure that the refcount doesn't hit zero while any readers |
| 295 | * can see it. Similar to atomic_dec_and_lock(), but for an | 288 | * can see it. Similar to atomic_dec_and_lock(), but for an |
| @@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
| 302 | write_unlock(&css_set_lock); | 295 | write_unlock(&css_set_lock); |
| 303 | return; | 296 | return; |
| 304 | } | 297 | } |
| 305 | unlink_css_set(cg); | ||
| 306 | write_unlock(&css_set_lock); | ||
| 307 | 298 | ||
| 308 | rcu_read_lock(); | 299 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
| 309 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 300 | hlist_del(&cg->hlist); |
| 310 | struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); | 301 | css_set_count--; |
| 302 | |||
| 303 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | ||
| 304 | cg_link_list) { | ||
| 305 | struct cgroup *cgrp = link->cgrp; | ||
| 306 | list_del(&link->cg_link_list); | ||
| 307 | list_del(&link->cgrp_link_list); | ||
| 311 | if (atomic_dec_and_test(&cgrp->count) && | 308 | if (atomic_dec_and_test(&cgrp->count) && |
| 312 | notify_on_release(cgrp)) { | 309 | notify_on_release(cgrp)) { |
| 313 | if (taskexit) | 310 | if (taskexit) |
| 314 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 311 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 315 | check_for_release(cgrp); | 312 | check_for_release(cgrp); |
| 316 | } | 313 | } |
| 314 | |||
| 315 | kfree(link); | ||
| 317 | } | 316 | } |
| 318 | rcu_read_unlock(); | 317 | |
| 319 | kfree(cg); | 318 | write_unlock(&css_set_lock); |
| 319 | call_rcu(&cg->rcu_head, free_css_set_rcu); | ||
| 320 | } | 320 | } |
| 321 | 321 | ||
| 322 | /* | 322 | /* |
| @@ -338,6 +338,78 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
| 338 | } | 338 | } |
| 339 | 339 | ||
| 340 | /* | 340 | /* |
| 341 | * compare_css_sets - helper function for find_existing_css_set(). | ||
| 342 | * @cg: candidate css_set being tested | ||
| 343 | * @old_cg: existing css_set for a task | ||
| 344 | * @new_cgrp: cgroup that's being entered by the task | ||
| 345 | * @template: desired set of css pointers in css_set (pre-calculated) | ||
| 346 | * | ||
| 347 | * Returns true if "cg" matches "old_cg" except for the hierarchy | ||
| 348 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". | ||
| 349 | */ | ||
| 350 | static bool compare_css_sets(struct css_set *cg, | ||
| 351 | struct css_set *old_cg, | ||
| 352 | struct cgroup *new_cgrp, | ||
| 353 | struct cgroup_subsys_state *template[]) | ||
| 354 | { | ||
| 355 | struct list_head *l1, *l2; | ||
| 356 | |||
| 357 | if (memcmp(template, cg->subsys, sizeof(cg->subsys))) { | ||
| 358 | /* Not all subsystems matched */ | ||
| 359 | return false; | ||
| 360 | } | ||
| 361 | |||
| 362 | /* | ||
| 363 | * Compare cgroup pointers in order to distinguish between | ||
| 364 | * different cgroups in heirarchies with no subsystems. We | ||
| 365 | * could get by with just this check alone (and skip the | ||
| 366 | * memcmp above) but on most setups the memcmp check will | ||
| 367 | * avoid the need for this more expensive check on almost all | ||
| 368 | * candidates. | ||
| 369 | */ | ||
| 370 | |||
| 371 | l1 = &cg->cg_links; | ||
| 372 | l2 = &old_cg->cg_links; | ||
| 373 | while (1) { | ||
| 374 | struct cg_cgroup_link *cgl1, *cgl2; | ||
| 375 | struct cgroup *cg1, *cg2; | ||
| 376 | |||
| 377 | l1 = l1->next; | ||
| 378 | l2 = l2->next; | ||
| 379 | /* See if we reached the end - both lists are equal length. */ | ||
| 380 | if (l1 == &cg->cg_links) { | ||
| 381 | BUG_ON(l2 != &old_cg->cg_links); | ||
| 382 | break; | ||
| 383 | } else { | ||
| 384 | BUG_ON(l2 == &old_cg->cg_links); | ||
| 385 | } | ||
| 386 | /* Locate the cgroups associated with these links. */ | ||
| 387 | cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list); | ||
| 388 | cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list); | ||
| 389 | cg1 = cgl1->cgrp; | ||
| 390 | cg2 = cgl2->cgrp; | ||
| 391 | /* Hierarchies should be linked in the same order. */ | ||
| 392 | BUG_ON(cg1->root != cg2->root); | ||
| 393 | |||
| 394 | /* | ||
| 395 | * If this hierarchy is the hierarchy of the cgroup | ||
| 396 | * that's changing, then we need to check that this | ||
| 397 | * css_set points to the new cgroup; if it's any other | ||
| 398 | * hierarchy, then this css_set should point to the | ||
| 399 | * same cgroup as the old css_set. | ||
| 400 | */ | ||
| 401 | if (cg1->root == new_cgrp->root) { | ||
| 402 | if (cg1 != new_cgrp) | ||
| 403 | return false; | ||
| 404 | } else { | ||
| 405 | if (cg1 != cg2) | ||
| 406 | return false; | ||
| 407 | } | ||
| 408 | } | ||
| 409 | return true; | ||
| 410 | } | ||
| 411 | |||
| 412 | /* | ||
| 341 | * find_existing_css_set() is a helper for | 413 | * find_existing_css_set() is a helper for |
| 342 | * find_css_set(), and checks to see whether an existing | 414 | * find_css_set(), and checks to see whether an existing |
| 343 | * css_set is suitable. | 415 | * css_set is suitable. |
| @@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set( | |||
| 378 | 450 | ||
| 379 | hhead = css_set_hash(template); | 451 | hhead = css_set_hash(template); |
| 380 | hlist_for_each_entry(cg, node, hhead, hlist) { | 452 | hlist_for_each_entry(cg, node, hhead, hlist) { |
| 381 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { | 453 | if (!compare_css_sets(cg, oldcg, cgrp, template)) |
| 382 | /* All subsystems matched */ | 454 | continue; |
| 383 | return cg; | 455 | |
| 384 | } | 456 | /* This css_set matches what we need */ |
| 457 | return cg; | ||
| 385 | } | 458 | } |
| 386 | 459 | ||
| 387 | /* No existing cgroup group matched */ | 460 | /* No existing cgroup group matched */ |
| @@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links, | |||
| 435 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, | 508 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, |
| 436 | cgrp_link_list); | 509 | cgrp_link_list); |
| 437 | link->cg = cg; | 510 | link->cg = cg; |
| 511 | link->cgrp = cgrp; | ||
| 512 | atomic_inc(&cgrp->count); | ||
| 438 | list_move(&link->cgrp_link_list, &cgrp->css_sets); | 513 | list_move(&link->cgrp_link_list, &cgrp->css_sets); |
| 439 | list_add(&link->cg_link_list, &cg->cg_links); | 514 | /* |
| 515 | * Always add links to the tail of the list so that the list | ||
| 516 | * is sorted by order of hierarchy creation | ||
| 517 | */ | ||
| 518 | list_add_tail(&link->cg_link_list, &cg->cg_links); | ||
| 440 | } | 519 | } |
| 441 | 520 | ||
| 442 | /* | 521 | /* |
| @@ -451,11 +530,11 @@ static struct css_set *find_css_set( | |||
| 451 | { | 530 | { |
| 452 | struct css_set *res; | 531 | struct css_set *res; |
| 453 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | 532 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; |
| 454 | int i; | ||
| 455 | 533 | ||
| 456 | struct list_head tmp_cg_links; | 534 | struct list_head tmp_cg_links; |
| 457 | 535 | ||
| 458 | struct hlist_head *hhead; | 536 | struct hlist_head *hhead; |
| 537 | struct cg_cgroup_link *link; | ||
| 459 | 538 | ||
| 460 | /* First see if we already have a cgroup group that matches | 539 | /* First see if we already have a cgroup group that matches |
| 461 | * the desired set */ | 540 | * the desired set */ |
| @@ -489,20 +568,12 @@ static struct css_set *find_css_set( | |||
| 489 | 568 | ||
| 490 | write_lock(&css_set_lock); | 569 | write_lock(&css_set_lock); |
| 491 | /* Add reference counts and links from the new css_set. */ | 570 | /* Add reference counts and links from the new css_set. */ |
| 492 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 571 | list_for_each_entry(link, &oldcg->cg_links, cg_link_list) { |
| 493 | struct cgroup *cgrp = res->subsys[i]->cgroup; | 572 | struct cgroup *c = link->cgrp; |
| 494 | struct cgroup_subsys *ss = subsys[i]; | 573 | if (c->root == cgrp->root) |
| 495 | atomic_inc(&cgrp->count); | 574 | c = cgrp; |
| 496 | /* | 575 | link_css_set(&tmp_cg_links, res, c); |
| 497 | * We want to add a link once per cgroup, so we | ||
| 498 | * only do it for the first subsystem in each | ||
| 499 | * hierarchy | ||
| 500 | */ | ||
| 501 | if (ss->root->subsys_list.next == &ss->sibling) | ||
| 502 | link_css_set(&tmp_cg_links, res, cgrp); | ||
| 503 | } | 576 | } |
| 504 | if (list_empty(&rootnode.subsys_list)) | ||
| 505 | link_css_set(&tmp_cg_links, res, dummytop); | ||
| 506 | 577 | ||
| 507 | BUG_ON(!list_empty(&tmp_cg_links)); | 578 | BUG_ON(!list_empty(&tmp_cg_links)); |
| 508 | 579 | ||
| @@ -518,6 +589,41 @@ static struct css_set *find_css_set( | |||
| 518 | } | 589 | } |
| 519 | 590 | ||
| 520 | /* | 591 | /* |
| 592 | * Return the cgroup for "task" from the given hierarchy. Must be | ||
| 593 | * called with cgroup_mutex held. | ||
| 594 | */ | ||
| 595 | static struct cgroup *task_cgroup_from_root(struct task_struct *task, | ||
| 596 | struct cgroupfs_root *root) | ||
| 597 | { | ||
| 598 | struct css_set *css; | ||
| 599 | struct cgroup *res = NULL; | ||
| 600 | |||
| 601 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | ||
| 602 | read_lock(&css_set_lock); | ||
| 603 | /* | ||
| 604 | * No need to lock the task - since we hold cgroup_mutex the | ||
| 605 | * task can't change groups, so the only thing that can happen | ||
| 606 | * is that it exits and its css is set back to init_css_set. | ||
| 607 | */ | ||
| 608 | css = task->cgroups; | ||
| 609 | if (css == &init_css_set) { | ||
| 610 | res = &root->top_cgroup; | ||
| 611 | } else { | ||
| 612 | struct cg_cgroup_link *link; | ||
| 613 | list_for_each_entry(link, &css->cg_links, cg_link_list) { | ||
| 614 | struct cgroup *c = link->cgrp; | ||
| 615 | if (c->root == root) { | ||
| 616 | res = c; | ||
| 617 | break; | ||
| 618 | } | ||
| 619 | } | ||
| 620 | } | ||
| 621 | read_unlock(&css_set_lock); | ||
| 622 | BUG_ON(!res); | ||
| 623 | return res; | ||
| 624 | } | ||
| 625 | |||
| 626 | /* | ||
| 521 | * There is one global cgroup mutex. We also require taking | 627 | * There is one global cgroup mutex. We also require taking |
| 522 | * task_lock() when dereferencing a task's cgroup subsys pointers. | 628 | * task_lock() when dereferencing a task's cgroup subsys pointers. |
| 523 | * See "The task_lock() exception", at the end of this comment. | 629 | * See "The task_lock() exception", at the end of this comment. |
| @@ -596,10 +702,11 @@ void cgroup_unlock(void) | |||
| 596 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); | 702 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); |
| 597 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 703 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
| 598 | static int cgroup_populate_dir(struct cgroup *cgrp); | 704 | static int cgroup_populate_dir(struct cgroup *cgrp); |
| 599 | static struct inode_operations cgroup_dir_inode_operations; | 705 | static const struct inode_operations cgroup_dir_inode_operations; |
| 600 | static struct file_operations proc_cgroupstats_operations; | 706 | static const struct file_operations proc_cgroupstats_operations; |
| 601 | 707 | ||
| 602 | static struct backing_dev_info cgroup_backing_dev_info = { | 708 | static struct backing_dev_info cgroup_backing_dev_info = { |
| 709 | .name = "cgroup", | ||
| 603 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 710 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
| 604 | }; | 711 | }; |
| 605 | 712 | ||
| @@ -676,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 676 | */ | 783 | */ |
| 677 | deactivate_super(cgrp->root->sb); | 784 | deactivate_super(cgrp->root->sb); |
| 678 | 785 | ||
| 786 | /* | ||
| 787 | * if we're getting rid of the cgroup, refcount should ensure | ||
| 788 | * that there are no pidlists left. | ||
| 789 | */ | ||
| 790 | BUG_ON(!list_empty(&cgrp->pidlists)); | ||
| 791 | |||
| 679 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); | 792 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); |
| 680 | } | 793 | } |
| 681 | iput(inode); | 794 | iput(inode); |
| @@ -840,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 840 | seq_puts(seq, ",noprefix"); | 953 | seq_puts(seq, ",noprefix"); |
| 841 | if (strlen(root->release_agent_path)) | 954 | if (strlen(root->release_agent_path)) |
| 842 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 955 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
| 956 | if (strlen(root->name)) | ||
| 957 | seq_printf(seq, ",name=%s", root->name); | ||
| 843 | mutex_unlock(&cgroup_mutex); | 958 | mutex_unlock(&cgroup_mutex); |
| 844 | return 0; | 959 | return 0; |
| 845 | } | 960 | } |
| @@ -848,6 +963,12 @@ struct cgroup_sb_opts { | |||
| 848 | unsigned long subsys_bits; | 963 | unsigned long subsys_bits; |
| 849 | unsigned long flags; | 964 | unsigned long flags; |
| 850 | char *release_agent; | 965 | char *release_agent; |
| 966 | char *name; | ||
| 967 | /* User explicitly requested empty subsystem */ | ||
| 968 | bool none; | ||
| 969 | |||
| 970 | struct cgroupfs_root *new_root; | ||
| 971 | |||
| 851 | }; | 972 | }; |
| 852 | 973 | ||
| 853 | /* Convert a hierarchy specifier into a bitmask of subsystems and | 974 | /* Convert a hierarchy specifier into a bitmask of subsystems and |
| @@ -862,9 +983,7 @@ static int parse_cgroupfs_options(char *data, | |||
| 862 | mask = ~(1UL << cpuset_subsys_id); | 983 | mask = ~(1UL << cpuset_subsys_id); |
| 863 | #endif | 984 | #endif |
| 864 | 985 | ||
| 865 | opts->subsys_bits = 0; | 986 | memset(opts, 0, sizeof(*opts)); |
| 866 | opts->flags = 0; | ||
| 867 | opts->release_agent = NULL; | ||
| 868 | 987 | ||
| 869 | while ((token = strsep(&o, ",")) != NULL) { | 988 | while ((token = strsep(&o, ",")) != NULL) { |
| 870 | if (!*token) | 989 | if (!*token) |
| @@ -878,17 +997,42 @@ static int parse_cgroupfs_options(char *data, | |||
| 878 | if (!ss->disabled) | 997 | if (!ss->disabled) |
| 879 | opts->subsys_bits |= 1ul << i; | 998 | opts->subsys_bits |= 1ul << i; |
| 880 | } | 999 | } |
| 1000 | } else if (!strcmp(token, "none")) { | ||
| 1001 | /* Explicitly have no subsystems */ | ||
| 1002 | opts->none = true; | ||
| 881 | } else if (!strcmp(token, "noprefix")) { | 1003 | } else if (!strcmp(token, "noprefix")) { |
| 882 | set_bit(ROOT_NOPREFIX, &opts->flags); | 1004 | set_bit(ROOT_NOPREFIX, &opts->flags); |
| 883 | } else if (!strncmp(token, "release_agent=", 14)) { | 1005 | } else if (!strncmp(token, "release_agent=", 14)) { |
| 884 | /* Specifying two release agents is forbidden */ | 1006 | /* Specifying two release agents is forbidden */ |
| 885 | if (opts->release_agent) | 1007 | if (opts->release_agent) |
| 886 | return -EINVAL; | 1008 | return -EINVAL; |
| 887 | opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL); | 1009 | opts->release_agent = |
| 1010 | kstrndup(token + 14, PATH_MAX, GFP_KERNEL); | ||
| 888 | if (!opts->release_agent) | 1011 | if (!opts->release_agent) |
| 889 | return -ENOMEM; | 1012 | return -ENOMEM; |
| 890 | strncpy(opts->release_agent, token + 14, PATH_MAX - 1); | 1013 | } else if (!strncmp(token, "name=", 5)) { |
| 891 | opts->release_agent[PATH_MAX - 1] = 0; | 1014 | int i; |
| 1015 | const char *name = token + 5; | ||
| 1016 | /* Can't specify an empty name */ | ||
| 1017 | if (!strlen(name)) | ||
| 1018 | return -EINVAL; | ||
| 1019 | /* Must match [\w.-]+ */ | ||
| 1020 | for (i = 0; i < strlen(name); i++) { | ||
| 1021 | char c = name[i]; | ||
| 1022 | if (isalnum(c)) | ||
| 1023 | continue; | ||
| 1024 | if ((c == '.') || (c == '-') || (c == '_')) | ||
| 1025 | continue; | ||
| 1026 | return -EINVAL; | ||
| 1027 | } | ||
| 1028 | /* Specifying two names is forbidden */ | ||
| 1029 | if (opts->name) | ||
| 1030 | return -EINVAL; | ||
| 1031 | opts->name = kstrndup(name, | ||
| 1032 | MAX_CGROUP_ROOT_NAMELEN, | ||
| 1033 | GFP_KERNEL); | ||
| 1034 | if (!opts->name) | ||
| 1035 | return -ENOMEM; | ||
| 892 | } else { | 1036 | } else { |
| 893 | struct cgroup_subsys *ss; | 1037 | struct cgroup_subsys *ss; |
| 894 | int i; | 1038 | int i; |
| @@ -905,6 +1049,8 @@ static int parse_cgroupfs_options(char *data, | |||
| 905 | } | 1049 | } |
| 906 | } | 1050 | } |
| 907 | 1051 | ||
| 1052 | /* Consistency checks */ | ||
| 1053 | |||
| 908 | /* | 1054 | /* |
| 909 | * Option noprefix was introduced just for backward compatibility | 1055 | * Option noprefix was introduced just for backward compatibility |
| 910 | * with the old cpuset, so we allow noprefix only if mounting just | 1056 | * with the old cpuset, so we allow noprefix only if mounting just |
| @@ -914,8 +1060,16 @@ static int parse_cgroupfs_options(char *data, | |||
| 914 | (opts->subsys_bits & mask)) | 1060 | (opts->subsys_bits & mask)) |
| 915 | return -EINVAL; | 1061 | return -EINVAL; |
| 916 | 1062 | ||
| 917 | /* We can't have an empty hierarchy */ | 1063 | |
| 918 | if (!opts->subsys_bits) | 1064 | /* Can't specify "none" and some subsystems */ |
| 1065 | if (opts->subsys_bits && opts->none) | ||
| 1066 | return -EINVAL; | ||
| 1067 | |||
| 1068 | /* | ||
| 1069 | * We either have to specify by name or by subsystems. (So all | ||
| 1070 | * empty hierarchies must have a name). | ||
| 1071 | */ | ||
| 1072 | if (!opts->subsys_bits && !opts->name) | ||
| 919 | return -EINVAL; | 1073 | return -EINVAL; |
| 920 | 1074 | ||
| 921 | return 0; | 1075 | return 0; |
| @@ -943,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 943 | goto out_unlock; | 1097 | goto out_unlock; |
| 944 | } | 1098 | } |
| 945 | 1099 | ||
| 1100 | /* Don't allow name to change at remount */ | ||
| 1101 | if (opts.name && strcmp(opts.name, root->name)) { | ||
| 1102 | ret = -EINVAL; | ||
| 1103 | goto out_unlock; | ||
| 1104 | } | ||
| 1105 | |||
| 946 | ret = rebind_subsystems(root, opts.subsys_bits); | 1106 | ret = rebind_subsystems(root, opts.subsys_bits); |
| 947 | if (ret) | 1107 | if (ret) |
| 948 | goto out_unlock; | 1108 | goto out_unlock; |
| @@ -954,13 +1114,14 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 954 | strcpy(root->release_agent_path, opts.release_agent); | 1114 | strcpy(root->release_agent_path, opts.release_agent); |
| 955 | out_unlock: | 1115 | out_unlock: |
| 956 | kfree(opts.release_agent); | 1116 | kfree(opts.release_agent); |
| 1117 | kfree(opts.name); | ||
| 957 | mutex_unlock(&cgroup_mutex); | 1118 | mutex_unlock(&cgroup_mutex); |
| 958 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1119 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
| 959 | unlock_kernel(); | 1120 | unlock_kernel(); |
| 960 | return ret; | 1121 | return ret; |
| 961 | } | 1122 | } |
| 962 | 1123 | ||
| 963 | static struct super_operations cgroup_ops = { | 1124 | static const struct super_operations cgroup_ops = { |
| 964 | .statfs = simple_statfs, | 1125 | .statfs = simple_statfs, |
| 965 | .drop_inode = generic_delete_inode, | 1126 | .drop_inode = generic_delete_inode, |
| 966 | .show_options = cgroup_show_options, | 1127 | .show_options = cgroup_show_options, |
| @@ -973,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 973 | INIT_LIST_HEAD(&cgrp->children); | 1134 | INIT_LIST_HEAD(&cgrp->children); |
| 974 | INIT_LIST_HEAD(&cgrp->css_sets); | 1135 | INIT_LIST_HEAD(&cgrp->css_sets); |
| 975 | INIT_LIST_HEAD(&cgrp->release_list); | 1136 | INIT_LIST_HEAD(&cgrp->release_list); |
| 976 | INIT_LIST_HEAD(&cgrp->pids_list); | 1137 | INIT_LIST_HEAD(&cgrp->pidlists); |
| 977 | init_rwsem(&cgrp->pids_mutex); | 1138 | mutex_init(&cgrp->pidlist_mutex); |
| 978 | } | 1139 | } |
| 1140 | |||
| 979 | static void init_cgroup_root(struct cgroupfs_root *root) | 1141 | static void init_cgroup_root(struct cgroupfs_root *root) |
| 980 | { | 1142 | { |
| 981 | struct cgroup *cgrp = &root->top_cgroup; | 1143 | struct cgroup *cgrp = &root->top_cgroup; |
| @@ -987,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
| 987 | init_cgroup_housekeeping(cgrp); | 1149 | init_cgroup_housekeeping(cgrp); |
| 988 | } | 1150 | } |
| 989 | 1151 | ||
| 1152 | static bool init_root_id(struct cgroupfs_root *root) | ||
| 1153 | { | ||
| 1154 | int ret = 0; | ||
| 1155 | |||
| 1156 | do { | ||
| 1157 | if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL)) | ||
| 1158 | return false; | ||
| 1159 | spin_lock(&hierarchy_id_lock); | ||
| 1160 | /* Try to allocate the next unused ID */ | ||
| 1161 | ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id, | ||
| 1162 | &root->hierarchy_id); | ||
| 1163 | if (ret == -ENOSPC) | ||
| 1164 | /* Try again starting from 0 */ | ||
| 1165 | ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id); | ||
| 1166 | if (!ret) { | ||
| 1167 | next_hierarchy_id = root->hierarchy_id + 1; | ||
| 1168 | } else if (ret != -EAGAIN) { | ||
| 1169 | /* Can only get here if the 31-bit IDR is full ... */ | ||
| 1170 | BUG_ON(ret); | ||
| 1171 | } | ||
| 1172 | spin_unlock(&hierarchy_id_lock); | ||
| 1173 | } while (ret); | ||
| 1174 | return true; | ||
| 1175 | } | ||
| 1176 | |||
| 990 | static int cgroup_test_super(struct super_block *sb, void *data) | 1177 | static int cgroup_test_super(struct super_block *sb, void *data) |
| 991 | { | 1178 | { |
| 992 | struct cgroupfs_root *new = data; | 1179 | struct cgroup_sb_opts *opts = data; |
| 993 | struct cgroupfs_root *root = sb->s_fs_info; | 1180 | struct cgroupfs_root *root = sb->s_fs_info; |
| 994 | 1181 | ||
| 995 | /* First check subsystems */ | 1182 | /* If we asked for a name then it must match */ |
| 996 | if (new->subsys_bits != root->subsys_bits) | 1183 | if (opts->name && strcmp(opts->name, root->name)) |
| 997 | return 0; | 1184 | return 0; |
| 998 | 1185 | ||
| 999 | /* Next check flags */ | 1186 | /* |
| 1000 | if (new->flags != root->flags) | 1187 | * If we asked for subsystems (or explicitly for no |
| 1188 | * subsystems) then they must match | ||
| 1189 | */ | ||
| 1190 | if ((opts->subsys_bits || opts->none) | ||
| 1191 | && (opts->subsys_bits != root->subsys_bits)) | ||
| 1001 | return 0; | 1192 | return 0; |
| 1002 | 1193 | ||
| 1003 | return 1; | 1194 | return 1; |
| 1004 | } | 1195 | } |
| 1005 | 1196 | ||
| 1197 | static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | ||
| 1198 | { | ||
| 1199 | struct cgroupfs_root *root; | ||
| 1200 | |||
| 1201 | if (!opts->subsys_bits && !opts->none) | ||
| 1202 | return NULL; | ||
| 1203 | |||
| 1204 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
| 1205 | if (!root) | ||
| 1206 | return ERR_PTR(-ENOMEM); | ||
| 1207 | |||
| 1208 | if (!init_root_id(root)) { | ||
| 1209 | kfree(root); | ||
| 1210 | return ERR_PTR(-ENOMEM); | ||
| 1211 | } | ||
| 1212 | init_cgroup_root(root); | ||
| 1213 | |||
| 1214 | root->subsys_bits = opts->subsys_bits; | ||
| 1215 | root->flags = opts->flags; | ||
| 1216 | if (opts->release_agent) | ||
| 1217 | strcpy(root->release_agent_path, opts->release_agent); | ||
| 1218 | if (opts->name) | ||
| 1219 | strcpy(root->name, opts->name); | ||
| 1220 | return root; | ||
| 1221 | } | ||
| 1222 | |||
| 1223 | static void cgroup_drop_root(struct cgroupfs_root *root) | ||
| 1224 | { | ||
| 1225 | if (!root) | ||
| 1226 | return; | ||
| 1227 | |||
| 1228 | BUG_ON(!root->hierarchy_id); | ||
| 1229 | spin_lock(&hierarchy_id_lock); | ||
| 1230 | ida_remove(&hierarchy_ida, root->hierarchy_id); | ||
| 1231 | spin_unlock(&hierarchy_id_lock); | ||
| 1232 | kfree(root); | ||
| 1233 | } | ||
| 1234 | |||
| 1006 | static int cgroup_set_super(struct super_block *sb, void *data) | 1235 | static int cgroup_set_super(struct super_block *sb, void *data) |
| 1007 | { | 1236 | { |
| 1008 | int ret; | 1237 | int ret; |
| 1009 | struct cgroupfs_root *root = data; | 1238 | struct cgroup_sb_opts *opts = data; |
| 1239 | |||
| 1240 | /* If we don't have a new root, we can't set up a new sb */ | ||
| 1241 | if (!opts->new_root) | ||
| 1242 | return -EINVAL; | ||
| 1243 | |||
| 1244 | BUG_ON(!opts->subsys_bits && !opts->none); | ||
| 1010 | 1245 | ||
| 1011 | ret = set_anon_super(sb, NULL); | 1246 | ret = set_anon_super(sb, NULL); |
| 1012 | if (ret) | 1247 | if (ret) |
| 1013 | return ret; | 1248 | return ret; |
| 1014 | 1249 | ||
| 1015 | sb->s_fs_info = root; | 1250 | sb->s_fs_info = opts->new_root; |
| 1016 | root->sb = sb; | 1251 | opts->new_root->sb = sb; |
| 1017 | 1252 | ||
| 1018 | sb->s_blocksize = PAGE_CACHE_SIZE; | 1253 | sb->s_blocksize = PAGE_CACHE_SIZE; |
| 1019 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; | 1254 | sb->s_blocksize_bits = PAGE_CACHE_SHIFT; |
| @@ -1050,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
| 1050 | void *data, struct vfsmount *mnt) | 1285 | void *data, struct vfsmount *mnt) |
| 1051 | { | 1286 | { |
| 1052 | struct cgroup_sb_opts opts; | 1287 | struct cgroup_sb_opts opts; |
| 1288 | struct cgroupfs_root *root; | ||
| 1053 | int ret = 0; | 1289 | int ret = 0; |
| 1054 | struct super_block *sb; | 1290 | struct super_block *sb; |
| 1055 | struct cgroupfs_root *root; | 1291 | struct cgroupfs_root *new_root; |
| 1056 | struct list_head tmp_cg_links; | ||
| 1057 | 1292 | ||
| 1058 | /* First find the desired set of subsystems */ | 1293 | /* First find the desired set of subsystems */ |
| 1059 | ret = parse_cgroupfs_options(data, &opts); | 1294 | ret = parse_cgroupfs_options(data, &opts); |
| 1060 | if (ret) { | 1295 | if (ret) |
| 1061 | kfree(opts.release_agent); | 1296 | goto out_err; |
| 1062 | return ret; | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | root = kzalloc(sizeof(*root), GFP_KERNEL); | ||
| 1066 | if (!root) { | ||
| 1067 | kfree(opts.release_agent); | ||
| 1068 | return -ENOMEM; | ||
| 1069 | } | ||
| 1070 | 1297 | ||
| 1071 | init_cgroup_root(root); | 1298 | /* |
| 1072 | root->subsys_bits = opts.subsys_bits; | 1299 | * Allocate a new cgroup root. We may not need it if we're |
| 1073 | root->flags = opts.flags; | 1300 | * reusing an existing hierarchy. |
| 1074 | if (opts.release_agent) { | 1301 | */ |
| 1075 | strcpy(root->release_agent_path, opts.release_agent); | 1302 | new_root = cgroup_root_from_opts(&opts); |
| 1076 | kfree(opts.release_agent); | 1303 | if (IS_ERR(new_root)) { |
| 1304 | ret = PTR_ERR(new_root); | ||
| 1305 | goto out_err; | ||
| 1077 | } | 1306 | } |
| 1307 | opts.new_root = new_root; | ||
| 1078 | 1308 | ||
| 1079 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root); | 1309 | /* Locate an existing or new sb for this hierarchy */ |
| 1080 | 1310 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); | |
| 1081 | if (IS_ERR(sb)) { | 1311 | if (IS_ERR(sb)) { |
| 1082 | kfree(root); | 1312 | ret = PTR_ERR(sb); |
| 1083 | return PTR_ERR(sb); | 1313 | cgroup_drop_root(opts.new_root); |
| 1314 | goto out_err; | ||
| 1084 | } | 1315 | } |
| 1085 | 1316 | ||
| 1086 | if (sb->s_fs_info != root) { | 1317 | root = sb->s_fs_info; |
| 1087 | /* Reusing an existing superblock */ | 1318 | BUG_ON(!root); |
| 1088 | BUG_ON(sb->s_root == NULL); | 1319 | if (root == opts.new_root) { |
| 1089 | kfree(root); | 1320 | /* We used the new root structure, so this is a new hierarchy */ |
| 1090 | root = NULL; | 1321 | struct list_head tmp_cg_links; |
| 1091 | } else { | ||
| 1092 | /* New superblock */ | ||
| 1093 | struct cgroup *root_cgrp = &root->top_cgroup; | 1322 | struct cgroup *root_cgrp = &root->top_cgroup; |
| 1094 | struct inode *inode; | 1323 | struct inode *inode; |
| 1324 | struct cgroupfs_root *existing_root; | ||
| 1095 | int i; | 1325 | int i; |
| 1096 | 1326 | ||
| 1097 | BUG_ON(sb->s_root != NULL); | 1327 | BUG_ON(sb->s_root != NULL); |
| @@ -1104,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
| 1104 | mutex_lock(&inode->i_mutex); | 1334 | mutex_lock(&inode->i_mutex); |
| 1105 | mutex_lock(&cgroup_mutex); | 1335 | mutex_lock(&cgroup_mutex); |
| 1106 | 1336 | ||
| 1337 | if (strlen(root->name)) { | ||
| 1338 | /* Check for name clashes with existing mounts */ | ||
| 1339 | for_each_active_root(existing_root) { | ||
| 1340 | if (!strcmp(existing_root->name, root->name)) { | ||
| 1341 | ret = -EBUSY; | ||
| 1342 | mutex_unlock(&cgroup_mutex); | ||
| 1343 | mutex_unlock(&inode->i_mutex); | ||
| 1344 | goto drop_new_super; | ||
| 1345 | } | ||
| 1346 | } | ||
| 1347 | } | ||
| 1348 | |||
| 1107 | /* | 1349 | /* |
| 1108 | * We're accessing css_set_count without locking | 1350 | * We're accessing css_set_count without locking |
| 1109 | * css_set_lock here, but that's OK - it can only be | 1351 | * css_set_lock here, but that's OK - it can only be |
| @@ -1122,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
| 1122 | if (ret == -EBUSY) { | 1364 | if (ret == -EBUSY) { |
| 1123 | mutex_unlock(&cgroup_mutex); | 1365 | mutex_unlock(&cgroup_mutex); |
| 1124 | mutex_unlock(&inode->i_mutex); | 1366 | mutex_unlock(&inode->i_mutex); |
| 1125 | goto free_cg_links; | 1367 | free_cg_links(&tmp_cg_links); |
| 1368 | goto drop_new_super; | ||
| 1126 | } | 1369 | } |
| 1127 | 1370 | ||
| 1128 | /* EBUSY should be the only error here */ | 1371 | /* EBUSY should be the only error here */ |
| @@ -1154,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
| 1154 | BUG_ON(root->number_of_cgroups != 1); | 1397 | BUG_ON(root->number_of_cgroups != 1); |
| 1155 | 1398 | ||
| 1156 | cgroup_populate_dir(root_cgrp); | 1399 | cgroup_populate_dir(root_cgrp); |
| 1157 | mutex_unlock(&inode->i_mutex); | ||
| 1158 | mutex_unlock(&cgroup_mutex); | 1400 | mutex_unlock(&cgroup_mutex); |
| 1401 | mutex_unlock(&inode->i_mutex); | ||
| 1402 | } else { | ||
| 1403 | /* | ||
| 1404 | * We re-used an existing hierarchy - the new root (if | ||
| 1405 | * any) is not needed | ||
| 1406 | */ | ||
| 1407 | cgroup_drop_root(opts.new_root); | ||
| 1159 | } | 1408 | } |
| 1160 | 1409 | ||
| 1161 | simple_set_mnt(mnt, sb); | 1410 | simple_set_mnt(mnt, sb); |
| 1411 | kfree(opts.release_agent); | ||
| 1412 | kfree(opts.name); | ||
| 1162 | return 0; | 1413 | return 0; |
| 1163 | 1414 | ||
| 1164 | free_cg_links: | ||
| 1165 | free_cg_links(&tmp_cg_links); | ||
| 1166 | drop_new_super: | 1415 | drop_new_super: |
| 1167 | deactivate_locked_super(sb); | 1416 | deactivate_locked_super(sb); |
| 1417 | out_err: | ||
| 1418 | kfree(opts.release_agent); | ||
| 1419 | kfree(opts.name); | ||
| 1420 | |||
| 1168 | return ret; | 1421 | return ret; |
| 1169 | } | 1422 | } |
| 1170 | 1423 | ||
| @@ -1210,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1210 | mutex_unlock(&cgroup_mutex); | 1463 | mutex_unlock(&cgroup_mutex); |
| 1211 | 1464 | ||
| 1212 | kill_litter_super(sb); | 1465 | kill_litter_super(sb); |
| 1213 | kfree(root); | 1466 | cgroup_drop_root(root); |
| 1214 | } | 1467 | } |
| 1215 | 1468 | ||
| 1216 | static struct file_system_type cgroup_fs_type = { | 1469 | static struct file_system_type cgroup_fs_type = { |
| @@ -1275,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
| 1275 | return 0; | 1528 | return 0; |
| 1276 | } | 1529 | } |
| 1277 | 1530 | ||
| 1278 | /* | ||
| 1279 | * Return the first subsystem attached to a cgroup's hierarchy, and | ||
| 1280 | * its subsystem id. | ||
| 1281 | */ | ||
| 1282 | |||
| 1283 | static void get_first_subsys(const struct cgroup *cgrp, | ||
| 1284 | struct cgroup_subsys_state **css, int *subsys_id) | ||
| 1285 | { | ||
| 1286 | const struct cgroupfs_root *root = cgrp->root; | ||
| 1287 | const struct cgroup_subsys *test_ss; | ||
| 1288 | BUG_ON(list_empty(&root->subsys_list)); | ||
| 1289 | test_ss = list_entry(root->subsys_list.next, | ||
| 1290 | struct cgroup_subsys, sibling); | ||
| 1291 | if (css) { | ||
| 1292 | *css = cgrp->subsys[test_ss->subsys_id]; | ||
| 1293 | BUG_ON(!*css); | ||
| 1294 | } | ||
| 1295 | if (subsys_id) | ||
| 1296 | *subsys_id = test_ss->subsys_id; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | /** | 1531 | /** |
| 1300 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' | 1532 | * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' |
| 1301 | * @cgrp: the cgroup the task is attaching to | 1533 | * @cgrp: the cgroup the task is attaching to |
| @@ -1312,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1312 | struct css_set *cg; | 1544 | struct css_set *cg; |
| 1313 | struct css_set *newcg; | 1545 | struct css_set *newcg; |
| 1314 | struct cgroupfs_root *root = cgrp->root; | 1546 | struct cgroupfs_root *root = cgrp->root; |
| 1315 | int subsys_id; | ||
| 1316 | |||
| 1317 | get_first_subsys(cgrp, NULL, &subsys_id); | ||
| 1318 | 1547 | ||
| 1319 | /* Nothing to do if the task is already in that cgroup */ | 1548 | /* Nothing to do if the task is already in that cgroup */ |
| 1320 | oldcgrp = task_cgroup(tsk, subsys_id); | 1549 | oldcgrp = task_cgroup_from_root(tsk, root); |
| 1321 | if (cgrp == oldcgrp) | 1550 | if (cgrp == oldcgrp) |
| 1322 | return 0; | 1551 | return 0; |
| 1323 | 1552 | ||
| 1324 | for_each_subsys(root, ss) { | 1553 | for_each_subsys(root, ss) { |
| 1325 | if (ss->can_attach) { | 1554 | if (ss->can_attach) { |
| 1326 | retval = ss->can_attach(ss, cgrp, tsk); | 1555 | retval = ss->can_attach(ss, cgrp, tsk, false); |
| 1327 | if (retval) | 1556 | if (retval) |
| 1328 | return retval; | 1557 | return retval; |
| 1329 | } | 1558 | } |
| @@ -1361,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1361 | 1590 | ||
| 1362 | for_each_subsys(root, ss) { | 1591 | for_each_subsys(root, ss) { |
| 1363 | if (ss->attach) | 1592 | if (ss->attach) |
| 1364 | ss->attach(ss, cgrp, oldcgrp, tsk); | 1593 | ss->attach(ss, cgrp, oldcgrp, tsk, false); |
| 1365 | } | 1594 | } |
| 1366 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1595 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
| 1367 | synchronize_rcu(); | 1596 | synchronize_rcu(); |
| @@ -1422,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid) | |||
| 1422 | return ret; | 1651 | return ret; |
| 1423 | } | 1652 | } |
| 1424 | 1653 | ||
| 1425 | /* The various types of files and directories in a cgroup file system */ | ||
| 1426 | enum cgroup_filetype { | ||
| 1427 | FILE_ROOT, | ||
| 1428 | FILE_DIR, | ||
| 1429 | FILE_TASKLIST, | ||
| 1430 | FILE_NOTIFY_ON_RELEASE, | ||
| 1431 | FILE_RELEASE_AGENT, | ||
| 1432 | }; | ||
| 1433 | |||
| 1434 | /** | 1654 | /** |
| 1435 | * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. | 1655 | * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive. |
| 1436 | * @cgrp: the cgroup to be checked for liveness | 1656 | * @cgrp: the cgroup to be checked for liveness |
| @@ -1643,7 +1863,7 @@ static int cgroup_seqfile_release(struct inode *inode, struct file *file) | |||
| 1643 | return single_release(inode, file); | 1863 | return single_release(inode, file); |
| 1644 | } | 1864 | } |
| 1645 | 1865 | ||
| 1646 | static struct file_operations cgroup_seqfile_operations = { | 1866 | static const struct file_operations cgroup_seqfile_operations = { |
| 1647 | .read = seq_read, | 1867 | .read = seq_read, |
| 1648 | .write = cgroup_file_write, | 1868 | .write = cgroup_file_write, |
| 1649 | .llseek = seq_lseek, | 1869 | .llseek = seq_lseek, |
| @@ -1702,7 +1922,7 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 1702 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); | 1922 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); |
| 1703 | } | 1923 | } |
| 1704 | 1924 | ||
| 1705 | static struct file_operations cgroup_file_operations = { | 1925 | static const struct file_operations cgroup_file_operations = { |
| 1706 | .read = cgroup_file_read, | 1926 | .read = cgroup_file_read, |
| 1707 | .write = cgroup_file_write, | 1927 | .write = cgroup_file_write, |
| 1708 | .llseek = generic_file_llseek, | 1928 | .llseek = generic_file_llseek, |
| @@ -1710,7 +1930,7 @@ static struct file_operations cgroup_file_operations = { | |||
| 1710 | .release = cgroup_file_release, | 1930 | .release = cgroup_file_release, |
| 1711 | }; | 1931 | }; |
| 1712 | 1932 | ||
| 1713 | static struct inode_operations cgroup_dir_inode_operations = { | 1933 | static const struct inode_operations cgroup_dir_inode_operations = { |
| 1714 | .lookup = simple_lookup, | 1934 | .lookup = simple_lookup, |
| 1715 | .mkdir = cgroup_mkdir, | 1935 | .mkdir = cgroup_mkdir, |
| 1716 | .rmdir = cgroup_rmdir, | 1936 | .rmdir = cgroup_rmdir, |
| @@ -1875,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp) | |||
| 1875 | * the start of a css_set | 2095 | * the start of a css_set |
| 1876 | */ | 2096 | */ |
| 1877 | static void cgroup_advance_iter(struct cgroup *cgrp, | 2097 | static void cgroup_advance_iter(struct cgroup *cgrp, |
| 1878 | struct cgroup_iter *it) | 2098 | struct cgroup_iter *it) |
| 1879 | { | 2099 | { |
| 1880 | struct list_head *l = it->cg_link; | 2100 | struct list_head *l = it->cg_link; |
| 1881 | struct cg_cgroup_link *link; | 2101 | struct cg_cgroup_link *link; |
| @@ -2128,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
| 2128 | } | 2348 | } |
| 2129 | 2349 | ||
| 2130 | /* | 2350 | /* |
| 2131 | * Stuff for reading the 'tasks' file. | 2351 | * Stuff for reading the 'tasks'/'procs' files. |
| 2132 | * | 2352 | * |
| 2133 | * Reading this file can return large amounts of data if a cgroup has | 2353 | * Reading this file can return large amounts of data if a cgroup has |
| 2134 | * *lots* of attached tasks. So it may need several calls to read(), | 2354 | * *lots* of attached tasks. So it may need several calls to read(), |
| @@ -2138,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
| 2138 | */ | 2358 | */ |
| 2139 | 2359 | ||
| 2140 | /* | 2360 | /* |
| 2141 | * Load into 'pidarray' up to 'npids' of the tasks using cgroup | 2361 | * The following two functions "fix" the issue where there are more pids |
| 2142 | * 'cgrp'. Return actual number of pids loaded. No need to | 2362 | * than kmalloc will give memory for; in such cases, we use vmalloc/vfree. |
| 2143 | * task_lock(p) when reading out p->cgroup, since we're in an RCU | 2363 | * TODO: replace with a kernel-wide solution to this problem |
| 2144 | * read section, so the css_set can't go away, and is | 2364 | */ |
| 2145 | * immutable after creation. | 2365 | #define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2)) |
| 2366 | static void *pidlist_allocate(int count) | ||
| 2367 | { | ||
| 2368 | if (PIDLIST_TOO_LARGE(count)) | ||
| 2369 | return vmalloc(count * sizeof(pid_t)); | ||
| 2370 | else | ||
| 2371 | return kmalloc(count * sizeof(pid_t), GFP_KERNEL); | ||
| 2372 | } | ||
| 2373 | static void pidlist_free(void *p) | ||
| 2374 | { | ||
| 2375 | if (is_vmalloc_addr(p)) | ||
| 2376 | vfree(p); | ||
| 2377 | else | ||
| 2378 | kfree(p); | ||
| 2379 | } | ||
| 2380 | static void *pidlist_resize(void *p, int newcount) | ||
| 2381 | { | ||
| 2382 | void *newlist; | ||
| 2383 | /* note: if new alloc fails, old p will still be valid either way */ | ||
| 2384 | if (is_vmalloc_addr(p)) { | ||
| 2385 | newlist = vmalloc(newcount * sizeof(pid_t)); | ||
| 2386 | if (!newlist) | ||
| 2387 | return NULL; | ||
| 2388 | memcpy(newlist, p, newcount * sizeof(pid_t)); | ||
| 2389 | vfree(p); | ||
| 2390 | } else { | ||
| 2391 | newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL); | ||
| 2392 | } | ||
| 2393 | return newlist; | ||
| 2394 | } | ||
| 2395 | |||
| 2396 | /* | ||
| 2397 | * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries | ||
| 2398 | * If the new stripped list is sufficiently smaller and there's enough memory | ||
| 2399 | * to allocate a new buffer, will let go of the unneeded memory. Returns the | ||
| 2400 | * number of unique elements. | ||
| 2401 | */ | ||
| 2402 | /* is the size difference enough that we should re-allocate the array? */ | ||
| 2403 | #define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new)) | ||
| 2404 | static int pidlist_uniq(pid_t **p, int length) | ||
| 2405 | { | ||
| 2406 | int src, dest = 1; | ||
| 2407 | pid_t *list = *p; | ||
| 2408 | pid_t *newlist; | ||
| 2409 | |||
| 2410 | /* | ||
| 2411 | * we presume the 0th element is unique, so i starts at 1. trivial | ||
| 2412 | * edge cases first; no work needs to be done for either | ||
| 2413 | */ | ||
| 2414 | if (length == 0 || length == 1) | ||
| 2415 | return length; | ||
| 2416 | /* src and dest walk down the list; dest counts unique elements */ | ||
| 2417 | for (src = 1; src < length; src++) { | ||
| 2418 | /* find next unique element */ | ||
| 2419 | while (list[src] == list[src-1]) { | ||
| 2420 | src++; | ||
| 2421 | if (src == length) | ||
| 2422 | goto after; | ||
| 2423 | } | ||
| 2424 | /* dest always points to where the next unique element goes */ | ||
| 2425 | list[dest] = list[src]; | ||
| 2426 | dest++; | ||
| 2427 | } | ||
| 2428 | after: | ||
| 2429 | /* | ||
| 2430 | * if the length difference is large enough, we want to allocate a | ||
| 2431 | * smaller buffer to save memory. if this fails due to out of memory, | ||
| 2432 | * we'll just stay with what we've got. | ||
| 2433 | */ | ||
| 2434 | if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) { | ||
| 2435 | newlist = pidlist_resize(list, dest); | ||
| 2436 | if (newlist) | ||
| 2437 | *p = newlist; | ||
| 2438 | } | ||
| 2439 | return dest; | ||
| 2440 | } | ||
| 2441 | |||
| 2442 | static int cmppid(const void *a, const void *b) | ||
| 2443 | { | ||
| 2444 | return *(pid_t *)a - *(pid_t *)b; | ||
| 2445 | } | ||
| 2446 | |||
| 2447 | /* | ||
| 2448 | * find the appropriate pidlist for our purpose (given procs vs tasks) | ||
| 2449 | * returns with the lock on that pidlist already held, and takes care | ||
| 2450 | * of the use count, or returns NULL with no locks held if we're out of | ||
| 2451 | * memory. | ||
| 2146 | */ | 2452 | */ |
| 2147 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | 2453 | static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, |
| 2454 | enum cgroup_filetype type) | ||
| 2148 | { | 2455 | { |
| 2149 | int n = 0, pid; | 2456 | struct cgroup_pidlist *l; |
| 2457 | /* don't need task_nsproxy() if we're looking at ourself */ | ||
| 2458 | struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); | ||
| 2459 | /* | ||
| 2460 | * We can't drop the pidlist_mutex before taking the l->mutex in case | ||
| 2461 | * the last ref-holder is trying to remove l from the list at the same | ||
| 2462 | * time. Holding the pidlist_mutex precludes somebody taking whichever | ||
| 2463 | * list we find out from under us - compare release_pid_array(). | ||
| 2464 | */ | ||
| 2465 | mutex_lock(&cgrp->pidlist_mutex); | ||
| 2466 | list_for_each_entry(l, &cgrp->pidlists, links) { | ||
| 2467 | if (l->key.type == type && l->key.ns == ns) { | ||
| 2468 | /* found a matching list - drop the extra refcount */ | ||
| 2469 | put_pid_ns(ns); | ||
| 2470 | /* make sure l doesn't vanish out from under us */ | ||
| 2471 | down_write(&l->mutex); | ||
| 2472 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 2473 | l->use_count++; | ||
| 2474 | return l; | ||
| 2475 | } | ||
| 2476 | } | ||
| 2477 | /* entry not found; create a new one */ | ||
| 2478 | l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); | ||
| 2479 | if (!l) { | ||
| 2480 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 2481 | put_pid_ns(ns); | ||
| 2482 | return l; | ||
| 2483 | } | ||
| 2484 | init_rwsem(&l->mutex); | ||
| 2485 | down_write(&l->mutex); | ||
| 2486 | l->key.type = type; | ||
| 2487 | l->key.ns = ns; | ||
| 2488 | l->use_count = 0; /* don't increment here */ | ||
| 2489 | l->list = NULL; | ||
| 2490 | l->owner = cgrp; | ||
| 2491 | list_add(&l->links, &cgrp->pidlists); | ||
| 2492 | mutex_unlock(&cgrp->pidlist_mutex); | ||
| 2493 | return l; | ||
| 2494 | } | ||
| 2495 | |||
| 2496 | /* | ||
| 2497 | * Load a cgroup's pidarray with either procs' tgids or tasks' pids | ||
| 2498 | */ | ||
| 2499 | static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, | ||
| 2500 | struct cgroup_pidlist **lp) | ||
| 2501 | { | ||
| 2502 | pid_t *array; | ||
| 2503 | int length; | ||
| 2504 | int pid, n = 0; /* used for populating the array */ | ||
| 2150 | struct cgroup_iter it; | 2505 | struct cgroup_iter it; |
| 2151 | struct task_struct *tsk; | 2506 | struct task_struct *tsk; |
| 2507 | struct cgroup_pidlist *l; | ||
| 2508 | |||
| 2509 | /* | ||
| 2510 | * If cgroup gets more users after we read count, we won't have | ||
| 2511 | * enough space - tough. This race is indistinguishable to the | ||
| 2512 | * caller from the case that the additional cgroup users didn't | ||
| 2513 | * show up until sometime later on. | ||
| 2514 | */ | ||
| 2515 | length = cgroup_task_count(cgrp); | ||
| 2516 | array = pidlist_allocate(length); | ||
| 2517 | if (!array) | ||
| 2518 | return -ENOMEM; | ||
| 2519 | /* now, populate the array */ | ||
| 2152 | cgroup_iter_start(cgrp, &it); | 2520 | cgroup_iter_start(cgrp, &it); |
| 2153 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2521 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
| 2154 | if (unlikely(n == npids)) | 2522 | if (unlikely(n == length)) |
| 2155 | break; | 2523 | break; |
| 2156 | pid = task_pid_vnr(tsk); | 2524 | /* get tgid or pid for procs or tasks file respectively */ |
| 2157 | if (pid > 0) | 2525 | if (type == CGROUP_FILE_PROCS) |
| 2158 | pidarray[n++] = pid; | 2526 | pid = task_tgid_vnr(tsk); |
| 2527 | else | ||
| 2528 | pid = task_pid_vnr(tsk); | ||
| 2529 | if (pid > 0) /* make sure to only use valid results */ | ||
| 2530 | array[n++] = pid; | ||
| 2159 | } | 2531 | } |
| 2160 | cgroup_iter_end(cgrp, &it); | 2532 | cgroup_iter_end(cgrp, &it); |
| 2161 | return n; | 2533 | length = n; |
| 2534 | /* now sort & (if procs) strip out duplicates */ | ||
| 2535 | sort(array, length, sizeof(pid_t), cmppid, NULL); | ||
| 2536 | if (type == CGROUP_FILE_PROCS) | ||
| 2537 | length = pidlist_uniq(&array, length); | ||
| 2538 | l = cgroup_pidlist_find(cgrp, type); | ||
| 2539 | if (!l) { | ||
| 2540 | pidlist_free(array); | ||
| 2541 | return -ENOMEM; | ||
| 2542 | } | ||
| 2543 | /* store array, freeing old if necessary - lock already held */ | ||
| 2544 | pidlist_free(l->list); | ||
| 2545 | l->list = array; | ||
| 2546 | l->length = length; | ||
| 2547 | l->use_count++; | ||
| 2548 | up_write(&l->mutex); | ||
| 2549 | *lp = l; | ||
| 2550 | return 0; | ||
| 2162 | } | 2551 | } |
| 2163 | 2552 | ||
| 2164 | /** | 2553 | /** |
| @@ -2215,37 +2604,14 @@ err: | |||
| 2215 | return ret; | 2604 | return ret; |
| 2216 | } | 2605 | } |
| 2217 | 2606 | ||
| 2218 | /* | ||
| 2219 | * Cache pids for all threads in the same pid namespace that are | ||
| 2220 | * opening the same "tasks" file. | ||
| 2221 | */ | ||
| 2222 | struct cgroup_pids { | ||
| 2223 | /* The node in cgrp->pids_list */ | ||
| 2224 | struct list_head list; | ||
| 2225 | /* The cgroup those pids belong to */ | ||
| 2226 | struct cgroup *cgrp; | ||
| 2227 | /* The namepsace those pids belong to */ | ||
| 2228 | struct pid_namespace *ns; | ||
| 2229 | /* Array of process ids in the cgroup */ | ||
| 2230 | pid_t *tasks_pids; | ||
| 2231 | /* How many files are using the this tasks_pids array */ | ||
| 2232 | int use_count; | ||
| 2233 | /* Length of the current tasks_pids array */ | ||
| 2234 | int length; | ||
| 2235 | }; | ||
| 2236 | |||
| 2237 | static int cmppid(const void *a, const void *b) | ||
| 2238 | { | ||
| 2239 | return *(pid_t *)a - *(pid_t *)b; | ||
| 2240 | } | ||
| 2241 | 2607 | ||
| 2242 | /* | 2608 | /* |
| 2243 | * seq_file methods for the "tasks" file. The seq_file position is the | 2609 | * seq_file methods for the tasks/procs files. The seq_file position is the |
| 2244 | * next pid to display; the seq_file iterator is a pointer to the pid | 2610 | * next pid to display; the seq_file iterator is a pointer to the pid |
| 2245 | * in the cgroup->tasks_pids array. | 2611 | * in the cgroup->l->list array. |
| 2246 | */ | 2612 | */ |
| 2247 | 2613 | ||
| 2248 | static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | 2614 | static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) |
| 2249 | { | 2615 | { |
| 2250 | /* | 2616 | /* |
| 2251 | * Initially we receive a position value that corresponds to | 2617 | * Initially we receive a position value that corresponds to |
| @@ -2253,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | |||
| 2253 | * after a seek to the start). Use a binary-search to find the | 2619 | * after a seek to the start). Use a binary-search to find the |
| 2254 | * next pid to display, if any | 2620 | * next pid to display, if any |
| 2255 | */ | 2621 | */ |
| 2256 | struct cgroup_pids *cp = s->private; | 2622 | struct cgroup_pidlist *l = s->private; |
| 2257 | struct cgroup *cgrp = cp->cgrp; | ||
| 2258 | int index = 0, pid = *pos; | 2623 | int index = 0, pid = *pos; |
| 2259 | int *iter; | 2624 | int *iter; |
| 2260 | 2625 | ||
| 2261 | down_read(&cgrp->pids_mutex); | 2626 | down_read(&l->mutex); |
| 2262 | if (pid) { | 2627 | if (pid) { |
| 2263 | int end = cp->length; | 2628 | int end = l->length; |
| 2264 | 2629 | ||
| 2265 | while (index < end) { | 2630 | while (index < end) { |
| 2266 | int mid = (index + end) / 2; | 2631 | int mid = (index + end) / 2; |
| 2267 | if (cp->tasks_pids[mid] == pid) { | 2632 | if (l->list[mid] == pid) { |
| 2268 | index = mid; | 2633 | index = mid; |
| 2269 | break; | 2634 | break; |
| 2270 | } else if (cp->tasks_pids[mid] <= pid) | 2635 | } else if (l->list[mid] <= pid) |
| 2271 | index = mid + 1; | 2636 | index = mid + 1; |
| 2272 | else | 2637 | else |
| 2273 | end = mid; | 2638 | end = mid; |
| 2274 | } | 2639 | } |
| 2275 | } | 2640 | } |
| 2276 | /* If we're off the end of the array, we're done */ | 2641 | /* If we're off the end of the array, we're done */ |
| 2277 | if (index >= cp->length) | 2642 | if (index >= l->length) |
| 2278 | return NULL; | 2643 | return NULL; |
| 2279 | /* Update the abstract position to be the actual pid that we found */ | 2644 | /* Update the abstract position to be the actual pid that we found */ |
| 2280 | iter = cp->tasks_pids + index; | 2645 | iter = l->list + index; |
| 2281 | *pos = *iter; | 2646 | *pos = *iter; |
| 2282 | return iter; | 2647 | return iter; |
| 2283 | } | 2648 | } |
| 2284 | 2649 | ||
| 2285 | static void cgroup_tasks_stop(struct seq_file *s, void *v) | 2650 | static void cgroup_pidlist_stop(struct seq_file *s, void *v) |
| 2286 | { | 2651 | { |
| 2287 | struct cgroup_pids *cp = s->private; | 2652 | struct cgroup_pidlist *l = s->private; |
| 2288 | struct cgroup *cgrp = cp->cgrp; | 2653 | up_read(&l->mutex); |
| 2289 | up_read(&cgrp->pids_mutex); | ||
| 2290 | } | 2654 | } |
| 2291 | 2655 | ||
| 2292 | static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | 2656 | static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) |
| 2293 | { | 2657 | { |
| 2294 | struct cgroup_pids *cp = s->private; | 2658 | struct cgroup_pidlist *l = s->private; |
| 2295 | int *p = v; | 2659 | pid_t *p = v; |
| 2296 | int *end = cp->tasks_pids + cp->length; | 2660 | pid_t *end = l->list + l->length; |
| 2297 | |||
| 2298 | /* | 2661 | /* |
| 2299 | * Advance to the next pid in the array. If this goes off the | 2662 | * Advance to the next pid in the array. If this goes off the |
| 2300 | * end, we're done | 2663 | * end, we're done |
| @@ -2308,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | |||
| 2308 | } | 2671 | } |
| 2309 | } | 2672 | } |
| 2310 | 2673 | ||
| 2311 | static int cgroup_tasks_show(struct seq_file *s, void *v) | 2674 | static int cgroup_pidlist_show(struct seq_file *s, void *v) |
| 2312 | { | 2675 | { |
| 2313 | return seq_printf(s, "%d\n", *(int *)v); | 2676 | return seq_printf(s, "%d\n", *(int *)v); |
| 2314 | } | 2677 | } |
| 2315 | 2678 | ||
| 2316 | static struct seq_operations cgroup_tasks_seq_operations = { | 2679 | /* |
| 2317 | .start = cgroup_tasks_start, | 2680 | * seq_operations functions for iterating on pidlists through seq_file - |
| 2318 | .stop = cgroup_tasks_stop, | 2681 | * independent of whether it's tasks or procs |
| 2319 | .next = cgroup_tasks_next, | 2682 | */ |
| 2320 | .show = cgroup_tasks_show, | 2683 | static const struct seq_operations cgroup_pidlist_seq_operations = { |
| 2684 | .start = cgroup_pidlist_start, | ||
| 2685 | .stop = cgroup_pidlist_stop, | ||
| 2686 | .next = cgroup_pidlist_next, | ||
| 2687 | .show = cgroup_pidlist_show, | ||
| 2321 | }; | 2688 | }; |
| 2322 | 2689 | ||
| 2323 | static void release_cgroup_pid_array(struct cgroup_pids *cp) | 2690 | static void cgroup_release_pid_array(struct cgroup_pidlist *l) |
| 2324 | { | 2691 | { |
| 2325 | struct cgroup *cgrp = cp->cgrp; | 2692 | /* |
| 2326 | 2693 | * the case where we're the last user of this particular pidlist will | |
| 2327 | down_write(&cgrp->pids_mutex); | 2694 | * have us remove it from the cgroup's list, which entails taking the |
| 2328 | BUG_ON(!cp->use_count); | 2695 | * mutex. since in pidlist_find the pidlist->lock depends on cgroup-> |
| 2329 | if (!--cp->use_count) { | 2696 | * pidlist_mutex, we have to take pidlist_mutex first. |
| 2330 | list_del(&cp->list); | 2697 | */ |
| 2331 | put_pid_ns(cp->ns); | 2698 | mutex_lock(&l->owner->pidlist_mutex); |
| 2332 | kfree(cp->tasks_pids); | 2699 | down_write(&l->mutex); |
| 2333 | kfree(cp); | 2700 | BUG_ON(!l->use_count); |
| 2701 | if (!--l->use_count) { | ||
| 2702 | /* we're the last user if refcount is 0; remove and free */ | ||
| 2703 | list_del(&l->links); | ||
| 2704 | mutex_unlock(&l->owner->pidlist_mutex); | ||
| 2705 | pidlist_free(l->list); | ||
| 2706 | put_pid_ns(l->key.ns); | ||
| 2707 | up_write(&l->mutex); | ||
| 2708 | kfree(l); | ||
| 2709 | return; | ||
| 2334 | } | 2710 | } |
| 2335 | up_write(&cgrp->pids_mutex); | 2711 | mutex_unlock(&l->owner->pidlist_mutex); |
| 2712 | up_write(&l->mutex); | ||
| 2336 | } | 2713 | } |
| 2337 | 2714 | ||
| 2338 | static int cgroup_tasks_release(struct inode *inode, struct file *file) | 2715 | static int cgroup_pidlist_release(struct inode *inode, struct file *file) |
| 2339 | { | 2716 | { |
| 2340 | struct seq_file *seq; | 2717 | struct cgroup_pidlist *l; |
| 2341 | struct cgroup_pids *cp; | ||
| 2342 | |||
| 2343 | if (!(file->f_mode & FMODE_READ)) | 2718 | if (!(file->f_mode & FMODE_READ)) |
| 2344 | return 0; | 2719 | return 0; |
| 2345 | 2720 | /* | |
| 2346 | seq = file->private_data; | 2721 | * the seq_file will only be initialized if the file was opened for |
| 2347 | cp = seq->private; | 2722 | * reading; hence we check if it's not null only in that case. |
| 2348 | 2723 | */ | |
| 2349 | release_cgroup_pid_array(cp); | 2724 | l = ((struct seq_file *)file->private_data)->private; |
| 2725 | cgroup_release_pid_array(l); | ||
| 2350 | return seq_release(inode, file); | 2726 | return seq_release(inode, file); |
| 2351 | } | 2727 | } |
| 2352 | 2728 | ||
| 2353 | static struct file_operations cgroup_tasks_operations = { | 2729 | static const struct file_operations cgroup_pidlist_operations = { |
| 2354 | .read = seq_read, | 2730 | .read = seq_read, |
| 2355 | .llseek = seq_lseek, | 2731 | .llseek = seq_lseek, |
| 2356 | .write = cgroup_file_write, | 2732 | .write = cgroup_file_write, |
| 2357 | .release = cgroup_tasks_release, | 2733 | .release = cgroup_pidlist_release, |
| 2358 | }; | 2734 | }; |
| 2359 | 2735 | ||
| 2360 | /* | 2736 | /* |
| 2361 | * Handle an open on 'tasks' file. Prepare an array containing the | 2737 | * The following functions handle opens on a file that displays a pidlist |
| 2362 | * process id's of tasks currently attached to the cgroup being opened. | 2738 | * (tasks or procs). Prepare an array of the process/thread IDs of whoever's |
| 2739 | * in the cgroup. | ||
| 2363 | */ | 2740 | */ |
| 2364 | 2741 | /* helper function for the two below it */ | |
| 2365 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | 2742 | static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type) |
| 2366 | { | 2743 | { |
| 2367 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2744 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
| 2368 | struct pid_namespace *ns = current->nsproxy->pid_ns; | 2745 | struct cgroup_pidlist *l; |
| 2369 | struct cgroup_pids *cp; | ||
| 2370 | pid_t *pidarray; | ||
| 2371 | int npids; | ||
| 2372 | int retval; | 2746 | int retval; |
| 2373 | 2747 | ||
| 2374 | /* Nothing to do for write-only files */ | 2748 | /* Nothing to do for write-only files */ |
| 2375 | if (!(file->f_mode & FMODE_READ)) | 2749 | if (!(file->f_mode & FMODE_READ)) |
| 2376 | return 0; | 2750 | return 0; |
| 2377 | 2751 | ||
| 2378 | /* | 2752 | /* have the array populated */ |
| 2379 | * If cgroup gets more users after we read count, we won't have | 2753 | retval = pidlist_array_load(cgrp, type, &l); |
| 2380 | * enough space - tough. This race is indistinguishable to the | 2754 | if (retval) |
| 2381 | * caller from the case that the additional cgroup users didn't | 2755 | return retval; |
| 2382 | * show up until sometime later on. | 2756 | /* configure file information */ |
| 2383 | */ | 2757 | file->f_op = &cgroup_pidlist_operations; |
| 2384 | npids = cgroup_task_count(cgrp); | ||
| 2385 | pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); | ||
| 2386 | if (!pidarray) | ||
| 2387 | return -ENOMEM; | ||
| 2388 | npids = pid_array_load(pidarray, npids, cgrp); | ||
| 2389 | sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); | ||
| 2390 | |||
| 2391 | /* | ||
| 2392 | * Store the array in the cgroup, freeing the old | ||
| 2393 | * array if necessary | ||
| 2394 | */ | ||
| 2395 | down_write(&cgrp->pids_mutex); | ||
| 2396 | |||
| 2397 | list_for_each_entry(cp, &cgrp->pids_list, list) { | ||
| 2398 | if (ns == cp->ns) | ||
| 2399 | goto found; | ||
| 2400 | } | ||
| 2401 | |||
| 2402 | cp = kzalloc(sizeof(*cp), GFP_KERNEL); | ||
| 2403 | if (!cp) { | ||
| 2404 | up_write(&cgrp->pids_mutex); | ||
| 2405 | kfree(pidarray); | ||
| 2406 | return -ENOMEM; | ||
| 2407 | } | ||
| 2408 | cp->cgrp = cgrp; | ||
| 2409 | cp->ns = ns; | ||
| 2410 | get_pid_ns(ns); | ||
| 2411 | list_add(&cp->list, &cgrp->pids_list); | ||
| 2412 | found: | ||
| 2413 | kfree(cp->tasks_pids); | ||
| 2414 | cp->tasks_pids = pidarray; | ||
| 2415 | cp->length = npids; | ||
| 2416 | cp->use_count++; | ||
| 2417 | up_write(&cgrp->pids_mutex); | ||
| 2418 | |||
| 2419 | file->f_op = &cgroup_tasks_operations; | ||
| 2420 | 2758 | ||
| 2421 | retval = seq_open(file, &cgroup_tasks_seq_operations); | 2759 | retval = seq_open(file, &cgroup_pidlist_seq_operations); |
| 2422 | if (retval) { | 2760 | if (retval) { |
| 2423 | release_cgroup_pid_array(cp); | 2761 | cgroup_release_pid_array(l); |
| 2424 | return retval; | 2762 | return retval; |
| 2425 | } | 2763 | } |
| 2426 | ((struct seq_file *)file->private_data)->private = cp; | 2764 | ((struct seq_file *)file->private_data)->private = l; |
| 2427 | return 0; | 2765 | return 0; |
| 2428 | } | 2766 | } |
| 2767 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | ||
| 2768 | { | ||
| 2769 | return cgroup_pidlist_open(file, CGROUP_FILE_TASKS); | ||
| 2770 | } | ||
| 2771 | static int cgroup_procs_open(struct inode *unused, struct file *file) | ||
| 2772 | { | ||
| 2773 | return cgroup_pidlist_open(file, CGROUP_FILE_PROCS); | ||
| 2774 | } | ||
| 2429 | 2775 | ||
| 2430 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | 2776 | static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, |
| 2431 | struct cftype *cft) | 2777 | struct cftype *cft) |
| @@ -2448,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, | |||
| 2448 | /* | 2794 | /* |
| 2449 | * for the common functions, 'private' gives the type of file | 2795 | * for the common functions, 'private' gives the type of file |
| 2450 | */ | 2796 | */ |
| 2797 | /* for hysterical raisins, we can't put this on the older files */ | ||
| 2798 | #define CGROUP_FILE_GENERIC_PREFIX "cgroup." | ||
| 2451 | static struct cftype files[] = { | 2799 | static struct cftype files[] = { |
| 2452 | { | 2800 | { |
| 2453 | .name = "tasks", | 2801 | .name = "tasks", |
| 2454 | .open = cgroup_tasks_open, | 2802 | .open = cgroup_tasks_open, |
| 2455 | .write_u64 = cgroup_tasks_write, | 2803 | .write_u64 = cgroup_tasks_write, |
| 2456 | .release = cgroup_tasks_release, | 2804 | .release = cgroup_pidlist_release, |
| 2457 | .private = FILE_TASKLIST, | ||
| 2458 | .mode = S_IRUGO | S_IWUSR, | 2805 | .mode = S_IRUGO | S_IWUSR, |
| 2459 | }, | 2806 | }, |
| 2460 | 2807 | { | |
| 2808 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", | ||
| 2809 | .open = cgroup_procs_open, | ||
| 2810 | /* .write_u64 = cgroup_procs_write, TODO */ | ||
| 2811 | .release = cgroup_pidlist_release, | ||
| 2812 | .mode = S_IRUGO, | ||
| 2813 | }, | ||
| 2461 | { | 2814 | { |
| 2462 | .name = "notify_on_release", | 2815 | .name = "notify_on_release", |
| 2463 | .read_u64 = cgroup_read_notify_on_release, | 2816 | .read_u64 = cgroup_read_notify_on_release, |
| 2464 | .write_u64 = cgroup_write_notify_on_release, | 2817 | .write_u64 = cgroup_write_notify_on_release, |
| 2465 | .private = FILE_NOTIFY_ON_RELEASE, | ||
| 2466 | }, | 2818 | }, |
| 2467 | }; | 2819 | }; |
| 2468 | 2820 | ||
| @@ -2471,7 +2823,6 @@ static struct cftype cft_release_agent = { | |||
| 2471 | .read_seq_string = cgroup_release_agent_show, | 2823 | .read_seq_string = cgroup_release_agent_show, |
| 2472 | .write_string = cgroup_release_agent_write, | 2824 | .write_string = cgroup_release_agent_write, |
| 2473 | .max_write_len = PATH_MAX, | 2825 | .max_write_len = PATH_MAX, |
| 2474 | .private = FILE_RELEASE_AGENT, | ||
| 2475 | }; | 2826 | }; |
| 2476 | 2827 | ||
| 2477 | static int cgroup_populate_dir(struct cgroup *cgrp) | 2828 | static int cgroup_populate_dir(struct cgroup *cgrp) |
| @@ -2878,6 +3229,7 @@ int __init cgroup_init_early(void) | |||
| 2878 | init_task.cgroups = &init_css_set; | 3229 | init_task.cgroups = &init_css_set; |
| 2879 | 3230 | ||
| 2880 | init_css_set_link.cg = &init_css_set; | 3231 | init_css_set_link.cg = &init_css_set; |
| 3232 | init_css_set_link.cgrp = dummytop; | ||
| 2881 | list_add(&init_css_set_link.cgrp_link_list, | 3233 | list_add(&init_css_set_link.cgrp_link_list, |
| 2882 | &rootnode.top_cgroup.css_sets); | 3234 | &rootnode.top_cgroup.css_sets); |
| 2883 | list_add(&init_css_set_link.cg_link_list, | 3235 | list_add(&init_css_set_link.cg_link_list, |
| @@ -2932,7 +3284,7 @@ int __init cgroup_init(void) | |||
| 2932 | /* Add init_css_set to the hash table */ | 3284 | /* Add init_css_set to the hash table */ |
| 2933 | hhead = css_set_hash(init_css_set.subsys); | 3285 | hhead = css_set_hash(init_css_set.subsys); |
| 2934 | hlist_add_head(&init_css_set.hlist, hhead); | 3286 | hlist_add_head(&init_css_set.hlist, hhead); |
| 2935 | 3287 | BUG_ON(!init_root_id(&rootnode)); | |
| 2936 | err = register_filesystem(&cgroup_fs_type); | 3288 | err = register_filesystem(&cgroup_fs_type); |
| 2937 | if (err < 0) | 3289 | if (err < 0) |
| 2938 | goto out; | 3290 | goto out; |
| @@ -2985,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v) | |||
| 2985 | for_each_active_root(root) { | 3337 | for_each_active_root(root) { |
| 2986 | struct cgroup_subsys *ss; | 3338 | struct cgroup_subsys *ss; |
| 2987 | struct cgroup *cgrp; | 3339 | struct cgroup *cgrp; |
| 2988 | int subsys_id; | ||
| 2989 | int count = 0; | 3340 | int count = 0; |
| 2990 | 3341 | ||
| 2991 | seq_printf(m, "%lu:", root->subsys_bits); | 3342 | seq_printf(m, "%d:", root->hierarchy_id); |
| 2992 | for_each_subsys(root, ss) | 3343 | for_each_subsys(root, ss) |
| 2993 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 3344 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
| 3345 | if (strlen(root->name)) | ||
| 3346 | seq_printf(m, "%sname=%s", count ? "," : "", | ||
| 3347 | root->name); | ||
| 2994 | seq_putc(m, ':'); | 3348 | seq_putc(m, ':'); |
| 2995 | get_first_subsys(&root->top_cgroup, NULL, &subsys_id); | 3349 | cgrp = task_cgroup_from_root(tsk, root); |
| 2996 | cgrp = task_cgroup(tsk, subsys_id); | ||
| 2997 | retval = cgroup_path(cgrp, buf, PAGE_SIZE); | 3350 | retval = cgroup_path(cgrp, buf, PAGE_SIZE); |
| 2998 | if (retval < 0) | 3351 | if (retval < 0) |
| 2999 | goto out_unlock; | 3352 | goto out_unlock; |
| @@ -3016,7 +3369,7 @@ static int cgroup_open(struct inode *inode, struct file *file) | |||
| 3016 | return single_open(file, proc_cgroup_show, pid); | 3369 | return single_open(file, proc_cgroup_show, pid); |
| 3017 | } | 3370 | } |
| 3018 | 3371 | ||
| 3019 | struct file_operations proc_cgroup_operations = { | 3372 | const struct file_operations proc_cgroup_operations = { |
| 3020 | .open = cgroup_open, | 3373 | .open = cgroup_open, |
| 3021 | .read = seq_read, | 3374 | .read = seq_read, |
| 3022 | .llseek = seq_lseek, | 3375 | .llseek = seq_lseek, |
| @@ -3032,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) | |||
| 3032 | mutex_lock(&cgroup_mutex); | 3385 | mutex_lock(&cgroup_mutex); |
| 3033 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 3386 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 3034 | struct cgroup_subsys *ss = subsys[i]; | 3387 | struct cgroup_subsys *ss = subsys[i]; |
| 3035 | seq_printf(m, "%s\t%lu\t%d\t%d\n", | 3388 | seq_printf(m, "%s\t%d\t%d\t%d\n", |
| 3036 | ss->name, ss->root->subsys_bits, | 3389 | ss->name, ss->root->hierarchy_id, |
| 3037 | ss->root->number_of_cgroups, !ss->disabled); | 3390 | ss->root->number_of_cgroups, !ss->disabled); |
| 3038 | } | 3391 | } |
| 3039 | mutex_unlock(&cgroup_mutex); | 3392 | mutex_unlock(&cgroup_mutex); |
| @@ -3045,7 +3398,7 @@ static int cgroupstats_open(struct inode *inode, struct file *file) | |||
| 3045 | return single_open(file, proc_cgroupstats_show, NULL); | 3398 | return single_open(file, proc_cgroupstats_show, NULL); |
| 3046 | } | 3399 | } |
| 3047 | 3400 | ||
| 3048 | static struct file_operations proc_cgroupstats_operations = { | 3401 | static const struct file_operations proc_cgroupstats_operations = { |
| 3049 | .open = cgroupstats_open, | 3402 | .open = cgroupstats_open, |
| 3050 | .read = seq_read, | 3403 | .read = seq_read, |
| 3051 | .llseek = seq_lseek, | 3404 | .llseek = seq_lseek, |
| @@ -3319,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) | |||
| 3319 | { | 3672 | { |
| 3320 | int ret; | 3673 | int ret; |
| 3321 | struct cgroup *target; | 3674 | struct cgroup *target; |
| 3322 | int subsys_id; | ||
| 3323 | 3675 | ||
| 3324 | if (cgrp == dummytop) | 3676 | if (cgrp == dummytop) |
| 3325 | return 1; | 3677 | return 1; |
| 3326 | 3678 | ||
| 3327 | get_first_subsys(cgrp, NULL, &subsys_id); | 3679 | target = task_cgroup_from_root(task, cgrp->root); |
| 3328 | target = task_cgroup(task, subsys_id); | ||
| 3329 | while (cgrp != target && cgrp!= cgrp->top_cgroup) | 3680 | while (cgrp != target && cgrp!= cgrp->top_cgroup) |
| 3330 | cgrp = cgrp->parent; | 3681 | cgrp = cgrp->parent; |
| 3331 | ret = (cgrp == target); | 3682 | ret = (cgrp == target); |
| @@ -3357,8 +3708,10 @@ static void check_for_release(struct cgroup *cgrp) | |||
| 3357 | void __css_put(struct cgroup_subsys_state *css) | 3708 | void __css_put(struct cgroup_subsys_state *css) |
| 3358 | { | 3709 | { |
| 3359 | struct cgroup *cgrp = css->cgroup; | 3710 | struct cgroup *cgrp = css->cgroup; |
| 3711 | int val; | ||
| 3360 | rcu_read_lock(); | 3712 | rcu_read_lock(); |
| 3361 | if (atomic_dec_return(&css->refcnt) == 1) { | 3713 | val = atomic_dec_return(&css->refcnt); |
| 3714 | if (val == 1) { | ||
| 3362 | if (notify_on_release(cgrp)) { | 3715 | if (notify_on_release(cgrp)) { |
| 3363 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3716 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 3364 | check_for_release(cgrp); | 3717 | check_for_release(cgrp); |
| @@ -3366,6 +3719,7 @@ void __css_put(struct cgroup_subsys_state *css) | |||
| 3366 | cgroup_wakeup_rmdir_waiter(cgrp); | 3719 | cgroup_wakeup_rmdir_waiter(cgrp); |
| 3367 | } | 3720 | } |
| 3368 | rcu_read_unlock(); | 3721 | rcu_read_unlock(); |
| 3722 | WARN_ON_ONCE(val < 1); | ||
| 3369 | } | 3723 | } |
| 3370 | 3724 | ||
| 3371 | /* | 3725 | /* |
| @@ -3692,3 +4046,154 @@ css_get_next(struct cgroup_subsys *ss, int id, | |||
| 3692 | return ret; | 4046 | return ret; |
| 3693 | } | 4047 | } |
| 3694 | 4048 | ||
| 4049 | #ifdef CONFIG_CGROUP_DEBUG | ||
| 4050 | static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss, | ||
| 4051 | struct cgroup *cont) | ||
| 4052 | { | ||
| 4053 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | ||
| 4054 | |||
| 4055 | if (!css) | ||
| 4056 | return ERR_PTR(-ENOMEM); | ||
| 4057 | |||
| 4058 | return css; | ||
| 4059 | } | ||
| 4060 | |||
| 4061 | static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 4062 | { | ||
| 4063 | kfree(cont->subsys[debug_subsys_id]); | ||
| 4064 | } | ||
| 4065 | |||
| 4066 | static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) | ||
| 4067 | { | ||
| 4068 | return atomic_read(&cont->count); | ||
| 4069 | } | ||
| 4070 | |||
| 4071 | static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft) | ||
| 4072 | { | ||
| 4073 | return cgroup_task_count(cont); | ||
| 4074 | } | ||
| 4075 | |||
| 4076 | static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) | ||
| 4077 | { | ||
| 4078 | return (u64)(unsigned long)current->cgroups; | ||
| 4079 | } | ||
| 4080 | |||
| 4081 | static u64 current_css_set_refcount_read(struct cgroup *cont, | ||
| 4082 | struct cftype *cft) | ||
| 4083 | { | ||
| 4084 | u64 count; | ||
| 4085 | |||
| 4086 | rcu_read_lock(); | ||
| 4087 | count = atomic_read(¤t->cgroups->refcount); | ||
| 4088 | rcu_read_unlock(); | ||
| 4089 | return count; | ||
| 4090 | } | ||
| 4091 | |||
| 4092 | static int current_css_set_cg_links_read(struct cgroup *cont, | ||
| 4093 | struct cftype *cft, | ||
| 4094 | struct seq_file *seq) | ||
| 4095 | { | ||
| 4096 | struct cg_cgroup_link *link; | ||
| 4097 | struct css_set *cg; | ||
| 4098 | |||
| 4099 | read_lock(&css_set_lock); | ||
| 4100 | rcu_read_lock(); | ||
| 4101 | cg = rcu_dereference(current->cgroups); | ||
| 4102 | list_for_each_entry(link, &cg->cg_links, cg_link_list) { | ||
| 4103 | struct cgroup *c = link->cgrp; | ||
| 4104 | const char *name; | ||
| 4105 | |||
| 4106 | if (c->dentry) | ||
| 4107 | name = c->dentry->d_name.name; | ||
| 4108 | else | ||
| 4109 | name = "?"; | ||
| 4110 | seq_printf(seq, "Root %d group %s\n", | ||
| 4111 | c->root->hierarchy_id, name); | ||
| 4112 | } | ||
| 4113 | rcu_read_unlock(); | ||
| 4114 | read_unlock(&css_set_lock); | ||
| 4115 | return 0; | ||
| 4116 | } | ||
| 4117 | |||
| 4118 | #define MAX_TASKS_SHOWN_PER_CSS 25 | ||
| 4119 | static int cgroup_css_links_read(struct cgroup *cont, | ||
| 4120 | struct cftype *cft, | ||
| 4121 | struct seq_file *seq) | ||
| 4122 | { | ||
| 4123 | struct cg_cgroup_link *link; | ||
| 4124 | |||
| 4125 | read_lock(&css_set_lock); | ||
| 4126 | list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { | ||
| 4127 | struct css_set *cg = link->cg; | ||
| 4128 | struct task_struct *task; | ||
| 4129 | int count = 0; | ||
| 4130 | seq_printf(seq, "css_set %p\n", cg); | ||
| 4131 | list_for_each_entry(task, &cg->tasks, cg_list) { | ||
| 4132 | if (count++ > MAX_TASKS_SHOWN_PER_CSS) { | ||
| 4133 | seq_puts(seq, " ...\n"); | ||
| 4134 | break; | ||
| 4135 | } else { | ||
| 4136 | seq_printf(seq, " task %d\n", | ||
| 4137 | task_pid_vnr(task)); | ||
| 4138 | } | ||
| 4139 | } | ||
| 4140 | } | ||
| 4141 | read_unlock(&css_set_lock); | ||
| 4142 | return 0; | ||
| 4143 | } | ||
| 4144 | |||
| 4145 | static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | ||
| 4146 | { | ||
| 4147 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
| 4148 | } | ||
| 4149 | |||
| 4150 | static struct cftype debug_files[] = { | ||
| 4151 | { | ||
| 4152 | .name = "cgroup_refcount", | ||
| 4153 | .read_u64 = cgroup_refcount_read, | ||
| 4154 | }, | ||
| 4155 | { | ||
| 4156 | .name = "taskcount", | ||
| 4157 | .read_u64 = debug_taskcount_read, | ||
| 4158 | }, | ||
| 4159 | |||
| 4160 | { | ||
| 4161 | .name = "current_css_set", | ||
| 4162 | .read_u64 = current_css_set_read, | ||
| 4163 | }, | ||
| 4164 | |||
| 4165 | { | ||
| 4166 | .name = "current_css_set_refcount", | ||
| 4167 | .read_u64 = current_css_set_refcount_read, | ||
| 4168 | }, | ||
| 4169 | |||
| 4170 | { | ||
| 4171 | .name = "current_css_set_cg_links", | ||
| 4172 | .read_seq_string = current_css_set_cg_links_read, | ||
| 4173 | }, | ||
| 4174 | |||
| 4175 | { | ||
| 4176 | .name = "cgroup_css_links", | ||
| 4177 | .read_seq_string = cgroup_css_links_read, | ||
| 4178 | }, | ||
| 4179 | |||
| 4180 | { | ||
| 4181 | .name = "releasable", | ||
| 4182 | .read_u64 = releasable_read, | ||
| 4183 | }, | ||
| 4184 | }; | ||
| 4185 | |||
| 4186 | static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 4187 | { | ||
| 4188 | return cgroup_add_files(cont, ss, debug_files, | ||
| 4189 | ARRAY_SIZE(debug_files)); | ||
| 4190 | } | ||
| 4191 | |||
| 4192 | struct cgroup_subsys debug_subsys = { | ||
| 4193 | .name = "debug", | ||
| 4194 | .create = debug_create, | ||
| 4195 | .destroy = debug_destroy, | ||
| 4196 | .populate = debug_populate, | ||
| 4197 | .subsys_id = debug_subsys_id, | ||
| 4198 | }; | ||
| 4199 | #endif /* CONFIG_CGROUP_DEBUG */ | ||
