diff options
author | Paul Menage <menage@google.com> | 2009-01-07 21:08:36 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:10 -0500 |
commit | 999cd8a450f8f93701669a61cac4d3b19eca07e8 (patch) | |
tree | 990e5b08e6db971d2e9943f89abf39e7c8f4cb1e | |
parent | b5a84319a4343a0db753436fd8147e61eaafa7ea (diff) |
cgroups: add a per-subsystem hierarchy_mutex
These patches introduce new locking/refcount support for cgroups to
reduce the need for subsystems to call cgroup_lock(). This will
ultimately allow the atomicity of cgroup_rmdir() (which was removed
recently) to be restored.
These three patches give:
1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can
use to prevent changes to its own cgroup tree
2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the
memory controller
3/3 - introduce a css_tryget() function similar to the one recently
proposed by Kamezawa, but avoiding spurious refcount failures in
the event of a race between a css_tryget() and an unsuccessful
cgroup_rmdir()
Future patches will likely involve:
- using hierarchy mutex in place of cgroup_lock() in more subsystems
where appropriate
- restoring the atomicity of cgroup_rmdir() with respect to cgroup_create()
This patch:
Add a hierarchy_mutex to the cgroup_subsys object that protects changes to
the hierarchy observed by that subsystem. It is taken by the cgroup
subsystem (in addition to cgroup_mutex) for the following operations:
- linking a cgroup into that subsystem's cgroup tree
- unlinking a cgroup from that subsystem's cgroup tree
- moving the subsystem to/from a hierarchy (including across the
bind() callback)
Thus if the subsystem holds its own hierarchy_mutex, it can safely
traverse its own hierarchy.
Signed-off-by: Paul Menage <menage@google.com>
Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/cgroups/cgroups.txt | 2 | ||||
-rw-r--r-- | include/linux/cgroup.h | 17 | ||||
-rw-r--r-- | kernel/cgroup.c | 37 |
3 files changed, 52 insertions, 4 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index 60287e9e9d27..e33ee74eee77 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt | |||
@@ -528,7 +528,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set | |||
528 | up. | 528 | up. |
529 | 529 | ||
530 | void bind(struct cgroup_subsys *ss, struct cgroup *root) | 530 | void bind(struct cgroup_subsys *ss, struct cgroup *root) |
531 | (cgroup_mutex held by caller) | 531 | (cgroup_mutex and ss->hierarchy_mutex held by caller) |
532 | 532 | ||
533 | Called when a cgroup subsystem is rebound to a different hierarchy | 533 | Called when a cgroup subsystem is rebound to a different hierarchy |
534 | and root cgroup. Currently this will only involve movement between | 534 | and root cgroup. Currently this will only involve movement between |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 73d1c730c3c4..ce1c1f34c30c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -340,8 +340,23 @@ struct cgroup_subsys { | |||
340 | #define MAX_CGROUP_TYPE_NAMELEN 32 | 340 | #define MAX_CGROUP_TYPE_NAMELEN 32 |
341 | const char *name; | 341 | const char *name; |
342 | 342 | ||
343 | struct cgroupfs_root *root; | 343 | /* |
344 | * Protects sibling/children links of cgroups in this | ||
345 | * hierarchy, plus protects which hierarchy (or none) the | ||
346 | * subsystem is a part of (i.e. root/sibling). To avoid | ||
347 | * potential deadlocks, the following operations should not be | ||
348 | * undertaken while holding any hierarchy_mutex: | ||
349 | * | ||
350 | * - allocating memory | ||
351 | * - initiating hotplug events | ||
352 | */ | ||
353 | struct mutex hierarchy_mutex; | ||
344 | 354 | ||
355 | /* | ||
356 | * Link to parent, and list entry in parent's children. | ||
357 | * Protected by this->hierarchy_mutex and cgroup_lock() | ||
358 | */ | ||
359 | struct cgroupfs_root *root; | ||
345 | struct list_head sibling; | 360 | struct list_head sibling; |
346 | }; | 361 | }; |
347 | 362 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 83ea4f524be5..8b6379cdf637 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -722,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
722 | BUG_ON(cgrp->subsys[i]); | 722 | BUG_ON(cgrp->subsys[i]); |
723 | BUG_ON(!dummytop->subsys[i]); | 723 | BUG_ON(!dummytop->subsys[i]); |
724 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); | 724 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); |
725 | mutex_lock(&ss->hierarchy_mutex); | ||
725 | cgrp->subsys[i] = dummytop->subsys[i]; | 726 | cgrp->subsys[i] = dummytop->subsys[i]; |
726 | cgrp->subsys[i]->cgroup = cgrp; | 727 | cgrp->subsys[i]->cgroup = cgrp; |
727 | list_move(&ss->sibling, &root->subsys_list); | 728 | list_move(&ss->sibling, &root->subsys_list); |
728 | ss->root = root; | 729 | ss->root = root; |
729 | if (ss->bind) | 730 | if (ss->bind) |
730 | ss->bind(ss, cgrp); | 731 | ss->bind(ss, cgrp); |
731 | 732 | mutex_unlock(&ss->hierarchy_mutex); | |
732 | } else if (bit & removed_bits) { | 733 | } else if (bit & removed_bits) { |
733 | /* We're removing this subsystem */ | 734 | /* We're removing this subsystem */ |
734 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | 735 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); |
735 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 736 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
737 | mutex_lock(&ss->hierarchy_mutex); | ||
736 | if (ss->bind) | 738 | if (ss->bind) |
737 | ss->bind(ss, dummytop); | 739 | ss->bind(ss, dummytop); |
738 | dummytop->subsys[i]->cgroup = dummytop; | 740 | dummytop->subsys[i]->cgroup = dummytop; |
739 | cgrp->subsys[i] = NULL; | 741 | cgrp->subsys[i] = NULL; |
740 | subsys[i]->root = &rootnode; | 742 | subsys[i]->root = &rootnode; |
741 | list_move(&ss->sibling, &rootnode.subsys_list); | 743 | list_move(&ss->sibling, &rootnode.subsys_list); |
744 | mutex_unlock(&ss->hierarchy_mutex); | ||
742 | } else if (bit & final_bits) { | 745 | } else if (bit & final_bits) { |
743 | /* Subsystem state should already exist */ | 746 | /* Subsystem state should already exist */ |
744 | BUG_ON(!cgrp->subsys[i]); | 747 | BUG_ON(!cgrp->subsys[i]); |
@@ -2338,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2338 | cgrp->subsys[ss->subsys_id] = css; | 2341 | cgrp->subsys[ss->subsys_id] = css; |
2339 | } | 2342 | } |
2340 | 2343 | ||
2344 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | ||
2345 | { | ||
2346 | /* We need to take each hierarchy_mutex in a consistent order */ | ||
2347 | int i; | ||
2348 | |||
2349 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2350 | struct cgroup_subsys *ss = subsys[i]; | ||
2351 | if (ss->root == root) | ||
2352 | mutex_lock_nested(&ss->hierarchy_mutex, i); | ||
2353 | } | ||
2354 | } | ||
2355 | |||
2356 | static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | ||
2357 | { | ||
2358 | int i; | ||
2359 | |||
2360 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2361 | struct cgroup_subsys *ss = subsys[i]; | ||
2362 | if (ss->root == root) | ||
2363 | mutex_unlock(&ss->hierarchy_mutex); | ||
2364 | } | ||
2365 | } | ||
2366 | |||
2341 | /* | 2367 | /* |
2342 | * cgroup_create - create a cgroup | 2368 | * cgroup_create - create a cgroup |
2343 | * @parent: cgroup that will be parent of the new cgroup | 2369 | * @parent: cgroup that will be parent of the new cgroup |
@@ -2386,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2386 | init_cgroup_css(css, ss, cgrp); | 2412 | init_cgroup_css(css, ss, cgrp); |
2387 | } | 2413 | } |
2388 | 2414 | ||
2415 | cgroup_lock_hierarchy(root); | ||
2389 | list_add(&cgrp->sibling, &cgrp->parent->children); | 2416 | list_add(&cgrp->sibling, &cgrp->parent->children); |
2417 | cgroup_unlock_hierarchy(root); | ||
2390 | root->number_of_cgroups++; | 2418 | root->number_of_cgroups++; |
2391 | 2419 | ||
2392 | err = cgroup_create_dir(cgrp, dentry, mode); | 2420 | err = cgroup_create_dir(cgrp, dentry, mode); |
@@ -2504,8 +2532,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2504 | if (!list_empty(&cgrp->release_list)) | 2532 | if (!list_empty(&cgrp->release_list)) |
2505 | list_del(&cgrp->release_list); | 2533 | list_del(&cgrp->release_list); |
2506 | spin_unlock(&release_list_lock); | 2534 | spin_unlock(&release_list_lock); |
2507 | /* delete my sibling from parent->children */ | 2535 | |
2536 | cgroup_lock_hierarchy(cgrp->root); | ||
2537 | /* delete this cgroup from parent->children */ | ||
2508 | list_del(&cgrp->sibling); | 2538 | list_del(&cgrp->sibling); |
2539 | cgroup_unlock_hierarchy(cgrp->root); | ||
2540 | |||
2509 | spin_lock(&cgrp->dentry->d_lock); | 2541 | spin_lock(&cgrp->dentry->d_lock); |
2510 | d = dget(cgrp->dentry); | 2542 | d = dget(cgrp->dentry); |
2511 | spin_unlock(&d->d_lock); | 2543 | spin_unlock(&d->d_lock); |
@@ -2547,6 +2579,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2547 | * need to invoke fork callbacks here. */ | 2579 | * need to invoke fork callbacks here. */ |
2548 | BUG_ON(!list_empty(&init_task.tasks)); | 2580 | BUG_ON(!list_empty(&init_task.tasks)); |
2549 | 2581 | ||
2582 | mutex_init(&ss->hierarchy_mutex); | ||
2550 | ss->active = 1; | 2583 | ss->active = 1; |
2551 | } | 2584 | } |
2552 | 2585 | ||