aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Menage <menage@google.com>2009-01-07 21:08:36 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:10 -0500
commit999cd8a450f8f93701669a61cac4d3b19eca07e8 (patch)
tree990e5b08e6db971d2e9943f89abf39e7c8f4cb1e
parentb5a84319a4343a0db753436fd8147e61eaafa7ea (diff)
cgroups: add a per-subsystem hierarchy_mutex
These patches introduce new locking/refcount support for cgroups to reduce the need for subsystems to call cgroup_lock(). This will ultimately allow the atomicity of cgroup_rmdir() (which was removed recently) to be restored. These three patches give: 1/3 - introduce a per-subsystem hierarchy_mutex which a subsystem can use to prevent changes to its own cgroup tree 2/3 - use hierarchy_mutex in place of calling cgroup_lock() in the memory controller 3/3 - introduce a css_tryget() function similar to the one recently proposed by Kamezawa, but avoiding spurious refcount failures in the event of a race between a css_tryget() and an unsuccessful cgroup_rmdir() Future patches will likely involve: - using hierarchy mutex in place of cgroup_lock() in more subsystems where appropriate - restoring the atomicity of cgroup_rmdir() with respect to cgroup_create() This patch: Add a hierarchy_mutex to the cgroup_subsys object that protects changes to the hierarchy observed by that subsystem. It is taken by the cgroup subsystem (in addition to cgroup_mutex) for the following operations: - linking a cgroup into that subsystem's cgroup tree - unlinking a cgroup from that subsystem's cgroup tree - moving the subsystem to/from a hierarchy (including across the bind() callback) Thus if the subsystem holds its own hierarchy_mutex, it can safely traverse its own hierarchy. Signed-off-by: Paul Menage <menage@google.com> Tested-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cgroups/cgroups.txt2
-rw-r--r--include/linux/cgroup.h17
-rw-r--r--kernel/cgroup.c37
3 files changed, 52 insertions, 4 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 60287e9e9d27..e33ee74eee77 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -528,7 +528,7 @@ example in cpusets, no task may attach before 'cpus' and 'mems' are set
528up. 528up.
529 529
530void bind(struct cgroup_subsys *ss, struct cgroup *root) 530void bind(struct cgroup_subsys *ss, struct cgroup *root)
531(cgroup_mutex held by caller) 531(cgroup_mutex and ss->hierarchy_mutex held by caller)
532 532
533Called when a cgroup subsystem is rebound to a different hierarchy 533Called when a cgroup subsystem is rebound to a different hierarchy
534and root cgroup. Currently this will only involve movement between 534and root cgroup. Currently this will only involve movement between
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 73d1c730c3c4..ce1c1f34c30c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -340,8 +340,23 @@ struct cgroup_subsys {
340#define MAX_CGROUP_TYPE_NAMELEN 32 340#define MAX_CGROUP_TYPE_NAMELEN 32
341 const char *name; 341 const char *name;
342 342
343 struct cgroupfs_root *root; 343 /*
344 * Protects sibling/children links of cgroups in this
345 * hierarchy, plus protects which hierarchy (or none) the
346 * subsystem is a part of (i.e. root/sibling). To avoid
347 * potential deadlocks, the following operations should not be
348 * undertaken while holding any hierarchy_mutex:
349 *
350 * - allocating memory
351 * - initiating hotplug events
352 */
353 struct mutex hierarchy_mutex;
344 354
355 /*
356 * Link to parent, and list entry in parent's children.
357 * Protected by this->hierarchy_mutex and cgroup_lock()
358 */
359 struct cgroupfs_root *root;
345 struct list_head sibling; 360 struct list_head sibling;
346}; 361};
347 362
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 83ea4f524be5..8b6379cdf637 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -722,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
722 BUG_ON(cgrp->subsys[i]); 722 BUG_ON(cgrp->subsys[i]);
723 BUG_ON(!dummytop->subsys[i]); 723 BUG_ON(!dummytop->subsys[i]);
724 BUG_ON(dummytop->subsys[i]->cgroup != dummytop); 724 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
725 mutex_lock(&ss->hierarchy_mutex);
725 cgrp->subsys[i] = dummytop->subsys[i]; 726 cgrp->subsys[i] = dummytop->subsys[i];
726 cgrp->subsys[i]->cgroup = cgrp; 727 cgrp->subsys[i]->cgroup = cgrp;
727 list_move(&ss->sibling, &root->subsys_list); 728 list_move(&ss->sibling, &root->subsys_list);
728 ss->root = root; 729 ss->root = root;
729 if (ss->bind) 730 if (ss->bind)
730 ss->bind(ss, cgrp); 731 ss->bind(ss, cgrp);
731 732 mutex_unlock(&ss->hierarchy_mutex);
732 } else if (bit & removed_bits) { 733 } else if (bit & removed_bits) {
733 /* We're removing this subsystem */ 734 /* We're removing this subsystem */
734 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); 735 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
735 BUG_ON(cgrp->subsys[i]->cgroup != cgrp); 736 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
737 mutex_lock(&ss->hierarchy_mutex);
736 if (ss->bind) 738 if (ss->bind)
737 ss->bind(ss, dummytop); 739 ss->bind(ss, dummytop);
738 dummytop->subsys[i]->cgroup = dummytop; 740 dummytop->subsys[i]->cgroup = dummytop;
739 cgrp->subsys[i] = NULL; 741 cgrp->subsys[i] = NULL;
740 subsys[i]->root = &rootnode; 742 subsys[i]->root = &rootnode;
741 list_move(&ss->sibling, &rootnode.subsys_list); 743 list_move(&ss->sibling, &rootnode.subsys_list);
744 mutex_unlock(&ss->hierarchy_mutex);
742 } else if (bit & final_bits) { 745 } else if (bit & final_bits) {
743 /* Subsystem state should already exist */ 746 /* Subsystem state should already exist */
744 BUG_ON(!cgrp->subsys[i]); 747 BUG_ON(!cgrp->subsys[i]);
@@ -2338,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
2338 cgrp->subsys[ss->subsys_id] = css; 2341 cgrp->subsys[ss->subsys_id] = css;
2339} 2342}
2340 2343
2344static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2345{
2346 /* We need to take each hierarchy_mutex in a consistent order */
2347 int i;
2348
2349 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2350 struct cgroup_subsys *ss = subsys[i];
2351 if (ss->root == root)
2352 mutex_lock_nested(&ss->hierarchy_mutex, i);
2353 }
2354}
2355
2356static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2357{
2358 int i;
2359
2360 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2361 struct cgroup_subsys *ss = subsys[i];
2362 if (ss->root == root)
2363 mutex_unlock(&ss->hierarchy_mutex);
2364 }
2365}
2366
2341/* 2367/*
2342 * cgroup_create - create a cgroup 2368 * cgroup_create - create a cgroup
2343 * @parent: cgroup that will be parent of the new cgroup 2369 * @parent: cgroup that will be parent of the new cgroup
@@ -2386,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2386 init_cgroup_css(css, ss, cgrp); 2412 init_cgroup_css(css, ss, cgrp);
2387 } 2413 }
2388 2414
2415 cgroup_lock_hierarchy(root);
2389 list_add(&cgrp->sibling, &cgrp->parent->children); 2416 list_add(&cgrp->sibling, &cgrp->parent->children);
2417 cgroup_unlock_hierarchy(root);
2390 root->number_of_cgroups++; 2418 root->number_of_cgroups++;
2391 2419
2392 err = cgroup_create_dir(cgrp, dentry, mode); 2420 err = cgroup_create_dir(cgrp, dentry, mode);
@@ -2504,8 +2532,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2504 if (!list_empty(&cgrp->release_list)) 2532 if (!list_empty(&cgrp->release_list))
2505 list_del(&cgrp->release_list); 2533 list_del(&cgrp->release_list);
2506 spin_unlock(&release_list_lock); 2534 spin_unlock(&release_list_lock);
2507 /* delete my sibling from parent->children */ 2535
2536 cgroup_lock_hierarchy(cgrp->root);
2537 /* delete this cgroup from parent->children */
2508 list_del(&cgrp->sibling); 2538 list_del(&cgrp->sibling);
2539 cgroup_unlock_hierarchy(cgrp->root);
2540
2509 spin_lock(&cgrp->dentry->d_lock); 2541 spin_lock(&cgrp->dentry->d_lock);
2510 d = dget(cgrp->dentry); 2542 d = dget(cgrp->dentry);
2511 spin_unlock(&d->d_lock); 2543 spin_unlock(&d->d_lock);
@@ -2547,6 +2579,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2547 * need to invoke fork callbacks here. */ 2579 * need to invoke fork callbacks here. */
2548 BUG_ON(!list_empty(&init_task.tasks)); 2580 BUG_ON(!list_empty(&init_task.tasks));
2549 2581
2582 mutex_init(&ss->hierarchy_mutex);
2550 ss->active = 1; 2583 ss->active = 1;
2551} 2584}
2552 2585