aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/cgroup.c104
1 files changed, 57 insertions, 47 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 893b7b502e18..89428b9d9933 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -42,6 +42,7 @@
42#include <linux/sched.h> 42#include <linux/sched.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/spinlock.h> 44#include <linux/spinlock.h>
45#include <linux/rwsem.h>
45#include <linux/string.h> 46#include <linux/string.h>
46#include <linux/sort.h> 47#include <linux/sort.h>
47#include <linux/kmod.h> 48#include <linux/kmod.h>
@@ -341,11 +342,10 @@ static struct css_set init_css_set;
341static struct cgrp_cset_link init_cgrp_cset_link; 342static struct cgrp_cset_link init_cgrp_cset_link;
342 343
343/* 344/*
344 * css_set_lock protects the list of css_set objects, and the chain of 345 * css_set_rwsem protects the list of css_set objects, and the chain of
345 * tasks off each css_set. Nests outside task->alloc_lock due to 346 * tasks off each css_set.
346 * css_task_iter_start().
347 */ 347 */
348static DEFINE_RWLOCK(css_set_lock); 348static DECLARE_RWSEM(css_set_rwsem);
349static int css_set_count; 349static int css_set_count;
350 350
351/* 351/*
@@ -380,9 +380,9 @@ static void __put_css_set(struct css_set *cset, int taskexit)
380 */ 380 */
381 if (atomic_add_unless(&cset->refcount, -1, 1)) 381 if (atomic_add_unless(&cset->refcount, -1, 1))
382 return; 382 return;
383 write_lock(&css_set_lock); 383 down_write(&css_set_rwsem);
384 if (!atomic_dec_and_test(&cset->refcount)) { 384 if (!atomic_dec_and_test(&cset->refcount)) {
385 write_unlock(&css_set_lock); 385 up_write(&css_set_rwsem);
386 return; 386 return;
387 } 387 }
388 388
@@ -396,7 +396,7 @@ static void __put_css_set(struct css_set *cset, int taskexit)
396 list_del(&link->cset_link); 396 list_del(&link->cset_link);
397 list_del(&link->cgrp_link); 397 list_del(&link->cgrp_link);
398 398
399 /* @cgrp can't go away while we're holding css_set_lock */ 399 /* @cgrp can't go away while we're holding css_set_rwsem */
400 if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) { 400 if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
401 if (taskexit) 401 if (taskexit)
402 set_bit(CGRP_RELEASABLE, &cgrp->flags); 402 set_bit(CGRP_RELEASABLE, &cgrp->flags);
@@ -406,7 +406,7 @@ static void __put_css_set(struct css_set *cset, int taskexit)
406 kfree(link); 406 kfree(link);
407 } 407 }
408 408
409 write_unlock(&css_set_lock); 409 up_write(&css_set_rwsem);
410 kfree_rcu(cset, rcu_head); 410 kfree_rcu(cset, rcu_head);
411} 411}
412 412
@@ -627,11 +627,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
627 627
628 /* First see if we already have a cgroup group that matches 628 /* First see if we already have a cgroup group that matches
629 * the desired set */ 629 * the desired set */
630 read_lock(&css_set_lock); 630 down_read(&css_set_rwsem);
631 cset = find_existing_css_set(old_cset, cgrp, template); 631 cset = find_existing_css_set(old_cset, cgrp, template);
632 if (cset) 632 if (cset)
633 get_css_set(cset); 633 get_css_set(cset);
634 read_unlock(&css_set_lock); 634 up_read(&css_set_rwsem);
635 635
636 if (cset) 636 if (cset)
637 return cset; 637 return cset;
@@ -655,7 +655,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
655 * find_existing_css_set() */ 655 * find_existing_css_set() */
656 memcpy(cset->subsys, template, sizeof(cset->subsys)); 656 memcpy(cset->subsys, template, sizeof(cset->subsys));
657 657
658 write_lock(&css_set_lock); 658 down_write(&css_set_rwsem);
659 /* Add reference counts and links from the new css_set. */ 659 /* Add reference counts and links from the new css_set. */
660 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { 660 list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
661 struct cgroup *c = link->cgrp; 661 struct cgroup *c = link->cgrp;
@@ -673,7 +673,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
673 key = css_set_hash(cset->subsys); 673 key = css_set_hash(cset->subsys);
674 hash_add(css_set_table, &cset->hlist, key); 674 hash_add(css_set_table, &cset->hlist, key);
675 675
676 write_unlock(&css_set_lock); 676 up_write(&css_set_rwsem);
677 677
678 return cset; 678 return cset;
679} 679}
@@ -739,14 +739,14 @@ static void cgroup_destroy_root(struct cgroupfs_root *root)
739 * Release all the links from cset_links to this hierarchy's 739 * Release all the links from cset_links to this hierarchy's
740 * root cgroup 740 * root cgroup
741 */ 741 */
742 write_lock(&css_set_lock); 742 down_write(&css_set_rwsem);
743 743
744 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { 744 list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
745 list_del(&link->cset_link); 745 list_del(&link->cset_link);
746 list_del(&link->cgrp_link); 746 list_del(&link->cgrp_link);
747 kfree(link); 747 kfree(link);
748 } 748 }
749 write_unlock(&css_set_lock); 749 up_write(&css_set_rwsem);
750 750
751 if (!list_empty(&root->root_list)) { 751 if (!list_empty(&root->root_list)) {
752 list_del(&root->root_list); 752 list_del(&root->root_list);
@@ -764,7 +764,7 @@ static void cgroup_destroy_root(struct cgroupfs_root *root)
764 764
765/* 765/*
766 * Return the cgroup for "task" from the given hierarchy. Must be 766 * Return the cgroup for "task" from the given hierarchy. Must be
767 * called with cgroup_mutex held. 767 * called with cgroup_mutex and css_set_rwsem held.
768 */ 768 */
769static struct cgroup *task_cgroup_from_root(struct task_struct *task, 769static struct cgroup *task_cgroup_from_root(struct task_struct *task,
770 struct cgroupfs_root *root) 770 struct cgroupfs_root *root)
@@ -772,8 +772,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
772 struct css_set *cset; 772 struct css_set *cset;
773 struct cgroup *res = NULL; 773 struct cgroup *res = NULL;
774 774
775 BUG_ON(!mutex_is_locked(&cgroup_mutex)); 775 lockdep_assert_held(&cgroup_mutex);
776 read_lock(&css_set_lock); 776 lockdep_assert_held(&css_set_rwsem);
777
777 /* 778 /*
778 * No need to lock the task - since we hold cgroup_mutex the 779 * No need to lock the task - since we hold cgroup_mutex the
779 * task can't change groups, so the only thing that can happen 780 * task can't change groups, so the only thing that can happen
@@ -794,7 +795,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
794 } 795 }
795 } 796 }
796 } 797 }
797 read_unlock(&css_set_lock); 798
798 BUG_ON(!res); 799 BUG_ON(!res);
799 return res; 800 return res;
800} 801}
@@ -1310,7 +1311,7 @@ static void cgroup_enable_task_cg_lists(void)
1310{ 1311{
1311 struct task_struct *p, *g; 1312 struct task_struct *p, *g;
1312 1313
1313 write_lock(&css_set_lock); 1314 down_write(&css_set_rwsem);
1314 1315
1315 if (use_task_css_set_links) 1316 if (use_task_css_set_links)
1316 goto out_unlock; 1317 goto out_unlock;
@@ -1343,7 +1344,7 @@ static void cgroup_enable_task_cg_lists(void)
1343 } while_each_thread(g, p); 1344 } while_each_thread(g, p);
1344 read_unlock(&tasklist_lock); 1345 read_unlock(&tasklist_lock);
1345out_unlock: 1346out_unlock:
1346 write_unlock(&css_set_lock); 1347 up_write(&css_set_rwsem);
1347} 1348}
1348 1349
1349static void init_cgroup_housekeeping(struct cgroup *cgrp) 1350static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1408,7 +1409,7 @@ static int cgroup_setup_root(struct cgroupfs_root *root, unsigned long ss_mask)
1408 root_cgrp->id = ret; 1409 root_cgrp->id = ret;
1409 1410
1410 /* 1411 /*
1411 * We're accessing css_set_count without locking css_set_lock here, 1412 * We're accessing css_set_count without locking css_set_rwsem here,
1412 * but that's OK - it can only be increased by someone holding 1413 * but that's OK - it can only be increased by someone holding
1413 * cgroup_lock, and that's us. The worst that can happen is that we 1414 * cgroup_lock, and that's us. The worst that can happen is that we
1414 * have some link structures left over 1415 * have some link structures left over
@@ -1451,10 +1452,10 @@ static int cgroup_setup_root(struct cgroupfs_root *root, unsigned long ss_mask)
1451 * Link the top cgroup in this hierarchy into all the css_set 1452 * Link the top cgroup in this hierarchy into all the css_set
1452 * objects. 1453 * objects.
1453 */ 1454 */
1454 write_lock(&css_set_lock); 1455 down_write(&css_set_rwsem);
1455 hash_for_each(css_set_table, i, cset, hlist) 1456 hash_for_each(css_set_table, i, cset, hlist)
1456 link_css_set(&tmp_links, cset, root_cgrp); 1457 link_css_set(&tmp_links, cset, root_cgrp);
1457 write_unlock(&css_set_lock); 1458 up_write(&css_set_rwsem);
1458 1459
1459 BUG_ON(!list_empty(&root_cgrp->children)); 1460 BUG_ON(!list_empty(&root_cgrp->children));
1460 BUG_ON(atomic_read(&root->nr_cgrps) != 1); 1461 BUG_ON(atomic_read(&root->nr_cgrps) != 1);
@@ -1617,6 +1618,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
1617 char *path = NULL; 1618 char *path = NULL;
1618 1619
1619 mutex_lock(&cgroup_mutex); 1620 mutex_lock(&cgroup_mutex);
1621 down_read(&css_set_rwsem);
1620 1622
1621 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); 1623 root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
1622 1624
@@ -1629,6 +1631,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
1629 path = buf; 1631 path = buf;
1630 } 1632 }
1631 1633
1634 up_read(&css_set_rwsem);
1632 mutex_unlock(&cgroup_mutex); 1635 mutex_unlock(&cgroup_mutex);
1633 return path; 1636 return path;
1634} 1637}
@@ -1739,9 +1742,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
1739 rcu_assign_pointer(tsk->cgroups, new_cset); 1742 rcu_assign_pointer(tsk->cgroups, new_cset);
1740 task_unlock(tsk); 1743 task_unlock(tsk);
1741 1744
1742 write_lock(&css_set_lock); 1745 down_write(&css_set_rwsem);
1743 list_move(&tsk->cg_list, &new_cset->tasks); 1746 list_move(&tsk->cg_list, &new_cset->tasks);
1744 write_unlock(&css_set_lock); 1747 up_write(&css_set_rwsem);
1745 1748
1746 /* 1749 /*
1747 * We just gained a reference on old_cset by taking it from the 1750 * We just gained a reference on old_cset by taking it from the
@@ -1799,6 +1802,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
1799 * already PF_EXITING could be freed from underneath us unless we 1802 * already PF_EXITING could be freed from underneath us unless we
1800 * take an rcu_read_lock. 1803 * take an rcu_read_lock.
1801 */ 1804 */
1805 down_read(&css_set_rwsem);
1802 rcu_read_lock(); 1806 rcu_read_lock();
1803 do { 1807 do {
1804 struct task_and_cgroup ent; 1808 struct task_and_cgroup ent;
@@ -1826,6 +1830,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk,
1826 break; 1830 break;
1827 } while_each_thread(leader, tsk); 1831 } while_each_thread(leader, tsk);
1828 rcu_read_unlock(); 1832 rcu_read_unlock();
1833 up_read(&css_set_rwsem);
1829 /* remember the number of threads in the array for later. */ 1834 /* remember the number of threads in the array for later. */
1830 group_size = i; 1835 group_size = i;
1831 tset.tc_array = group; 1836 tset.tc_array = group;
@@ -2003,7 +2008,11 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
2003 2008
2004 mutex_lock(&cgroup_mutex); 2009 mutex_lock(&cgroup_mutex);
2005 for_each_active_root(root) { 2010 for_each_active_root(root) {
2006 struct cgroup *from_cgrp = task_cgroup_from_root(from, root); 2011 struct cgroup *from_cgrp;
2012
2013 down_read(&css_set_rwsem);
2014 from_cgrp = task_cgroup_from_root(from, root);
2015 up_read(&css_set_rwsem);
2007 2016
2008 retval = cgroup_attach_task(from_cgrp, tsk, false); 2017 retval = cgroup_attach_task(from_cgrp, tsk, false);
2009 if (retval) 2018 if (retval)
@@ -2396,10 +2405,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
2396 int count = 0; 2405 int count = 0;
2397 struct cgrp_cset_link *link; 2406 struct cgrp_cset_link *link;
2398 2407
2399 read_lock(&css_set_lock); 2408 down_read(&css_set_rwsem);
2400 list_for_each_entry(link, &cgrp->cset_links, cset_link) 2409 list_for_each_entry(link, &cgrp->cset_links, cset_link)
2401 count += atomic_read(&link->cset->refcount); 2410 count += atomic_read(&link->cset->refcount);
2402 read_unlock(&css_set_lock); 2411 up_read(&css_set_rwsem);
2403 return count; 2412 return count;
2404} 2413}
2405 2414
@@ -2630,12 +2639,12 @@ static void css_advance_task_iter(struct css_task_iter *it)
2630 */ 2639 */
2631void css_task_iter_start(struct cgroup_subsys_state *css, 2640void css_task_iter_start(struct cgroup_subsys_state *css,
2632 struct css_task_iter *it) 2641 struct css_task_iter *it)
2633 __acquires(css_set_lock) 2642 __acquires(css_set_rwsem)
2634{ 2643{
2635 /* no one should try to iterate before mounting cgroups */ 2644 /* no one should try to iterate before mounting cgroups */
2636 WARN_ON_ONCE(!use_task_css_set_links); 2645 WARN_ON_ONCE(!use_task_css_set_links);
2637 2646
2638 read_lock(&css_set_lock); 2647 down_read(&css_set_rwsem);
2639 2648
2640 it->origin_css = css; 2649 it->origin_css = css;
2641 it->cset_link = &css->cgroup->cset_links; 2650 it->cset_link = &css->cgroup->cset_links;
@@ -2683,9 +2692,9 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
2683 * Finish task iteration started by css_task_iter_start(). 2692 * Finish task iteration started by css_task_iter_start().
2684 */ 2693 */
2685void css_task_iter_end(struct css_task_iter *it) 2694void css_task_iter_end(struct css_task_iter *it)
2686 __releases(css_set_lock) 2695 __releases(css_set_rwsem)
2687{ 2696{
2688 read_unlock(&css_set_lock); 2697 up_read(&css_set_rwsem);
2689} 2698}
2690 2699
2691static inline int started_after_time(struct task_struct *t1, 2700static inline int started_after_time(struct task_struct *t1,
@@ -2735,7 +2744,7 @@ static inline int started_after(void *p1, void *p2)
2735 * 2744 *
2736 * @test may be NULL, meaning always true (select all tasks), which 2745 * @test may be NULL, meaning always true (select all tasks), which
2737 * effectively duplicates css_task_iter_{start,next,end}() but does not 2746 * effectively duplicates css_task_iter_{start,next,end}() but does not
2738 * lock css_set_lock for the call to @process. 2747 * lock css_set_rwsem for the call to @process.
2739 * 2748 *
2740 * It is guaranteed that @process will act on every task that is a member 2749 * It is guaranteed that @process will act on every task that is a member
2741 * of @css for the duration of this call. This function may or may not 2750 * of @css for the duration of this call. This function may or may not
@@ -3867,12 +3876,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
3867 lockdep_assert_held(&cgroup_mutex); 3876 lockdep_assert_held(&cgroup_mutex);
3868 3877
3869 /* 3878 /*
3870 * css_set_lock synchronizes access to ->cset_links and prevents 3879 * css_set_rwsem synchronizes access to ->cset_links and prevents
3871 * @cgrp from being removed while __put_css_set() is in progress. 3880 * @cgrp from being removed while __put_css_set() is in progress.
3872 */ 3881 */
3873 read_lock(&css_set_lock); 3882 down_read(&css_set_rwsem);
3874 empty = list_empty(&cgrp->cset_links); 3883 empty = list_empty(&cgrp->cset_links);
3875 read_unlock(&css_set_lock); 3884 up_read(&css_set_rwsem);
3876 if (!empty) 3885 if (!empty)
3877 return -EBUSY; 3886 return -EBUSY;
3878 3887
@@ -4208,6 +4217,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
4208 retval = 0; 4217 retval = 0;
4209 4218
4210 mutex_lock(&cgroup_mutex); 4219 mutex_lock(&cgroup_mutex);
4220 down_read(&css_set_rwsem);
4211 4221
4212 for_each_active_root(root) { 4222 for_each_active_root(root) {
4213 struct cgroup_subsys *ss; 4223 struct cgroup_subsys *ss;
@@ -4233,6 +4243,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
4233 } 4243 }
4234 4244
4235out_unlock: 4245out_unlock:
4246 up_read(&css_set_rwsem);
4236 mutex_unlock(&cgroup_mutex); 4247 mutex_unlock(&cgroup_mutex);
4237 put_task_struct(tsk); 4248 put_task_struct(tsk);
4238out_free: 4249out_free:
@@ -4328,12 +4339,12 @@ void cgroup_post_fork(struct task_struct *child)
4328 * lock on fork. 4339 * lock on fork.
4329 */ 4340 */
4330 if (use_task_css_set_links) { 4341 if (use_task_css_set_links) {
4331 write_lock(&css_set_lock); 4342 down_write(&css_set_rwsem);
4332 task_lock(child); 4343 task_lock(child);
4333 if (list_empty(&child->cg_list)) 4344 if (list_empty(&child->cg_list))
4334 list_add(&child->cg_list, &task_css_set(child)->tasks); 4345 list_add(&child->cg_list, &task_css_set(child)->tasks);
4335 task_unlock(child); 4346 task_unlock(child);
4336 write_unlock(&css_set_lock); 4347 up_write(&css_set_rwsem);
4337 } 4348 }
4338 4349
4339 /* 4350 /*
@@ -4390,15 +4401,14 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4390 int i; 4401 int i;
4391 4402
4392 /* 4403 /*
4393 * Unlink from the css_set task list if necessary. 4404 * Unlink from the css_set task list if necessary. Optimistically
4394 * Optimistically check cg_list before taking 4405 * check cg_list before taking css_set_rwsem.
4395 * css_set_lock
4396 */ 4406 */
4397 if (!list_empty(&tsk->cg_list)) { 4407 if (!list_empty(&tsk->cg_list)) {
4398 write_lock(&css_set_lock); 4408 down_write(&css_set_rwsem);
4399 if (!list_empty(&tsk->cg_list)) 4409 if (!list_empty(&tsk->cg_list))
4400 list_del_init(&tsk->cg_list); 4410 list_del_init(&tsk->cg_list);
4401 write_unlock(&css_set_lock); 4411 up_write(&css_set_rwsem);
4402 } 4412 }
4403 4413
4404 /* Reassign the task to the init_css_set. */ 4414 /* Reassign the task to the init_css_set. */
@@ -4650,7 +4660,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
4650 if (!name_buf) 4660 if (!name_buf)
4651 return -ENOMEM; 4661 return -ENOMEM;
4652 4662
4653 read_lock(&css_set_lock); 4663 down_read(&css_set_rwsem);
4654 rcu_read_lock(); 4664 rcu_read_lock();
4655 cset = rcu_dereference(current->cgroups); 4665 cset = rcu_dereference(current->cgroups);
4656 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { 4666 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
@@ -4666,7 +4676,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
4666 c->root->hierarchy_id, name); 4676 c->root->hierarchy_id, name);
4667 } 4677 }
4668 rcu_read_unlock(); 4678 rcu_read_unlock();
4669 read_unlock(&css_set_lock); 4679 up_read(&css_set_rwsem);
4670 kfree(name_buf); 4680 kfree(name_buf);
4671 return 0; 4681 return 0;
4672} 4682}
@@ -4677,7 +4687,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
4677 struct cgroup_subsys_state *css = seq_css(seq); 4687 struct cgroup_subsys_state *css = seq_css(seq);
4678 struct cgrp_cset_link *link; 4688 struct cgrp_cset_link *link;
4679 4689
4680 read_lock(&css_set_lock); 4690 down_read(&css_set_rwsem);
4681 list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { 4691 list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
4682 struct css_set *cset = link->cset; 4692 struct css_set *cset = link->cset;
4683 struct task_struct *task; 4693 struct task_struct *task;
@@ -4693,7 +4703,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
4693 } 4703 }
4694 } 4704 }
4695 } 4705 }
4696 read_unlock(&css_set_lock); 4706 up_read(&css_set_rwsem);
4697 return 0; 4707 return 0;
4698} 4708}
4699 4709