aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c288
1 files changed, 168 insertions, 120 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4855892798fd..b5c64327e712 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -52,7 +52,7 @@
52#include <linux/module.h> 52#include <linux/module.h>
53#include <linux/delayacct.h> 53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h> 54#include <linux/cgroupstats.h>
55#include <linux/hash.h> 55#include <linux/hashtable.h>
56#include <linux/namei.h> 56#include <linux/namei.h>
57#include <linux/pid_namespace.h> 57#include <linux/pid_namespace.h>
58#include <linux/idr.h> 58#include <linux/idr.h>
@@ -376,22 +376,18 @@ static int css_set_count;
376 * account cgroups in empty hierarchies. 376 * account cgroups in empty hierarchies.
377 */ 377 */
378#define CSS_SET_HASH_BITS 7 378#define CSS_SET_HASH_BITS 7
379#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) 379static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
380static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
381 380
382static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) 381static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
383{ 382{
384 int i; 383 int i;
385 int index; 384 unsigned long key = 0UL;
386 unsigned long tmp = 0UL;
387 385
388 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) 386 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
389 tmp += (unsigned long)css[i]; 387 key += (unsigned long)css[i];
390 tmp = (tmp >> 16) ^ tmp; 388 key = (key >> 16) ^ key;
391 389
392 index = hash_long(tmp, CSS_SET_HASH_BITS); 390 return key;
393
394 return &css_set_table[index];
395} 391}
396 392
397/* We don't maintain the lists running through each css_set to its 393/* We don't maintain the lists running through each css_set to its
@@ -418,7 +414,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
418 } 414 }
419 415
420 /* This css_set is dead. unlink it and release cgroup refcounts */ 416 /* This css_set is dead. unlink it and release cgroup refcounts */
421 hlist_del(&cg->hlist); 417 hash_del(&cg->hlist);
422 css_set_count--; 418 css_set_count--;
423 419
424 list_for_each_entry_safe(link, saved_link, &cg->cg_links, 420 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
@@ -426,12 +422,20 @@ static void __put_css_set(struct css_set *cg, int taskexit)
426 struct cgroup *cgrp = link->cgrp; 422 struct cgroup *cgrp = link->cgrp;
427 list_del(&link->cg_link_list); 423 list_del(&link->cg_link_list);
428 list_del(&link->cgrp_link_list); 424 list_del(&link->cgrp_link_list);
425
426 /*
427 * We may not be holding cgroup_mutex, and if cgrp->count is
428 * dropped to 0 the cgroup can be destroyed at any time, hence
429 * rcu_read_lock is used to keep it alive.
430 */
431 rcu_read_lock();
429 if (atomic_dec_and_test(&cgrp->count) && 432 if (atomic_dec_and_test(&cgrp->count) &&
430 notify_on_release(cgrp)) { 433 notify_on_release(cgrp)) {
431 if (taskexit) 434 if (taskexit)
432 set_bit(CGRP_RELEASABLE, &cgrp->flags); 435 set_bit(CGRP_RELEASABLE, &cgrp->flags);
433 check_for_release(cgrp); 436 check_for_release(cgrp);
434 } 437 }
438 rcu_read_unlock();
435 439
436 kfree(link); 440 kfree(link);
437 } 441 }
@@ -550,9 +554,9 @@ static struct css_set *find_existing_css_set(
550{ 554{
551 int i; 555 int i;
552 struct cgroupfs_root *root = cgrp->root; 556 struct cgroupfs_root *root = cgrp->root;
553 struct hlist_head *hhead;
554 struct hlist_node *node; 557 struct hlist_node *node;
555 struct css_set *cg; 558 struct css_set *cg;
559 unsigned long key;
556 560
557 /* 561 /*
558 * Build the set of subsystem state objects that we want to see in the 562 * Build the set of subsystem state objects that we want to see in the
@@ -572,8 +576,8 @@ static struct css_set *find_existing_css_set(
572 } 576 }
573 } 577 }
574 578
575 hhead = css_set_hash(template); 579 key = css_set_hash(template);
576 hlist_for_each_entry(cg, node, hhead, hlist) { 580 hash_for_each_possible(css_set_table, cg, node, hlist, key) {
577 if (!compare_css_sets(cg, oldcg, cgrp, template)) 581 if (!compare_css_sets(cg, oldcg, cgrp, template))
578 continue; 582 continue;
579 583
@@ -657,8 +661,8 @@ static struct css_set *find_css_set(
657 661
658 struct list_head tmp_cg_links; 662 struct list_head tmp_cg_links;
659 663
660 struct hlist_head *hhead;
661 struct cg_cgroup_link *link; 664 struct cg_cgroup_link *link;
665 unsigned long key;
662 666
663 /* First see if we already have a cgroup group that matches 667 /* First see if we already have a cgroup group that matches
664 * the desired set */ 668 * the desired set */
@@ -704,8 +708,8 @@ static struct css_set *find_css_set(
704 css_set_count++; 708 css_set_count++;
705 709
706 /* Add this cgroup group to the hash table */ 710 /* Add this cgroup group to the hash table */
707 hhead = css_set_hash(res->subsys); 711 key = css_set_hash(res->subsys);
708 hlist_add_head(&res->hlist, hhead); 712 hash_add(css_set_table, &res->hlist, key);
709 713
710 write_unlock(&css_set_lock); 714 write_unlock(&css_set_lock);
711 715
@@ -856,47 +860,54 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
856 return inode; 860 return inode;
857} 861}
858 862
859static void cgroup_diput(struct dentry *dentry, struct inode *inode) 863static void cgroup_free_fn(struct work_struct *work)
860{ 864{
861 /* is dentry a directory ? if so, kfree() associated cgroup */ 865 struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
862 if (S_ISDIR(inode->i_mode)) { 866 struct cgroup_subsys *ss;
863 struct cgroup *cgrp = dentry->d_fsdata;
864 struct cgroup_subsys *ss;
865 BUG_ON(!(cgroup_is_removed(cgrp)));
866 /* It's possible for external users to be holding css
867 * reference counts on a cgroup; css_put() needs to
868 * be able to access the cgroup after decrementing
869 * the reference count in order to know if it needs to
870 * queue the cgroup to be handled by the release
871 * agent */
872 synchronize_rcu();
873 867
874 mutex_lock(&cgroup_mutex); 868 mutex_lock(&cgroup_mutex);
875 /* 869 /*
876 * Release the subsystem state objects. 870 * Release the subsystem state objects.
877 */ 871 */
878 for_each_subsys(cgrp->root, ss) 872 for_each_subsys(cgrp->root, ss)
879 ss->css_free(cgrp); 873 ss->css_free(cgrp);
880 874
881 cgrp->root->number_of_cgroups--; 875 cgrp->root->number_of_cgroups--;
882 mutex_unlock(&cgroup_mutex); 876 mutex_unlock(&cgroup_mutex);
883 877
884 /* 878 /*
885 * Drop the active superblock reference that we took when we 879 * Drop the active superblock reference that we took when we
886 * created the cgroup 880 * created the cgroup
887 */ 881 */
888 deactivate_super(cgrp->root->sb); 882 deactivate_super(cgrp->root->sb);
889 883
890 /* 884 /*
891 * if we're getting rid of the cgroup, refcount should ensure 885 * if we're getting rid of the cgroup, refcount should ensure
892 * that there are no pidlists left. 886 * that there are no pidlists left.
893 */ 887 */
894 BUG_ON(!list_empty(&cgrp->pidlists)); 888 BUG_ON(!list_empty(&cgrp->pidlists));
895 889
896 simple_xattrs_free(&cgrp->xattrs); 890 simple_xattrs_free(&cgrp->xattrs);
897 891
898 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); 892 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
899 kfree_rcu(cgrp, rcu_head); 893 kfree(cgrp);
894}
895
896static void cgroup_free_rcu(struct rcu_head *head)
897{
898 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
899
900 schedule_work(&cgrp->free_work);
901}
902
903static void cgroup_diput(struct dentry *dentry, struct inode *inode)
904{
905 /* is dentry a directory ? if so, kfree() associated cgroup */
906 if (S_ISDIR(inode->i_mode)) {
907 struct cgroup *cgrp = dentry->d_fsdata;
908
909 BUG_ON(!(cgroup_is_removed(cgrp)));
910 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
900 } else { 911 } else {
901 struct cfent *cfe = __d_cfe(dentry); 912 struct cfent *cfe = __d_cfe(dentry);
902 struct cgroup *cgrp = dentry->d_parent->d_fsdata; 913 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
@@ -925,13 +936,17 @@ static void remove_dir(struct dentry *d)
925 dput(parent); 936 dput(parent);
926} 937}
927 938
928static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) 939static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
929{ 940{
930 struct cfent *cfe; 941 struct cfent *cfe;
931 942
932 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); 943 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
933 lockdep_assert_held(&cgroup_mutex); 944 lockdep_assert_held(&cgroup_mutex);
934 945
946 /*
947 * If we're doing cleanup due to failure of cgroup_create(),
948 * the corresponding @cfe may not exist.
949 */
935 list_for_each_entry(cfe, &cgrp->files, node) { 950 list_for_each_entry(cfe, &cgrp->files, node) {
936 struct dentry *d = cfe->dentry; 951 struct dentry *d = cfe->dentry;
937 952
@@ -944,9 +959,8 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
944 list_del_init(&cfe->node); 959 list_del_init(&cfe->node);
945 dput(d); 960 dput(d);
946 961
947 return 0; 962 break;
948 } 963 }
949 return -ENOENT;
950} 964}
951 965
952/** 966/**
@@ -1083,7 +1097,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1083 } 1097 }
1084 } 1098 }
1085 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; 1099 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
1086 synchronize_rcu();
1087 1100
1088 return 0; 1101 return 0;
1089} 1102}
@@ -1393,6 +1406,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1393 INIT_LIST_HEAD(&cgrp->allcg_node); 1406 INIT_LIST_HEAD(&cgrp->allcg_node);
1394 INIT_LIST_HEAD(&cgrp->release_list); 1407 INIT_LIST_HEAD(&cgrp->release_list);
1395 INIT_LIST_HEAD(&cgrp->pidlists); 1408 INIT_LIST_HEAD(&cgrp->pidlists);
1409 INIT_WORK(&cgrp->free_work, cgroup_free_fn);
1396 mutex_init(&cgrp->pidlist_mutex); 1410 mutex_init(&cgrp->pidlist_mutex);
1397 INIT_LIST_HEAD(&cgrp->event_list); 1411 INIT_LIST_HEAD(&cgrp->event_list);
1398 spin_lock_init(&cgrp->event_list_lock); 1412 spin_lock_init(&cgrp->event_list_lock);
@@ -1597,6 +1611,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1597 struct cgroupfs_root *existing_root; 1611 struct cgroupfs_root *existing_root;
1598 const struct cred *cred; 1612 const struct cred *cred;
1599 int i; 1613 int i;
1614 struct hlist_node *node;
1615 struct css_set *cg;
1600 1616
1601 BUG_ON(sb->s_root != NULL); 1617 BUG_ON(sb->s_root != NULL);
1602 1618
@@ -1650,14 +1666,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1650 /* Link the top cgroup in this hierarchy into all 1666 /* Link the top cgroup in this hierarchy into all
1651 * the css_set objects */ 1667 * the css_set objects */
1652 write_lock(&css_set_lock); 1668 write_lock(&css_set_lock);
1653 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { 1669 hash_for_each(css_set_table, i, node, cg, hlist)
1654 struct hlist_head *hhead = &css_set_table[i]; 1670 link_css_set(&tmp_cg_links, cg, root_cgrp);
1655 struct hlist_node *node;
1656 struct css_set *cg;
1657
1658 hlist_for_each_entry(cg, node, hhead, hlist)
1659 link_css_set(&tmp_cg_links, cg, root_cgrp);
1660 }
1661 write_unlock(&css_set_lock); 1671 write_unlock(&css_set_lock);
1662 1672
1663 free_cg_links(&tmp_cg_links); 1673 free_cg_links(&tmp_cg_links);
@@ -1773,7 +1783,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1773 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), 1783 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
1774 "cgroup_path() called without proper locking"); 1784 "cgroup_path() called without proper locking");
1775 1785
1776 if (!dentry || cgrp == dummytop) { 1786 if (cgrp == dummytop) {
1777 /* 1787 /*
1778 * Inactive subsystems have no dentry for their root 1788 * Inactive subsystems have no dentry for their root
1779 * cgroup 1789 * cgroup
@@ -1982,7 +1992,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1982 ss->attach(cgrp, &tset); 1992 ss->attach(cgrp, &tset);
1983 } 1993 }
1984 1994
1985 synchronize_rcu();
1986out: 1995out:
1987 if (retval) { 1996 if (retval) {
1988 for_each_subsys(root, ss) { 1997 for_each_subsys(root, ss) {
@@ -2151,7 +2160,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2151 /* 2160 /*
2152 * step 5: success! and cleanup 2161 * step 5: success! and cleanup
2153 */ 2162 */
2154 synchronize_rcu();
2155 retval = 0; 2163 retval = 0;
2156out_put_css_set_refs: 2164out_put_css_set_refs:
2157 if (retval) { 2165 if (retval) {
@@ -2769,14 +2777,14 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2769 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) 2777 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2770 continue; 2778 continue;
2771 2779
2772 if (is_add) 2780 if (is_add) {
2773 err = cgroup_add_file(cgrp, subsys, cft); 2781 err = cgroup_add_file(cgrp, subsys, cft);
2774 else 2782 if (err)
2775 err = cgroup_rm_file(cgrp, cft); 2783 pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
2776 if (err) { 2784 cft->name, err);
2777 pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n",
2778 is_add ? "add" : "remove", cft->name, err);
2779 ret = err; 2785 ret = err;
2786 } else {
2787 cgroup_rm_file(cgrp, cft);
2780 } 2788 }
2781 } 2789 }
2782 return ret; 2790 return ret;
@@ -3017,6 +3025,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
3017} 3025}
3018EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); 3026EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
3019 3027
3028/**
3029 * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
3030 * @pos: cgroup of interest
3031 *
3032 * Return the rightmost descendant of @pos. If there's no descendant,
3033 * @pos is returned. This can be used during pre-order traversal to skip
3034 * subtree of @pos.
3035 */
3036struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
3037{
3038 struct cgroup *last, *tmp;
3039
3040 WARN_ON_ONCE(!rcu_read_lock_held());
3041
3042 do {
3043 last = pos;
3044 /* ->prev isn't RCU safe, walk ->next till the end */
3045 pos = NULL;
3046 list_for_each_entry_rcu(tmp, &last->children, sibling)
3047 pos = tmp;
3048 } while (pos);
3049
3050 return last;
3051}
3052EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
3053
3020static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) 3054static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
3021{ 3055{
3022 struct cgroup *last; 3056 struct cgroup *last;
@@ -3752,8 +3786,13 @@ static void cgroup_event_remove(struct work_struct *work)
3752 remove); 3786 remove);
3753 struct cgroup *cgrp = event->cgrp; 3787 struct cgroup *cgrp = event->cgrp;
3754 3788
3789 remove_wait_queue(event->wqh, &event->wait);
3790
3755 event->cft->unregister_event(cgrp, event->cft, event->eventfd); 3791 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3756 3792
3793 /* Notify userspace the event is going away. */
3794 eventfd_signal(event->eventfd, 1);
3795
3757 eventfd_ctx_put(event->eventfd); 3796 eventfd_ctx_put(event->eventfd);
3758 kfree(event); 3797 kfree(event);
3759 dput(cgrp->dentry); 3798 dput(cgrp->dentry);
@@ -3773,15 +3812,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3773 unsigned long flags = (unsigned long)key; 3812 unsigned long flags = (unsigned long)key;
3774 3813
3775 if (flags & POLLHUP) { 3814 if (flags & POLLHUP) {
3776 __remove_wait_queue(event->wqh, &event->wait);
3777 spin_lock(&cgrp->event_list_lock);
3778 list_del_init(&event->list);
3779 spin_unlock(&cgrp->event_list_lock);
3780 /* 3815 /*
3781 * We are in atomic context, but cgroup_event_remove() may 3816 * If the event has been detached at cgroup removal, we
3782 * sleep, so we have to call it in workqueue. 3817 * can simply return knowing the other side will cleanup
3818 * for us.
3819 *
3820 * We can't race against event freeing since the other
3821 * side will require wqh->lock via remove_wait_queue(),
3822 * which we hold.
3783 */ 3823 */
3784 schedule_work(&event->remove); 3824 spin_lock(&cgrp->event_list_lock);
3825 if (!list_empty(&event->list)) {
3826 list_del_init(&event->list);
3827 /*
3828 * We are in atomic context, but cgroup_event_remove()
3829 * may sleep, so we have to call it in workqueue.
3830 */
3831 schedule_work(&event->remove);
3832 }
3833 spin_unlock(&cgrp->event_list_lock);
3785 } 3834 }
3786 3835
3787 return 0; 3836 return 0;
@@ -3807,6 +3856,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3807 const char *buffer) 3856 const char *buffer)
3808{ 3857{
3809 struct cgroup_event *event = NULL; 3858 struct cgroup_event *event = NULL;
3859 struct cgroup *cgrp_cfile;
3810 unsigned int efd, cfd; 3860 unsigned int efd, cfd;
3811 struct file *efile = NULL; 3861 struct file *efile = NULL;
3812 struct file *cfile = NULL; 3862 struct file *cfile = NULL;
@@ -3862,6 +3912,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3862 goto fail; 3912 goto fail;
3863 } 3913 }
3864 3914
3915 /*
3916 * The file to be monitored must be in the same cgroup as
3917 * cgroup.event_control is.
3918 */
3919 cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
3920 if (cgrp_cfile != cgrp) {
3921 ret = -EINVAL;
3922 goto fail;
3923 }
3924
3865 if (!event->cft->register_event || !event->cft->unregister_event) { 3925 if (!event->cft->register_event || !event->cft->unregister_event) {
3866 ret = -EINVAL; 3926 ret = -EINVAL;
3867 goto fail; 3927 goto fail;
@@ -4135,6 +4195,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4135 4195
4136 init_cgroup_housekeeping(cgrp); 4196 init_cgroup_housekeeping(cgrp);
4137 4197
4198 dentry->d_fsdata = cgrp;
4199 cgrp->dentry = dentry;
4200
4138 cgrp->parent = parent; 4201 cgrp->parent = parent;
4139 cgrp->root = parent->root; 4202 cgrp->root = parent->root;
4140 cgrp->top_cgroup = parent->top_cgroup; 4203 cgrp->top_cgroup = parent->top_cgroup;
@@ -4172,8 +4235,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4172 lockdep_assert_held(&dentry->d_inode->i_mutex); 4235 lockdep_assert_held(&dentry->d_inode->i_mutex);
4173 4236
4174 /* allocation complete, commit to creation */ 4237 /* allocation complete, commit to creation */
4175 dentry->d_fsdata = cgrp;
4176 cgrp->dentry = dentry;
4177 list_add_tail(&cgrp->allcg_node, &root->allcg_list); 4238 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
4178 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); 4239 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
4179 root->number_of_cgroups++; 4240 root->number_of_cgroups++;
@@ -4340,20 +4401,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4340 /* 4401 /*
4341 * Unregister events and notify userspace. 4402 * Unregister events and notify userspace.
4342 * Notify userspace about cgroup removing only after rmdir of cgroup 4403 * Notify userspace about cgroup removing only after rmdir of cgroup
4343 * directory to avoid race between userspace and kernelspace. Use 4404 * directory to avoid race between userspace and kernelspace.
4344 * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
4345 * cgroup_event_wake() is called with the wait queue head locked,
4346 * remove_wait_queue() cannot be called while holding event_list_lock.
4347 */ 4405 */
4348 spin_lock(&cgrp->event_list_lock); 4406 spin_lock(&cgrp->event_list_lock);
4349 list_splice_init(&cgrp->event_list, &tmp_list); 4407 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4350 spin_unlock(&cgrp->event_list_lock);
4351 list_for_each_entry_safe(event, tmp, &tmp_list, list) {
4352 list_del_init(&event->list); 4408 list_del_init(&event->list);
4353 remove_wait_queue(event->wqh, &event->wait);
4354 eventfd_signal(event->eventfd, 1);
4355 schedule_work(&event->remove); 4409 schedule_work(&event->remove);
4356 } 4410 }
4411 spin_unlock(&cgrp->event_list_lock);
4357 4412
4358 return 0; 4413 return 0;
4359} 4414}
@@ -4438,6 +4493,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4438{ 4493{
4439 struct cgroup_subsys_state *css; 4494 struct cgroup_subsys_state *css;
4440 int i, ret; 4495 int i, ret;
4496 struct hlist_node *node, *tmp;
4497 struct css_set *cg;
4498 unsigned long key;
4441 4499
4442 /* check name and function validity */ 4500 /* check name and function validity */
4443 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || 4501 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
@@ -4503,23 +4561,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4503 * this is all done under the css_set_lock. 4561 * this is all done under the css_set_lock.
4504 */ 4562 */
4505 write_lock(&css_set_lock); 4563 write_lock(&css_set_lock);
4506 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { 4564 hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) {
4507 struct css_set *cg; 4565 /* skip entries that we already rehashed */
4508 struct hlist_node *node, *tmp; 4566 if (cg->subsys[ss->subsys_id])
4509 struct hlist_head *bucket = &css_set_table[i], *new_bucket; 4567 continue;
4510 4568 /* remove existing entry */
4511 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) { 4569 hash_del(&cg->hlist);
4512 /* skip entries that we already rehashed */ 4570 /* set new value */
4513 if (cg->subsys[ss->subsys_id]) 4571 cg->subsys[ss->subsys_id] = css;
4514 continue; 4572 /* recompute hash and restore entry */
4515 /* remove existing entry */ 4573 key = css_set_hash(cg->subsys);
4516 hlist_del(&cg->hlist); 4574 hash_add(css_set_table, node, key);
4517 /* set new value */
4518 cg->subsys[ss->subsys_id] = css;
4519 /* recompute hash and restore entry */
4520 new_bucket = css_set_hash(cg->subsys);
4521 hlist_add_head(&cg->hlist, new_bucket);
4522 }
4523 } 4575 }
4524 write_unlock(&css_set_lock); 4576 write_unlock(&css_set_lock);
4525 4577
@@ -4551,7 +4603,6 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4551void cgroup_unload_subsys(struct cgroup_subsys *ss) 4603void cgroup_unload_subsys(struct cgroup_subsys *ss)
4552{ 4604{
4553 struct cg_cgroup_link *link; 4605 struct cg_cgroup_link *link;
4554 struct hlist_head *hhead;
4555 4606
4556 BUG_ON(ss->module == NULL); 4607 BUG_ON(ss->module == NULL);
4557 4608
@@ -4585,11 +4636,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
4585 write_lock(&css_set_lock); 4636 write_lock(&css_set_lock);
4586 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { 4637 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4587 struct css_set *cg = link->cg; 4638 struct css_set *cg = link->cg;
4639 unsigned long key;
4588 4640
4589 hlist_del(&cg->hlist); 4641 hash_del(&cg->hlist);
4590 cg->subsys[ss->subsys_id] = NULL; 4642 cg->subsys[ss->subsys_id] = NULL;
4591 hhead = css_set_hash(cg->subsys); 4643 key = css_set_hash(cg->subsys);
4592 hlist_add_head(&cg->hlist, hhead); 4644 hash_add(css_set_table, &cg->hlist, key);
4593 } 4645 }
4594 write_unlock(&css_set_lock); 4646 write_unlock(&css_set_lock);
4595 4647
@@ -4631,9 +4683,6 @@ int __init cgroup_init_early(void)
4631 list_add(&init_css_set_link.cg_link_list, 4683 list_add(&init_css_set_link.cg_link_list,
4632 &init_css_set.cg_links); 4684 &init_css_set.cg_links);
4633 4685
4634 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4635 INIT_HLIST_HEAD(&css_set_table[i]);
4636
4637 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4686 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4638 struct cgroup_subsys *ss = subsys[i]; 4687 struct cgroup_subsys *ss = subsys[i];
4639 4688
@@ -4667,7 +4716,7 @@ int __init cgroup_init(void)
4667{ 4716{
4668 int err; 4717 int err;
4669 int i; 4718 int i;
4670 struct hlist_head *hhead; 4719 unsigned long key;
4671 4720
4672 err = bdi_init(&cgroup_backing_dev_info); 4721 err = bdi_init(&cgroup_backing_dev_info);
4673 if (err) 4722 if (err)
@@ -4686,8 +4735,8 @@ int __init cgroup_init(void)
4686 } 4735 }
4687 4736
4688 /* Add init_css_set to the hash table */ 4737 /* Add init_css_set to the hash table */
4689 hhead = css_set_hash(init_css_set.subsys); 4738 key = css_set_hash(init_css_set.subsys);
4690 hlist_add_head(&init_css_set.hlist, hhead); 4739 hash_add(css_set_table, &init_css_set.hlist, key);
4691 BUG_ON(!init_root_id(&rootnode)); 4740 BUG_ON(!init_root_id(&rootnode));
4692 4741
4693 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); 4742 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
@@ -4982,8 +5031,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4982 } 5031 }
4983 task_unlock(tsk); 5032 task_unlock(tsk);
4984 5033
4985 if (cg) 5034 put_css_set_taskexit(cg);
4986 put_css_set_taskexit(cg);
4987} 5035}
4988 5036
4989/** 5037/**