diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 288 |
1 files changed, 168 insertions, 120 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4855892798fd..b5c64327e712 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -52,7 +52,7 @@ | |||
52 | #include <linux/module.h> | 52 | #include <linux/module.h> |
53 | #include <linux/delayacct.h> | 53 | #include <linux/delayacct.h> |
54 | #include <linux/cgroupstats.h> | 54 | #include <linux/cgroupstats.h> |
55 | #include <linux/hash.h> | 55 | #include <linux/hashtable.h> |
56 | #include <linux/namei.h> | 56 | #include <linux/namei.h> |
57 | #include <linux/pid_namespace.h> | 57 | #include <linux/pid_namespace.h> |
58 | #include <linux/idr.h> | 58 | #include <linux/idr.h> |
@@ -376,22 +376,18 @@ static int css_set_count; | |||
376 | * account cgroups in empty hierarchies. | 376 | * account cgroups in empty hierarchies. |
377 | */ | 377 | */ |
378 | #define CSS_SET_HASH_BITS 7 | 378 | #define CSS_SET_HASH_BITS 7 |
379 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | 379 | static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); |
380 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
381 | 380 | ||
382 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | 381 | static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) |
383 | { | 382 | { |
384 | int i; | 383 | int i; |
385 | int index; | 384 | unsigned long key = 0UL; |
386 | unsigned long tmp = 0UL; | ||
387 | 385 | ||
388 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | 386 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) |
389 | tmp += (unsigned long)css[i]; | 387 | key += (unsigned long)css[i]; |
390 | tmp = (tmp >> 16) ^ tmp; | 388 | key = (key >> 16) ^ key; |
391 | 389 | ||
392 | index = hash_long(tmp, CSS_SET_HASH_BITS); | 390 | return key; |
393 | |||
394 | return &css_set_table[index]; | ||
395 | } | 391 | } |
396 | 392 | ||
397 | /* We don't maintain the lists running through each css_set to its | 393 | /* We don't maintain the lists running through each css_set to its |
@@ -418,7 +414,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
418 | } | 414 | } |
419 | 415 | ||
420 | /* This css_set is dead. unlink it and release cgroup refcounts */ | 416 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
421 | hlist_del(&cg->hlist); | 417 | hash_del(&cg->hlist); |
422 | css_set_count--; | 418 | css_set_count--; |
423 | 419 | ||
424 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | 420 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, |
@@ -426,12 +422,20 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
426 | struct cgroup *cgrp = link->cgrp; | 422 | struct cgroup *cgrp = link->cgrp; |
427 | list_del(&link->cg_link_list); | 423 | list_del(&link->cg_link_list); |
428 | list_del(&link->cgrp_link_list); | 424 | list_del(&link->cgrp_link_list); |
425 | |||
426 | /* | ||
427 | * We may not be holding cgroup_mutex, and if cgrp->count is | ||
428 | * dropped to 0 the cgroup can be destroyed at any time, hence | ||
429 | * rcu_read_lock is used to keep it alive. | ||
430 | */ | ||
431 | rcu_read_lock(); | ||
429 | if (atomic_dec_and_test(&cgrp->count) && | 432 | if (atomic_dec_and_test(&cgrp->count) && |
430 | notify_on_release(cgrp)) { | 433 | notify_on_release(cgrp)) { |
431 | if (taskexit) | 434 | if (taskexit) |
432 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 435 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
433 | check_for_release(cgrp); | 436 | check_for_release(cgrp); |
434 | } | 437 | } |
438 | rcu_read_unlock(); | ||
435 | 439 | ||
436 | kfree(link); | 440 | kfree(link); |
437 | } | 441 | } |
@@ -550,9 +554,9 @@ static struct css_set *find_existing_css_set( | |||
550 | { | 554 | { |
551 | int i; | 555 | int i; |
552 | struct cgroupfs_root *root = cgrp->root; | 556 | struct cgroupfs_root *root = cgrp->root; |
553 | struct hlist_head *hhead; | ||
554 | struct hlist_node *node; | 557 | struct hlist_node *node; |
555 | struct css_set *cg; | 558 | struct css_set *cg; |
559 | unsigned long key; | ||
556 | 560 | ||
557 | /* | 561 | /* |
558 | * Build the set of subsystem state objects that we want to see in the | 562 | * Build the set of subsystem state objects that we want to see in the |
@@ -572,8 +576,8 @@ static struct css_set *find_existing_css_set( | |||
572 | } | 576 | } |
573 | } | 577 | } |
574 | 578 | ||
575 | hhead = css_set_hash(template); | 579 | key = css_set_hash(template); |
576 | hlist_for_each_entry(cg, node, hhead, hlist) { | 580 | hash_for_each_possible(css_set_table, cg, node, hlist, key) { |
577 | if (!compare_css_sets(cg, oldcg, cgrp, template)) | 581 | if (!compare_css_sets(cg, oldcg, cgrp, template)) |
578 | continue; | 582 | continue; |
579 | 583 | ||
@@ -657,8 +661,8 @@ static struct css_set *find_css_set( | |||
657 | 661 | ||
658 | struct list_head tmp_cg_links; | 662 | struct list_head tmp_cg_links; |
659 | 663 | ||
660 | struct hlist_head *hhead; | ||
661 | struct cg_cgroup_link *link; | 664 | struct cg_cgroup_link *link; |
665 | unsigned long key; | ||
662 | 666 | ||
663 | /* First see if we already have a cgroup group that matches | 667 | /* First see if we already have a cgroup group that matches |
664 | * the desired set */ | 668 | * the desired set */ |
@@ -704,8 +708,8 @@ static struct css_set *find_css_set( | |||
704 | css_set_count++; | 708 | css_set_count++; |
705 | 709 | ||
706 | /* Add this cgroup group to the hash table */ | 710 | /* Add this cgroup group to the hash table */ |
707 | hhead = css_set_hash(res->subsys); | 711 | key = css_set_hash(res->subsys); |
708 | hlist_add_head(&res->hlist, hhead); | 712 | hash_add(css_set_table, &res->hlist, key); |
709 | 713 | ||
710 | write_unlock(&css_set_lock); | 714 | write_unlock(&css_set_lock); |
711 | 715 | ||
@@ -856,47 +860,54 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | |||
856 | return inode; | 860 | return inode; |
857 | } | 861 | } |
858 | 862 | ||
859 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 863 | static void cgroup_free_fn(struct work_struct *work) |
860 | { | 864 | { |
861 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 865 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); |
862 | if (S_ISDIR(inode->i_mode)) { | 866 | struct cgroup_subsys *ss; |
863 | struct cgroup *cgrp = dentry->d_fsdata; | ||
864 | struct cgroup_subsys *ss; | ||
865 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
866 | /* It's possible for external users to be holding css | ||
867 | * reference counts on a cgroup; css_put() needs to | ||
868 | * be able to access the cgroup after decrementing | ||
869 | * the reference count in order to know if it needs to | ||
870 | * queue the cgroup to be handled by the release | ||
871 | * agent */ | ||
872 | synchronize_rcu(); | ||
873 | 867 | ||
874 | mutex_lock(&cgroup_mutex); | 868 | mutex_lock(&cgroup_mutex); |
875 | /* | 869 | /* |
876 | * Release the subsystem state objects. | 870 | * Release the subsystem state objects. |
877 | */ | 871 | */ |
878 | for_each_subsys(cgrp->root, ss) | 872 | for_each_subsys(cgrp->root, ss) |
879 | ss->css_free(cgrp); | 873 | ss->css_free(cgrp); |
880 | 874 | ||
881 | cgrp->root->number_of_cgroups--; | 875 | cgrp->root->number_of_cgroups--; |
882 | mutex_unlock(&cgroup_mutex); | 876 | mutex_unlock(&cgroup_mutex); |
883 | 877 | ||
884 | /* | 878 | /* |
885 | * Drop the active superblock reference that we took when we | 879 | * Drop the active superblock reference that we took when we |
886 | * created the cgroup | 880 | * created the cgroup |
887 | */ | 881 | */ |
888 | deactivate_super(cgrp->root->sb); | 882 | deactivate_super(cgrp->root->sb); |
889 | 883 | ||
890 | /* | 884 | /* |
891 | * if we're getting rid of the cgroup, refcount should ensure | 885 | * if we're getting rid of the cgroup, refcount should ensure |
892 | * that there are no pidlists left. | 886 | * that there are no pidlists left. |
893 | */ | 887 | */ |
894 | BUG_ON(!list_empty(&cgrp->pidlists)); | 888 | BUG_ON(!list_empty(&cgrp->pidlists)); |
895 | 889 | ||
896 | simple_xattrs_free(&cgrp->xattrs); | 890 | simple_xattrs_free(&cgrp->xattrs); |
897 | 891 | ||
898 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | 892 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); |
899 | kfree_rcu(cgrp, rcu_head); | 893 | kfree(cgrp); |
894 | } | ||
895 | |||
896 | static void cgroup_free_rcu(struct rcu_head *head) | ||
897 | { | ||
898 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | ||
899 | |||
900 | schedule_work(&cgrp->free_work); | ||
901 | } | ||
902 | |||
903 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | ||
904 | { | ||
905 | /* is dentry a directory ? if so, kfree() associated cgroup */ | ||
906 | if (S_ISDIR(inode->i_mode)) { | ||
907 | struct cgroup *cgrp = dentry->d_fsdata; | ||
908 | |||
909 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
910 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); | ||
900 | } else { | 911 | } else { |
901 | struct cfent *cfe = __d_cfe(dentry); | 912 | struct cfent *cfe = __d_cfe(dentry); |
902 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 913 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
@@ -925,13 +936,17 @@ static void remove_dir(struct dentry *d) | |||
925 | dput(parent); | 936 | dput(parent); |
926 | } | 937 | } |
927 | 938 | ||
928 | static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | 939 | static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) |
929 | { | 940 | { |
930 | struct cfent *cfe; | 941 | struct cfent *cfe; |
931 | 942 | ||
932 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | 943 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); |
933 | lockdep_assert_held(&cgroup_mutex); | 944 | lockdep_assert_held(&cgroup_mutex); |
934 | 945 | ||
946 | /* | ||
947 | * If we're doing cleanup due to failure of cgroup_create(), | ||
948 | * the corresponding @cfe may not exist. | ||
949 | */ | ||
935 | list_for_each_entry(cfe, &cgrp->files, node) { | 950 | list_for_each_entry(cfe, &cgrp->files, node) { |
936 | struct dentry *d = cfe->dentry; | 951 | struct dentry *d = cfe->dentry; |
937 | 952 | ||
@@ -944,9 +959,8 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
944 | list_del_init(&cfe->node); | 959 | list_del_init(&cfe->node); |
945 | dput(d); | 960 | dput(d); |
946 | 961 | ||
947 | return 0; | 962 | break; |
948 | } | 963 | } |
949 | return -ENOENT; | ||
950 | } | 964 | } |
951 | 965 | ||
952 | /** | 966 | /** |
@@ -1083,7 +1097,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1083 | } | 1097 | } |
1084 | } | 1098 | } |
1085 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; | 1099 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; |
1086 | synchronize_rcu(); | ||
1087 | 1100 | ||
1088 | return 0; | 1101 | return 0; |
1089 | } | 1102 | } |
@@ -1393,6 +1406,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1393 | INIT_LIST_HEAD(&cgrp->allcg_node); | 1406 | INIT_LIST_HEAD(&cgrp->allcg_node); |
1394 | INIT_LIST_HEAD(&cgrp->release_list); | 1407 | INIT_LIST_HEAD(&cgrp->release_list); |
1395 | INIT_LIST_HEAD(&cgrp->pidlists); | 1408 | INIT_LIST_HEAD(&cgrp->pidlists); |
1409 | INIT_WORK(&cgrp->free_work, cgroup_free_fn); | ||
1396 | mutex_init(&cgrp->pidlist_mutex); | 1410 | mutex_init(&cgrp->pidlist_mutex); |
1397 | INIT_LIST_HEAD(&cgrp->event_list); | 1411 | INIT_LIST_HEAD(&cgrp->event_list); |
1398 | spin_lock_init(&cgrp->event_list_lock); | 1412 | spin_lock_init(&cgrp->event_list_lock); |
@@ -1597,6 +1611,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1597 | struct cgroupfs_root *existing_root; | 1611 | struct cgroupfs_root *existing_root; |
1598 | const struct cred *cred; | 1612 | const struct cred *cred; |
1599 | int i; | 1613 | int i; |
1614 | struct hlist_node *node; | ||
1615 | struct css_set *cg; | ||
1600 | 1616 | ||
1601 | BUG_ON(sb->s_root != NULL); | 1617 | BUG_ON(sb->s_root != NULL); |
1602 | 1618 | ||
@@ -1650,14 +1666,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1650 | /* Link the top cgroup in this hierarchy into all | 1666 | /* Link the top cgroup in this hierarchy into all |
1651 | * the css_set objects */ | 1667 | * the css_set objects */ |
1652 | write_lock(&css_set_lock); | 1668 | write_lock(&css_set_lock); |
1653 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 1669 | hash_for_each(css_set_table, i, node, cg, hlist) |
1654 | struct hlist_head *hhead = &css_set_table[i]; | 1670 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
1655 | struct hlist_node *node; | ||
1656 | struct css_set *cg; | ||
1657 | |||
1658 | hlist_for_each_entry(cg, node, hhead, hlist) | ||
1659 | link_css_set(&tmp_cg_links, cg, root_cgrp); | ||
1660 | } | ||
1661 | write_unlock(&css_set_lock); | 1671 | write_unlock(&css_set_lock); |
1662 | 1672 | ||
1663 | free_cg_links(&tmp_cg_links); | 1673 | free_cg_links(&tmp_cg_links); |
@@ -1773,7 +1783,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1773 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), | 1783 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), |
1774 | "cgroup_path() called without proper locking"); | 1784 | "cgroup_path() called without proper locking"); |
1775 | 1785 | ||
1776 | if (!dentry || cgrp == dummytop) { | 1786 | if (cgrp == dummytop) { |
1777 | /* | 1787 | /* |
1778 | * Inactive subsystems have no dentry for their root | 1788 | * Inactive subsystems have no dentry for their root |
1779 | * cgroup | 1789 | * cgroup |
@@ -1982,7 +1992,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1982 | ss->attach(cgrp, &tset); | 1992 | ss->attach(cgrp, &tset); |
1983 | } | 1993 | } |
1984 | 1994 | ||
1985 | synchronize_rcu(); | ||
1986 | out: | 1995 | out: |
1987 | if (retval) { | 1996 | if (retval) { |
1988 | for_each_subsys(root, ss) { | 1997 | for_each_subsys(root, ss) { |
@@ -2151,7 +2160,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2151 | /* | 2160 | /* |
2152 | * step 5: success! and cleanup | 2161 | * step 5: success! and cleanup |
2153 | */ | 2162 | */ |
2154 | synchronize_rcu(); | ||
2155 | retval = 0; | 2163 | retval = 0; |
2156 | out_put_css_set_refs: | 2164 | out_put_css_set_refs: |
2157 | if (retval) { | 2165 | if (retval) { |
@@ -2769,14 +2777,14 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2769 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | 2777 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) |
2770 | continue; | 2778 | continue; |
2771 | 2779 | ||
2772 | if (is_add) | 2780 | if (is_add) { |
2773 | err = cgroup_add_file(cgrp, subsys, cft); | 2781 | err = cgroup_add_file(cgrp, subsys, cft); |
2774 | else | 2782 | if (err) |
2775 | err = cgroup_rm_file(cgrp, cft); | 2783 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", |
2776 | if (err) { | 2784 | cft->name, err); |
2777 | pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", | ||
2778 | is_add ? "add" : "remove", cft->name, err); | ||
2779 | ret = err; | 2785 | ret = err; |
2786 | } else { | ||
2787 | cgroup_rm_file(cgrp, cft); | ||
2780 | } | 2788 | } |
2781 | } | 2789 | } |
2782 | return ret; | 2790 | return ret; |
@@ -3017,6 +3025,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
3017 | } | 3025 | } |
3018 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | 3026 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); |
3019 | 3027 | ||
3028 | /** | ||
3029 | * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup | ||
3030 | * @pos: cgroup of interest | ||
3031 | * | ||
3032 | * Return the rightmost descendant of @pos. If there's no descendant, | ||
3033 | * @pos is returned. This can be used during pre-order traversal to skip | ||
3034 | * subtree of @pos. | ||
3035 | */ | ||
3036 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | ||
3037 | { | ||
3038 | struct cgroup *last, *tmp; | ||
3039 | |||
3040 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
3041 | |||
3042 | do { | ||
3043 | last = pos; | ||
3044 | /* ->prev isn't RCU safe, walk ->next till the end */ | ||
3045 | pos = NULL; | ||
3046 | list_for_each_entry_rcu(tmp, &last->children, sibling) | ||
3047 | pos = tmp; | ||
3048 | } while (pos); | ||
3049 | |||
3050 | return last; | ||
3051 | } | ||
3052 | EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); | ||
3053 | |||
3020 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | 3054 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) |
3021 | { | 3055 | { |
3022 | struct cgroup *last; | 3056 | struct cgroup *last; |
@@ -3752,8 +3786,13 @@ static void cgroup_event_remove(struct work_struct *work) | |||
3752 | remove); | 3786 | remove); |
3753 | struct cgroup *cgrp = event->cgrp; | 3787 | struct cgroup *cgrp = event->cgrp; |
3754 | 3788 | ||
3789 | remove_wait_queue(event->wqh, &event->wait); | ||
3790 | |||
3755 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | 3791 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); |
3756 | 3792 | ||
3793 | /* Notify userspace the event is going away. */ | ||
3794 | eventfd_signal(event->eventfd, 1); | ||
3795 | |||
3757 | eventfd_ctx_put(event->eventfd); | 3796 | eventfd_ctx_put(event->eventfd); |
3758 | kfree(event); | 3797 | kfree(event); |
3759 | dput(cgrp->dentry); | 3798 | dput(cgrp->dentry); |
@@ -3773,15 +3812,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | |||
3773 | unsigned long flags = (unsigned long)key; | 3812 | unsigned long flags = (unsigned long)key; |
3774 | 3813 | ||
3775 | if (flags & POLLHUP) { | 3814 | if (flags & POLLHUP) { |
3776 | __remove_wait_queue(event->wqh, &event->wait); | ||
3777 | spin_lock(&cgrp->event_list_lock); | ||
3778 | list_del_init(&event->list); | ||
3779 | spin_unlock(&cgrp->event_list_lock); | ||
3780 | /* | 3815 | /* |
3781 | * We are in atomic context, but cgroup_event_remove() may | 3816 | * If the event has been detached at cgroup removal, we |
3782 | * sleep, so we have to call it in workqueue. | 3817 | * can simply return knowing the other side will cleanup |
3818 | * for us. | ||
3819 | * | ||
3820 | * We can't race against event freeing since the other | ||
3821 | * side will require wqh->lock via remove_wait_queue(), | ||
3822 | * which we hold. | ||
3783 | */ | 3823 | */ |
3784 | schedule_work(&event->remove); | 3824 | spin_lock(&cgrp->event_list_lock); |
3825 | if (!list_empty(&event->list)) { | ||
3826 | list_del_init(&event->list); | ||
3827 | /* | ||
3828 | * We are in atomic context, but cgroup_event_remove() | ||
3829 | * may sleep, so we have to call it in workqueue. | ||
3830 | */ | ||
3831 | schedule_work(&event->remove); | ||
3832 | } | ||
3833 | spin_unlock(&cgrp->event_list_lock); | ||
3785 | } | 3834 | } |
3786 | 3835 | ||
3787 | return 0; | 3836 | return 0; |
@@ -3807,6 +3856,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3807 | const char *buffer) | 3856 | const char *buffer) |
3808 | { | 3857 | { |
3809 | struct cgroup_event *event = NULL; | 3858 | struct cgroup_event *event = NULL; |
3859 | struct cgroup *cgrp_cfile; | ||
3810 | unsigned int efd, cfd; | 3860 | unsigned int efd, cfd; |
3811 | struct file *efile = NULL; | 3861 | struct file *efile = NULL; |
3812 | struct file *cfile = NULL; | 3862 | struct file *cfile = NULL; |
@@ -3862,6 +3912,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3862 | goto fail; | 3912 | goto fail; |
3863 | } | 3913 | } |
3864 | 3914 | ||
3915 | /* | ||
3916 | * The file to be monitored must be in the same cgroup as | ||
3917 | * cgroup.event_control is. | ||
3918 | */ | ||
3919 | cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); | ||
3920 | if (cgrp_cfile != cgrp) { | ||
3921 | ret = -EINVAL; | ||
3922 | goto fail; | ||
3923 | } | ||
3924 | |||
3865 | if (!event->cft->register_event || !event->cft->unregister_event) { | 3925 | if (!event->cft->register_event || !event->cft->unregister_event) { |
3866 | ret = -EINVAL; | 3926 | ret = -EINVAL; |
3867 | goto fail; | 3927 | goto fail; |
@@ -4135,6 +4195,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4135 | 4195 | ||
4136 | init_cgroup_housekeeping(cgrp); | 4196 | init_cgroup_housekeeping(cgrp); |
4137 | 4197 | ||
4198 | dentry->d_fsdata = cgrp; | ||
4199 | cgrp->dentry = dentry; | ||
4200 | |||
4138 | cgrp->parent = parent; | 4201 | cgrp->parent = parent; |
4139 | cgrp->root = parent->root; | 4202 | cgrp->root = parent->root; |
4140 | cgrp->top_cgroup = parent->top_cgroup; | 4203 | cgrp->top_cgroup = parent->top_cgroup; |
@@ -4172,8 +4235,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4172 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4235 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
4173 | 4236 | ||
4174 | /* allocation complete, commit to creation */ | 4237 | /* allocation complete, commit to creation */ |
4175 | dentry->d_fsdata = cgrp; | ||
4176 | cgrp->dentry = dentry; | ||
4177 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | 4238 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); |
4178 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4239 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
4179 | root->number_of_cgroups++; | 4240 | root->number_of_cgroups++; |
@@ -4340,20 +4401,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4340 | /* | 4401 | /* |
4341 | * Unregister events and notify userspace. | 4402 | * Unregister events and notify userspace. |
4342 | * Notify userspace about cgroup removing only after rmdir of cgroup | 4403 | * Notify userspace about cgroup removing only after rmdir of cgroup |
4343 | * directory to avoid race between userspace and kernelspace. Use | 4404 | * directory to avoid race between userspace and kernelspace. |
4344 | * a temporary list to avoid a deadlock with cgroup_event_wake(). Since | ||
4345 | * cgroup_event_wake() is called with the wait queue head locked, | ||
4346 | * remove_wait_queue() cannot be called while holding event_list_lock. | ||
4347 | */ | 4405 | */ |
4348 | spin_lock(&cgrp->event_list_lock); | 4406 | spin_lock(&cgrp->event_list_lock); |
4349 | list_splice_init(&cgrp->event_list, &tmp_list); | 4407 | list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { |
4350 | spin_unlock(&cgrp->event_list_lock); | ||
4351 | list_for_each_entry_safe(event, tmp, &tmp_list, list) { | ||
4352 | list_del_init(&event->list); | 4408 | list_del_init(&event->list); |
4353 | remove_wait_queue(event->wqh, &event->wait); | ||
4354 | eventfd_signal(event->eventfd, 1); | ||
4355 | schedule_work(&event->remove); | 4409 | schedule_work(&event->remove); |
4356 | } | 4410 | } |
4411 | spin_unlock(&cgrp->event_list_lock); | ||
4357 | 4412 | ||
4358 | return 0; | 4413 | return 0; |
4359 | } | 4414 | } |
@@ -4438,6 +4493,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4438 | { | 4493 | { |
4439 | struct cgroup_subsys_state *css; | 4494 | struct cgroup_subsys_state *css; |
4440 | int i, ret; | 4495 | int i, ret; |
4496 | struct hlist_node *node, *tmp; | ||
4497 | struct css_set *cg; | ||
4498 | unsigned long key; | ||
4441 | 4499 | ||
4442 | /* check name and function validity */ | 4500 | /* check name and function validity */ |
4443 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || | 4501 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || |
@@ -4503,23 +4561,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4503 | * this is all done under the css_set_lock. | 4561 | * this is all done under the css_set_lock. |
4504 | */ | 4562 | */ |
4505 | write_lock(&css_set_lock); | 4563 | write_lock(&css_set_lock); |
4506 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 4564 | hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) { |
4507 | struct css_set *cg; | 4565 | /* skip entries that we already rehashed */ |
4508 | struct hlist_node *node, *tmp; | 4566 | if (cg->subsys[ss->subsys_id]) |
4509 | struct hlist_head *bucket = &css_set_table[i], *new_bucket; | 4567 | continue; |
4510 | 4568 | /* remove existing entry */ | |
4511 | hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) { | 4569 | hash_del(&cg->hlist); |
4512 | /* skip entries that we already rehashed */ | 4570 | /* set new value */ |
4513 | if (cg->subsys[ss->subsys_id]) | 4571 | cg->subsys[ss->subsys_id] = css; |
4514 | continue; | 4572 | /* recompute hash and restore entry */ |
4515 | /* remove existing entry */ | 4573 | key = css_set_hash(cg->subsys); |
4516 | hlist_del(&cg->hlist); | 4574 | hash_add(css_set_table, node, key); |
4517 | /* set new value */ | ||
4518 | cg->subsys[ss->subsys_id] = css; | ||
4519 | /* recompute hash and restore entry */ | ||
4520 | new_bucket = css_set_hash(cg->subsys); | ||
4521 | hlist_add_head(&cg->hlist, new_bucket); | ||
4522 | } | ||
4523 | } | 4575 | } |
4524 | write_unlock(&css_set_lock); | 4576 | write_unlock(&css_set_lock); |
4525 | 4577 | ||
@@ -4551,7 +4603,6 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
4551 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4603 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
4552 | { | 4604 | { |
4553 | struct cg_cgroup_link *link; | 4605 | struct cg_cgroup_link *link; |
4554 | struct hlist_head *hhead; | ||
4555 | 4606 | ||
4556 | BUG_ON(ss->module == NULL); | 4607 | BUG_ON(ss->module == NULL); |
4557 | 4608 | ||
@@ -4585,11 +4636,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4585 | write_lock(&css_set_lock); | 4636 | write_lock(&css_set_lock); |
4586 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { | 4637 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { |
4587 | struct css_set *cg = link->cg; | 4638 | struct css_set *cg = link->cg; |
4639 | unsigned long key; | ||
4588 | 4640 | ||
4589 | hlist_del(&cg->hlist); | 4641 | hash_del(&cg->hlist); |
4590 | cg->subsys[ss->subsys_id] = NULL; | 4642 | cg->subsys[ss->subsys_id] = NULL; |
4591 | hhead = css_set_hash(cg->subsys); | 4643 | key = css_set_hash(cg->subsys); |
4592 | hlist_add_head(&cg->hlist, hhead); | 4644 | hash_add(css_set_table, &cg->hlist, key); |
4593 | } | 4645 | } |
4594 | write_unlock(&css_set_lock); | 4646 | write_unlock(&css_set_lock); |
4595 | 4647 | ||
@@ -4631,9 +4683,6 @@ int __init cgroup_init_early(void) | |||
4631 | list_add(&init_css_set_link.cg_link_list, | 4683 | list_add(&init_css_set_link.cg_link_list, |
4632 | &init_css_set.cg_links); | 4684 | &init_css_set.cg_links); |
4633 | 4685 | ||
4634 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
4635 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
4636 | |||
4637 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4686 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
4638 | struct cgroup_subsys *ss = subsys[i]; | 4687 | struct cgroup_subsys *ss = subsys[i]; |
4639 | 4688 | ||
@@ -4667,7 +4716,7 @@ int __init cgroup_init(void) | |||
4667 | { | 4716 | { |
4668 | int err; | 4717 | int err; |
4669 | int i; | 4718 | int i; |
4670 | struct hlist_head *hhead; | 4719 | unsigned long key; |
4671 | 4720 | ||
4672 | err = bdi_init(&cgroup_backing_dev_info); | 4721 | err = bdi_init(&cgroup_backing_dev_info); |
4673 | if (err) | 4722 | if (err) |
@@ -4686,8 +4735,8 @@ int __init cgroup_init(void) | |||
4686 | } | 4735 | } |
4687 | 4736 | ||
4688 | /* Add init_css_set to the hash table */ | 4737 | /* Add init_css_set to the hash table */ |
4689 | hhead = css_set_hash(init_css_set.subsys); | 4738 | key = css_set_hash(init_css_set.subsys); |
4690 | hlist_add_head(&init_css_set.hlist, hhead); | 4739 | hash_add(css_set_table, &init_css_set.hlist, key); |
4691 | BUG_ON(!init_root_id(&rootnode)); | 4740 | BUG_ON(!init_root_id(&rootnode)); |
4692 | 4741 | ||
4693 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); | 4742 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); |
@@ -4982,8 +5031,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4982 | } | 5031 | } |
4983 | task_unlock(tsk); | 5032 | task_unlock(tsk); |
4984 | 5033 | ||
4985 | if (cg) | 5034 | put_css_set_taskexit(cg); |
4986 | put_css_set_taskexit(cg); | ||
4987 | } | 5035 | } |
4988 | 5036 | ||
4989 | /** | 5037 | /** |