diff options
Diffstat (limited to 'kernel/cgroup.c')
| -rw-r--r-- | kernel/cgroup.c | 288 |
1 files changed, 168 insertions, 120 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4855892798fd..b5c64327e712 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | #include <linux/module.h> | 52 | #include <linux/module.h> |
| 53 | #include <linux/delayacct.h> | 53 | #include <linux/delayacct.h> |
| 54 | #include <linux/cgroupstats.h> | 54 | #include <linux/cgroupstats.h> |
| 55 | #include <linux/hash.h> | 55 | #include <linux/hashtable.h> |
| 56 | #include <linux/namei.h> | 56 | #include <linux/namei.h> |
| 57 | #include <linux/pid_namespace.h> | 57 | #include <linux/pid_namespace.h> |
| 58 | #include <linux/idr.h> | 58 | #include <linux/idr.h> |
| @@ -376,22 +376,18 @@ static int css_set_count; | |||
| 376 | * account cgroups in empty hierarchies. | 376 | * account cgroups in empty hierarchies. |
| 377 | */ | 377 | */ |
| 378 | #define CSS_SET_HASH_BITS 7 | 378 | #define CSS_SET_HASH_BITS 7 |
| 379 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | 379 | static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); |
| 380 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
| 381 | 380 | ||
| 382 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | 381 | static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) |
| 383 | { | 382 | { |
| 384 | int i; | 383 | int i; |
| 385 | int index; | 384 | unsigned long key = 0UL; |
| 386 | unsigned long tmp = 0UL; | ||
| 387 | 385 | ||
| 388 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | 386 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) |
| 389 | tmp += (unsigned long)css[i]; | 387 | key += (unsigned long)css[i]; |
| 390 | tmp = (tmp >> 16) ^ tmp; | 388 | key = (key >> 16) ^ key; |
| 391 | 389 | ||
| 392 | index = hash_long(tmp, CSS_SET_HASH_BITS); | 390 | return key; |
| 393 | |||
| 394 | return &css_set_table[index]; | ||
| 395 | } | 391 | } |
| 396 | 392 | ||
| 397 | /* We don't maintain the lists running through each css_set to its | 393 | /* We don't maintain the lists running through each css_set to its |
| @@ -418,7 +414,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
| 418 | } | 414 | } |
| 419 | 415 | ||
| 420 | /* This css_set is dead. unlink it and release cgroup refcounts */ | 416 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
| 421 | hlist_del(&cg->hlist); | 417 | hash_del(&cg->hlist); |
| 422 | css_set_count--; | 418 | css_set_count--; |
| 423 | 419 | ||
| 424 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | 420 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, |
| @@ -426,12 +422,20 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
| 426 | struct cgroup *cgrp = link->cgrp; | 422 | struct cgroup *cgrp = link->cgrp; |
| 427 | list_del(&link->cg_link_list); | 423 | list_del(&link->cg_link_list); |
| 428 | list_del(&link->cgrp_link_list); | 424 | list_del(&link->cgrp_link_list); |
| 425 | |||
| 426 | /* | ||
| 427 | * We may not be holding cgroup_mutex, and if cgrp->count is | ||
| 428 | * dropped to 0 the cgroup can be destroyed at any time, hence | ||
| 429 | * rcu_read_lock is used to keep it alive. | ||
| 430 | */ | ||
| 431 | rcu_read_lock(); | ||
| 429 | if (atomic_dec_and_test(&cgrp->count) && | 432 | if (atomic_dec_and_test(&cgrp->count) && |
| 430 | notify_on_release(cgrp)) { | 433 | notify_on_release(cgrp)) { |
| 431 | if (taskexit) | 434 | if (taskexit) |
| 432 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 435 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 433 | check_for_release(cgrp); | 436 | check_for_release(cgrp); |
| 434 | } | 437 | } |
| 438 | rcu_read_unlock(); | ||
| 435 | 439 | ||
| 436 | kfree(link); | 440 | kfree(link); |
| 437 | } | 441 | } |
| @@ -550,9 +554,9 @@ static struct css_set *find_existing_css_set( | |||
| 550 | { | 554 | { |
| 551 | int i; | 555 | int i; |
| 552 | struct cgroupfs_root *root = cgrp->root; | 556 | struct cgroupfs_root *root = cgrp->root; |
| 553 | struct hlist_head *hhead; | ||
| 554 | struct hlist_node *node; | 557 | struct hlist_node *node; |
| 555 | struct css_set *cg; | 558 | struct css_set *cg; |
| 559 | unsigned long key; | ||
| 556 | 560 | ||
| 557 | /* | 561 | /* |
| 558 | * Build the set of subsystem state objects that we want to see in the | 562 | * Build the set of subsystem state objects that we want to see in the |
| @@ -572,8 +576,8 @@ static struct css_set *find_existing_css_set( | |||
| 572 | } | 576 | } |
| 573 | } | 577 | } |
| 574 | 578 | ||
| 575 | hhead = css_set_hash(template); | 579 | key = css_set_hash(template); |
| 576 | hlist_for_each_entry(cg, node, hhead, hlist) { | 580 | hash_for_each_possible(css_set_table, cg, node, hlist, key) { |
| 577 | if (!compare_css_sets(cg, oldcg, cgrp, template)) | 581 | if (!compare_css_sets(cg, oldcg, cgrp, template)) |
| 578 | continue; | 582 | continue; |
| 579 | 583 | ||
| @@ -657,8 +661,8 @@ static struct css_set *find_css_set( | |||
| 657 | 661 | ||
| 658 | struct list_head tmp_cg_links; | 662 | struct list_head tmp_cg_links; |
| 659 | 663 | ||
| 660 | struct hlist_head *hhead; | ||
| 661 | struct cg_cgroup_link *link; | 664 | struct cg_cgroup_link *link; |
| 665 | unsigned long key; | ||
| 662 | 666 | ||
| 663 | /* First see if we already have a cgroup group that matches | 667 | /* First see if we already have a cgroup group that matches |
| 664 | * the desired set */ | 668 | * the desired set */ |
| @@ -704,8 +708,8 @@ static struct css_set *find_css_set( | |||
| 704 | css_set_count++; | 708 | css_set_count++; |
| 705 | 709 | ||
| 706 | /* Add this cgroup group to the hash table */ | 710 | /* Add this cgroup group to the hash table */ |
| 707 | hhead = css_set_hash(res->subsys); | 711 | key = css_set_hash(res->subsys); |
| 708 | hlist_add_head(&res->hlist, hhead); | 712 | hash_add(css_set_table, &res->hlist, key); |
| 709 | 713 | ||
| 710 | write_unlock(&css_set_lock); | 714 | write_unlock(&css_set_lock); |
| 711 | 715 | ||
| @@ -856,47 +860,54 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | |||
| 856 | return inode; | 860 | return inode; |
| 857 | } | 861 | } |
| 858 | 862 | ||
| 859 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 863 | static void cgroup_free_fn(struct work_struct *work) |
| 860 | { | 864 | { |
| 861 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 865 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); |
| 862 | if (S_ISDIR(inode->i_mode)) { | 866 | struct cgroup_subsys *ss; |
| 863 | struct cgroup *cgrp = dentry->d_fsdata; | ||
| 864 | struct cgroup_subsys *ss; | ||
| 865 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
| 866 | /* It's possible for external users to be holding css | ||
| 867 | * reference counts on a cgroup; css_put() needs to | ||
| 868 | * be able to access the cgroup after decrementing | ||
| 869 | * the reference count in order to know if it needs to | ||
| 870 | * queue the cgroup to be handled by the release | ||
| 871 | * agent */ | ||
| 872 | synchronize_rcu(); | ||
| 873 | 867 | ||
| 874 | mutex_lock(&cgroup_mutex); | 868 | mutex_lock(&cgroup_mutex); |
| 875 | /* | 869 | /* |
| 876 | * Release the subsystem state objects. | 870 | * Release the subsystem state objects. |
| 877 | */ | 871 | */ |
| 878 | for_each_subsys(cgrp->root, ss) | 872 | for_each_subsys(cgrp->root, ss) |
| 879 | ss->css_free(cgrp); | 873 | ss->css_free(cgrp); |
| 880 | 874 | ||
| 881 | cgrp->root->number_of_cgroups--; | 875 | cgrp->root->number_of_cgroups--; |
| 882 | mutex_unlock(&cgroup_mutex); | 876 | mutex_unlock(&cgroup_mutex); |
| 883 | 877 | ||
| 884 | /* | 878 | /* |
| 885 | * Drop the active superblock reference that we took when we | 879 | * Drop the active superblock reference that we took when we |
| 886 | * created the cgroup | 880 | * created the cgroup |
| 887 | */ | 881 | */ |
| 888 | deactivate_super(cgrp->root->sb); | 882 | deactivate_super(cgrp->root->sb); |
| 889 | 883 | ||
| 890 | /* | 884 | /* |
| 891 | * if we're getting rid of the cgroup, refcount should ensure | 885 | * if we're getting rid of the cgroup, refcount should ensure |
| 892 | * that there are no pidlists left. | 886 | * that there are no pidlists left. |
| 893 | */ | 887 | */ |
| 894 | BUG_ON(!list_empty(&cgrp->pidlists)); | 888 | BUG_ON(!list_empty(&cgrp->pidlists)); |
| 895 | 889 | ||
| 896 | simple_xattrs_free(&cgrp->xattrs); | 890 | simple_xattrs_free(&cgrp->xattrs); |
| 897 | 891 | ||
| 898 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | 892 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); |
| 899 | kfree_rcu(cgrp, rcu_head); | 893 | kfree(cgrp); |
| 894 | } | ||
| 895 | |||
| 896 | static void cgroup_free_rcu(struct rcu_head *head) | ||
| 897 | { | ||
| 898 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | ||
| 899 | |||
| 900 | schedule_work(&cgrp->free_work); | ||
| 901 | } | ||
| 902 | |||
| 903 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | ||
| 904 | { | ||
| 905 | /* is dentry a directory ? if so, kfree() associated cgroup */ | ||
| 906 | if (S_ISDIR(inode->i_mode)) { | ||
| 907 | struct cgroup *cgrp = dentry->d_fsdata; | ||
| 908 | |||
| 909 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
| 910 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); | ||
| 900 | } else { | 911 | } else { |
| 901 | struct cfent *cfe = __d_cfe(dentry); | 912 | struct cfent *cfe = __d_cfe(dentry); |
| 902 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 913 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
| @@ -925,13 +936,17 @@ static void remove_dir(struct dentry *d) | |||
| 925 | dput(parent); | 936 | dput(parent); |
| 926 | } | 937 | } |
| 927 | 938 | ||
| 928 | static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | 939 | static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) |
| 929 | { | 940 | { |
| 930 | struct cfent *cfe; | 941 | struct cfent *cfe; |
| 931 | 942 | ||
| 932 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | 943 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); |
| 933 | lockdep_assert_held(&cgroup_mutex); | 944 | lockdep_assert_held(&cgroup_mutex); |
| 934 | 945 | ||
| 946 | /* | ||
| 947 | * If we're doing cleanup due to failure of cgroup_create(), | ||
| 948 | * the corresponding @cfe may not exist. | ||
| 949 | */ | ||
| 935 | list_for_each_entry(cfe, &cgrp->files, node) { | 950 | list_for_each_entry(cfe, &cgrp->files, node) { |
| 936 | struct dentry *d = cfe->dentry; | 951 | struct dentry *d = cfe->dentry; |
| 937 | 952 | ||
| @@ -944,9 +959,8 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
| 944 | list_del_init(&cfe->node); | 959 | list_del_init(&cfe->node); |
| 945 | dput(d); | 960 | dput(d); |
| 946 | 961 | ||
| 947 | return 0; | 962 | break; |
| 948 | } | 963 | } |
| 949 | return -ENOENT; | ||
| 950 | } | 964 | } |
| 951 | 965 | ||
| 952 | /** | 966 | /** |
| @@ -1083,7 +1097,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1083 | } | 1097 | } |
| 1084 | } | 1098 | } |
| 1085 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; | 1099 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; |
| 1086 | synchronize_rcu(); | ||
| 1087 | 1100 | ||
| 1088 | return 0; | 1101 | return 0; |
| 1089 | } | 1102 | } |
| @@ -1393,6 +1406,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1393 | INIT_LIST_HEAD(&cgrp->allcg_node); | 1406 | INIT_LIST_HEAD(&cgrp->allcg_node); |
| 1394 | INIT_LIST_HEAD(&cgrp->release_list); | 1407 | INIT_LIST_HEAD(&cgrp->release_list); |
| 1395 | INIT_LIST_HEAD(&cgrp->pidlists); | 1408 | INIT_LIST_HEAD(&cgrp->pidlists); |
| 1409 | INIT_WORK(&cgrp->free_work, cgroup_free_fn); | ||
| 1396 | mutex_init(&cgrp->pidlist_mutex); | 1410 | mutex_init(&cgrp->pidlist_mutex); |
| 1397 | INIT_LIST_HEAD(&cgrp->event_list); | 1411 | INIT_LIST_HEAD(&cgrp->event_list); |
| 1398 | spin_lock_init(&cgrp->event_list_lock); | 1412 | spin_lock_init(&cgrp->event_list_lock); |
| @@ -1597,6 +1611,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1597 | struct cgroupfs_root *existing_root; | 1611 | struct cgroupfs_root *existing_root; |
| 1598 | const struct cred *cred; | 1612 | const struct cred *cred; |
| 1599 | int i; | 1613 | int i; |
| 1614 | struct hlist_node *node; | ||
| 1615 | struct css_set *cg; | ||
| 1600 | 1616 | ||
| 1601 | BUG_ON(sb->s_root != NULL); | 1617 | BUG_ON(sb->s_root != NULL); |
| 1602 | 1618 | ||
| @@ -1650,14 +1666,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1650 | /* Link the top cgroup in this hierarchy into all | 1666 | /* Link the top cgroup in this hierarchy into all |
| 1651 | * the css_set objects */ | 1667 | * the css_set objects */ |
| 1652 | write_lock(&css_set_lock); | 1668 | write_lock(&css_set_lock); |
| 1653 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 1669 | hash_for_each(css_set_table, i, node, cg, hlist) |
| 1654 | struct hlist_head *hhead = &css_set_table[i]; | 1670 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
| 1655 | struct hlist_node *node; | ||
| 1656 | struct css_set *cg; | ||
| 1657 | |||
| 1658 | hlist_for_each_entry(cg, node, hhead, hlist) | ||
| 1659 | link_css_set(&tmp_cg_links, cg, root_cgrp); | ||
| 1660 | } | ||
| 1661 | write_unlock(&css_set_lock); | 1671 | write_unlock(&css_set_lock); |
| 1662 | 1672 | ||
| 1663 | free_cg_links(&tmp_cg_links); | 1673 | free_cg_links(&tmp_cg_links); |
| @@ -1773,7 +1783,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
| 1773 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), | 1783 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), |
| 1774 | "cgroup_path() called without proper locking"); | 1784 | "cgroup_path() called without proper locking"); |
| 1775 | 1785 | ||
| 1776 | if (!dentry || cgrp == dummytop) { | 1786 | if (cgrp == dummytop) { |
| 1777 | /* | 1787 | /* |
| 1778 | * Inactive subsystems have no dentry for their root | 1788 | * Inactive subsystems have no dentry for their root |
| 1779 | * cgroup | 1789 | * cgroup |
| @@ -1982,7 +1992,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1982 | ss->attach(cgrp, &tset); | 1992 | ss->attach(cgrp, &tset); |
| 1983 | } | 1993 | } |
| 1984 | 1994 | ||
| 1985 | synchronize_rcu(); | ||
| 1986 | out: | 1995 | out: |
| 1987 | if (retval) { | 1996 | if (retval) { |
| 1988 | for_each_subsys(root, ss) { | 1997 | for_each_subsys(root, ss) { |
| @@ -2151,7 +2160,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
| 2151 | /* | 2160 | /* |
| 2152 | * step 5: success! and cleanup | 2161 | * step 5: success! and cleanup |
| 2153 | */ | 2162 | */ |
| 2154 | synchronize_rcu(); | ||
| 2155 | retval = 0; | 2163 | retval = 0; |
| 2156 | out_put_css_set_refs: | 2164 | out_put_css_set_refs: |
| 2157 | if (retval) { | 2165 | if (retval) { |
| @@ -2769,14 +2777,14 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
| 2769 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | 2777 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) |
| 2770 | continue; | 2778 | continue; |
| 2771 | 2779 | ||
| 2772 | if (is_add) | 2780 | if (is_add) { |
| 2773 | err = cgroup_add_file(cgrp, subsys, cft); | 2781 | err = cgroup_add_file(cgrp, subsys, cft); |
| 2774 | else | 2782 | if (err) |
| 2775 | err = cgroup_rm_file(cgrp, cft); | 2783 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", |
| 2776 | if (err) { | 2784 | cft->name, err); |
| 2777 | pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", | ||
| 2778 | is_add ? "add" : "remove", cft->name, err); | ||
| 2779 | ret = err; | 2785 | ret = err; |
| 2786 | } else { | ||
| 2787 | cgroup_rm_file(cgrp, cft); | ||
| 2780 | } | 2788 | } |
| 2781 | } | 2789 | } |
| 2782 | return ret; | 2790 | return ret; |
| @@ -3017,6 +3025,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
| 3017 | } | 3025 | } |
| 3018 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | 3026 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); |
| 3019 | 3027 | ||
| 3028 | /** | ||
| 3029 | * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup | ||
| 3030 | * @pos: cgroup of interest | ||
| 3031 | * | ||
| 3032 | * Return the rightmost descendant of @pos. If there's no descendant, | ||
| 3033 | * @pos is returned. This can be used during pre-order traversal to skip | ||
| 3034 | * subtree of @pos. | ||
| 3035 | */ | ||
| 3036 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | ||
| 3037 | { | ||
| 3038 | struct cgroup *last, *tmp; | ||
| 3039 | |||
| 3040 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 3041 | |||
| 3042 | do { | ||
| 3043 | last = pos; | ||
| 3044 | /* ->prev isn't RCU safe, walk ->next till the end */ | ||
| 3045 | pos = NULL; | ||
| 3046 | list_for_each_entry_rcu(tmp, &last->children, sibling) | ||
| 3047 | pos = tmp; | ||
| 3048 | } while (pos); | ||
| 3049 | |||
| 3050 | return last; | ||
| 3051 | } | ||
| 3052 | EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); | ||
| 3053 | |||
| 3020 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | 3054 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) |
| 3021 | { | 3055 | { |
| 3022 | struct cgroup *last; | 3056 | struct cgroup *last; |
| @@ -3752,8 +3786,13 @@ static void cgroup_event_remove(struct work_struct *work) | |||
| 3752 | remove); | 3786 | remove); |
| 3753 | struct cgroup *cgrp = event->cgrp; | 3787 | struct cgroup *cgrp = event->cgrp; |
| 3754 | 3788 | ||
| 3789 | remove_wait_queue(event->wqh, &event->wait); | ||
| 3790 | |||
| 3755 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | 3791 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); |
| 3756 | 3792 | ||
| 3793 | /* Notify userspace the event is going away. */ | ||
| 3794 | eventfd_signal(event->eventfd, 1); | ||
| 3795 | |||
| 3757 | eventfd_ctx_put(event->eventfd); | 3796 | eventfd_ctx_put(event->eventfd); |
| 3758 | kfree(event); | 3797 | kfree(event); |
| 3759 | dput(cgrp->dentry); | 3798 | dput(cgrp->dentry); |
| @@ -3773,15 +3812,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | |||
| 3773 | unsigned long flags = (unsigned long)key; | 3812 | unsigned long flags = (unsigned long)key; |
| 3774 | 3813 | ||
| 3775 | if (flags & POLLHUP) { | 3814 | if (flags & POLLHUP) { |
| 3776 | __remove_wait_queue(event->wqh, &event->wait); | ||
| 3777 | spin_lock(&cgrp->event_list_lock); | ||
| 3778 | list_del_init(&event->list); | ||
| 3779 | spin_unlock(&cgrp->event_list_lock); | ||
| 3780 | /* | 3815 | /* |
| 3781 | * We are in atomic context, but cgroup_event_remove() may | 3816 | * If the event has been detached at cgroup removal, we |
| 3782 | * sleep, so we have to call it in workqueue. | 3817 | * can simply return knowing the other side will cleanup |
| 3818 | * for us. | ||
| 3819 | * | ||
| 3820 | * We can't race against event freeing since the other | ||
| 3821 | * side will require wqh->lock via remove_wait_queue(), | ||
| 3822 | * which we hold. | ||
| 3783 | */ | 3823 | */ |
| 3784 | schedule_work(&event->remove); | 3824 | spin_lock(&cgrp->event_list_lock); |
| 3825 | if (!list_empty(&event->list)) { | ||
| 3826 | list_del_init(&event->list); | ||
| 3827 | /* | ||
| 3828 | * We are in atomic context, but cgroup_event_remove() | ||
| 3829 | * may sleep, so we have to call it in workqueue. | ||
| 3830 | */ | ||
| 3831 | schedule_work(&event->remove); | ||
| 3832 | } | ||
| 3833 | spin_unlock(&cgrp->event_list_lock); | ||
| 3785 | } | 3834 | } |
| 3786 | 3835 | ||
| 3787 | return 0; | 3836 | return 0; |
| @@ -3807,6 +3856,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
| 3807 | const char *buffer) | 3856 | const char *buffer) |
| 3808 | { | 3857 | { |
| 3809 | struct cgroup_event *event = NULL; | 3858 | struct cgroup_event *event = NULL; |
| 3859 | struct cgroup *cgrp_cfile; | ||
| 3810 | unsigned int efd, cfd; | 3860 | unsigned int efd, cfd; |
| 3811 | struct file *efile = NULL; | 3861 | struct file *efile = NULL; |
| 3812 | struct file *cfile = NULL; | 3862 | struct file *cfile = NULL; |
| @@ -3862,6 +3912,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
| 3862 | goto fail; | 3912 | goto fail; |
| 3863 | } | 3913 | } |
| 3864 | 3914 | ||
| 3915 | /* | ||
| 3916 | * The file to be monitored must be in the same cgroup as | ||
| 3917 | * cgroup.event_control is. | ||
| 3918 | */ | ||
| 3919 | cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); | ||
| 3920 | if (cgrp_cfile != cgrp) { | ||
| 3921 | ret = -EINVAL; | ||
| 3922 | goto fail; | ||
| 3923 | } | ||
| 3924 | |||
| 3865 | if (!event->cft->register_event || !event->cft->unregister_event) { | 3925 | if (!event->cft->register_event || !event->cft->unregister_event) { |
| 3866 | ret = -EINVAL; | 3926 | ret = -EINVAL; |
| 3867 | goto fail; | 3927 | goto fail; |
| @@ -4135,6 +4195,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4135 | 4195 | ||
| 4136 | init_cgroup_housekeeping(cgrp); | 4196 | init_cgroup_housekeeping(cgrp); |
| 4137 | 4197 | ||
| 4198 | dentry->d_fsdata = cgrp; | ||
| 4199 | cgrp->dentry = dentry; | ||
| 4200 | |||
| 4138 | cgrp->parent = parent; | 4201 | cgrp->parent = parent; |
| 4139 | cgrp->root = parent->root; | 4202 | cgrp->root = parent->root; |
| 4140 | cgrp->top_cgroup = parent->top_cgroup; | 4203 | cgrp->top_cgroup = parent->top_cgroup; |
| @@ -4172,8 +4235,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4172 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4235 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
| 4173 | 4236 | ||
| 4174 | /* allocation complete, commit to creation */ | 4237 | /* allocation complete, commit to creation */ |
| 4175 | dentry->d_fsdata = cgrp; | ||
| 4176 | cgrp->dentry = dentry; | ||
| 4177 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | 4238 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); |
| 4178 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4239 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
| 4179 | root->number_of_cgroups++; | 4240 | root->number_of_cgroups++; |
| @@ -4340,20 +4401,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 4340 | /* | 4401 | /* |
| 4341 | * Unregister events and notify userspace. | 4402 | * Unregister events and notify userspace. |
| 4342 | * Notify userspace about cgroup removing only after rmdir of cgroup | 4403 | * Notify userspace about cgroup removing only after rmdir of cgroup |
| 4343 | * directory to avoid race between userspace and kernelspace. Use | 4404 | * directory to avoid race between userspace and kernelspace. |
| 4344 | * a temporary list to avoid a deadlock with cgroup_event_wake(). Since | ||
| 4345 | * cgroup_event_wake() is called with the wait queue head locked, | ||
| 4346 | * remove_wait_queue() cannot be called while holding event_list_lock. | ||
| 4347 | */ | 4405 | */ |
| 4348 | spin_lock(&cgrp->event_list_lock); | 4406 | spin_lock(&cgrp->event_list_lock); |
| 4349 | list_splice_init(&cgrp->event_list, &tmp_list); | 4407 | list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { |
| 4350 | spin_unlock(&cgrp->event_list_lock); | ||
| 4351 | list_for_each_entry_safe(event, tmp, &tmp_list, list) { | ||
| 4352 | list_del_init(&event->list); | 4408 | list_del_init(&event->list); |
| 4353 | remove_wait_queue(event->wqh, &event->wait); | ||
| 4354 | eventfd_signal(event->eventfd, 1); | ||
| 4355 | schedule_work(&event->remove); | 4409 | schedule_work(&event->remove); |
| 4356 | } | 4410 | } |
| 4411 | spin_unlock(&cgrp->event_list_lock); | ||
| 4357 | 4412 | ||
| 4358 | return 0; | 4413 | return 0; |
| 4359 | } | 4414 | } |
| @@ -4438,6 +4493,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4438 | { | 4493 | { |
| 4439 | struct cgroup_subsys_state *css; | 4494 | struct cgroup_subsys_state *css; |
| 4440 | int i, ret; | 4495 | int i, ret; |
| 4496 | struct hlist_node *node, *tmp; | ||
| 4497 | struct css_set *cg; | ||
| 4498 | unsigned long key; | ||
| 4441 | 4499 | ||
| 4442 | /* check name and function validity */ | 4500 | /* check name and function validity */ |
| 4443 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || | 4501 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || |
| @@ -4503,23 +4561,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4503 | * this is all done under the css_set_lock. | 4561 | * this is all done under the css_set_lock. |
| 4504 | */ | 4562 | */ |
| 4505 | write_lock(&css_set_lock); | 4563 | write_lock(&css_set_lock); |
| 4506 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 4564 | hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) { |
| 4507 | struct css_set *cg; | 4565 | /* skip entries that we already rehashed */ |
| 4508 | struct hlist_node *node, *tmp; | 4566 | if (cg->subsys[ss->subsys_id]) |
| 4509 | struct hlist_head *bucket = &css_set_table[i], *new_bucket; | 4567 | continue; |
| 4510 | 4568 | /* remove existing entry */ | |
| 4511 | hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) { | 4569 | hash_del(&cg->hlist); |
| 4512 | /* skip entries that we already rehashed */ | 4570 | /* set new value */ |
| 4513 | if (cg->subsys[ss->subsys_id]) | 4571 | cg->subsys[ss->subsys_id] = css; |
| 4514 | continue; | 4572 | /* recompute hash and restore entry */ |
| 4515 | /* remove existing entry */ | 4573 | key = css_set_hash(cg->subsys); |
| 4516 | hlist_del(&cg->hlist); | 4574 | hash_add(css_set_table, node, key); |
| 4517 | /* set new value */ | ||
| 4518 | cg->subsys[ss->subsys_id] = css; | ||
| 4519 | /* recompute hash and restore entry */ | ||
| 4520 | new_bucket = css_set_hash(cg->subsys); | ||
| 4521 | hlist_add_head(&cg->hlist, new_bucket); | ||
| 4522 | } | ||
| 4523 | } | 4575 | } |
| 4524 | write_unlock(&css_set_lock); | 4576 | write_unlock(&css_set_lock); |
| 4525 | 4577 | ||
| @@ -4551,7 +4603,6 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
| 4551 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4603 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
| 4552 | { | 4604 | { |
| 4553 | struct cg_cgroup_link *link; | 4605 | struct cg_cgroup_link *link; |
| 4554 | struct hlist_head *hhead; | ||
| 4555 | 4606 | ||
| 4556 | BUG_ON(ss->module == NULL); | 4607 | BUG_ON(ss->module == NULL); |
| 4557 | 4608 | ||
| @@ -4585,11 +4636,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
| 4585 | write_lock(&css_set_lock); | 4636 | write_lock(&css_set_lock); |
| 4586 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { | 4637 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { |
| 4587 | struct css_set *cg = link->cg; | 4638 | struct css_set *cg = link->cg; |
| 4639 | unsigned long key; | ||
| 4588 | 4640 | ||
| 4589 | hlist_del(&cg->hlist); | 4641 | hash_del(&cg->hlist); |
| 4590 | cg->subsys[ss->subsys_id] = NULL; | 4642 | cg->subsys[ss->subsys_id] = NULL; |
| 4591 | hhead = css_set_hash(cg->subsys); | 4643 | key = css_set_hash(cg->subsys); |
| 4592 | hlist_add_head(&cg->hlist, hhead); | 4644 | hash_add(css_set_table, &cg->hlist, key); |
| 4593 | } | 4645 | } |
| 4594 | write_unlock(&css_set_lock); | 4646 | write_unlock(&css_set_lock); |
| 4595 | 4647 | ||
| @@ -4631,9 +4683,6 @@ int __init cgroup_init_early(void) | |||
| 4631 | list_add(&init_css_set_link.cg_link_list, | 4683 | list_add(&init_css_set_link.cg_link_list, |
| 4632 | &init_css_set.cg_links); | 4684 | &init_css_set.cg_links); |
| 4633 | 4685 | ||
| 4634 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
| 4635 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
| 4636 | |||
| 4637 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4686 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
| 4638 | struct cgroup_subsys *ss = subsys[i]; | 4687 | struct cgroup_subsys *ss = subsys[i]; |
| 4639 | 4688 | ||
| @@ -4667,7 +4716,7 @@ int __init cgroup_init(void) | |||
| 4667 | { | 4716 | { |
| 4668 | int err; | 4717 | int err; |
| 4669 | int i; | 4718 | int i; |
| 4670 | struct hlist_head *hhead; | 4719 | unsigned long key; |
| 4671 | 4720 | ||
| 4672 | err = bdi_init(&cgroup_backing_dev_info); | 4721 | err = bdi_init(&cgroup_backing_dev_info); |
| 4673 | if (err) | 4722 | if (err) |
| @@ -4686,8 +4735,8 @@ int __init cgroup_init(void) | |||
| 4686 | } | 4735 | } |
| 4687 | 4736 | ||
| 4688 | /* Add init_css_set to the hash table */ | 4737 | /* Add init_css_set to the hash table */ |
| 4689 | hhead = css_set_hash(init_css_set.subsys); | 4738 | key = css_set_hash(init_css_set.subsys); |
| 4690 | hlist_add_head(&init_css_set.hlist, hhead); | 4739 | hash_add(css_set_table, &init_css_set.hlist, key); |
| 4691 | BUG_ON(!init_root_id(&rootnode)); | 4740 | BUG_ON(!init_root_id(&rootnode)); |
| 4692 | 4741 | ||
| 4693 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); | 4742 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); |
| @@ -4982,8 +5031,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
| 4982 | } | 5031 | } |
| 4983 | task_unlock(tsk); | 5032 | task_unlock(tsk); |
| 4984 | 5033 | ||
| 4985 | if (cg) | 5034 | put_css_set_taskexit(cg); |
| 4986 | put_css_set_taskexit(cg); | ||
| 4987 | } | 5035 | } |
| 4988 | 5036 | ||
| 4989 | /** | 5037 | /** |
