diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-20 12:16:21 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-20 12:16:21 -0500 |
commit | 502b24c23b44fbaa01cc2cbd86d8035845b7811f (patch) | |
tree | 3096deeb99f6acc2d72ee33f145008ec5e2c68b3 | |
parent | ece8e0b2f9c980e5511fe8db2d68c6f1859b9d83 (diff) | |
parent | f169007b2773f285e098cb84c74aac0154d65ff7 (diff) |
Merge branch 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup changes from Tejun Heo:
"Nothing too drastic.
- Removal of synchronize_rcu() from userland visible paths.
- Various fixes and cleanups from Li.
- cgroup_rightmost_descendant() added which will be used by cpuset
changes (it will be a separate pull request)."
* 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: fail if monitored file and event_control are in different cgroup
cgroup: fix cgroup_rmdir() vs close(eventfd) race
cpuset: fix cpuset_print_task_mems_allowed() vs rename() race
cgroup: fix exit() vs rmdir() race
cgroup: remove bogus comments in cgroup_diput()
cgroup: remove synchronize_rcu() from cgroup_diput()
cgroup: remove duplicate RCU free on struct cgroup
sched: remove redundant NULL cgroup check in task_group_path()
sched: split out css_online/css_offline from tg creation/destruction
cgroup: initialize cgrp->dentry before css_alloc()
cgroup: remove a NULL check in cgroup_exit()
cgroup: fix bogus kernel warnings when cgroup_create() failed
cgroup: remove synchronize_rcu() from rebind_subsystems()
cgroup: remove synchronize_rcu() from cgroup_attach_{task|proc}()
cgroup: use new hashtable implementation
cgroups: fix cgroup_event_listener error handling
cgroups: move cgroup_event_listener.c to tools/cgroup
cgroup: implement cgroup_rightmost_descendant()
cgroup: remove unused dummy cgroup_fork_callbacks()
-rw-r--r-- | Documentation/cgroups/00-INDEX | 2 | ||||
-rw-r--r-- | Documentation/cgroups/memcg_test.txt | 3 | ||||
-rw-r--r-- | include/linux/cgroup.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/cgroup.c | 288 | ||||
-rw-r--r-- | kernel/cpuset.c | 12 | ||||
-rw-r--r-- | kernel/sched/auto_group.c | 3 | ||||
-rw-r--r-- | kernel/sched/core.c | 49 | ||||
-rw-r--r-- | kernel/sched/debug.c | 7 | ||||
-rw-r--r-- | tools/Makefile | 19 | ||||
-rw-r--r-- | tools/cgroup/.gitignore | 1 | ||||
-rw-r--r-- | tools/cgroup/Makefile | 11 | ||||
-rw-r--r-- | tools/cgroup/cgroup_event_listener.c (renamed from Documentation/cgroups/cgroup_event_listener.c) | 72 |
13 files changed, 270 insertions, 203 deletions
diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX index f78b90a35ad0..f5635a09c3f6 100644 --- a/Documentation/cgroups/00-INDEX +++ b/Documentation/cgroups/00-INDEX | |||
@@ -4,8 +4,6 @@ blkio-controller.txt | |||
4 | - Description for Block IO Controller, implementation and usage details. | 4 | - Description for Block IO Controller, implementation and usage details. |
5 | cgroups.txt | 5 | cgroups.txt |
6 | - Control Groups definition, implementation details, examples and API. | 6 | - Control Groups definition, implementation details, examples and API. |
7 | cgroup_event_listener.c | ||
8 | - A user program for cgroup listener. | ||
9 | cpuacct.txt | 7 | cpuacct.txt |
10 | - CPU Accounting Controller; account CPU usage for groups of tasks. | 8 | - CPU Accounting Controller; account CPU usage for groups of tasks. |
11 | cpusets.txt | 9 | cpusets.txt |
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index fc8fa97a09ac..ce94a83a7d9a 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt | |||
@@ -399,8 +399,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. | |||
399 | 399 | ||
400 | 9.10 Memory thresholds | 400 | 9.10 Memory thresholds |
401 | Memory controller implements memory thresholds using cgroups notification | 401 | Memory controller implements memory thresholds using cgroups notification |
402 | API. You can use Documentation/cgroups/cgroup_event_listener.c to test | 402 | API. You can use tools/cgroup/cgroup_event_listener.c to test it. |
403 | it. | ||
404 | 403 | ||
405 | (Shell-A) Create cgroup and run event listener | 404 | (Shell-A) Create cgroup and run event listener |
406 | # mkdir /cgroup/A | 405 | # mkdir /cgroup/A |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7d73905dcba2..900af5964f55 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -203,6 +203,7 @@ struct cgroup { | |||
203 | 203 | ||
204 | /* For RCU-protected deletion */ | 204 | /* For RCU-protected deletion */ |
205 | struct rcu_head rcu_head; | 205 | struct rcu_head rcu_head; |
206 | struct work_struct free_work; | ||
206 | 207 | ||
207 | /* List of events which userspace want to receive */ | 208 | /* List of events which userspace want to receive */ |
208 | struct list_head event_list; | 209 | struct list_head event_list; |
@@ -558,6 +559,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, | |||
558 | 559 | ||
559 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | 560 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, |
560 | struct cgroup *cgroup); | 561 | struct cgroup *cgroup); |
562 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); | ||
561 | 563 | ||
562 | /** | 564 | /** |
563 | * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants | 565 | * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants |
@@ -706,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); | |||
706 | static inline int cgroup_init_early(void) { return 0; } | 708 | static inline int cgroup_init_early(void) { return 0; } |
707 | static inline int cgroup_init(void) { return 0; } | 709 | static inline int cgroup_init(void) { return 0; } |
708 | static inline void cgroup_fork(struct task_struct *p) {} | 710 | static inline void cgroup_fork(struct task_struct *p) {} |
709 | static inline void cgroup_fork_callbacks(struct task_struct *p) {} | ||
710 | static inline void cgroup_post_fork(struct task_struct *p) {} | 711 | static inline void cgroup_post_fork(struct task_struct *p) {} |
711 | static inline void cgroup_exit(struct task_struct *p, int callbacks) {} | 712 | static inline void cgroup_exit(struct task_struct *p, int callbacks) {} |
712 | 713 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 33cc42130371..e4112aad2964 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2659,7 +2659,10 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | |||
2659 | extern struct task_group root_task_group; | 2659 | extern struct task_group root_task_group; |
2660 | 2660 | ||
2661 | extern struct task_group *sched_create_group(struct task_group *parent); | 2661 | extern struct task_group *sched_create_group(struct task_group *parent); |
2662 | extern void sched_online_group(struct task_group *tg, | ||
2663 | struct task_group *parent); | ||
2662 | extern void sched_destroy_group(struct task_group *tg); | 2664 | extern void sched_destroy_group(struct task_group *tg); |
2665 | extern void sched_offline_group(struct task_group *tg); | ||
2663 | extern void sched_move_task(struct task_struct *tsk); | 2666 | extern void sched_move_task(struct task_struct *tsk); |
2664 | #ifdef CONFIG_FAIR_GROUP_SCHED | 2667 | #ifdef CONFIG_FAIR_GROUP_SCHED |
2665 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); | 2668 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4855892798fd..b5c64327e712 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -52,7 +52,7 @@ | |||
52 | #include <linux/module.h> | 52 | #include <linux/module.h> |
53 | #include <linux/delayacct.h> | 53 | #include <linux/delayacct.h> |
54 | #include <linux/cgroupstats.h> | 54 | #include <linux/cgroupstats.h> |
55 | #include <linux/hash.h> | 55 | #include <linux/hashtable.h> |
56 | #include <linux/namei.h> | 56 | #include <linux/namei.h> |
57 | #include <linux/pid_namespace.h> | 57 | #include <linux/pid_namespace.h> |
58 | #include <linux/idr.h> | 58 | #include <linux/idr.h> |
@@ -376,22 +376,18 @@ static int css_set_count; | |||
376 | * account cgroups in empty hierarchies. | 376 | * account cgroups in empty hierarchies. |
377 | */ | 377 | */ |
378 | #define CSS_SET_HASH_BITS 7 | 378 | #define CSS_SET_HASH_BITS 7 |
379 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | 379 | static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); |
380 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
381 | 380 | ||
382 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | 381 | static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) |
383 | { | 382 | { |
384 | int i; | 383 | int i; |
385 | int index; | 384 | unsigned long key = 0UL; |
386 | unsigned long tmp = 0UL; | ||
387 | 385 | ||
388 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | 386 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) |
389 | tmp += (unsigned long)css[i]; | 387 | key += (unsigned long)css[i]; |
390 | tmp = (tmp >> 16) ^ tmp; | 388 | key = (key >> 16) ^ key; |
391 | 389 | ||
392 | index = hash_long(tmp, CSS_SET_HASH_BITS); | 390 | return key; |
393 | |||
394 | return &css_set_table[index]; | ||
395 | } | 391 | } |
396 | 392 | ||
397 | /* We don't maintain the lists running through each css_set to its | 393 | /* We don't maintain the lists running through each css_set to its |
@@ -418,7 +414,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
418 | } | 414 | } |
419 | 415 | ||
420 | /* This css_set is dead. unlink it and release cgroup refcounts */ | 416 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
421 | hlist_del(&cg->hlist); | 417 | hash_del(&cg->hlist); |
422 | css_set_count--; | 418 | css_set_count--; |
423 | 419 | ||
424 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | 420 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, |
@@ -426,12 +422,20 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
426 | struct cgroup *cgrp = link->cgrp; | 422 | struct cgroup *cgrp = link->cgrp; |
427 | list_del(&link->cg_link_list); | 423 | list_del(&link->cg_link_list); |
428 | list_del(&link->cgrp_link_list); | 424 | list_del(&link->cgrp_link_list); |
425 | |||
426 | /* | ||
427 | * We may not be holding cgroup_mutex, and if cgrp->count is | ||
428 | * dropped to 0 the cgroup can be destroyed at any time, hence | ||
429 | * rcu_read_lock is used to keep it alive. | ||
430 | */ | ||
431 | rcu_read_lock(); | ||
429 | if (atomic_dec_and_test(&cgrp->count) && | 432 | if (atomic_dec_and_test(&cgrp->count) && |
430 | notify_on_release(cgrp)) { | 433 | notify_on_release(cgrp)) { |
431 | if (taskexit) | 434 | if (taskexit) |
432 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 435 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
433 | check_for_release(cgrp); | 436 | check_for_release(cgrp); |
434 | } | 437 | } |
438 | rcu_read_unlock(); | ||
435 | 439 | ||
436 | kfree(link); | 440 | kfree(link); |
437 | } | 441 | } |
@@ -550,9 +554,9 @@ static struct css_set *find_existing_css_set( | |||
550 | { | 554 | { |
551 | int i; | 555 | int i; |
552 | struct cgroupfs_root *root = cgrp->root; | 556 | struct cgroupfs_root *root = cgrp->root; |
553 | struct hlist_head *hhead; | ||
554 | struct hlist_node *node; | 557 | struct hlist_node *node; |
555 | struct css_set *cg; | 558 | struct css_set *cg; |
559 | unsigned long key; | ||
556 | 560 | ||
557 | /* | 561 | /* |
558 | * Build the set of subsystem state objects that we want to see in the | 562 | * Build the set of subsystem state objects that we want to see in the |
@@ -572,8 +576,8 @@ static struct css_set *find_existing_css_set( | |||
572 | } | 576 | } |
573 | } | 577 | } |
574 | 578 | ||
575 | hhead = css_set_hash(template); | 579 | key = css_set_hash(template); |
576 | hlist_for_each_entry(cg, node, hhead, hlist) { | 580 | hash_for_each_possible(css_set_table, cg, node, hlist, key) { |
577 | if (!compare_css_sets(cg, oldcg, cgrp, template)) | 581 | if (!compare_css_sets(cg, oldcg, cgrp, template)) |
578 | continue; | 582 | continue; |
579 | 583 | ||
@@ -657,8 +661,8 @@ static struct css_set *find_css_set( | |||
657 | 661 | ||
658 | struct list_head tmp_cg_links; | 662 | struct list_head tmp_cg_links; |
659 | 663 | ||
660 | struct hlist_head *hhead; | ||
661 | struct cg_cgroup_link *link; | 664 | struct cg_cgroup_link *link; |
665 | unsigned long key; | ||
662 | 666 | ||
663 | /* First see if we already have a cgroup group that matches | 667 | /* First see if we already have a cgroup group that matches |
664 | * the desired set */ | 668 | * the desired set */ |
@@ -704,8 +708,8 @@ static struct css_set *find_css_set( | |||
704 | css_set_count++; | 708 | css_set_count++; |
705 | 709 | ||
706 | /* Add this cgroup group to the hash table */ | 710 | /* Add this cgroup group to the hash table */ |
707 | hhead = css_set_hash(res->subsys); | 711 | key = css_set_hash(res->subsys); |
708 | hlist_add_head(&res->hlist, hhead); | 712 | hash_add(css_set_table, &res->hlist, key); |
709 | 713 | ||
710 | write_unlock(&css_set_lock); | 714 | write_unlock(&css_set_lock); |
711 | 715 | ||
@@ -856,47 +860,54 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) | |||
856 | return inode; | 860 | return inode; |
857 | } | 861 | } |
858 | 862 | ||
859 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 863 | static void cgroup_free_fn(struct work_struct *work) |
860 | { | 864 | { |
861 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 865 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); |
862 | if (S_ISDIR(inode->i_mode)) { | 866 | struct cgroup_subsys *ss; |
863 | struct cgroup *cgrp = dentry->d_fsdata; | ||
864 | struct cgroup_subsys *ss; | ||
865 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
866 | /* It's possible for external users to be holding css | ||
867 | * reference counts on a cgroup; css_put() needs to | ||
868 | * be able to access the cgroup after decrementing | ||
869 | * the reference count in order to know if it needs to | ||
870 | * queue the cgroup to be handled by the release | ||
871 | * agent */ | ||
872 | synchronize_rcu(); | ||
873 | 867 | ||
874 | mutex_lock(&cgroup_mutex); | 868 | mutex_lock(&cgroup_mutex); |
875 | /* | 869 | /* |
876 | * Release the subsystem state objects. | 870 | * Release the subsystem state objects. |
877 | */ | 871 | */ |
878 | for_each_subsys(cgrp->root, ss) | 872 | for_each_subsys(cgrp->root, ss) |
879 | ss->css_free(cgrp); | 873 | ss->css_free(cgrp); |
880 | 874 | ||
881 | cgrp->root->number_of_cgroups--; | 875 | cgrp->root->number_of_cgroups--; |
882 | mutex_unlock(&cgroup_mutex); | 876 | mutex_unlock(&cgroup_mutex); |
883 | 877 | ||
884 | /* | 878 | /* |
885 | * Drop the active superblock reference that we took when we | 879 | * Drop the active superblock reference that we took when we |
886 | * created the cgroup | 880 | * created the cgroup |
887 | */ | 881 | */ |
888 | deactivate_super(cgrp->root->sb); | 882 | deactivate_super(cgrp->root->sb); |
889 | 883 | ||
890 | /* | 884 | /* |
891 | * if we're getting rid of the cgroup, refcount should ensure | 885 | * if we're getting rid of the cgroup, refcount should ensure |
892 | * that there are no pidlists left. | 886 | * that there are no pidlists left. |
893 | */ | 887 | */ |
894 | BUG_ON(!list_empty(&cgrp->pidlists)); | 888 | BUG_ON(!list_empty(&cgrp->pidlists)); |
895 | 889 | ||
896 | simple_xattrs_free(&cgrp->xattrs); | 890 | simple_xattrs_free(&cgrp->xattrs); |
897 | 891 | ||
898 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); | 892 | ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); |
899 | kfree_rcu(cgrp, rcu_head); | 893 | kfree(cgrp); |
894 | } | ||
895 | |||
896 | static void cgroup_free_rcu(struct rcu_head *head) | ||
897 | { | ||
898 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | ||
899 | |||
900 | schedule_work(&cgrp->free_work); | ||
901 | } | ||
902 | |||
903 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | ||
904 | { | ||
905 | /* is dentry a directory ? if so, kfree() associated cgroup */ | ||
906 | if (S_ISDIR(inode->i_mode)) { | ||
907 | struct cgroup *cgrp = dentry->d_fsdata; | ||
908 | |||
909 | BUG_ON(!(cgroup_is_removed(cgrp))); | ||
910 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); | ||
900 | } else { | 911 | } else { |
901 | struct cfent *cfe = __d_cfe(dentry); | 912 | struct cfent *cfe = __d_cfe(dentry); |
902 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 913 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
@@ -925,13 +936,17 @@ static void remove_dir(struct dentry *d) | |||
925 | dput(parent); | 936 | dput(parent); |
926 | } | 937 | } |
927 | 938 | ||
928 | static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | 939 | static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) |
929 | { | 940 | { |
930 | struct cfent *cfe; | 941 | struct cfent *cfe; |
931 | 942 | ||
932 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); | 943 | lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); |
933 | lockdep_assert_held(&cgroup_mutex); | 944 | lockdep_assert_held(&cgroup_mutex); |
934 | 945 | ||
946 | /* | ||
947 | * If we're doing cleanup due to failure of cgroup_create(), | ||
948 | * the corresponding @cfe may not exist. | ||
949 | */ | ||
935 | list_for_each_entry(cfe, &cgrp->files, node) { | 950 | list_for_each_entry(cfe, &cgrp->files, node) { |
936 | struct dentry *d = cfe->dentry; | 951 | struct dentry *d = cfe->dentry; |
937 | 952 | ||
@@ -944,9 +959,8 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
944 | list_del_init(&cfe->node); | 959 | list_del_init(&cfe->node); |
945 | dput(d); | 960 | dput(d); |
946 | 961 | ||
947 | return 0; | 962 | break; |
948 | } | 963 | } |
949 | return -ENOENT; | ||
950 | } | 964 | } |
951 | 965 | ||
952 | /** | 966 | /** |
@@ -1083,7 +1097,6 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
1083 | } | 1097 | } |
1084 | } | 1098 | } |
1085 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; | 1099 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; |
1086 | synchronize_rcu(); | ||
1087 | 1100 | ||
1088 | return 0; | 1101 | return 0; |
1089 | } | 1102 | } |
@@ -1393,6 +1406,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1393 | INIT_LIST_HEAD(&cgrp->allcg_node); | 1406 | INIT_LIST_HEAD(&cgrp->allcg_node); |
1394 | INIT_LIST_HEAD(&cgrp->release_list); | 1407 | INIT_LIST_HEAD(&cgrp->release_list); |
1395 | INIT_LIST_HEAD(&cgrp->pidlists); | 1408 | INIT_LIST_HEAD(&cgrp->pidlists); |
1409 | INIT_WORK(&cgrp->free_work, cgroup_free_fn); | ||
1396 | mutex_init(&cgrp->pidlist_mutex); | 1410 | mutex_init(&cgrp->pidlist_mutex); |
1397 | INIT_LIST_HEAD(&cgrp->event_list); | 1411 | INIT_LIST_HEAD(&cgrp->event_list); |
1398 | spin_lock_init(&cgrp->event_list_lock); | 1412 | spin_lock_init(&cgrp->event_list_lock); |
@@ -1597,6 +1611,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1597 | struct cgroupfs_root *existing_root; | 1611 | struct cgroupfs_root *existing_root; |
1598 | const struct cred *cred; | 1612 | const struct cred *cred; |
1599 | int i; | 1613 | int i; |
1614 | struct hlist_node *node; | ||
1615 | struct css_set *cg; | ||
1600 | 1616 | ||
1601 | BUG_ON(sb->s_root != NULL); | 1617 | BUG_ON(sb->s_root != NULL); |
1602 | 1618 | ||
@@ -1650,14 +1666,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
1650 | /* Link the top cgroup in this hierarchy into all | 1666 | /* Link the top cgroup in this hierarchy into all |
1651 | * the css_set objects */ | 1667 | * the css_set objects */ |
1652 | write_lock(&css_set_lock); | 1668 | write_lock(&css_set_lock); |
1653 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 1669 | hash_for_each(css_set_table, i, node, cg, hlist) |
1654 | struct hlist_head *hhead = &css_set_table[i]; | 1670 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
1655 | struct hlist_node *node; | ||
1656 | struct css_set *cg; | ||
1657 | |||
1658 | hlist_for_each_entry(cg, node, hhead, hlist) | ||
1659 | link_css_set(&tmp_cg_links, cg, root_cgrp); | ||
1660 | } | ||
1661 | write_unlock(&css_set_lock); | 1671 | write_unlock(&css_set_lock); |
1662 | 1672 | ||
1663 | free_cg_links(&tmp_cg_links); | 1673 | free_cg_links(&tmp_cg_links); |
@@ -1773,7 +1783,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1773 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), | 1783 | rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), |
1774 | "cgroup_path() called without proper locking"); | 1784 | "cgroup_path() called without proper locking"); |
1775 | 1785 | ||
1776 | if (!dentry || cgrp == dummytop) { | 1786 | if (cgrp == dummytop) { |
1777 | /* | 1787 | /* |
1778 | * Inactive subsystems have no dentry for their root | 1788 | * Inactive subsystems have no dentry for their root |
1779 | * cgroup | 1789 | * cgroup |
@@ -1982,7 +1992,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1982 | ss->attach(cgrp, &tset); | 1992 | ss->attach(cgrp, &tset); |
1983 | } | 1993 | } |
1984 | 1994 | ||
1985 | synchronize_rcu(); | ||
1986 | out: | 1995 | out: |
1987 | if (retval) { | 1996 | if (retval) { |
1988 | for_each_subsys(root, ss) { | 1997 | for_each_subsys(root, ss) { |
@@ -2151,7 +2160,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader) | |||
2151 | /* | 2160 | /* |
2152 | * step 5: success! and cleanup | 2161 | * step 5: success! and cleanup |
2153 | */ | 2162 | */ |
2154 | synchronize_rcu(); | ||
2155 | retval = 0; | 2163 | retval = 0; |
2156 | out_put_css_set_refs: | 2164 | out_put_css_set_refs: |
2157 | if (retval) { | 2165 | if (retval) { |
@@ -2769,14 +2777,14 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2769 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) | 2777 | if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) |
2770 | continue; | 2778 | continue; |
2771 | 2779 | ||
2772 | if (is_add) | 2780 | if (is_add) { |
2773 | err = cgroup_add_file(cgrp, subsys, cft); | 2781 | err = cgroup_add_file(cgrp, subsys, cft); |
2774 | else | 2782 | if (err) |
2775 | err = cgroup_rm_file(cgrp, cft); | 2783 | pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n", |
2776 | if (err) { | 2784 | cft->name, err); |
2777 | pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n", | ||
2778 | is_add ? "add" : "remove", cft->name, err); | ||
2779 | ret = err; | 2785 | ret = err; |
2786 | } else { | ||
2787 | cgroup_rm_file(cgrp, cft); | ||
2780 | } | 2788 | } |
2781 | } | 2789 | } |
2782 | return ret; | 2790 | return ret; |
@@ -3017,6 +3025,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
3017 | } | 3025 | } |
3018 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | 3026 | EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); |
3019 | 3027 | ||
3028 | /** | ||
3029 | * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup | ||
3030 | * @pos: cgroup of interest | ||
3031 | * | ||
3032 | * Return the rightmost descendant of @pos. If there's no descendant, | ||
3033 | * @pos is returned. This can be used during pre-order traversal to skip | ||
3034 | * subtree of @pos. | ||
3035 | */ | ||
3036 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | ||
3037 | { | ||
3038 | struct cgroup *last, *tmp; | ||
3039 | |||
3040 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
3041 | |||
3042 | do { | ||
3043 | last = pos; | ||
3044 | /* ->prev isn't RCU safe, walk ->next till the end */ | ||
3045 | pos = NULL; | ||
3046 | list_for_each_entry_rcu(tmp, &last->children, sibling) | ||
3047 | pos = tmp; | ||
3048 | } while (pos); | ||
3049 | |||
3050 | return last; | ||
3051 | } | ||
3052 | EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); | ||
3053 | |||
3020 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | 3054 | static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) |
3021 | { | 3055 | { |
3022 | struct cgroup *last; | 3056 | struct cgroup *last; |
@@ -3752,8 +3786,13 @@ static void cgroup_event_remove(struct work_struct *work) | |||
3752 | remove); | 3786 | remove); |
3753 | struct cgroup *cgrp = event->cgrp; | 3787 | struct cgroup *cgrp = event->cgrp; |
3754 | 3788 | ||
3789 | remove_wait_queue(event->wqh, &event->wait); | ||
3790 | |||
3755 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); | 3791 | event->cft->unregister_event(cgrp, event->cft, event->eventfd); |
3756 | 3792 | ||
3793 | /* Notify userspace the event is going away. */ | ||
3794 | eventfd_signal(event->eventfd, 1); | ||
3795 | |||
3757 | eventfd_ctx_put(event->eventfd); | 3796 | eventfd_ctx_put(event->eventfd); |
3758 | kfree(event); | 3797 | kfree(event); |
3759 | dput(cgrp->dentry); | 3798 | dput(cgrp->dentry); |
@@ -3773,15 +3812,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, | |||
3773 | unsigned long flags = (unsigned long)key; | 3812 | unsigned long flags = (unsigned long)key; |
3774 | 3813 | ||
3775 | if (flags & POLLHUP) { | 3814 | if (flags & POLLHUP) { |
3776 | __remove_wait_queue(event->wqh, &event->wait); | ||
3777 | spin_lock(&cgrp->event_list_lock); | ||
3778 | list_del_init(&event->list); | ||
3779 | spin_unlock(&cgrp->event_list_lock); | ||
3780 | /* | 3815 | /* |
3781 | * We are in atomic context, but cgroup_event_remove() may | 3816 | * If the event has been detached at cgroup removal, we |
3782 | * sleep, so we have to call it in workqueue. | 3817 | * can simply return knowing the other side will cleanup |
3818 | * for us. | ||
3819 | * | ||
3820 | * We can't race against event freeing since the other | ||
3821 | * side will require wqh->lock via remove_wait_queue(), | ||
3822 | * which we hold. | ||
3783 | */ | 3823 | */ |
3784 | schedule_work(&event->remove); | 3824 | spin_lock(&cgrp->event_list_lock); |
3825 | if (!list_empty(&event->list)) { | ||
3826 | list_del_init(&event->list); | ||
3827 | /* | ||
3828 | * We are in atomic context, but cgroup_event_remove() | ||
3829 | * may sleep, so we have to call it in workqueue. | ||
3830 | */ | ||
3831 | schedule_work(&event->remove); | ||
3832 | } | ||
3833 | spin_unlock(&cgrp->event_list_lock); | ||
3785 | } | 3834 | } |
3786 | 3835 | ||
3787 | return 0; | 3836 | return 0; |
@@ -3807,6 +3856,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3807 | const char *buffer) | 3856 | const char *buffer) |
3808 | { | 3857 | { |
3809 | struct cgroup_event *event = NULL; | 3858 | struct cgroup_event *event = NULL; |
3859 | struct cgroup *cgrp_cfile; | ||
3810 | unsigned int efd, cfd; | 3860 | unsigned int efd, cfd; |
3811 | struct file *efile = NULL; | 3861 | struct file *efile = NULL; |
3812 | struct file *cfile = NULL; | 3862 | struct file *cfile = NULL; |
@@ -3862,6 +3912,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft, | |||
3862 | goto fail; | 3912 | goto fail; |
3863 | } | 3913 | } |
3864 | 3914 | ||
3915 | /* | ||
3916 | * The file to be monitored must be in the same cgroup as | ||
3917 | * cgroup.event_control is. | ||
3918 | */ | ||
3919 | cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent); | ||
3920 | if (cgrp_cfile != cgrp) { | ||
3921 | ret = -EINVAL; | ||
3922 | goto fail; | ||
3923 | } | ||
3924 | |||
3865 | if (!event->cft->register_event || !event->cft->unregister_event) { | 3925 | if (!event->cft->register_event || !event->cft->unregister_event) { |
3866 | ret = -EINVAL; | 3926 | ret = -EINVAL; |
3867 | goto fail; | 3927 | goto fail; |
@@ -4135,6 +4195,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4135 | 4195 | ||
4136 | init_cgroup_housekeeping(cgrp); | 4196 | init_cgroup_housekeeping(cgrp); |
4137 | 4197 | ||
4198 | dentry->d_fsdata = cgrp; | ||
4199 | cgrp->dentry = dentry; | ||
4200 | |||
4138 | cgrp->parent = parent; | 4201 | cgrp->parent = parent; |
4139 | cgrp->root = parent->root; | 4202 | cgrp->root = parent->root; |
4140 | cgrp->top_cgroup = parent->top_cgroup; | 4203 | cgrp->top_cgroup = parent->top_cgroup; |
@@ -4172,8 +4235,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4172 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4235 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
4173 | 4236 | ||
4174 | /* allocation complete, commit to creation */ | 4237 | /* allocation complete, commit to creation */ |
4175 | dentry->d_fsdata = cgrp; | ||
4176 | cgrp->dentry = dentry; | ||
4177 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | 4238 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); |
4178 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4239 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
4179 | root->number_of_cgroups++; | 4240 | root->number_of_cgroups++; |
@@ -4340,20 +4401,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4340 | /* | 4401 | /* |
4341 | * Unregister events and notify userspace. | 4402 | * Unregister events and notify userspace. |
4342 | * Notify userspace about cgroup removing only after rmdir of cgroup | 4403 | * Notify userspace about cgroup removing only after rmdir of cgroup |
4343 | * directory to avoid race between userspace and kernelspace. Use | 4404 | * directory to avoid race between userspace and kernelspace. |
4344 | * a temporary list to avoid a deadlock with cgroup_event_wake(). Since | ||
4345 | * cgroup_event_wake() is called with the wait queue head locked, | ||
4346 | * remove_wait_queue() cannot be called while holding event_list_lock. | ||
4347 | */ | 4405 | */ |
4348 | spin_lock(&cgrp->event_list_lock); | 4406 | spin_lock(&cgrp->event_list_lock); |
4349 | list_splice_init(&cgrp->event_list, &tmp_list); | 4407 | list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { |
4350 | spin_unlock(&cgrp->event_list_lock); | ||
4351 | list_for_each_entry_safe(event, tmp, &tmp_list, list) { | ||
4352 | list_del_init(&event->list); | 4408 | list_del_init(&event->list); |
4353 | remove_wait_queue(event->wqh, &event->wait); | ||
4354 | eventfd_signal(event->eventfd, 1); | ||
4355 | schedule_work(&event->remove); | 4409 | schedule_work(&event->remove); |
4356 | } | 4410 | } |
4411 | spin_unlock(&cgrp->event_list_lock); | ||
4357 | 4412 | ||
4358 | return 0; | 4413 | return 0; |
4359 | } | 4414 | } |
@@ -4438,6 +4493,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4438 | { | 4493 | { |
4439 | struct cgroup_subsys_state *css; | 4494 | struct cgroup_subsys_state *css; |
4440 | int i, ret; | 4495 | int i, ret; |
4496 | struct hlist_node *node, *tmp; | ||
4497 | struct css_set *cg; | ||
4498 | unsigned long key; | ||
4441 | 4499 | ||
4442 | /* check name and function validity */ | 4500 | /* check name and function validity */ |
4443 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || | 4501 | if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || |
@@ -4503,23 +4561,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
4503 | * this is all done under the css_set_lock. | 4561 | * this is all done under the css_set_lock. |
4504 | */ | 4562 | */ |
4505 | write_lock(&css_set_lock); | 4563 | write_lock(&css_set_lock); |
4506 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { | 4564 | hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) { |
4507 | struct css_set *cg; | 4565 | /* skip entries that we already rehashed */ |
4508 | struct hlist_node *node, *tmp; | 4566 | if (cg->subsys[ss->subsys_id]) |
4509 | struct hlist_head *bucket = &css_set_table[i], *new_bucket; | 4567 | continue; |
4510 | 4568 | /* remove existing entry */ | |
4511 | hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) { | 4569 | hash_del(&cg->hlist); |
4512 | /* skip entries that we already rehashed */ | 4570 | /* set new value */ |
4513 | if (cg->subsys[ss->subsys_id]) | 4571 | cg->subsys[ss->subsys_id] = css; |
4514 | continue; | 4572 | /* recompute hash and restore entry */ |
4515 | /* remove existing entry */ | 4573 | key = css_set_hash(cg->subsys); |
4516 | hlist_del(&cg->hlist); | 4574 | hash_add(css_set_table, node, key); |
4517 | /* set new value */ | ||
4518 | cg->subsys[ss->subsys_id] = css; | ||
4519 | /* recompute hash and restore entry */ | ||
4520 | new_bucket = css_set_hash(cg->subsys); | ||
4521 | hlist_add_head(&cg->hlist, new_bucket); | ||
4522 | } | ||
4523 | } | 4575 | } |
4524 | write_unlock(&css_set_lock); | 4576 | write_unlock(&css_set_lock); |
4525 | 4577 | ||
@@ -4551,7 +4603,6 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
4551 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4603 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
4552 | { | 4604 | { |
4553 | struct cg_cgroup_link *link; | 4605 | struct cg_cgroup_link *link; |
4554 | struct hlist_head *hhead; | ||
4555 | 4606 | ||
4556 | BUG_ON(ss->module == NULL); | 4607 | BUG_ON(ss->module == NULL); |
4557 | 4608 | ||
@@ -4585,11 +4636,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
4585 | write_lock(&css_set_lock); | 4636 | write_lock(&css_set_lock); |
4586 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { | 4637 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { |
4587 | struct css_set *cg = link->cg; | 4638 | struct css_set *cg = link->cg; |
4639 | unsigned long key; | ||
4588 | 4640 | ||
4589 | hlist_del(&cg->hlist); | 4641 | hash_del(&cg->hlist); |
4590 | cg->subsys[ss->subsys_id] = NULL; | 4642 | cg->subsys[ss->subsys_id] = NULL; |
4591 | hhead = css_set_hash(cg->subsys); | 4643 | key = css_set_hash(cg->subsys); |
4592 | hlist_add_head(&cg->hlist, hhead); | 4644 | hash_add(css_set_table, &cg->hlist, key); |
4593 | } | 4645 | } |
4594 | write_unlock(&css_set_lock); | 4646 | write_unlock(&css_set_lock); |
4595 | 4647 | ||
@@ -4631,9 +4683,6 @@ int __init cgroup_init_early(void) | |||
4631 | list_add(&init_css_set_link.cg_link_list, | 4683 | list_add(&init_css_set_link.cg_link_list, |
4632 | &init_css_set.cg_links); | 4684 | &init_css_set.cg_links); |
4633 | 4685 | ||
4634 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
4635 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
4636 | |||
4637 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4686 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
4638 | struct cgroup_subsys *ss = subsys[i]; | 4687 | struct cgroup_subsys *ss = subsys[i]; |
4639 | 4688 | ||
@@ -4667,7 +4716,7 @@ int __init cgroup_init(void) | |||
4667 | { | 4716 | { |
4668 | int err; | 4717 | int err; |
4669 | int i; | 4718 | int i; |
4670 | struct hlist_head *hhead; | 4719 | unsigned long key; |
4671 | 4720 | ||
4672 | err = bdi_init(&cgroup_backing_dev_info); | 4721 | err = bdi_init(&cgroup_backing_dev_info); |
4673 | if (err) | 4722 | if (err) |
@@ -4686,8 +4735,8 @@ int __init cgroup_init(void) | |||
4686 | } | 4735 | } |
4687 | 4736 | ||
4688 | /* Add init_css_set to the hash table */ | 4737 | /* Add init_css_set to the hash table */ |
4689 | hhead = css_set_hash(init_css_set.subsys); | 4738 | key = css_set_hash(init_css_set.subsys); |
4690 | hlist_add_head(&init_css_set.hlist, hhead); | 4739 | hash_add(css_set_table, &init_css_set.hlist, key); |
4691 | BUG_ON(!init_root_id(&rootnode)); | 4740 | BUG_ON(!init_root_id(&rootnode)); |
4692 | 4741 | ||
4693 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); | 4742 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); |
@@ -4982,8 +5031,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
4982 | } | 5031 | } |
4983 | task_unlock(tsk); | 5032 | task_unlock(tsk); |
4984 | 5033 | ||
4985 | if (cg) | 5034 | put_css_set_taskexit(cg); |
4986 | put_css_set_taskexit(cg); | ||
4987 | } | 5035 | } |
4988 | 5036 | ||
4989 | /** | 5037 | /** |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7bb63eea6eb8..5bb9bf18438c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -2511,8 +2511,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) | |||
2511 | 2511 | ||
2512 | dentry = task_cs(tsk)->css.cgroup->dentry; | 2512 | dentry = task_cs(tsk)->css.cgroup->dentry; |
2513 | spin_lock(&cpuset_buffer_lock); | 2513 | spin_lock(&cpuset_buffer_lock); |
2514 | snprintf(cpuset_name, CPUSET_NAME_LEN, | 2514 | |
2515 | dentry ? (const char *)dentry->d_name.name : "/"); | 2515 | if (!dentry) { |
2516 | strcpy(cpuset_name, "/"); | ||
2517 | } else { | ||
2518 | spin_lock(&dentry->d_lock); | ||
2519 | strlcpy(cpuset_name, (const char *)dentry->d_name.name, | ||
2520 | CPUSET_NAME_LEN); | ||
2521 | spin_unlock(&dentry->d_lock); | ||
2522 | } | ||
2523 | |||
2516 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, | 2524 | nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, |
2517 | tsk->mems_allowed); | 2525 | tsk->mems_allowed); |
2518 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", | 2526 | printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", |
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..64de5f8b0c9e 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref) | |||
35 | ag->tg->rt_se = NULL; | 35 | ag->tg->rt_se = NULL; |
36 | ag->tg->rt_rq = NULL; | 36 | ag->tg->rt_rq = NULL; |
37 | #endif | 37 | #endif |
38 | sched_offline_group(ag->tg); | ||
38 | sched_destroy_group(ag->tg); | 39 | sched_destroy_group(ag->tg); |
39 | } | 40 | } |
40 | 41 | ||
@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void) | |||
76 | if (IS_ERR(tg)) | 77 | if (IS_ERR(tg)) |
77 | goto out_free; | 78 | goto out_free; |
78 | 79 | ||
80 | sched_online_group(tg, &root_task_group); | ||
81 | |||
79 | kref_init(&ag->kref); | 82 | kref_init(&ag->kref); |
80 | init_rwsem(&ag->lock); | 83 | init_rwsem(&ag->lock); |
81 | ag->id = atomic_inc_return(&autogroup_seq_nr); | 84 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 03d7784b7bd2..3a673a3b0c6b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -7161,7 +7161,6 @@ static void free_sched_group(struct task_group *tg) | |||
7161 | struct task_group *sched_create_group(struct task_group *parent) | 7161 | struct task_group *sched_create_group(struct task_group *parent) |
7162 | { | 7162 | { |
7163 | struct task_group *tg; | 7163 | struct task_group *tg; |
7164 | unsigned long flags; | ||
7165 | 7164 | ||
7166 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 7165 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
7167 | if (!tg) | 7166 | if (!tg) |
@@ -7173,6 +7172,17 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
7173 | if (!alloc_rt_sched_group(tg, parent)) | 7172 | if (!alloc_rt_sched_group(tg, parent)) |
7174 | goto err; | 7173 | goto err; |
7175 | 7174 | ||
7175 | return tg; | ||
7176 | |||
7177 | err: | ||
7178 | free_sched_group(tg); | ||
7179 | return ERR_PTR(-ENOMEM); | ||
7180 | } | ||
7181 | |||
7182 | void sched_online_group(struct task_group *tg, struct task_group *parent) | ||
7183 | { | ||
7184 | unsigned long flags; | ||
7185 | |||
7176 | spin_lock_irqsave(&task_group_lock, flags); | 7186 | spin_lock_irqsave(&task_group_lock, flags); |
7177 | list_add_rcu(&tg->list, &task_groups); | 7187 | list_add_rcu(&tg->list, &task_groups); |
7178 | 7188 | ||
@@ -7182,12 +7192,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
7182 | INIT_LIST_HEAD(&tg->children); | 7192 | INIT_LIST_HEAD(&tg->children); |
7183 | list_add_rcu(&tg->siblings, &parent->children); | 7193 | list_add_rcu(&tg->siblings, &parent->children); |
7184 | spin_unlock_irqrestore(&task_group_lock, flags); | 7194 | spin_unlock_irqrestore(&task_group_lock, flags); |
7185 | |||
7186 | return tg; | ||
7187 | |||
7188 | err: | ||
7189 | free_sched_group(tg); | ||
7190 | return ERR_PTR(-ENOMEM); | ||
7191 | } | 7195 | } |
7192 | 7196 | ||
7193 | /* rcu callback to free various structures associated with a task group */ | 7197 | /* rcu callback to free various structures associated with a task group */ |
@@ -7200,6 +7204,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp) | |||
7200 | /* Destroy runqueue etc associated with a task group */ | 7204 | /* Destroy runqueue etc associated with a task group */ |
7201 | void sched_destroy_group(struct task_group *tg) | 7205 | void sched_destroy_group(struct task_group *tg) |
7202 | { | 7206 | { |
7207 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
7208 | call_rcu(&tg->rcu, free_sched_group_rcu); | ||
7209 | } | ||
7210 | |||
7211 | void sched_offline_group(struct task_group *tg) | ||
7212 | { | ||
7203 | unsigned long flags; | 7213 | unsigned long flags; |
7204 | int i; | 7214 | int i; |
7205 | 7215 | ||
@@ -7211,9 +7221,6 @@ void sched_destroy_group(struct task_group *tg) | |||
7211 | list_del_rcu(&tg->list); | 7221 | list_del_rcu(&tg->list); |
7212 | list_del_rcu(&tg->siblings); | 7222 | list_del_rcu(&tg->siblings); |
7213 | spin_unlock_irqrestore(&task_group_lock, flags); | 7223 | spin_unlock_irqrestore(&task_group_lock, flags); |
7214 | |||
7215 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
7216 | call_rcu(&tg->rcu, free_sched_group_rcu); | ||
7217 | } | 7224 | } |
7218 | 7225 | ||
7219 | /* change task's runqueue when it moves between groups. | 7226 | /* change task's runqueue when it moves between groups. |
@@ -7584,6 +7591,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) | |||
7584 | return &tg->css; | 7591 | return &tg->css; |
7585 | } | 7592 | } |
7586 | 7593 | ||
7594 | static int cpu_cgroup_css_online(struct cgroup *cgrp) | ||
7595 | { | ||
7596 | struct task_group *tg = cgroup_tg(cgrp); | ||
7597 | struct task_group *parent; | ||
7598 | |||
7599 | if (!cgrp->parent) | ||
7600 | return 0; | ||
7601 | |||
7602 | parent = cgroup_tg(cgrp->parent); | ||
7603 | sched_online_group(tg, parent); | ||
7604 | return 0; | ||
7605 | } | ||
7606 | |||
7587 | static void cpu_cgroup_css_free(struct cgroup *cgrp) | 7607 | static void cpu_cgroup_css_free(struct cgroup *cgrp) |
7588 | { | 7608 | { |
7589 | struct task_group *tg = cgroup_tg(cgrp); | 7609 | struct task_group *tg = cgroup_tg(cgrp); |
@@ -7591,6 +7611,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp) | |||
7591 | sched_destroy_group(tg); | 7611 | sched_destroy_group(tg); |
7592 | } | 7612 | } |
7593 | 7613 | ||
7614 | static void cpu_cgroup_css_offline(struct cgroup *cgrp) | ||
7615 | { | ||
7616 | struct task_group *tg = cgroup_tg(cgrp); | ||
7617 | |||
7618 | sched_offline_group(tg); | ||
7619 | } | ||
7620 | |||
7594 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, | 7621 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, |
7595 | struct cgroup_taskset *tset) | 7622 | struct cgroup_taskset *tset) |
7596 | { | 7623 | { |
@@ -7946,6 +7973,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7946 | .name = "cpu", | 7973 | .name = "cpu", |
7947 | .css_alloc = cpu_cgroup_css_alloc, | 7974 | .css_alloc = cpu_cgroup_css_alloc, |
7948 | .css_free = cpu_cgroup_css_free, | 7975 | .css_free = cpu_cgroup_css_free, |
7976 | .css_online = cpu_cgroup_css_online, | ||
7977 | .css_offline = cpu_cgroup_css_offline, | ||
7949 | .can_attach = cpu_cgroup_can_attach, | 7978 | .can_attach = cpu_cgroup_can_attach, |
7950 | .attach = cpu_cgroup_attach, | 7979 | .attach = cpu_cgroup_attach, |
7951 | .exit = cpu_cgroup_exit, | 7980 | .exit = cpu_cgroup_exit, |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 7ae4c4c5420e..557e7b53b323 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -110,13 +110,6 @@ static char *task_group_path(struct task_group *tg) | |||
110 | if (autogroup_path(tg, group_path, PATH_MAX)) | 110 | if (autogroup_path(tg, group_path, PATH_MAX)) |
111 | return group_path; | 111 | return group_path; |
112 | 112 | ||
113 | /* | ||
114 | * May be NULL if the underlying cgroup isn't fully-created yet | ||
115 | */ | ||
116 | if (!tg->css.cgroup) { | ||
117 | group_path[0] = '\0'; | ||
118 | return group_path; | ||
119 | } | ||
120 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); | 113 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); |
121 | return group_path; | 114 | return group_path; |
122 | } | 115 | } |
diff --git a/tools/Makefile b/tools/Makefile index 798fa0ef048e..fa36565b209d 100644 --- a/tools/Makefile +++ b/tools/Makefile | |||
@@ -3,6 +3,7 @@ include scripts/Makefile.include | |||
3 | help: | 3 | help: |
4 | @echo 'Possible targets:' | 4 | @echo 'Possible targets:' |
5 | @echo '' | 5 | @echo '' |
6 | @echo ' cgroup - cgroup tools' | ||
6 | @echo ' cpupower - a tool for all things x86 CPU power' | 7 | @echo ' cpupower - a tool for all things x86 CPU power' |
7 | @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' | 8 | @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' |
8 | @echo ' lguest - a minimal 32-bit x86 hypervisor' | 9 | @echo ' lguest - a minimal 32-bit x86 hypervisor' |
@@ -33,7 +34,7 @@ help: | |||
33 | cpupower: FORCE | 34 | cpupower: FORCE |
34 | $(call descend,power/$@) | 35 | $(call descend,power/$@) |
35 | 36 | ||
36 | firewire lguest perf usb virtio vm: FORCE | 37 | cgroup firewire lguest perf usb virtio vm: FORCE |
37 | $(call descend,$@) | 38 | $(call descend,$@) |
38 | 39 | ||
39 | selftests: FORCE | 40 | selftests: FORCE |
@@ -45,7 +46,7 @@ turbostat x86_energy_perf_policy: FORCE | |||
45 | cpupower_install: | 46 | cpupower_install: |
46 | $(call descend,power/$(@:_install=),install) | 47 | $(call descend,power/$(@:_install=),install) |
47 | 48 | ||
48 | firewire_install lguest_install perf_install usb_install virtio_install vm_install: | 49 | cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install: |
49 | $(call descend,$(@:_install=),install) | 50 | $(call descend,$(@:_install=),install) |
50 | 51 | ||
51 | selftests_install: | 52 | selftests_install: |
@@ -54,14 +55,14 @@ selftests_install: | |||
54 | turbostat_install x86_energy_perf_policy_install: | 55 | turbostat_install x86_energy_perf_policy_install: |
55 | $(call descend,power/x86/$(@:_install=),install) | 56 | $(call descend,power/x86/$(@:_install=),install) |
56 | 57 | ||
57 | install: cpupower_install firewire_install lguest_install perf_install \ | 58 | install: cgroup_install cpupower_install firewire_install lguest_install \ |
58 | selftests_install turbostat_install usb_install virtio_install \ | 59 | perf_install selftests_install turbostat_install usb_install \ |
59 | vm_install x86_energy_perf_policy_install | 60 | virtio_install vm_install x86_energy_perf_policy_install |
60 | 61 | ||
61 | cpupower_clean: | 62 | cpupower_clean: |
62 | $(call descend,power/cpupower,clean) | 63 | $(call descend,power/cpupower,clean) |
63 | 64 | ||
64 | firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: | 65 | cgroup_clean firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: |
65 | $(call descend,$(@:_clean=),clean) | 66 | $(call descend,$(@:_clean=),clean) |
66 | 67 | ||
67 | selftests_clean: | 68 | selftests_clean: |
@@ -70,8 +71,8 @@ selftests_clean: | |||
70 | turbostat_clean x86_energy_perf_policy_clean: | 71 | turbostat_clean x86_energy_perf_policy_clean: |
71 | $(call descend,power/x86/$(@:_clean=),clean) | 72 | $(call descend,power/x86/$(@:_clean=),clean) |
72 | 73 | ||
73 | clean: cpupower_clean firewire_clean lguest_clean perf_clean selftests_clean \ | 74 | clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \ |
74 | turbostat_clean usb_clean virtio_clean vm_clean \ | 75 | selftests_clean turbostat_clean usb_clean virtio_clean \ |
75 | x86_energy_perf_policy_clean | 76 | vm_clean x86_energy_perf_policy_clean |
76 | 77 | ||
77 | .PHONY: FORCE | 78 | .PHONY: FORCE |
diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore new file mode 100644 index 000000000000..633cd9b874f9 --- /dev/null +++ b/tools/cgroup/.gitignore | |||
@@ -0,0 +1 @@ | |||
cgroup_event_listener | |||
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile new file mode 100644 index 000000000000..b4286196b763 --- /dev/null +++ b/tools/cgroup/Makefile | |||
@@ -0,0 +1,11 @@ | |||
1 | # Makefile for cgroup tools | ||
2 | |||
3 | CC = $(CROSS_COMPILE)gcc | ||
4 | CFLAGS = -Wall -Wextra | ||
5 | |||
6 | all: cgroup_event_listener | ||
7 | %: %.c | ||
8 | $(CC) $(CFLAGS) -o $@ $^ | ||
9 | |||
10 | clean: | ||
11 | $(RM) cgroup_event_listener | ||
diff --git a/Documentation/cgroups/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c index 3e082f96dc12..4eb5507205c9 100644 --- a/Documentation/cgroups/cgroup_event_listener.c +++ b/tools/cgroup/cgroup_event_listener.c | |||
@@ -5,6 +5,7 @@ | |||
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <assert.h> | 7 | #include <assert.h> |
8 | #include <err.h> | ||
8 | #include <errno.h> | 9 | #include <errno.h> |
9 | #include <fcntl.h> | 10 | #include <fcntl.h> |
10 | #include <libgen.h> | 11 | #include <libgen.h> |
@@ -15,7 +16,7 @@ | |||
15 | 16 | ||
16 | #include <sys/eventfd.h> | 17 | #include <sys/eventfd.h> |
17 | 18 | ||
18 | #define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>\n" | 19 | #define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>" |
19 | 20 | ||
20 | int main(int argc, char **argv) | 21 | int main(int argc, char **argv) |
21 | { | 22 | { |
@@ -26,49 +27,33 @@ int main(int argc, char **argv) | |||
26 | char line[LINE_MAX]; | 27 | char line[LINE_MAX]; |
27 | int ret; | 28 | int ret; |
28 | 29 | ||
29 | if (argc != 3) { | 30 | if (argc != 3) |
30 | fputs(USAGE_STR, stderr); | 31 | errx(1, "%s", USAGE_STR); |
31 | return 1; | ||
32 | } | ||
33 | 32 | ||
34 | cfd = open(argv[1], O_RDONLY); | 33 | cfd = open(argv[1], O_RDONLY); |
35 | if (cfd == -1) { | 34 | if (cfd == -1) |
36 | fprintf(stderr, "Cannot open %s: %s\n", argv[1], | 35 | err(1, "Cannot open %s", argv[1]); |
37 | strerror(errno)); | ||
38 | goto out; | ||
39 | } | ||
40 | 36 | ||
41 | ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", | 37 | ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", |
42 | dirname(argv[1])); | 38 | dirname(argv[1])); |
43 | if (ret >= PATH_MAX) { | 39 | if (ret >= PATH_MAX) |
44 | fputs("Path to cgroup.event_control is too long\n", stderr); | 40 | errx(1, "Path to cgroup.event_control is too long"); |
45 | goto out; | ||
46 | } | ||
47 | 41 | ||
48 | event_control = open(event_control_path, O_WRONLY); | 42 | event_control = open(event_control_path, O_WRONLY); |
49 | if (event_control == -1) { | 43 | if (event_control == -1) |
50 | fprintf(stderr, "Cannot open %s: %s\n", event_control_path, | 44 | err(1, "Cannot open %s", event_control_path); |
51 | strerror(errno)); | ||
52 | goto out; | ||
53 | } | ||
54 | 45 | ||
55 | efd = eventfd(0, 0); | 46 | efd = eventfd(0, 0); |
56 | if (efd == -1) { | 47 | if (efd == -1) |
57 | perror("eventfd() failed"); | 48 | err(1, "eventfd() failed"); |
58 | goto out; | ||
59 | } | ||
60 | 49 | ||
61 | ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); | 50 | ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); |
62 | if (ret >= LINE_MAX) { | 51 | if (ret >= LINE_MAX) |
63 | fputs("Arguments string is too long\n", stderr); | 52 | errx(1, "Arguments string is too long"); |
64 | goto out; | ||
65 | } | ||
66 | 53 | ||
67 | ret = write(event_control, line, strlen(line) + 1); | 54 | ret = write(event_control, line, strlen(line) + 1); |
68 | if (ret == -1) { | 55 | if (ret == -1) |
69 | perror("Cannot write to cgroup.event_control"); | 56 | err(1, "Cannot write to cgroup.event_control"); |
70 | goto out; | ||
71 | } | ||
72 | 57 | ||
73 | while (1) { | 58 | while (1) { |
74 | uint64_t result; | 59 | uint64_t result; |
@@ -77,34 +62,21 @@ int main(int argc, char **argv) | |||
77 | if (ret == -1) { | 62 | if (ret == -1) { |
78 | if (errno == EINTR) | 63 | if (errno == EINTR) |
79 | continue; | 64 | continue; |
80 | perror("Cannot read from eventfd"); | 65 | err(1, "Cannot read from eventfd"); |
81 | break; | ||
82 | } | 66 | } |
83 | assert(ret == sizeof(result)); | 67 | assert(ret == sizeof(result)); |
84 | 68 | ||
85 | ret = access(event_control_path, W_OK); | 69 | ret = access(event_control_path, W_OK); |
86 | if ((ret == -1) && (errno == ENOENT)) { | 70 | if ((ret == -1) && (errno == ENOENT)) { |
87 | puts("The cgroup seems to have removed."); | 71 | puts("The cgroup seems to have removed."); |
88 | ret = 0; | ||
89 | break; | ||
90 | } | ||
91 | |||
92 | if (ret == -1) { | ||
93 | perror("cgroup.event_control " | ||
94 | "is not accessible any more"); | ||
95 | break; | 72 | break; |
96 | } | 73 | } |
97 | 74 | ||
75 | if (ret == -1) | ||
76 | err(1, "cgroup.event_control is not accessible any more"); | ||
77 | |||
98 | printf("%s %s: crossed\n", argv[1], argv[2]); | 78 | printf("%s %s: crossed\n", argv[1], argv[2]); |
99 | } | 79 | } |
100 | 80 | ||
101 | out: | 81 | return 0; |
102 | if (efd >= 0) | ||
103 | close(efd); | ||
104 | if (event_control >= 0) | ||
105 | close(event_control); | ||
106 | if (cfd >= 0) | ||
107 | close(cfd); | ||
108 | |||
109 | return (ret != 0); | ||
110 | } | 82 | } |