aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 12:16:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-20 12:16:21 -0500
commit502b24c23b44fbaa01cc2cbd86d8035845b7811f (patch)
tree3096deeb99f6acc2d72ee33f145008ec5e2c68b3
parentece8e0b2f9c980e5511fe8db2d68c6f1859b9d83 (diff)
parentf169007b2773f285e098cb84c74aac0154d65ff7 (diff)
Merge branch 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup changes from Tejun Heo: "Nothing too drastic. - Removal of synchronize_rcu() from userland visible paths. - Various fixes and cleanups from Li. - cgroup_rightmost_descendant() added which will be used by cpuset changes (it will be a separate pull request)." * 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: fail if monitored file and event_control are in different cgroup cgroup: fix cgroup_rmdir() vs close(eventfd) race cpuset: fix cpuset_print_task_mems_allowed() vs rename() race cgroup: fix exit() vs rmdir() race cgroup: remove bogus comments in cgroup_diput() cgroup: remove synchronize_rcu() from cgroup_diput() cgroup: remove duplicate RCU free on struct cgroup sched: remove redundant NULL cgroup check in task_group_path() sched: split out css_online/css_offline from tg creation/destruction cgroup: initialize cgrp->dentry before css_alloc() cgroup: remove a NULL check in cgroup_exit() cgroup: fix bogus kernel warnings when cgroup_create() failed cgroup: remove synchronize_rcu() from rebind_subsystems() cgroup: remove synchronize_rcu() from cgroup_attach_{task|proc}() cgroup: use new hashtable implementation cgroups: fix cgroup_event_listener error handling cgroups: move cgroup_event_listener.c to tools/cgroup cgroup: implement cgroup_rightmost_descendant() cgroup: remove unused dummy cgroup_fork_callbacks()
-rw-r--r--Documentation/cgroups/00-INDEX2
-rw-r--r--Documentation/cgroups/memcg_test.txt3
-rw-r--r--include/linux/cgroup.h3
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/cgroup.c288
-rw-r--r--kernel/cpuset.c12
-rw-r--r--kernel/sched/auto_group.c3
-rw-r--r--kernel/sched/core.c49
-rw-r--r--kernel/sched/debug.c7
-rw-r--r--tools/Makefile19
-rw-r--r--tools/cgroup/.gitignore1
-rw-r--r--tools/cgroup/Makefile11
-rw-r--r--tools/cgroup/cgroup_event_listener.c (renamed from Documentation/cgroups/cgroup_event_listener.c)72
13 files changed, 270 insertions, 203 deletions
diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX
index f78b90a35ad0..f5635a09c3f6 100644
--- a/Documentation/cgroups/00-INDEX
+++ b/Documentation/cgroups/00-INDEX
@@ -4,8 +4,6 @@ blkio-controller.txt
4 - Description for Block IO Controller, implementation and usage details. 4 - Description for Block IO Controller, implementation and usage details.
5cgroups.txt 5cgroups.txt
6 - Control Groups definition, implementation details, examples and API. 6 - Control Groups definition, implementation details, examples and API.
7cgroup_event_listener.c
8 - A user program for cgroup listener.
9cpuacct.txt 7cpuacct.txt
10 - CPU Accounting Controller; account CPU usage for groups of tasks. 8 - CPU Accounting Controller; account CPU usage for groups of tasks.
11cpusets.txt 9cpusets.txt
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
index fc8fa97a09ac..ce94a83a7d9a 100644
--- a/Documentation/cgroups/memcg_test.txt
+++ b/Documentation/cgroups/memcg_test.txt
@@ -399,8 +399,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
399 399
400 9.10 Memory thresholds 400 9.10 Memory thresholds
401 Memory controller implements memory thresholds using cgroups notification 401 Memory controller implements memory thresholds using cgroups notification
402 API. You can use Documentation/cgroups/cgroup_event_listener.c to test 402 API. You can use tools/cgroup/cgroup_event_listener.c to test it.
403 it.
404 403
405 (Shell-A) Create cgroup and run event listener 404 (Shell-A) Create cgroup and run event listener
406 # mkdir /cgroup/A 405 # mkdir /cgroup/A
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7d73905dcba2..900af5964f55 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -203,6 +203,7 @@ struct cgroup {
203 203
204 /* For RCU-protected deletion */ 204 /* For RCU-protected deletion */
205 struct rcu_head rcu_head; 205 struct rcu_head rcu_head;
206 struct work_struct free_work;
206 207
207 /* List of events which userspace want to receive */ 208 /* List of events which userspace want to receive */
208 struct list_head event_list; 209 struct list_head event_list;
@@ -558,6 +559,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
558 559
559struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, 560struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
560 struct cgroup *cgroup); 561 struct cgroup *cgroup);
562struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
561 563
562/** 564/**
563 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants 565 * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
@@ -706,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
706static inline int cgroup_init_early(void) { return 0; } 708static inline int cgroup_init_early(void) { return 0; }
707static inline int cgroup_init(void) { return 0; } 709static inline int cgroup_init(void) { return 0; }
708static inline void cgroup_fork(struct task_struct *p) {} 710static inline void cgroup_fork(struct task_struct *p) {}
709static inline void cgroup_fork_callbacks(struct task_struct *p) {}
710static inline void cgroup_post_fork(struct task_struct *p) {} 711static inline void cgroup_post_fork(struct task_struct *p) {}
711static inline void cgroup_exit(struct task_struct *p, int callbacks) {} 712static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
712 713
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33cc42130371..e4112aad2964 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2659,7 +2659,10 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2659extern struct task_group root_task_group; 2659extern struct task_group root_task_group;
2660 2660
2661extern struct task_group *sched_create_group(struct task_group *parent); 2661extern struct task_group *sched_create_group(struct task_group *parent);
2662extern void sched_online_group(struct task_group *tg,
2663 struct task_group *parent);
2662extern void sched_destroy_group(struct task_group *tg); 2664extern void sched_destroy_group(struct task_group *tg);
2665extern void sched_offline_group(struct task_group *tg);
2663extern void sched_move_task(struct task_struct *tsk); 2666extern void sched_move_task(struct task_struct *tsk);
2664#ifdef CONFIG_FAIR_GROUP_SCHED 2667#ifdef CONFIG_FAIR_GROUP_SCHED
2665extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); 2668extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4855892798fd..b5c64327e712 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -52,7 +52,7 @@
52#include <linux/module.h> 52#include <linux/module.h>
53#include <linux/delayacct.h> 53#include <linux/delayacct.h>
54#include <linux/cgroupstats.h> 54#include <linux/cgroupstats.h>
55#include <linux/hash.h> 55#include <linux/hashtable.h>
56#include <linux/namei.h> 56#include <linux/namei.h>
57#include <linux/pid_namespace.h> 57#include <linux/pid_namespace.h>
58#include <linux/idr.h> 58#include <linux/idr.h>
@@ -376,22 +376,18 @@ static int css_set_count;
376 * account cgroups in empty hierarchies. 376 * account cgroups in empty hierarchies.
377 */ 377 */
378#define CSS_SET_HASH_BITS 7 378#define CSS_SET_HASH_BITS 7
379#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) 379static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
380static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
381 380
382static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) 381static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
383{ 382{
384 int i; 383 int i;
385 int index; 384 unsigned long key = 0UL;
386 unsigned long tmp = 0UL;
387 385
388 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) 386 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
389 tmp += (unsigned long)css[i]; 387 key += (unsigned long)css[i];
390 tmp = (tmp >> 16) ^ tmp; 388 key = (key >> 16) ^ key;
391 389
392 index = hash_long(tmp, CSS_SET_HASH_BITS); 390 return key;
393
394 return &css_set_table[index];
395} 391}
396 392
397/* We don't maintain the lists running through each css_set to its 393/* We don't maintain the lists running through each css_set to its
@@ -418,7 +414,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
418 } 414 }
419 415
420 /* This css_set is dead. unlink it and release cgroup refcounts */ 416 /* This css_set is dead. unlink it and release cgroup refcounts */
421 hlist_del(&cg->hlist); 417 hash_del(&cg->hlist);
422 css_set_count--; 418 css_set_count--;
423 419
424 list_for_each_entry_safe(link, saved_link, &cg->cg_links, 420 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
@@ -426,12 +422,20 @@ static void __put_css_set(struct css_set *cg, int taskexit)
426 struct cgroup *cgrp = link->cgrp; 422 struct cgroup *cgrp = link->cgrp;
427 list_del(&link->cg_link_list); 423 list_del(&link->cg_link_list);
428 list_del(&link->cgrp_link_list); 424 list_del(&link->cgrp_link_list);
425
426 /*
427 * We may not be holding cgroup_mutex, and if cgrp->count is
428 * dropped to 0 the cgroup can be destroyed at any time, hence
429 * rcu_read_lock is used to keep it alive.
430 */
431 rcu_read_lock();
429 if (atomic_dec_and_test(&cgrp->count) && 432 if (atomic_dec_and_test(&cgrp->count) &&
430 notify_on_release(cgrp)) { 433 notify_on_release(cgrp)) {
431 if (taskexit) 434 if (taskexit)
432 set_bit(CGRP_RELEASABLE, &cgrp->flags); 435 set_bit(CGRP_RELEASABLE, &cgrp->flags);
433 check_for_release(cgrp); 436 check_for_release(cgrp);
434 } 437 }
438 rcu_read_unlock();
435 439
436 kfree(link); 440 kfree(link);
437 } 441 }
@@ -550,9 +554,9 @@ static struct css_set *find_existing_css_set(
550{ 554{
551 int i; 555 int i;
552 struct cgroupfs_root *root = cgrp->root; 556 struct cgroupfs_root *root = cgrp->root;
553 struct hlist_head *hhead;
554 struct hlist_node *node; 557 struct hlist_node *node;
555 struct css_set *cg; 558 struct css_set *cg;
559 unsigned long key;
556 560
557 /* 561 /*
558 * Build the set of subsystem state objects that we want to see in the 562 * Build the set of subsystem state objects that we want to see in the
@@ -572,8 +576,8 @@ static struct css_set *find_existing_css_set(
572 } 576 }
573 } 577 }
574 578
575 hhead = css_set_hash(template); 579 key = css_set_hash(template);
576 hlist_for_each_entry(cg, node, hhead, hlist) { 580 hash_for_each_possible(css_set_table, cg, node, hlist, key) {
577 if (!compare_css_sets(cg, oldcg, cgrp, template)) 581 if (!compare_css_sets(cg, oldcg, cgrp, template))
578 continue; 582 continue;
579 583
@@ -657,8 +661,8 @@ static struct css_set *find_css_set(
657 661
658 struct list_head tmp_cg_links; 662 struct list_head tmp_cg_links;
659 663
660 struct hlist_head *hhead;
661 struct cg_cgroup_link *link; 664 struct cg_cgroup_link *link;
665 unsigned long key;
662 666
663 /* First see if we already have a cgroup group that matches 667 /* First see if we already have a cgroup group that matches
664 * the desired set */ 668 * the desired set */
@@ -704,8 +708,8 @@ static struct css_set *find_css_set(
704 css_set_count++; 708 css_set_count++;
705 709
706 /* Add this cgroup group to the hash table */ 710 /* Add this cgroup group to the hash table */
707 hhead = css_set_hash(res->subsys); 711 key = css_set_hash(res->subsys);
708 hlist_add_head(&res->hlist, hhead); 712 hash_add(css_set_table, &res->hlist, key);
709 713
710 write_unlock(&css_set_lock); 714 write_unlock(&css_set_lock);
711 715
@@ -856,47 +860,54 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb)
856 return inode; 860 return inode;
857} 861}
858 862
859static void cgroup_diput(struct dentry *dentry, struct inode *inode) 863static void cgroup_free_fn(struct work_struct *work)
860{ 864{
861 /* is dentry a directory ? if so, kfree() associated cgroup */ 865 struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
862 if (S_ISDIR(inode->i_mode)) { 866 struct cgroup_subsys *ss;
863 struct cgroup *cgrp = dentry->d_fsdata;
864 struct cgroup_subsys *ss;
865 BUG_ON(!(cgroup_is_removed(cgrp)));
866 /* It's possible for external users to be holding css
867 * reference counts on a cgroup; css_put() needs to
868 * be able to access the cgroup after decrementing
869 * the reference count in order to know if it needs to
870 * queue the cgroup to be handled by the release
871 * agent */
872 synchronize_rcu();
873 867
874 mutex_lock(&cgroup_mutex); 868 mutex_lock(&cgroup_mutex);
875 /* 869 /*
876 * Release the subsystem state objects. 870 * Release the subsystem state objects.
877 */ 871 */
878 for_each_subsys(cgrp->root, ss) 872 for_each_subsys(cgrp->root, ss)
879 ss->css_free(cgrp); 873 ss->css_free(cgrp);
880 874
881 cgrp->root->number_of_cgroups--; 875 cgrp->root->number_of_cgroups--;
882 mutex_unlock(&cgroup_mutex); 876 mutex_unlock(&cgroup_mutex);
883 877
884 /* 878 /*
885 * Drop the active superblock reference that we took when we 879 * Drop the active superblock reference that we took when we
886 * created the cgroup 880 * created the cgroup
887 */ 881 */
888 deactivate_super(cgrp->root->sb); 882 deactivate_super(cgrp->root->sb);
889 883
890 /* 884 /*
891 * if we're getting rid of the cgroup, refcount should ensure 885 * if we're getting rid of the cgroup, refcount should ensure
892 * that there are no pidlists left. 886 * that there are no pidlists left.
893 */ 887 */
894 BUG_ON(!list_empty(&cgrp->pidlists)); 888 BUG_ON(!list_empty(&cgrp->pidlists));
895 889
896 simple_xattrs_free(&cgrp->xattrs); 890 simple_xattrs_free(&cgrp->xattrs);
897 891
898 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); 892 ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
899 kfree_rcu(cgrp, rcu_head); 893 kfree(cgrp);
894}
895
896static void cgroup_free_rcu(struct rcu_head *head)
897{
898 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
899
900 schedule_work(&cgrp->free_work);
901}
902
903static void cgroup_diput(struct dentry *dentry, struct inode *inode)
904{
905 /* is dentry a directory ? if so, kfree() associated cgroup */
906 if (S_ISDIR(inode->i_mode)) {
907 struct cgroup *cgrp = dentry->d_fsdata;
908
909 BUG_ON(!(cgroup_is_removed(cgrp)));
910 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
900 } else { 911 } else {
901 struct cfent *cfe = __d_cfe(dentry); 912 struct cfent *cfe = __d_cfe(dentry);
902 struct cgroup *cgrp = dentry->d_parent->d_fsdata; 913 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
@@ -925,13 +936,17 @@ static void remove_dir(struct dentry *d)
925 dput(parent); 936 dput(parent);
926} 937}
927 938
928static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) 939static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
929{ 940{
930 struct cfent *cfe; 941 struct cfent *cfe;
931 942
932 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex); 943 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
933 lockdep_assert_held(&cgroup_mutex); 944 lockdep_assert_held(&cgroup_mutex);
934 945
946 /*
947 * If we're doing cleanup due to failure of cgroup_create(),
948 * the corresponding @cfe may not exist.
949 */
935 list_for_each_entry(cfe, &cgrp->files, node) { 950 list_for_each_entry(cfe, &cgrp->files, node) {
936 struct dentry *d = cfe->dentry; 951 struct dentry *d = cfe->dentry;
937 952
@@ -944,9 +959,8 @@ static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
944 list_del_init(&cfe->node); 959 list_del_init(&cfe->node);
945 dput(d); 960 dput(d);
946 961
947 return 0; 962 break;
948 } 963 }
949 return -ENOENT;
950} 964}
951 965
952/** 966/**
@@ -1083,7 +1097,6 @@ static int rebind_subsystems(struct cgroupfs_root *root,
1083 } 1097 }
1084 } 1098 }
1085 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; 1099 root->subsys_mask = root->actual_subsys_mask = final_subsys_mask;
1086 synchronize_rcu();
1087 1100
1088 return 0; 1101 return 0;
1089} 1102}
@@ -1393,6 +1406,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1393 INIT_LIST_HEAD(&cgrp->allcg_node); 1406 INIT_LIST_HEAD(&cgrp->allcg_node);
1394 INIT_LIST_HEAD(&cgrp->release_list); 1407 INIT_LIST_HEAD(&cgrp->release_list);
1395 INIT_LIST_HEAD(&cgrp->pidlists); 1408 INIT_LIST_HEAD(&cgrp->pidlists);
1409 INIT_WORK(&cgrp->free_work, cgroup_free_fn);
1396 mutex_init(&cgrp->pidlist_mutex); 1410 mutex_init(&cgrp->pidlist_mutex);
1397 INIT_LIST_HEAD(&cgrp->event_list); 1411 INIT_LIST_HEAD(&cgrp->event_list);
1398 spin_lock_init(&cgrp->event_list_lock); 1412 spin_lock_init(&cgrp->event_list_lock);
@@ -1597,6 +1611,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1597 struct cgroupfs_root *existing_root; 1611 struct cgroupfs_root *existing_root;
1598 const struct cred *cred; 1612 const struct cred *cred;
1599 int i; 1613 int i;
1614 struct hlist_node *node;
1615 struct css_set *cg;
1600 1616
1601 BUG_ON(sb->s_root != NULL); 1617 BUG_ON(sb->s_root != NULL);
1602 1618
@@ -1650,14 +1666,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1650 /* Link the top cgroup in this hierarchy into all 1666 /* Link the top cgroup in this hierarchy into all
1651 * the css_set objects */ 1667 * the css_set objects */
1652 write_lock(&css_set_lock); 1668 write_lock(&css_set_lock);
1653 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { 1669 hash_for_each(css_set_table, i, node, cg, hlist)
1654 struct hlist_head *hhead = &css_set_table[i]; 1670 link_css_set(&tmp_cg_links, cg, root_cgrp);
1655 struct hlist_node *node;
1656 struct css_set *cg;
1657
1658 hlist_for_each_entry(cg, node, hhead, hlist)
1659 link_css_set(&tmp_cg_links, cg, root_cgrp);
1660 }
1661 write_unlock(&css_set_lock); 1671 write_unlock(&css_set_lock);
1662 1672
1663 free_cg_links(&tmp_cg_links); 1673 free_cg_links(&tmp_cg_links);
@@ -1773,7 +1783,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1773 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(), 1783 rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
1774 "cgroup_path() called without proper locking"); 1784 "cgroup_path() called without proper locking");
1775 1785
1776 if (!dentry || cgrp == dummytop) { 1786 if (cgrp == dummytop) {
1777 /* 1787 /*
1778 * Inactive subsystems have no dentry for their root 1788 * Inactive subsystems have no dentry for their root
1779 * cgroup 1789 * cgroup
@@ -1982,7 +1992,6 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1982 ss->attach(cgrp, &tset); 1992 ss->attach(cgrp, &tset);
1983 } 1993 }
1984 1994
1985 synchronize_rcu();
1986out: 1995out:
1987 if (retval) { 1996 if (retval) {
1988 for_each_subsys(root, ss) { 1997 for_each_subsys(root, ss) {
@@ -2151,7 +2160,6 @@ static int cgroup_attach_proc(struct cgroup *cgrp, struct task_struct *leader)
2151 /* 2160 /*
2152 * step 5: success! and cleanup 2161 * step 5: success! and cleanup
2153 */ 2162 */
2154 synchronize_rcu();
2155 retval = 0; 2163 retval = 0;
2156out_put_css_set_refs: 2164out_put_css_set_refs:
2157 if (retval) { 2165 if (retval) {
@@ -2769,14 +2777,14 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2769 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent) 2777 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2770 continue; 2778 continue;
2771 2779
2772 if (is_add) 2780 if (is_add) {
2773 err = cgroup_add_file(cgrp, subsys, cft); 2781 err = cgroup_add_file(cgrp, subsys, cft);
2774 else 2782 if (err)
2775 err = cgroup_rm_file(cgrp, cft); 2783 pr_warn("cgroup_addrm_files: failed to add %s, err=%d\n",
2776 if (err) { 2784 cft->name, err);
2777 pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n",
2778 is_add ? "add" : "remove", cft->name, err);
2779 ret = err; 2785 ret = err;
2786 } else {
2787 cgroup_rm_file(cgrp, cft);
2780 } 2788 }
2781 } 2789 }
2782 return ret; 2790 return ret;
@@ -3017,6 +3025,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
3017} 3025}
3018EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); 3026EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
3019 3027
3028/**
3029 * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup
3030 * @pos: cgroup of interest
3031 *
3032 * Return the rightmost descendant of @pos. If there's no descendant,
3033 * @pos is returned. This can be used during pre-order traversal to skip
3034 * subtree of @pos.
3035 */
3036struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
3037{
3038 struct cgroup *last, *tmp;
3039
3040 WARN_ON_ONCE(!rcu_read_lock_held());
3041
3042 do {
3043 last = pos;
3044 /* ->prev isn't RCU safe, walk ->next till the end */
3045 pos = NULL;
3046 list_for_each_entry_rcu(tmp, &last->children, sibling)
3047 pos = tmp;
3048 } while (pos);
3049
3050 return last;
3051}
3052EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant);
3053
3020static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) 3054static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
3021{ 3055{
3022 struct cgroup *last; 3056 struct cgroup *last;
@@ -3752,8 +3786,13 @@ static void cgroup_event_remove(struct work_struct *work)
3752 remove); 3786 remove);
3753 struct cgroup *cgrp = event->cgrp; 3787 struct cgroup *cgrp = event->cgrp;
3754 3788
3789 remove_wait_queue(event->wqh, &event->wait);
3790
3755 event->cft->unregister_event(cgrp, event->cft, event->eventfd); 3791 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3756 3792
3793 /* Notify userspace the event is going away. */
3794 eventfd_signal(event->eventfd, 1);
3795
3757 eventfd_ctx_put(event->eventfd); 3796 eventfd_ctx_put(event->eventfd);
3758 kfree(event); 3797 kfree(event);
3759 dput(cgrp->dentry); 3798 dput(cgrp->dentry);
@@ -3773,15 +3812,25 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3773 unsigned long flags = (unsigned long)key; 3812 unsigned long flags = (unsigned long)key;
3774 3813
3775 if (flags & POLLHUP) { 3814 if (flags & POLLHUP) {
3776 __remove_wait_queue(event->wqh, &event->wait);
3777 spin_lock(&cgrp->event_list_lock);
3778 list_del_init(&event->list);
3779 spin_unlock(&cgrp->event_list_lock);
3780 /* 3815 /*
3781 * We are in atomic context, but cgroup_event_remove() may 3816 * If the event has been detached at cgroup removal, we
3782 * sleep, so we have to call it in workqueue. 3817 * can simply return knowing the other side will cleanup
3818 * for us.
3819 *
3820 * We can't race against event freeing since the other
3821 * side will require wqh->lock via remove_wait_queue(),
3822 * which we hold.
3783 */ 3823 */
3784 schedule_work(&event->remove); 3824 spin_lock(&cgrp->event_list_lock);
3825 if (!list_empty(&event->list)) {
3826 list_del_init(&event->list);
3827 /*
3828 * We are in atomic context, but cgroup_event_remove()
3829 * may sleep, so we have to call it in workqueue.
3830 */
3831 schedule_work(&event->remove);
3832 }
3833 spin_unlock(&cgrp->event_list_lock);
3785 } 3834 }
3786 3835
3787 return 0; 3836 return 0;
@@ -3807,6 +3856,7 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3807 const char *buffer) 3856 const char *buffer)
3808{ 3857{
3809 struct cgroup_event *event = NULL; 3858 struct cgroup_event *event = NULL;
3859 struct cgroup *cgrp_cfile;
3810 unsigned int efd, cfd; 3860 unsigned int efd, cfd;
3811 struct file *efile = NULL; 3861 struct file *efile = NULL;
3812 struct file *cfile = NULL; 3862 struct file *cfile = NULL;
@@ -3862,6 +3912,16 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3862 goto fail; 3912 goto fail;
3863 } 3913 }
3864 3914
3915 /*
3916 * The file to be monitored must be in the same cgroup as
3917 * cgroup.event_control is.
3918 */
3919 cgrp_cfile = __d_cgrp(cfile->f_dentry->d_parent);
3920 if (cgrp_cfile != cgrp) {
3921 ret = -EINVAL;
3922 goto fail;
3923 }
3924
3865 if (!event->cft->register_event || !event->cft->unregister_event) { 3925 if (!event->cft->register_event || !event->cft->unregister_event) {
3866 ret = -EINVAL; 3926 ret = -EINVAL;
3867 goto fail; 3927 goto fail;
@@ -4135,6 +4195,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4135 4195
4136 init_cgroup_housekeeping(cgrp); 4196 init_cgroup_housekeeping(cgrp);
4137 4197
4198 dentry->d_fsdata = cgrp;
4199 cgrp->dentry = dentry;
4200
4138 cgrp->parent = parent; 4201 cgrp->parent = parent;
4139 cgrp->root = parent->root; 4202 cgrp->root = parent->root;
4140 cgrp->top_cgroup = parent->top_cgroup; 4203 cgrp->top_cgroup = parent->top_cgroup;
@@ -4172,8 +4235,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4172 lockdep_assert_held(&dentry->d_inode->i_mutex); 4235 lockdep_assert_held(&dentry->d_inode->i_mutex);
4173 4236
4174 /* allocation complete, commit to creation */ 4237 /* allocation complete, commit to creation */
4175 dentry->d_fsdata = cgrp;
4176 cgrp->dentry = dentry;
4177 list_add_tail(&cgrp->allcg_node, &root->allcg_list); 4238 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
4178 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); 4239 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
4179 root->number_of_cgroups++; 4240 root->number_of_cgroups++;
@@ -4340,20 +4401,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4340 /* 4401 /*
4341 * Unregister events and notify userspace. 4402 * Unregister events and notify userspace.
4342 * Notify userspace about cgroup removing only after rmdir of cgroup 4403 * Notify userspace about cgroup removing only after rmdir of cgroup
4343 * directory to avoid race between userspace and kernelspace. Use 4404 * directory to avoid race between userspace and kernelspace.
4344 * a temporary list to avoid a deadlock with cgroup_event_wake(). Since
4345 * cgroup_event_wake() is called with the wait queue head locked,
4346 * remove_wait_queue() cannot be called while holding event_list_lock.
4347 */ 4405 */
4348 spin_lock(&cgrp->event_list_lock); 4406 spin_lock(&cgrp->event_list_lock);
4349 list_splice_init(&cgrp->event_list, &tmp_list); 4407 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4350 spin_unlock(&cgrp->event_list_lock);
4351 list_for_each_entry_safe(event, tmp, &tmp_list, list) {
4352 list_del_init(&event->list); 4408 list_del_init(&event->list);
4353 remove_wait_queue(event->wqh, &event->wait);
4354 eventfd_signal(event->eventfd, 1);
4355 schedule_work(&event->remove); 4409 schedule_work(&event->remove);
4356 } 4410 }
4411 spin_unlock(&cgrp->event_list_lock);
4357 4412
4358 return 0; 4413 return 0;
4359} 4414}
@@ -4438,6 +4493,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4438{ 4493{
4439 struct cgroup_subsys_state *css; 4494 struct cgroup_subsys_state *css;
4440 int i, ret; 4495 int i, ret;
4496 struct hlist_node *node, *tmp;
4497 struct css_set *cg;
4498 unsigned long key;
4441 4499
4442 /* check name and function validity */ 4500 /* check name and function validity */
4443 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN || 4501 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
@@ -4503,23 +4561,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4503 * this is all done under the css_set_lock. 4561 * this is all done under the css_set_lock.
4504 */ 4562 */
4505 write_lock(&css_set_lock); 4563 write_lock(&css_set_lock);
4506 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { 4564 hash_for_each_safe(css_set_table, i, node, tmp, cg, hlist) {
4507 struct css_set *cg; 4565 /* skip entries that we already rehashed */
4508 struct hlist_node *node, *tmp; 4566 if (cg->subsys[ss->subsys_id])
4509 struct hlist_head *bucket = &css_set_table[i], *new_bucket; 4567 continue;
4510 4568 /* remove existing entry */
4511 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) { 4569 hash_del(&cg->hlist);
4512 /* skip entries that we already rehashed */ 4570 /* set new value */
4513 if (cg->subsys[ss->subsys_id]) 4571 cg->subsys[ss->subsys_id] = css;
4514 continue; 4572 /* recompute hash and restore entry */
4515 /* remove existing entry */ 4573 key = css_set_hash(cg->subsys);
4516 hlist_del(&cg->hlist); 4574 hash_add(css_set_table, node, key);
4517 /* set new value */
4518 cg->subsys[ss->subsys_id] = css;
4519 /* recompute hash and restore entry */
4520 new_bucket = css_set_hash(cg->subsys);
4521 hlist_add_head(&cg->hlist, new_bucket);
4522 }
4523 } 4575 }
4524 write_unlock(&css_set_lock); 4576 write_unlock(&css_set_lock);
4525 4577
@@ -4551,7 +4603,6 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys);
4551void cgroup_unload_subsys(struct cgroup_subsys *ss) 4603void cgroup_unload_subsys(struct cgroup_subsys *ss)
4552{ 4604{
4553 struct cg_cgroup_link *link; 4605 struct cg_cgroup_link *link;
4554 struct hlist_head *hhead;
4555 4606
4556 BUG_ON(ss->module == NULL); 4607 BUG_ON(ss->module == NULL);
4557 4608
@@ -4585,11 +4636,12 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
4585 write_lock(&css_set_lock); 4636 write_lock(&css_set_lock);
4586 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { 4637 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
4587 struct css_set *cg = link->cg; 4638 struct css_set *cg = link->cg;
4639 unsigned long key;
4588 4640
4589 hlist_del(&cg->hlist); 4641 hash_del(&cg->hlist);
4590 cg->subsys[ss->subsys_id] = NULL; 4642 cg->subsys[ss->subsys_id] = NULL;
4591 hhead = css_set_hash(cg->subsys); 4643 key = css_set_hash(cg->subsys);
4592 hlist_add_head(&cg->hlist, hhead); 4644 hash_add(css_set_table, &cg->hlist, key);
4593 } 4645 }
4594 write_unlock(&css_set_lock); 4646 write_unlock(&css_set_lock);
4595 4647
@@ -4631,9 +4683,6 @@ int __init cgroup_init_early(void)
4631 list_add(&init_css_set_link.cg_link_list, 4683 list_add(&init_css_set_link.cg_link_list,
4632 &init_css_set.cg_links); 4684 &init_css_set.cg_links);
4633 4685
4634 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
4635 INIT_HLIST_HEAD(&css_set_table[i]);
4636
4637 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4686 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
4638 struct cgroup_subsys *ss = subsys[i]; 4687 struct cgroup_subsys *ss = subsys[i];
4639 4688
@@ -4667,7 +4716,7 @@ int __init cgroup_init(void)
4667{ 4716{
4668 int err; 4717 int err;
4669 int i; 4718 int i;
4670 struct hlist_head *hhead; 4719 unsigned long key;
4671 4720
4672 err = bdi_init(&cgroup_backing_dev_info); 4721 err = bdi_init(&cgroup_backing_dev_info);
4673 if (err) 4722 if (err)
@@ -4686,8 +4735,8 @@ int __init cgroup_init(void)
4686 } 4735 }
4687 4736
4688 /* Add init_css_set to the hash table */ 4737 /* Add init_css_set to the hash table */
4689 hhead = css_set_hash(init_css_set.subsys); 4738 key = css_set_hash(init_css_set.subsys);
4690 hlist_add_head(&init_css_set.hlist, hhead); 4739 hash_add(css_set_table, &init_css_set.hlist, key);
4691 BUG_ON(!init_root_id(&rootnode)); 4740 BUG_ON(!init_root_id(&rootnode));
4692 4741
4693 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); 4742 cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
@@ -4982,8 +5031,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
4982 } 5031 }
4983 task_unlock(tsk); 5032 task_unlock(tsk);
4984 5033
4985 if (cg) 5034 put_css_set_taskexit(cg);
4986 put_css_set_taskexit(cg);
4987} 5035}
4988 5036
4989/** 5037/**
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7bb63eea6eb8..5bb9bf18438c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2511,8 +2511,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
2511 2511
2512 dentry = task_cs(tsk)->css.cgroup->dentry; 2512 dentry = task_cs(tsk)->css.cgroup->dentry;
2513 spin_lock(&cpuset_buffer_lock); 2513 spin_lock(&cpuset_buffer_lock);
2514 snprintf(cpuset_name, CPUSET_NAME_LEN, 2514
2515 dentry ? (const char *)dentry->d_name.name : "/"); 2515 if (!dentry) {
2516 strcpy(cpuset_name, "/");
2517 } else {
2518 spin_lock(&dentry->d_lock);
2519 strlcpy(cpuset_name, (const char *)dentry->d_name.name,
2520 CPUSET_NAME_LEN);
2521 spin_unlock(&dentry->d_lock);
2522 }
2523
2516 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, 2524 nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
2517 tsk->mems_allowed); 2525 tsk->mems_allowed);
2518 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", 2526 printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 0984a21076a3..64de5f8b0c9e 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref)
35 ag->tg->rt_se = NULL; 35 ag->tg->rt_se = NULL;
36 ag->tg->rt_rq = NULL; 36 ag->tg->rt_rq = NULL;
37#endif 37#endif
38 sched_offline_group(ag->tg);
38 sched_destroy_group(ag->tg); 39 sched_destroy_group(ag->tg);
39} 40}
40 41
@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void)
76 if (IS_ERR(tg)) 77 if (IS_ERR(tg))
77 goto out_free; 78 goto out_free;
78 79
80 sched_online_group(tg, &root_task_group);
81
79 kref_init(&ag->kref); 82 kref_init(&ag->kref);
80 init_rwsem(&ag->lock); 83 init_rwsem(&ag->lock);
81 ag->id = atomic_inc_return(&autogroup_seq_nr); 84 ag->id = atomic_inc_return(&autogroup_seq_nr);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 03d7784b7bd2..3a673a3b0c6b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7161,7 +7161,6 @@ static void free_sched_group(struct task_group *tg)
7161struct task_group *sched_create_group(struct task_group *parent) 7161struct task_group *sched_create_group(struct task_group *parent)
7162{ 7162{
7163 struct task_group *tg; 7163 struct task_group *tg;
7164 unsigned long flags;
7165 7164
7166 tg = kzalloc(sizeof(*tg), GFP_KERNEL); 7165 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
7167 if (!tg) 7166 if (!tg)
@@ -7173,6 +7172,17 @@ struct task_group *sched_create_group(struct task_group *parent)
7173 if (!alloc_rt_sched_group(tg, parent)) 7172 if (!alloc_rt_sched_group(tg, parent))
7174 goto err; 7173 goto err;
7175 7174
7175 return tg;
7176
7177err:
7178 free_sched_group(tg);
7179 return ERR_PTR(-ENOMEM);
7180}
7181
7182void sched_online_group(struct task_group *tg, struct task_group *parent)
7183{
7184 unsigned long flags;
7185
7176 spin_lock_irqsave(&task_group_lock, flags); 7186 spin_lock_irqsave(&task_group_lock, flags);
7177 list_add_rcu(&tg->list, &task_groups); 7187 list_add_rcu(&tg->list, &task_groups);
7178 7188
@@ -7182,12 +7192,6 @@ struct task_group *sched_create_group(struct task_group *parent)
7182 INIT_LIST_HEAD(&tg->children); 7192 INIT_LIST_HEAD(&tg->children);
7183 list_add_rcu(&tg->siblings, &parent->children); 7193 list_add_rcu(&tg->siblings, &parent->children);
7184 spin_unlock_irqrestore(&task_group_lock, flags); 7194 spin_unlock_irqrestore(&task_group_lock, flags);
7185
7186 return tg;
7187
7188err:
7189 free_sched_group(tg);
7190 return ERR_PTR(-ENOMEM);
7191} 7195}
7192 7196
7193/* rcu callback to free various structures associated with a task group */ 7197/* rcu callback to free various structures associated with a task group */
@@ -7200,6 +7204,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
7200/* Destroy runqueue etc associated with a task group */ 7204/* Destroy runqueue etc associated with a task group */
7201void sched_destroy_group(struct task_group *tg) 7205void sched_destroy_group(struct task_group *tg)
7202{ 7206{
7207 /* wait for possible concurrent references to cfs_rqs complete */
7208 call_rcu(&tg->rcu, free_sched_group_rcu);
7209}
7210
7211void sched_offline_group(struct task_group *tg)
7212{
7203 unsigned long flags; 7213 unsigned long flags;
7204 int i; 7214 int i;
7205 7215
@@ -7211,9 +7221,6 @@ void sched_destroy_group(struct task_group *tg)
7211 list_del_rcu(&tg->list); 7221 list_del_rcu(&tg->list);
7212 list_del_rcu(&tg->siblings); 7222 list_del_rcu(&tg->siblings);
7213 spin_unlock_irqrestore(&task_group_lock, flags); 7223 spin_unlock_irqrestore(&task_group_lock, flags);
7214
7215 /* wait for possible concurrent references to cfs_rqs complete */
7216 call_rcu(&tg->rcu, free_sched_group_rcu);
7217} 7224}
7218 7225
7219/* change task's runqueue when it moves between groups. 7226/* change task's runqueue when it moves between groups.
@@ -7584,6 +7591,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
7584 return &tg->css; 7591 return &tg->css;
7585} 7592}
7586 7593
7594static int cpu_cgroup_css_online(struct cgroup *cgrp)
7595{
7596 struct task_group *tg = cgroup_tg(cgrp);
7597 struct task_group *parent;
7598
7599 if (!cgrp->parent)
7600 return 0;
7601
7602 parent = cgroup_tg(cgrp->parent);
7603 sched_online_group(tg, parent);
7604 return 0;
7605}
7606
7587static void cpu_cgroup_css_free(struct cgroup *cgrp) 7607static void cpu_cgroup_css_free(struct cgroup *cgrp)
7588{ 7608{
7589 struct task_group *tg = cgroup_tg(cgrp); 7609 struct task_group *tg = cgroup_tg(cgrp);
@@ -7591,6 +7611,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
7591 sched_destroy_group(tg); 7611 sched_destroy_group(tg);
7592} 7612}
7593 7613
7614static void cpu_cgroup_css_offline(struct cgroup *cgrp)
7615{
7616 struct task_group *tg = cgroup_tg(cgrp);
7617
7618 sched_offline_group(tg);
7619}
7620
7594static int cpu_cgroup_can_attach(struct cgroup *cgrp, 7621static int cpu_cgroup_can_attach(struct cgroup *cgrp,
7595 struct cgroup_taskset *tset) 7622 struct cgroup_taskset *tset)
7596{ 7623{
@@ -7946,6 +7973,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7946 .name = "cpu", 7973 .name = "cpu",
7947 .css_alloc = cpu_cgroup_css_alloc, 7974 .css_alloc = cpu_cgroup_css_alloc,
7948 .css_free = cpu_cgroup_css_free, 7975 .css_free = cpu_cgroup_css_free,
7976 .css_online = cpu_cgroup_css_online,
7977 .css_offline = cpu_cgroup_css_offline,
7949 .can_attach = cpu_cgroup_can_attach, 7978 .can_attach = cpu_cgroup_can_attach,
7950 .attach = cpu_cgroup_attach, 7979 .attach = cpu_cgroup_attach,
7951 .exit = cpu_cgroup_exit, 7980 .exit = cpu_cgroup_exit,
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 7ae4c4c5420e..557e7b53b323 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -110,13 +110,6 @@ static char *task_group_path(struct task_group *tg)
110 if (autogroup_path(tg, group_path, PATH_MAX)) 110 if (autogroup_path(tg, group_path, PATH_MAX))
111 return group_path; 111 return group_path;
112 112
113 /*
114 * May be NULL if the underlying cgroup isn't fully-created yet
115 */
116 if (!tg->css.cgroup) {
117 group_path[0] = '\0';
118 return group_path;
119 }
120 cgroup_path(tg->css.cgroup, group_path, PATH_MAX); 113 cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
121 return group_path; 114 return group_path;
122} 115}
diff --git a/tools/Makefile b/tools/Makefile
index 798fa0ef048e..fa36565b209d 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -3,6 +3,7 @@ include scripts/Makefile.include
3help: 3help:
4 @echo 'Possible targets:' 4 @echo 'Possible targets:'
5 @echo '' 5 @echo ''
6 @echo ' cgroup - cgroup tools'
6 @echo ' cpupower - a tool for all things x86 CPU power' 7 @echo ' cpupower - a tool for all things x86 CPU power'
7 @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' 8 @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
8 @echo ' lguest - a minimal 32-bit x86 hypervisor' 9 @echo ' lguest - a minimal 32-bit x86 hypervisor'
@@ -33,7 +34,7 @@ help:
33cpupower: FORCE 34cpupower: FORCE
34 $(call descend,power/$@) 35 $(call descend,power/$@)
35 36
36firewire lguest perf usb virtio vm: FORCE 37cgroup firewire lguest perf usb virtio vm: FORCE
37 $(call descend,$@) 38 $(call descend,$@)
38 39
39selftests: FORCE 40selftests: FORCE
@@ -45,7 +46,7 @@ turbostat x86_energy_perf_policy: FORCE
45cpupower_install: 46cpupower_install:
46 $(call descend,power/$(@:_install=),install) 47 $(call descend,power/$(@:_install=),install)
47 48
48firewire_install lguest_install perf_install usb_install virtio_install vm_install: 49cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install:
49 $(call descend,$(@:_install=),install) 50 $(call descend,$(@:_install=),install)
50 51
51selftests_install: 52selftests_install:
@@ -54,14 +55,14 @@ selftests_install:
54turbostat_install x86_energy_perf_policy_install: 55turbostat_install x86_energy_perf_policy_install:
55 $(call descend,power/x86/$(@:_install=),install) 56 $(call descend,power/x86/$(@:_install=),install)
56 57
57install: cpupower_install firewire_install lguest_install perf_install \ 58install: cgroup_install cpupower_install firewire_install lguest_install \
58 selftests_install turbostat_install usb_install virtio_install \ 59 perf_install selftests_install turbostat_install usb_install \
59 vm_install x86_energy_perf_policy_install 60 virtio_install vm_install x86_energy_perf_policy_install
60 61
61cpupower_clean: 62cpupower_clean:
62 $(call descend,power/cpupower,clean) 63 $(call descend,power/cpupower,clean)
63 64
64firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: 65cgroup_clean firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean:
65 $(call descend,$(@:_clean=),clean) 66 $(call descend,$(@:_clean=),clean)
66 67
67selftests_clean: 68selftests_clean:
@@ -70,8 +71,8 @@ selftests_clean:
70turbostat_clean x86_energy_perf_policy_clean: 71turbostat_clean x86_energy_perf_policy_clean:
71 $(call descend,power/x86/$(@:_clean=),clean) 72 $(call descend,power/x86/$(@:_clean=),clean)
72 73
73clean: cpupower_clean firewire_clean lguest_clean perf_clean selftests_clean \ 74clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \
74 turbostat_clean usb_clean virtio_clean vm_clean \ 75 selftests_clean turbostat_clean usb_clean virtio_clean \
75 x86_energy_perf_policy_clean 76 vm_clean x86_energy_perf_policy_clean
76 77
77.PHONY: FORCE 78.PHONY: FORCE
diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore
new file mode 100644
index 000000000000..633cd9b874f9
--- /dev/null
+++ b/tools/cgroup/.gitignore
@@ -0,0 +1 @@
cgroup_event_listener
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
new file mode 100644
index 000000000000..b4286196b763
--- /dev/null
+++ b/tools/cgroup/Makefile
@@ -0,0 +1,11 @@
1# Makefile for cgroup tools
2
3CC = $(CROSS_COMPILE)gcc
4CFLAGS = -Wall -Wextra
5
6all: cgroup_event_listener
7%: %.c
8 $(CC) $(CFLAGS) -o $@ $^
9
10clean:
11 $(RM) cgroup_event_listener
diff --git a/Documentation/cgroups/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c
index 3e082f96dc12..4eb5507205c9 100644
--- a/Documentation/cgroups/cgroup_event_listener.c
+++ b/tools/cgroup/cgroup_event_listener.c
@@ -5,6 +5,7 @@
5 */ 5 */
6 6
7#include <assert.h> 7#include <assert.h>
8#include <err.h>
8#include <errno.h> 9#include <errno.h>
9#include <fcntl.h> 10#include <fcntl.h>
10#include <libgen.h> 11#include <libgen.h>
@@ -15,7 +16,7 @@
15 16
16#include <sys/eventfd.h> 17#include <sys/eventfd.h>
17 18
18#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>\n" 19#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>"
19 20
20int main(int argc, char **argv) 21int main(int argc, char **argv)
21{ 22{
@@ -26,49 +27,33 @@ int main(int argc, char **argv)
26 char line[LINE_MAX]; 27 char line[LINE_MAX];
27 int ret; 28 int ret;
28 29
29 if (argc != 3) { 30 if (argc != 3)
30 fputs(USAGE_STR, stderr); 31 errx(1, "%s", USAGE_STR);
31 return 1;
32 }
33 32
34 cfd = open(argv[1], O_RDONLY); 33 cfd = open(argv[1], O_RDONLY);
35 if (cfd == -1) { 34 if (cfd == -1)
36 fprintf(stderr, "Cannot open %s: %s\n", argv[1], 35 err(1, "Cannot open %s", argv[1]);
37 strerror(errno));
38 goto out;
39 }
40 36
41 ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", 37 ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
42 dirname(argv[1])); 38 dirname(argv[1]));
43 if (ret >= PATH_MAX) { 39 if (ret >= PATH_MAX)
44 fputs("Path to cgroup.event_control is too long\n", stderr); 40 errx(1, "Path to cgroup.event_control is too long");
45 goto out;
46 }
47 41
48 event_control = open(event_control_path, O_WRONLY); 42 event_control = open(event_control_path, O_WRONLY);
49 if (event_control == -1) { 43 if (event_control == -1)
50 fprintf(stderr, "Cannot open %s: %s\n", event_control_path, 44 err(1, "Cannot open %s", event_control_path);
51 strerror(errno));
52 goto out;
53 }
54 45
55 efd = eventfd(0, 0); 46 efd = eventfd(0, 0);
56 if (efd == -1) { 47 if (efd == -1)
57 perror("eventfd() failed"); 48 err(1, "eventfd() failed");
58 goto out;
59 }
60 49
61 ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); 50 ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
62 if (ret >= LINE_MAX) { 51 if (ret >= LINE_MAX)
63 fputs("Arguments string is too long\n", stderr); 52 errx(1, "Arguments string is too long");
64 goto out;
65 }
66 53
67 ret = write(event_control, line, strlen(line) + 1); 54 ret = write(event_control, line, strlen(line) + 1);
68 if (ret == -1) { 55 if (ret == -1)
69 perror("Cannot write to cgroup.event_control"); 56 err(1, "Cannot write to cgroup.event_control");
70 goto out;
71 }
72 57
73 while (1) { 58 while (1) {
74 uint64_t result; 59 uint64_t result;
@@ -77,34 +62,21 @@ int main(int argc, char **argv)
77 if (ret == -1) { 62 if (ret == -1) {
78 if (errno == EINTR) 63 if (errno == EINTR)
79 continue; 64 continue;
80 perror("Cannot read from eventfd"); 65 err(1, "Cannot read from eventfd");
81 break;
82 } 66 }
83 assert(ret == sizeof(result)); 67 assert(ret == sizeof(result));
84 68
85 ret = access(event_control_path, W_OK); 69 ret = access(event_control_path, W_OK);
86 if ((ret == -1) && (errno == ENOENT)) { 70 if ((ret == -1) && (errno == ENOENT)) {
87 puts("The cgroup seems to have removed."); 71 puts("The cgroup seems to have removed.");
88 ret = 0;
89 break;
90 }
91
92 if (ret == -1) {
93 perror("cgroup.event_control "
94 "is not accessible any more");
95 break; 72 break;
96 } 73 }
97 74
75 if (ret == -1)
76 err(1, "cgroup.event_control is not accessible any more");
77
98 printf("%s %s: crossed\n", argv[1], argv[2]); 78 printf("%s %s: crossed\n", argv[1], argv[2]);
99 } 79 }
100 80
101out: 81 return 0;
102 if (efd >= 0)
103 close(efd);
104 if (event_control >= 0)
105 close(event_control);
106 if (cfd >= 0)
107 close(cfd);
108
109 return (ret != 0);
110} 82}