diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 433 |
1 files changed, 399 insertions, 34 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c500ca7239b2..a7267bfd3765 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -94,7 +94,6 @@ struct cgroupfs_root { | |||
94 | char release_agent_path[PATH_MAX]; | 94 | char release_agent_path[PATH_MAX]; |
95 | }; | 95 | }; |
96 | 96 | ||
97 | |||
98 | /* | 97 | /* |
99 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the | 98 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the |
100 | * subsystems that are otherwise unattached - it never has more than a | 99 | * subsystems that are otherwise unattached - it never has more than a |
@@ -102,6 +101,39 @@ struct cgroupfs_root { | |||
102 | */ | 101 | */ |
103 | static struct cgroupfs_root rootnode; | 102 | static struct cgroupfs_root rootnode; |
104 | 103 | ||
104 | /* | ||
105 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when | ||
106 | * cgroup_subsys->use_id != 0. | ||
107 | */ | ||
108 | #define CSS_ID_MAX (65535) | ||
109 | struct css_id { | ||
110 | /* | ||
111 | * The css to which this ID points. This pointer is set to valid value | ||
112 | * after cgroup is populated. If cgroup is removed, this will be NULL. | ||
113 | * This pointer is expected to be RCU-safe because destroy() | ||
114 | * is called after synchronize_rcu(). But for safe use, css_is_removed() | ||
115 | * css_tryget() should be used for avoiding race. | ||
116 | */ | ||
117 | struct cgroup_subsys_state *css; | ||
118 | /* | ||
119 | * ID of this css. | ||
120 | */ | ||
121 | unsigned short id; | ||
122 | /* | ||
123 | * Depth in hierarchy which this ID belongs to. | ||
124 | */ | ||
125 | unsigned short depth; | ||
126 | /* | ||
127 | * ID is freed by RCU. (and lookup routine is RCU safe.) | ||
128 | */ | ||
129 | struct rcu_head rcu_head; | ||
130 | /* | ||
131 | * Hierarchy of CSS ID belongs to. | ||
132 | */ | ||
133 | unsigned short stack[0]; /* Array of Length (depth+1) */ | ||
134 | }; | ||
135 | |||
136 | |||
105 | /* The list of hierarchy roots */ | 137 | /* The list of hierarchy roots */ |
106 | 138 | ||
107 | static LIST_HEAD(roots); | 139 | static LIST_HEAD(roots); |
@@ -185,6 +217,8 @@ struct cg_cgroup_link { | |||
185 | static struct css_set init_css_set; | 217 | static struct css_set init_css_set; |
186 | static struct cg_cgroup_link init_css_set_link; | 218 | static struct cg_cgroup_link init_css_set_link; |
187 | 219 | ||
220 | static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); | ||
221 | |||
188 | /* css_set_lock protects the list of css_set objects, and the | 222 | /* css_set_lock protects the list of css_set objects, and the |
189 | * chain of tasks off each css_set. Nests outside task->alloc_lock | 223 | * chain of tasks off each css_set. Nests outside task->alloc_lock |
190 | * due to cgroup_iter_start() */ | 224 | * due to cgroup_iter_start() */ |
@@ -567,6 +601,9 @@ static struct backing_dev_info cgroup_backing_dev_info = { | |||
567 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 601 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
568 | }; | 602 | }; |
569 | 603 | ||
604 | static int alloc_css_id(struct cgroup_subsys *ss, | ||
605 | struct cgroup *parent, struct cgroup *child); | ||
606 | |||
570 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | 607 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) |
571 | { | 608 | { |
572 | struct inode *inode = new_inode(sb); | 609 | struct inode *inode = new_inode(sb); |
@@ -585,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
585 | * Call subsys's pre_destroy handler. | 622 | * Call subsys's pre_destroy handler. |
586 | * This is called before css refcnt check. | 623 | * This is called before css refcnt check. |
587 | */ | 624 | */ |
588 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | 625 | static int cgroup_call_pre_destroy(struct cgroup *cgrp) |
589 | { | 626 | { |
590 | struct cgroup_subsys *ss; | 627 | struct cgroup_subsys *ss; |
628 | int ret = 0; | ||
629 | |||
591 | for_each_subsys(cgrp->root, ss) | 630 | for_each_subsys(cgrp->root, ss) |
592 | if (ss->pre_destroy) | 631 | if (ss->pre_destroy) { |
593 | ss->pre_destroy(ss, cgrp); | 632 | ret = ss->pre_destroy(ss, cgrp); |
594 | return; | 633 | if (ret) |
634 | break; | ||
635 | } | ||
636 | return ret; | ||
595 | } | 637 | } |
596 | 638 | ||
597 | static void free_cgroup_rcu(struct rcu_head *obj) | 639 | static void free_cgroup_rcu(struct rcu_head *obj) |
@@ -685,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
685 | remove_dir(dentry); | 727 | remove_dir(dentry); |
686 | } | 728 | } |
687 | 729 | ||
730 | /* | ||
731 | * A queue for waiters to do rmdir() cgroup. A tasks will sleep when | ||
732 | * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some | ||
733 | * reference to css->refcnt. In general, this refcnt is expected to goes down | ||
734 | * to zero, soon. | ||
735 | * | ||
736 | * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex; | ||
737 | */ | ||
738 | DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | ||
739 | |||
740 | static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp) | ||
741 | { | ||
742 | if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) | ||
743 | wake_up_all(&cgroup_rmdir_waitq); | ||
744 | } | ||
745 | |||
688 | static int rebind_subsystems(struct cgroupfs_root *root, | 746 | static int rebind_subsystems(struct cgroupfs_root *root, |
689 | unsigned long final_bits) | 747 | unsigned long final_bits) |
690 | { | 748 | { |
@@ -857,16 +915,16 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
857 | } | 915 | } |
858 | 916 | ||
859 | ret = rebind_subsystems(root, opts.subsys_bits); | 917 | ret = rebind_subsystems(root, opts.subsys_bits); |
918 | if (ret) | ||
919 | goto out_unlock; | ||
860 | 920 | ||
861 | /* (re)populate subsystem files */ | 921 | /* (re)populate subsystem files */ |
862 | if (!ret) | 922 | cgroup_populate_dir(cgrp); |
863 | cgroup_populate_dir(cgrp); | ||
864 | 923 | ||
865 | if (opts.release_agent) | 924 | if (opts.release_agent) |
866 | strcpy(root->release_agent_path, opts.release_agent); | 925 | strcpy(root->release_agent_path, opts.release_agent); |
867 | out_unlock: | 926 | out_unlock: |
868 | if (opts.release_agent) | 927 | kfree(opts.release_agent); |
869 | kfree(opts.release_agent); | ||
870 | mutex_unlock(&cgroup_mutex); | 928 | mutex_unlock(&cgroup_mutex); |
871 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 929 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
872 | return ret; | 930 | return ret; |
@@ -969,15 +1027,13 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
969 | /* First find the desired set of subsystems */ | 1027 | /* First find the desired set of subsystems */ |
970 | ret = parse_cgroupfs_options(data, &opts); | 1028 | ret = parse_cgroupfs_options(data, &opts); |
971 | if (ret) { | 1029 | if (ret) { |
972 | if (opts.release_agent) | 1030 | kfree(opts.release_agent); |
973 | kfree(opts.release_agent); | ||
974 | return ret; | 1031 | return ret; |
975 | } | 1032 | } |
976 | 1033 | ||
977 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 1034 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
978 | if (!root) { | 1035 | if (!root) { |
979 | if (opts.release_agent) | 1036 | kfree(opts.release_agent); |
980 | kfree(opts.release_agent); | ||
981 | return -ENOMEM; | 1037 | return -ENOMEM; |
982 | } | 1038 | } |
983 | 1039 | ||
@@ -1077,8 +1133,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1077 | free_cg_links: | 1133 | free_cg_links: |
1078 | free_cg_links(&tmp_cg_links); | 1134 | free_cg_links(&tmp_cg_links); |
1079 | drop_new_super: | 1135 | drop_new_super: |
1080 | up_write(&sb->s_umount); | 1136 | deactivate_locked_super(sb); |
1081 | deactivate_super(sb); | ||
1082 | return ret; | 1137 | return ret; |
1083 | } | 1138 | } |
1084 | 1139 | ||
@@ -1280,6 +1335,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1280 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1335 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
1281 | synchronize_rcu(); | 1336 | synchronize_rcu(); |
1282 | put_css_set(cg); | 1337 | put_css_set(cg); |
1338 | |||
1339 | /* | ||
1340 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | ||
1341 | * is no longer empty. | ||
1342 | */ | ||
1343 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
1283 | return 0; | 1344 | return 0; |
1284 | } | 1345 | } |
1285 | 1346 | ||
@@ -1625,7 +1686,7 @@ static struct inode_operations cgroup_dir_inode_operations = { | |||
1625 | .rename = cgroup_rename, | 1686 | .rename = cgroup_rename, |
1626 | }; | 1687 | }; |
1627 | 1688 | ||
1628 | static int cgroup_create_file(struct dentry *dentry, int mode, | 1689 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, |
1629 | struct super_block *sb) | 1690 | struct super_block *sb) |
1630 | { | 1691 | { |
1631 | static const struct dentry_operations cgroup_dops = { | 1692 | static const struct dentry_operations cgroup_dops = { |
@@ -1671,7 +1732,7 @@ static int cgroup_create_file(struct dentry *dentry, int mode, | |||
1671 | * @mode: mode to set on new directory. | 1732 | * @mode: mode to set on new directory. |
1672 | */ | 1733 | */ |
1673 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | 1734 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, |
1674 | int mode) | 1735 | mode_t mode) |
1675 | { | 1736 | { |
1676 | struct dentry *parent; | 1737 | struct dentry *parent; |
1677 | int error = 0; | 1738 | int error = 0; |
@@ -1689,6 +1750,33 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | |||
1689 | return error; | 1750 | return error; |
1690 | } | 1751 | } |
1691 | 1752 | ||
1753 | /** | ||
1754 | * cgroup_file_mode - deduce file mode of a control file | ||
1755 | * @cft: the control file in question | ||
1756 | * | ||
1757 | * returns cft->mode if ->mode is not 0 | ||
1758 | * returns S_IRUGO|S_IWUSR if it has both a read and a write handler | ||
1759 | * returns S_IRUGO if it has only a read handler | ||
1760 | * returns S_IWUSR if it has only a write hander | ||
1761 | */ | ||
1762 | static mode_t cgroup_file_mode(const struct cftype *cft) | ||
1763 | { | ||
1764 | mode_t mode = 0; | ||
1765 | |||
1766 | if (cft->mode) | ||
1767 | return cft->mode; | ||
1768 | |||
1769 | if (cft->read || cft->read_u64 || cft->read_s64 || | ||
1770 | cft->read_map || cft->read_seq_string) | ||
1771 | mode |= S_IRUGO; | ||
1772 | |||
1773 | if (cft->write || cft->write_u64 || cft->write_s64 || | ||
1774 | cft->write_string || cft->trigger) | ||
1775 | mode |= S_IWUSR; | ||
1776 | |||
1777 | return mode; | ||
1778 | } | ||
1779 | |||
1692 | int cgroup_add_file(struct cgroup *cgrp, | 1780 | int cgroup_add_file(struct cgroup *cgrp, |
1693 | struct cgroup_subsys *subsys, | 1781 | struct cgroup_subsys *subsys, |
1694 | const struct cftype *cft) | 1782 | const struct cftype *cft) |
@@ -1696,6 +1784,7 @@ int cgroup_add_file(struct cgroup *cgrp, | |||
1696 | struct dentry *dir = cgrp->dentry; | 1784 | struct dentry *dir = cgrp->dentry; |
1697 | struct dentry *dentry; | 1785 | struct dentry *dentry; |
1698 | int error; | 1786 | int error; |
1787 | mode_t mode; | ||
1699 | 1788 | ||
1700 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 1789 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
1701 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { | 1790 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { |
@@ -1706,7 +1795,8 @@ int cgroup_add_file(struct cgroup *cgrp, | |||
1706 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); | 1795 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); |
1707 | dentry = lookup_one_len(name, dir, strlen(name)); | 1796 | dentry = lookup_one_len(name, dir, strlen(name)); |
1708 | if (!IS_ERR(dentry)) { | 1797 | if (!IS_ERR(dentry)) { |
1709 | error = cgroup_create_file(dentry, 0644 | S_IFREG, | 1798 | mode = cgroup_file_mode(cft); |
1799 | error = cgroup_create_file(dentry, mode | S_IFREG, | ||
1710 | cgrp->root->sb); | 1800 | cgrp->root->sb); |
1711 | if (!error) | 1801 | if (!error) |
1712 | dentry->d_fsdata = (void *)cft; | 1802 | dentry->d_fsdata = (void *)cft; |
@@ -2288,6 +2378,7 @@ static struct cftype files[] = { | |||
2288 | .write_u64 = cgroup_tasks_write, | 2378 | .write_u64 = cgroup_tasks_write, |
2289 | .release = cgroup_tasks_release, | 2379 | .release = cgroup_tasks_release, |
2290 | .private = FILE_TASKLIST, | 2380 | .private = FILE_TASKLIST, |
2381 | .mode = S_IRUGO | S_IWUSR, | ||
2291 | }, | 2382 | }, |
2292 | 2383 | ||
2293 | { | 2384 | { |
@@ -2327,6 +2418,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
2327 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) | 2418 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) |
2328 | return err; | 2419 | return err; |
2329 | } | 2420 | } |
2421 | /* This cgroup is ready now */ | ||
2422 | for_each_subsys(cgrp->root, ss) { | ||
2423 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2424 | /* | ||
2425 | * Update id->css pointer and make this css visible from | ||
2426 | * CSS ID functions. This pointer will be dereferened | ||
2427 | * from RCU-read-side without locks. | ||
2428 | */ | ||
2429 | if (css->id) | ||
2430 | rcu_assign_pointer(css->id->css, css); | ||
2431 | } | ||
2330 | 2432 | ||
2331 | return 0; | 2433 | return 0; |
2332 | } | 2434 | } |
@@ -2338,6 +2440,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2338 | css->cgroup = cgrp; | 2440 | css->cgroup = cgrp; |
2339 | atomic_set(&css->refcnt, 1); | 2441 | atomic_set(&css->refcnt, 1); |
2340 | css->flags = 0; | 2442 | css->flags = 0; |
2443 | css->id = NULL; | ||
2341 | if (cgrp == dummytop) | 2444 | if (cgrp == dummytop) |
2342 | set_bit(CSS_ROOT, &css->flags); | 2445 | set_bit(CSS_ROOT, &css->flags); |
2343 | BUG_ON(cgrp->subsys[ss->subsys_id]); | 2446 | BUG_ON(cgrp->subsys[ss->subsys_id]); |
@@ -2376,7 +2479,7 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | |||
2376 | * Must be called with the mutex on the parent inode held | 2479 | * Must be called with the mutex on the parent inode held |
2377 | */ | 2480 | */ |
2378 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 2481 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
2379 | int mode) | 2482 | mode_t mode) |
2380 | { | 2483 | { |
2381 | struct cgroup *cgrp; | 2484 | struct cgroup *cgrp; |
2382 | struct cgroupfs_root *root = parent->root; | 2485 | struct cgroupfs_root *root = parent->root; |
@@ -2413,6 +2516,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2413 | goto err_destroy; | 2516 | goto err_destroy; |
2414 | } | 2517 | } |
2415 | init_cgroup_css(css, ss, cgrp); | 2518 | init_cgroup_css(css, ss, cgrp); |
2519 | if (ss->use_id) | ||
2520 | if (alloc_css_id(ss, parent, cgrp)) | ||
2521 | goto err_destroy; | ||
2522 | /* At error, ->destroy() callback has to free assigned ID. */ | ||
2416 | } | 2523 | } |
2417 | 2524 | ||
2418 | cgroup_lock_hierarchy(root); | 2525 | cgroup_lock_hierarchy(root); |
@@ -2555,9 +2662,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2555 | struct cgroup *cgrp = dentry->d_fsdata; | 2662 | struct cgroup *cgrp = dentry->d_fsdata; |
2556 | struct dentry *d; | 2663 | struct dentry *d; |
2557 | struct cgroup *parent; | 2664 | struct cgroup *parent; |
2665 | DEFINE_WAIT(wait); | ||
2666 | int ret; | ||
2558 | 2667 | ||
2559 | /* the vfs holds both inode->i_mutex already */ | 2668 | /* the vfs holds both inode->i_mutex already */ |
2560 | 2669 | again: | |
2561 | mutex_lock(&cgroup_mutex); | 2670 | mutex_lock(&cgroup_mutex); |
2562 | if (atomic_read(&cgrp->count) != 0) { | 2671 | if (atomic_read(&cgrp->count) != 0) { |
2563 | mutex_unlock(&cgroup_mutex); | 2672 | mutex_unlock(&cgroup_mutex); |
@@ -2573,17 +2682,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2573 | * Call pre_destroy handlers of subsys. Notify subsystems | 2682 | * Call pre_destroy handlers of subsys. Notify subsystems |
2574 | * that rmdir() request comes. | 2683 | * that rmdir() request comes. |
2575 | */ | 2684 | */ |
2576 | cgroup_call_pre_destroy(cgrp); | 2685 | ret = cgroup_call_pre_destroy(cgrp); |
2686 | if (ret) | ||
2687 | return ret; | ||
2577 | 2688 | ||
2578 | mutex_lock(&cgroup_mutex); | 2689 | mutex_lock(&cgroup_mutex); |
2579 | parent = cgrp->parent; | 2690 | parent = cgrp->parent; |
2580 | 2691 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { | |
2581 | if (atomic_read(&cgrp->count) | ||
2582 | || !list_empty(&cgrp->children) | ||
2583 | || !cgroup_clear_css_refs(cgrp)) { | ||
2584 | mutex_unlock(&cgroup_mutex); | 2692 | mutex_unlock(&cgroup_mutex); |
2585 | return -EBUSY; | 2693 | return -EBUSY; |
2586 | } | 2694 | } |
2695 | /* | ||
2696 | * css_put/get is provided for subsys to grab refcnt to css. In typical | ||
2697 | * case, subsystem has no reference after pre_destroy(). But, under | ||
2698 | * hierarchy management, some *temporal* refcnt can be hold. | ||
2699 | * To avoid returning -EBUSY to a user, waitqueue is used. If subsys | ||
2700 | * is really busy, it should return -EBUSY at pre_destroy(). wake_up | ||
2701 | * is called when css_put() is called and refcnt goes down to 0. | ||
2702 | */ | ||
2703 | set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2704 | prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); | ||
2705 | |||
2706 | if (!cgroup_clear_css_refs(cgrp)) { | ||
2707 | mutex_unlock(&cgroup_mutex); | ||
2708 | schedule(); | ||
2709 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
2710 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2711 | if (signal_pending(current)) | ||
2712 | return -EINTR; | ||
2713 | goto again; | ||
2714 | } | ||
2715 | /* NO css_tryget() can success after here. */ | ||
2716 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
2717 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
2587 | 2718 | ||
2588 | spin_lock(&release_list_lock); | 2719 | spin_lock(&release_list_lock); |
2589 | set_bit(CGRP_REMOVED, &cgrp->flags); | 2720 | set_bit(CGRP_REMOVED, &cgrp->flags); |
@@ -2708,6 +2839,8 @@ int __init cgroup_init(void) | |||
2708 | struct cgroup_subsys *ss = subsys[i]; | 2839 | struct cgroup_subsys *ss = subsys[i]; |
2709 | if (!ss->early_init) | 2840 | if (!ss->early_init) |
2710 | cgroup_init_subsys(ss); | 2841 | cgroup_init_subsys(ss); |
2842 | if (ss->use_id) | ||
2843 | cgroup_subsys_init_idr(ss); | ||
2711 | } | 2844 | } |
2712 | 2845 | ||
2713 | /* Add init_css_set to the hash table */ | 2846 | /* Add init_css_set to the hash table */ |
@@ -3084,18 +3217,19 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
3084 | } | 3217 | } |
3085 | 3218 | ||
3086 | /** | 3219 | /** |
3087 | * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp | 3220 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp |
3088 | * @cgrp: the cgroup in question | 3221 | * @cgrp: the cgroup in question |
3222 | * @task: the task in question | ||
3089 | * | 3223 | * |
3090 | * See if @cgrp is a descendant of the current task's cgroup in | 3224 | * See if @cgrp is a descendant of @task's cgroup in the appropriate |
3091 | * the appropriate hierarchy. | 3225 | * hierarchy. |
3092 | * | 3226 | * |
3093 | * If we are sending in dummytop, then presumably we are creating | 3227 | * If we are sending in dummytop, then presumably we are creating |
3094 | * the top cgroup in the subsystem. | 3228 | * the top cgroup in the subsystem. |
3095 | * | 3229 | * |
3096 | * Called only by the ns (nsproxy) cgroup. | 3230 | * Called only by the ns (nsproxy) cgroup. |
3097 | */ | 3231 | */ |
3098 | int cgroup_is_descendant(const struct cgroup *cgrp) | 3232 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) |
3099 | { | 3233 | { |
3100 | int ret; | 3234 | int ret; |
3101 | struct cgroup *target; | 3235 | struct cgroup *target; |
@@ -3105,7 +3239,7 @@ int cgroup_is_descendant(const struct cgroup *cgrp) | |||
3105 | return 1; | 3239 | return 1; |
3106 | 3240 | ||
3107 | get_first_subsys(cgrp, NULL, &subsys_id); | 3241 | get_first_subsys(cgrp, NULL, &subsys_id); |
3108 | target = task_cgroup(current, subsys_id); | 3242 | target = task_cgroup(task, subsys_id); |
3109 | while (cgrp != target && cgrp!= cgrp->top_cgroup) | 3243 | while (cgrp != target && cgrp!= cgrp->top_cgroup) |
3110 | cgrp = cgrp->parent; | 3244 | cgrp = cgrp->parent; |
3111 | ret = (cgrp == target); | 3245 | ret = (cgrp == target); |
@@ -3138,10 +3272,12 @@ void __css_put(struct cgroup_subsys_state *css) | |||
3138 | { | 3272 | { |
3139 | struct cgroup *cgrp = css->cgroup; | 3273 | struct cgroup *cgrp = css->cgroup; |
3140 | rcu_read_lock(); | 3274 | rcu_read_lock(); |
3141 | if ((atomic_dec_return(&css->refcnt) == 1) && | 3275 | if (atomic_dec_return(&css->refcnt) == 1) { |
3142 | notify_on_release(cgrp)) { | 3276 | if (notify_on_release(cgrp)) { |
3143 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3277 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3144 | check_for_release(cgrp); | 3278 | check_for_release(cgrp); |
3279 | } | ||
3280 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
3145 | } | 3281 | } |
3146 | rcu_read_unlock(); | 3282 | rcu_read_unlock(); |
3147 | } | 3283 | } |
@@ -3241,3 +3377,232 @@ static int __init cgroup_disable(char *str) | |||
3241 | return 1; | 3377 | return 1; |
3242 | } | 3378 | } |
3243 | __setup("cgroup_disable=", cgroup_disable); | 3379 | __setup("cgroup_disable=", cgroup_disable); |
3380 | |||
3381 | /* | ||
3382 | * Functons for CSS ID. | ||
3383 | */ | ||
3384 | |||
3385 | /* | ||
3386 | *To get ID other than 0, this should be called when !cgroup_is_removed(). | ||
3387 | */ | ||
3388 | unsigned short css_id(struct cgroup_subsys_state *css) | ||
3389 | { | ||
3390 | struct css_id *cssid = rcu_dereference(css->id); | ||
3391 | |||
3392 | if (cssid) | ||
3393 | return cssid->id; | ||
3394 | return 0; | ||
3395 | } | ||
3396 | |||
3397 | unsigned short css_depth(struct cgroup_subsys_state *css) | ||
3398 | { | ||
3399 | struct css_id *cssid = rcu_dereference(css->id); | ||
3400 | |||
3401 | if (cssid) | ||
3402 | return cssid->depth; | ||
3403 | return 0; | ||
3404 | } | ||
3405 | |||
3406 | bool css_is_ancestor(struct cgroup_subsys_state *child, | ||
3407 | const struct cgroup_subsys_state *root) | ||
3408 | { | ||
3409 | struct css_id *child_id = rcu_dereference(child->id); | ||
3410 | struct css_id *root_id = rcu_dereference(root->id); | ||
3411 | |||
3412 | if (!child_id || !root_id || (child_id->depth < root_id->depth)) | ||
3413 | return false; | ||
3414 | return child_id->stack[root_id->depth] == root_id->id; | ||
3415 | } | ||
3416 | |||
3417 | static void __free_css_id_cb(struct rcu_head *head) | ||
3418 | { | ||
3419 | struct css_id *id; | ||
3420 | |||
3421 | id = container_of(head, struct css_id, rcu_head); | ||
3422 | kfree(id); | ||
3423 | } | ||
3424 | |||
3425 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | ||
3426 | { | ||
3427 | struct css_id *id = css->id; | ||
3428 | /* When this is called before css_id initialization, id can be NULL */ | ||
3429 | if (!id) | ||
3430 | return; | ||
3431 | |||
3432 | BUG_ON(!ss->use_id); | ||
3433 | |||
3434 | rcu_assign_pointer(id->css, NULL); | ||
3435 | rcu_assign_pointer(css->id, NULL); | ||
3436 | spin_lock(&ss->id_lock); | ||
3437 | idr_remove(&ss->idr, id->id); | ||
3438 | spin_unlock(&ss->id_lock); | ||
3439 | call_rcu(&id->rcu_head, __free_css_id_cb); | ||
3440 | } | ||
3441 | |||
3442 | /* | ||
3443 | * This is called by init or create(). Then, calls to this function are | ||
3444 | * always serialized (By cgroup_mutex() at create()). | ||
3445 | */ | ||
3446 | |||
3447 | static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | ||
3448 | { | ||
3449 | struct css_id *newid; | ||
3450 | int myid, error, size; | ||
3451 | |||
3452 | BUG_ON(!ss->use_id); | ||
3453 | |||
3454 | size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1); | ||
3455 | newid = kzalloc(size, GFP_KERNEL); | ||
3456 | if (!newid) | ||
3457 | return ERR_PTR(-ENOMEM); | ||
3458 | /* get id */ | ||
3459 | if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) { | ||
3460 | error = -ENOMEM; | ||
3461 | goto err_out; | ||
3462 | } | ||
3463 | spin_lock(&ss->id_lock); | ||
3464 | /* Don't use 0. allocates an ID of 1-65535 */ | ||
3465 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | ||
3466 | spin_unlock(&ss->id_lock); | ||
3467 | |||
3468 | /* Returns error when there are no free spaces for new ID.*/ | ||
3469 | if (error) { | ||
3470 | error = -ENOSPC; | ||
3471 | goto err_out; | ||
3472 | } | ||
3473 | if (myid > CSS_ID_MAX) | ||
3474 | goto remove_idr; | ||
3475 | |||
3476 | newid->id = myid; | ||
3477 | newid->depth = depth; | ||
3478 | return newid; | ||
3479 | remove_idr: | ||
3480 | error = -ENOSPC; | ||
3481 | spin_lock(&ss->id_lock); | ||
3482 | idr_remove(&ss->idr, myid); | ||
3483 | spin_unlock(&ss->id_lock); | ||
3484 | err_out: | ||
3485 | kfree(newid); | ||
3486 | return ERR_PTR(error); | ||
3487 | |||
3488 | } | ||
3489 | |||
3490 | static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss) | ||
3491 | { | ||
3492 | struct css_id *newid; | ||
3493 | struct cgroup_subsys_state *rootcss; | ||
3494 | |||
3495 | spin_lock_init(&ss->id_lock); | ||
3496 | idr_init(&ss->idr); | ||
3497 | |||
3498 | rootcss = init_css_set.subsys[ss->subsys_id]; | ||
3499 | newid = get_new_cssid(ss, 0); | ||
3500 | if (IS_ERR(newid)) | ||
3501 | return PTR_ERR(newid); | ||
3502 | |||
3503 | newid->stack[0] = newid->id; | ||
3504 | newid->css = rootcss; | ||
3505 | rootcss->id = newid; | ||
3506 | return 0; | ||
3507 | } | ||
3508 | |||
3509 | static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | ||
3510 | struct cgroup *child) | ||
3511 | { | ||
3512 | int subsys_id, i, depth = 0; | ||
3513 | struct cgroup_subsys_state *parent_css, *child_css; | ||
3514 | struct css_id *child_id, *parent_id = NULL; | ||
3515 | |||
3516 | subsys_id = ss->subsys_id; | ||
3517 | parent_css = parent->subsys[subsys_id]; | ||
3518 | child_css = child->subsys[subsys_id]; | ||
3519 | depth = css_depth(parent_css) + 1; | ||
3520 | parent_id = parent_css->id; | ||
3521 | |||
3522 | child_id = get_new_cssid(ss, depth); | ||
3523 | if (IS_ERR(child_id)) | ||
3524 | return PTR_ERR(child_id); | ||
3525 | |||
3526 | for (i = 0; i < depth; i++) | ||
3527 | child_id->stack[i] = parent_id->stack[i]; | ||
3528 | child_id->stack[depth] = child_id->id; | ||
3529 | /* | ||
3530 | * child_id->css pointer will be set after this cgroup is available | ||
3531 | * see cgroup_populate_dir() | ||
3532 | */ | ||
3533 | rcu_assign_pointer(child_css->id, child_id); | ||
3534 | |||
3535 | return 0; | ||
3536 | } | ||
3537 | |||
3538 | /** | ||
3539 | * css_lookup - lookup css by id | ||
3540 | * @ss: cgroup subsys to be looked into. | ||
3541 | * @id: the id | ||
3542 | * | ||
3543 | * Returns pointer to cgroup_subsys_state if there is valid one with id. | ||
3544 | * NULL if not. Should be called under rcu_read_lock() | ||
3545 | */ | ||
3546 | struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) | ||
3547 | { | ||
3548 | struct css_id *cssid = NULL; | ||
3549 | |||
3550 | BUG_ON(!ss->use_id); | ||
3551 | cssid = idr_find(&ss->idr, id); | ||
3552 | |||
3553 | if (unlikely(!cssid)) | ||
3554 | return NULL; | ||
3555 | |||
3556 | return rcu_dereference(cssid->css); | ||
3557 | } | ||
3558 | |||
3559 | /** | ||
3560 | * css_get_next - lookup next cgroup under specified hierarchy. | ||
3561 | * @ss: pointer to subsystem | ||
3562 | * @id: current position of iteration. | ||
3563 | * @root: pointer to css. search tree under this. | ||
3564 | * @foundid: position of found object. | ||
3565 | * | ||
3566 | * Search next css under the specified hierarchy of rootid. Calling under | ||
3567 | * rcu_read_lock() is necessary. Returns NULL if it reaches the end. | ||
3568 | */ | ||
3569 | struct cgroup_subsys_state * | ||
3570 | css_get_next(struct cgroup_subsys *ss, int id, | ||
3571 | struct cgroup_subsys_state *root, int *foundid) | ||
3572 | { | ||
3573 | struct cgroup_subsys_state *ret = NULL; | ||
3574 | struct css_id *tmp; | ||
3575 | int tmpid; | ||
3576 | int rootid = css_id(root); | ||
3577 | int depth = css_depth(root); | ||
3578 | |||
3579 | if (!rootid) | ||
3580 | return NULL; | ||
3581 | |||
3582 | BUG_ON(!ss->use_id); | ||
3583 | /* fill start point for scan */ | ||
3584 | tmpid = id; | ||
3585 | while (1) { | ||
3586 | /* | ||
3587 | * scan next entry from bitmap(tree), tmpid is updated after | ||
3588 | * idr_get_next(). | ||
3589 | */ | ||
3590 | spin_lock(&ss->id_lock); | ||
3591 | tmp = idr_get_next(&ss->idr, &tmpid); | ||
3592 | spin_unlock(&ss->id_lock); | ||
3593 | |||
3594 | if (!tmp) | ||
3595 | break; | ||
3596 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { | ||
3597 | ret = rcu_dereference(tmp->css); | ||
3598 | if (ret) { | ||
3599 | *foundid = tmpid; | ||
3600 | break; | ||
3601 | } | ||
3602 | } | ||
3603 | /* continue to scan from next id */ | ||
3604 | tmpid = tmpid + 1; | ||
3605 | } | ||
3606 | return ret; | ||
3607 | } | ||
3608 | |||