aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c433
1 files changed, 399 insertions, 34 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c500ca7239b2..a7267bfd3765 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -94,7 +94,6 @@ struct cgroupfs_root {
94 char release_agent_path[PATH_MAX]; 94 char release_agent_path[PATH_MAX];
95}; 95};
96 96
97
98/* 97/*
99 * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the 98 * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
100 * subsystems that are otherwise unattached - it never has more than a 99 * subsystems that are otherwise unattached - it never has more than a
@@ -102,6 +101,39 @@ struct cgroupfs_root {
102 */ 101 */
103static struct cgroupfs_root rootnode; 102static struct cgroupfs_root rootnode;
104 103
104/*
105 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
106 * cgroup_subsys->use_id != 0.
107 */
108#define CSS_ID_MAX (65535)
109struct css_id {
110 /*
111 * The css to which this ID points. This pointer is set to valid value
112 * after cgroup is populated. If cgroup is removed, this will be NULL.
113 * This pointer is expected to be RCU-safe because destroy()
114 * is called after synchronize_rcu(). But for safe use, css_is_removed()
115 * css_tryget() should be used for avoiding race.
116 */
117 struct cgroup_subsys_state *css;
118 /*
119 * ID of this css.
120 */
121 unsigned short id;
122 /*
123 * Depth in hierarchy which this ID belongs to.
124 */
125 unsigned short depth;
126 /*
127 * ID is freed by RCU. (and lookup routine is RCU safe.)
128 */
129 struct rcu_head rcu_head;
130 /*
131 * Hierarchy of CSS ID belongs to.
132 */
133 unsigned short stack[0]; /* Array of Length (depth+1) */
134};
135
136
105/* The list of hierarchy roots */ 137/* The list of hierarchy roots */
106 138
107static LIST_HEAD(roots); 139static LIST_HEAD(roots);
@@ -185,6 +217,8 @@ struct cg_cgroup_link {
185static struct css_set init_css_set; 217static struct css_set init_css_set;
186static struct cg_cgroup_link init_css_set_link; 218static struct cg_cgroup_link init_css_set_link;
187 219
220static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
221
188/* css_set_lock protects the list of css_set objects, and the 222/* css_set_lock protects the list of css_set objects, and the
189 * chain of tasks off each css_set. Nests outside task->alloc_lock 223 * chain of tasks off each css_set. Nests outside task->alloc_lock
190 * due to cgroup_iter_start() */ 224 * due to cgroup_iter_start() */
@@ -567,6 +601,9 @@ static struct backing_dev_info cgroup_backing_dev_info = {
567 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 601 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
568}; 602};
569 603
604static int alloc_css_id(struct cgroup_subsys *ss,
605 struct cgroup *parent, struct cgroup *child);
606
570static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) 607static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
571{ 608{
572 struct inode *inode = new_inode(sb); 609 struct inode *inode = new_inode(sb);
@@ -585,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
585 * Call subsys's pre_destroy handler. 622 * Call subsys's pre_destroy handler.
586 * This is called before css refcnt check. 623 * This is called before css refcnt check.
587 */ 624 */
588static void cgroup_call_pre_destroy(struct cgroup *cgrp) 625static int cgroup_call_pre_destroy(struct cgroup *cgrp)
589{ 626{
590 struct cgroup_subsys *ss; 627 struct cgroup_subsys *ss;
628 int ret = 0;
629
591 for_each_subsys(cgrp->root, ss) 630 for_each_subsys(cgrp->root, ss)
592 if (ss->pre_destroy) 631 if (ss->pre_destroy) {
593 ss->pre_destroy(ss, cgrp); 632 ret = ss->pre_destroy(ss, cgrp);
594 return; 633 if (ret)
634 break;
635 }
636 return ret;
595} 637}
596 638
597static void free_cgroup_rcu(struct rcu_head *obj) 639static void free_cgroup_rcu(struct rcu_head *obj)
@@ -685,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
685 remove_dir(dentry); 727 remove_dir(dentry);
686} 728}
687 729
730/*
731 * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
732 * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
733 * reference to css->refcnt. In general, this refcnt is expected to goes down
734 * to zero, soon.
735 *
736 * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
737 */
738DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
739
740static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
741{
742 if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
743 wake_up_all(&cgroup_rmdir_waitq);
744}
745
688static int rebind_subsystems(struct cgroupfs_root *root, 746static int rebind_subsystems(struct cgroupfs_root *root,
689 unsigned long final_bits) 747 unsigned long final_bits)
690{ 748{
@@ -857,16 +915,16 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
857 } 915 }
858 916
859 ret = rebind_subsystems(root, opts.subsys_bits); 917 ret = rebind_subsystems(root, opts.subsys_bits);
918 if (ret)
919 goto out_unlock;
860 920
861 /* (re)populate subsystem files */ 921 /* (re)populate subsystem files */
862 if (!ret) 922 cgroup_populate_dir(cgrp);
863 cgroup_populate_dir(cgrp);
864 923
865 if (opts.release_agent) 924 if (opts.release_agent)
866 strcpy(root->release_agent_path, opts.release_agent); 925 strcpy(root->release_agent_path, opts.release_agent);
867 out_unlock: 926 out_unlock:
868 if (opts.release_agent) 927 kfree(opts.release_agent);
869 kfree(opts.release_agent);
870 mutex_unlock(&cgroup_mutex); 928 mutex_unlock(&cgroup_mutex);
871 mutex_unlock(&cgrp->dentry->d_inode->i_mutex); 929 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
872 return ret; 930 return ret;
@@ -969,15 +1027,13 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
969 /* First find the desired set of subsystems */ 1027 /* First find the desired set of subsystems */
970 ret = parse_cgroupfs_options(data, &opts); 1028 ret = parse_cgroupfs_options(data, &opts);
971 if (ret) { 1029 if (ret) {
972 if (opts.release_agent) 1030 kfree(opts.release_agent);
973 kfree(opts.release_agent);
974 return ret; 1031 return ret;
975 } 1032 }
976 1033
977 root = kzalloc(sizeof(*root), GFP_KERNEL); 1034 root = kzalloc(sizeof(*root), GFP_KERNEL);
978 if (!root) { 1035 if (!root) {
979 if (opts.release_agent) 1036 kfree(opts.release_agent);
980 kfree(opts.release_agent);
981 return -ENOMEM; 1037 return -ENOMEM;
982 } 1038 }
983 1039
@@ -1077,8 +1133,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1077 free_cg_links: 1133 free_cg_links:
1078 free_cg_links(&tmp_cg_links); 1134 free_cg_links(&tmp_cg_links);
1079 drop_new_super: 1135 drop_new_super:
1080 up_write(&sb->s_umount); 1136 deactivate_locked_super(sb);
1081 deactivate_super(sb);
1082 return ret; 1137 return ret;
1083} 1138}
1084 1139
@@ -1280,6 +1335,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1280 set_bit(CGRP_RELEASABLE, &oldcgrp->flags); 1335 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1281 synchronize_rcu(); 1336 synchronize_rcu();
1282 put_css_set(cg); 1337 put_css_set(cg);
1338
1339 /*
1340 * wake up rmdir() waiter. the rmdir should fail since the cgroup
1341 * is no longer empty.
1342 */
1343 cgroup_wakeup_rmdir_waiters(cgrp);
1283 return 0; 1344 return 0;
1284} 1345}
1285 1346
@@ -1625,7 +1686,7 @@ static struct inode_operations cgroup_dir_inode_operations = {
1625 .rename = cgroup_rename, 1686 .rename = cgroup_rename,
1626}; 1687};
1627 1688
1628static int cgroup_create_file(struct dentry *dentry, int mode, 1689static int cgroup_create_file(struct dentry *dentry, mode_t mode,
1629 struct super_block *sb) 1690 struct super_block *sb)
1630{ 1691{
1631 static const struct dentry_operations cgroup_dops = { 1692 static const struct dentry_operations cgroup_dops = {
@@ -1671,7 +1732,7 @@ static int cgroup_create_file(struct dentry *dentry, int mode,
1671 * @mode: mode to set on new directory. 1732 * @mode: mode to set on new directory.
1672 */ 1733 */
1673static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, 1734static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1674 int mode) 1735 mode_t mode)
1675{ 1736{
1676 struct dentry *parent; 1737 struct dentry *parent;
1677 int error = 0; 1738 int error = 0;
@@ -1689,6 +1750,33 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1689 return error; 1750 return error;
1690} 1751}
1691 1752
1753/**
1754 * cgroup_file_mode - deduce file mode of a control file
1755 * @cft: the control file in question
1756 *
1757 * returns cft->mode if ->mode is not 0
1758 * returns S_IRUGO|S_IWUSR if it has both a read and a write handler
1759 * returns S_IRUGO if it has only a read handler
1760 * returns S_IWUSR if it has only a write hander
1761 */
1762static mode_t cgroup_file_mode(const struct cftype *cft)
1763{
1764 mode_t mode = 0;
1765
1766 if (cft->mode)
1767 return cft->mode;
1768
1769 if (cft->read || cft->read_u64 || cft->read_s64 ||
1770 cft->read_map || cft->read_seq_string)
1771 mode |= S_IRUGO;
1772
1773 if (cft->write || cft->write_u64 || cft->write_s64 ||
1774 cft->write_string || cft->trigger)
1775 mode |= S_IWUSR;
1776
1777 return mode;
1778}
1779
1692int cgroup_add_file(struct cgroup *cgrp, 1780int cgroup_add_file(struct cgroup *cgrp,
1693 struct cgroup_subsys *subsys, 1781 struct cgroup_subsys *subsys,
1694 const struct cftype *cft) 1782 const struct cftype *cft)
@@ -1696,6 +1784,7 @@ int cgroup_add_file(struct cgroup *cgrp,
1696 struct dentry *dir = cgrp->dentry; 1784 struct dentry *dir = cgrp->dentry;
1697 struct dentry *dentry; 1785 struct dentry *dentry;
1698 int error; 1786 int error;
1787 mode_t mode;
1699 1788
1700 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; 1789 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
1701 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { 1790 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
@@ -1706,7 +1795,8 @@ int cgroup_add_file(struct cgroup *cgrp,
1706 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); 1795 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
1707 dentry = lookup_one_len(name, dir, strlen(name)); 1796 dentry = lookup_one_len(name, dir, strlen(name));
1708 if (!IS_ERR(dentry)) { 1797 if (!IS_ERR(dentry)) {
1709 error = cgroup_create_file(dentry, 0644 | S_IFREG, 1798 mode = cgroup_file_mode(cft);
1799 error = cgroup_create_file(dentry, mode | S_IFREG,
1710 cgrp->root->sb); 1800 cgrp->root->sb);
1711 if (!error) 1801 if (!error)
1712 dentry->d_fsdata = (void *)cft; 1802 dentry->d_fsdata = (void *)cft;
@@ -2288,6 +2378,7 @@ static struct cftype files[] = {
2288 .write_u64 = cgroup_tasks_write, 2378 .write_u64 = cgroup_tasks_write,
2289 .release = cgroup_tasks_release, 2379 .release = cgroup_tasks_release,
2290 .private = FILE_TASKLIST, 2380 .private = FILE_TASKLIST,
2381 .mode = S_IRUGO | S_IWUSR,
2291 }, 2382 },
2292 2383
2293 { 2384 {
@@ -2327,6 +2418,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
2327 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) 2418 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
2328 return err; 2419 return err;
2329 } 2420 }
2421 /* This cgroup is ready now */
2422 for_each_subsys(cgrp->root, ss) {
2423 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
2424 /*
2425 * Update id->css pointer and make this css visible from
2426 * CSS ID functions. This pointer will be dereferened
2427 * from RCU-read-side without locks.
2428 */
2429 if (css->id)
2430 rcu_assign_pointer(css->id->css, css);
2431 }
2330 2432
2331 return 0; 2433 return 0;
2332} 2434}
@@ -2338,6 +2440,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
2338 css->cgroup = cgrp; 2440 css->cgroup = cgrp;
2339 atomic_set(&css->refcnt, 1); 2441 atomic_set(&css->refcnt, 1);
2340 css->flags = 0; 2442 css->flags = 0;
2443 css->id = NULL;
2341 if (cgrp == dummytop) 2444 if (cgrp == dummytop)
2342 set_bit(CSS_ROOT, &css->flags); 2445 set_bit(CSS_ROOT, &css->flags);
2343 BUG_ON(cgrp->subsys[ss->subsys_id]); 2446 BUG_ON(cgrp->subsys[ss->subsys_id]);
@@ -2376,7 +2479,7 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2376 * Must be called with the mutex on the parent inode held 2479 * Must be called with the mutex on the parent inode held
2377 */ 2480 */
2378static long cgroup_create(struct cgroup *parent, struct dentry *dentry, 2481static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2379 int mode) 2482 mode_t mode)
2380{ 2483{
2381 struct cgroup *cgrp; 2484 struct cgroup *cgrp;
2382 struct cgroupfs_root *root = parent->root; 2485 struct cgroupfs_root *root = parent->root;
@@ -2413,6 +2516,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2413 goto err_destroy; 2516 goto err_destroy;
2414 } 2517 }
2415 init_cgroup_css(css, ss, cgrp); 2518 init_cgroup_css(css, ss, cgrp);
2519 if (ss->use_id)
2520 if (alloc_css_id(ss, parent, cgrp))
2521 goto err_destroy;
2522 /* At error, ->destroy() callback has to free assigned ID. */
2416 } 2523 }
2417 2524
2418 cgroup_lock_hierarchy(root); 2525 cgroup_lock_hierarchy(root);
@@ -2555,9 +2662,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2555 struct cgroup *cgrp = dentry->d_fsdata; 2662 struct cgroup *cgrp = dentry->d_fsdata;
2556 struct dentry *d; 2663 struct dentry *d;
2557 struct cgroup *parent; 2664 struct cgroup *parent;
2665 DEFINE_WAIT(wait);
2666 int ret;
2558 2667
2559 /* the vfs holds both inode->i_mutex already */ 2668 /* the vfs holds both inode->i_mutex already */
2560 2669again:
2561 mutex_lock(&cgroup_mutex); 2670 mutex_lock(&cgroup_mutex);
2562 if (atomic_read(&cgrp->count) != 0) { 2671 if (atomic_read(&cgrp->count) != 0) {
2563 mutex_unlock(&cgroup_mutex); 2672 mutex_unlock(&cgroup_mutex);
@@ -2573,17 +2682,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2573 * Call pre_destroy handlers of subsys. Notify subsystems 2682 * Call pre_destroy handlers of subsys. Notify subsystems
2574 * that rmdir() request comes. 2683 * that rmdir() request comes.
2575 */ 2684 */
2576 cgroup_call_pre_destroy(cgrp); 2685 ret = cgroup_call_pre_destroy(cgrp);
2686 if (ret)
2687 return ret;
2577 2688
2578 mutex_lock(&cgroup_mutex); 2689 mutex_lock(&cgroup_mutex);
2579 parent = cgrp->parent; 2690 parent = cgrp->parent;
2580 2691 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
2581 if (atomic_read(&cgrp->count)
2582 || !list_empty(&cgrp->children)
2583 || !cgroup_clear_css_refs(cgrp)) {
2584 mutex_unlock(&cgroup_mutex); 2692 mutex_unlock(&cgroup_mutex);
2585 return -EBUSY; 2693 return -EBUSY;
2586 } 2694 }
2695 /*
2696 * css_put/get is provided for subsys to grab refcnt to css. In typical
2697 * case, subsystem has no reference after pre_destroy(). But, under
2698 * hierarchy management, some *temporal* refcnt can be hold.
2699 * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
2700 * is really busy, it should return -EBUSY at pre_destroy(). wake_up
2701 * is called when css_put() is called and refcnt goes down to 0.
2702 */
2703 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2704 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
2705
2706 if (!cgroup_clear_css_refs(cgrp)) {
2707 mutex_unlock(&cgroup_mutex);
2708 schedule();
2709 finish_wait(&cgroup_rmdir_waitq, &wait);
2710 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2711 if (signal_pending(current))
2712 return -EINTR;
2713 goto again;
2714 }
2715 /* NO css_tryget() can success after here. */
2716 finish_wait(&cgroup_rmdir_waitq, &wait);
2717 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2587 2718
2588 spin_lock(&release_list_lock); 2719 spin_lock(&release_list_lock);
2589 set_bit(CGRP_REMOVED, &cgrp->flags); 2720 set_bit(CGRP_REMOVED, &cgrp->flags);
@@ -2708,6 +2839,8 @@ int __init cgroup_init(void)
2708 struct cgroup_subsys *ss = subsys[i]; 2839 struct cgroup_subsys *ss = subsys[i];
2709 if (!ss->early_init) 2840 if (!ss->early_init)
2710 cgroup_init_subsys(ss); 2841 cgroup_init_subsys(ss);
2842 if (ss->use_id)
2843 cgroup_subsys_init_idr(ss);
2711 } 2844 }
2712 2845
2713 /* Add init_css_set to the hash table */ 2846 /* Add init_css_set to the hash table */
@@ -3084,18 +3217,19 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
3084} 3217}
3085 3218
3086/** 3219/**
3087 * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp 3220 * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp
3088 * @cgrp: the cgroup in question 3221 * @cgrp: the cgroup in question
3222 * @task: the task in question
3089 * 3223 *
3090 * See if @cgrp is a descendant of the current task's cgroup in 3224 * See if @cgrp is a descendant of @task's cgroup in the appropriate
3091 * the appropriate hierarchy. 3225 * hierarchy.
3092 * 3226 *
3093 * If we are sending in dummytop, then presumably we are creating 3227 * If we are sending in dummytop, then presumably we are creating
3094 * the top cgroup in the subsystem. 3228 * the top cgroup in the subsystem.
3095 * 3229 *
3096 * Called only by the ns (nsproxy) cgroup. 3230 * Called only by the ns (nsproxy) cgroup.
3097 */ 3231 */
3098int cgroup_is_descendant(const struct cgroup *cgrp) 3232int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
3099{ 3233{
3100 int ret; 3234 int ret;
3101 struct cgroup *target; 3235 struct cgroup *target;
@@ -3105,7 +3239,7 @@ int cgroup_is_descendant(const struct cgroup *cgrp)
3105 return 1; 3239 return 1;
3106 3240
3107 get_first_subsys(cgrp, NULL, &subsys_id); 3241 get_first_subsys(cgrp, NULL, &subsys_id);
3108 target = task_cgroup(current, subsys_id); 3242 target = task_cgroup(task, subsys_id);
3109 while (cgrp != target && cgrp!= cgrp->top_cgroup) 3243 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3110 cgrp = cgrp->parent; 3244 cgrp = cgrp->parent;
3111 ret = (cgrp == target); 3245 ret = (cgrp == target);
@@ -3138,10 +3272,12 @@ void __css_put(struct cgroup_subsys_state *css)
3138{ 3272{
3139 struct cgroup *cgrp = css->cgroup; 3273 struct cgroup *cgrp = css->cgroup;
3140 rcu_read_lock(); 3274 rcu_read_lock();
3141 if ((atomic_dec_return(&css->refcnt) == 1) && 3275 if (atomic_dec_return(&css->refcnt) == 1) {
3142 notify_on_release(cgrp)) { 3276 if (notify_on_release(cgrp)) {
3143 set_bit(CGRP_RELEASABLE, &cgrp->flags); 3277 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3144 check_for_release(cgrp); 3278 check_for_release(cgrp);
3279 }
3280 cgroup_wakeup_rmdir_waiters(cgrp);
3145 } 3281 }
3146 rcu_read_unlock(); 3282 rcu_read_unlock();
3147} 3283}
@@ -3241,3 +3377,232 @@ static int __init cgroup_disable(char *str)
3241 return 1; 3377 return 1;
3242} 3378}
3243__setup("cgroup_disable=", cgroup_disable); 3379__setup("cgroup_disable=", cgroup_disable);
3380
3381/*
3382 * Functons for CSS ID.
3383 */
3384
3385/*
3386 *To get ID other than 0, this should be called when !cgroup_is_removed().
3387 */
3388unsigned short css_id(struct cgroup_subsys_state *css)
3389{
3390 struct css_id *cssid = rcu_dereference(css->id);
3391
3392 if (cssid)
3393 return cssid->id;
3394 return 0;
3395}
3396
3397unsigned short css_depth(struct cgroup_subsys_state *css)
3398{
3399 struct css_id *cssid = rcu_dereference(css->id);
3400
3401 if (cssid)
3402 return cssid->depth;
3403 return 0;
3404}
3405
3406bool css_is_ancestor(struct cgroup_subsys_state *child,
3407 const struct cgroup_subsys_state *root)
3408{
3409 struct css_id *child_id = rcu_dereference(child->id);
3410 struct css_id *root_id = rcu_dereference(root->id);
3411
3412 if (!child_id || !root_id || (child_id->depth < root_id->depth))
3413 return false;
3414 return child_id->stack[root_id->depth] == root_id->id;
3415}
3416
3417static void __free_css_id_cb(struct rcu_head *head)
3418{
3419 struct css_id *id;
3420
3421 id = container_of(head, struct css_id, rcu_head);
3422 kfree(id);
3423}
3424
3425void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
3426{
3427 struct css_id *id = css->id;
3428 /* When this is called before css_id initialization, id can be NULL */
3429 if (!id)
3430 return;
3431
3432 BUG_ON(!ss->use_id);
3433
3434 rcu_assign_pointer(id->css, NULL);
3435 rcu_assign_pointer(css->id, NULL);
3436 spin_lock(&ss->id_lock);
3437 idr_remove(&ss->idr, id->id);
3438 spin_unlock(&ss->id_lock);
3439 call_rcu(&id->rcu_head, __free_css_id_cb);
3440}
3441
3442/*
3443 * This is called by init or create(). Then, calls to this function are
3444 * always serialized (By cgroup_mutex() at create()).
3445 */
3446
3447static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth)
3448{
3449 struct css_id *newid;
3450 int myid, error, size;
3451
3452 BUG_ON(!ss->use_id);
3453
3454 size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1);
3455 newid = kzalloc(size, GFP_KERNEL);
3456 if (!newid)
3457 return ERR_PTR(-ENOMEM);
3458 /* get id */
3459 if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) {
3460 error = -ENOMEM;
3461 goto err_out;
3462 }
3463 spin_lock(&ss->id_lock);
3464 /* Don't use 0. allocates an ID of 1-65535 */
3465 error = idr_get_new_above(&ss->idr, newid, 1, &myid);
3466 spin_unlock(&ss->id_lock);
3467
3468 /* Returns error when there are no free spaces for new ID.*/
3469 if (error) {
3470 error = -ENOSPC;
3471 goto err_out;
3472 }
3473 if (myid > CSS_ID_MAX)
3474 goto remove_idr;
3475
3476 newid->id = myid;
3477 newid->depth = depth;
3478 return newid;
3479remove_idr:
3480 error = -ENOSPC;
3481 spin_lock(&ss->id_lock);
3482 idr_remove(&ss->idr, myid);
3483 spin_unlock(&ss->id_lock);
3484err_out:
3485 kfree(newid);
3486 return ERR_PTR(error);
3487
3488}
3489
3490static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss)
3491{
3492 struct css_id *newid;
3493 struct cgroup_subsys_state *rootcss;
3494
3495 spin_lock_init(&ss->id_lock);
3496 idr_init(&ss->idr);
3497
3498 rootcss = init_css_set.subsys[ss->subsys_id];
3499 newid = get_new_cssid(ss, 0);
3500 if (IS_ERR(newid))
3501 return PTR_ERR(newid);
3502
3503 newid->stack[0] = newid->id;
3504 newid->css = rootcss;
3505 rootcss->id = newid;
3506 return 0;
3507}
3508
3509static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
3510 struct cgroup *child)
3511{
3512 int subsys_id, i, depth = 0;
3513 struct cgroup_subsys_state *parent_css, *child_css;
3514 struct css_id *child_id, *parent_id = NULL;
3515
3516 subsys_id = ss->subsys_id;
3517 parent_css = parent->subsys[subsys_id];
3518 child_css = child->subsys[subsys_id];
3519 depth = css_depth(parent_css) + 1;
3520 parent_id = parent_css->id;
3521
3522 child_id = get_new_cssid(ss, depth);
3523 if (IS_ERR(child_id))
3524 return PTR_ERR(child_id);
3525
3526 for (i = 0; i < depth; i++)
3527 child_id->stack[i] = parent_id->stack[i];
3528 child_id->stack[depth] = child_id->id;
3529 /*
3530 * child_id->css pointer will be set after this cgroup is available
3531 * see cgroup_populate_dir()
3532 */
3533 rcu_assign_pointer(child_css->id, child_id);
3534
3535 return 0;
3536}
3537
3538/**
3539 * css_lookup - lookup css by id
3540 * @ss: cgroup subsys to be looked into.
3541 * @id: the id
3542 *
3543 * Returns pointer to cgroup_subsys_state if there is valid one with id.
3544 * NULL if not. Should be called under rcu_read_lock()
3545 */
3546struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
3547{
3548 struct css_id *cssid = NULL;
3549
3550 BUG_ON(!ss->use_id);
3551 cssid = idr_find(&ss->idr, id);
3552
3553 if (unlikely(!cssid))
3554 return NULL;
3555
3556 return rcu_dereference(cssid->css);
3557}
3558
3559/**
3560 * css_get_next - lookup next cgroup under specified hierarchy.
3561 * @ss: pointer to subsystem
3562 * @id: current position of iteration.
3563 * @root: pointer to css. search tree under this.
3564 * @foundid: position of found object.
3565 *
3566 * Search next css under the specified hierarchy of rootid. Calling under
3567 * rcu_read_lock() is necessary. Returns NULL if it reaches the end.
3568 */
3569struct cgroup_subsys_state *
3570css_get_next(struct cgroup_subsys *ss, int id,
3571 struct cgroup_subsys_state *root, int *foundid)
3572{
3573 struct cgroup_subsys_state *ret = NULL;
3574 struct css_id *tmp;
3575 int tmpid;
3576 int rootid = css_id(root);
3577 int depth = css_depth(root);
3578
3579 if (!rootid)
3580 return NULL;
3581
3582 BUG_ON(!ss->use_id);
3583 /* fill start point for scan */
3584 tmpid = id;
3585 while (1) {
3586 /*
3587 * scan next entry from bitmap(tree), tmpid is updated after
3588 * idr_get_next().
3589 */
3590 spin_lock(&ss->id_lock);
3591 tmp = idr_get_next(&ss->idr, &tmpid);
3592 spin_unlock(&ss->id_lock);
3593
3594 if (!tmp)
3595 break;
3596 if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
3597 ret = rcu_dereference(tmp->css);
3598 if (ret) {
3599 *foundid = tmpid;
3600 break;
3601 }
3602 }
3603 /* continue to scan from next id */
3604 tmpid = tmpid + 1;
3605 }
3606 return ret;
3607}
3608