diff options
Diffstat (limited to 'kernel/cgroup.c')
| -rw-r--r-- | kernel/cgroup.c | 430 |
1 files changed, 398 insertions, 32 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c500ca7239b..382109b5bae 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -94,7 +94,6 @@ struct cgroupfs_root { | |||
| 94 | char release_agent_path[PATH_MAX]; | 94 | char release_agent_path[PATH_MAX]; |
| 95 | }; | 95 | }; |
| 96 | 96 | ||
| 97 | |||
| 98 | /* | 97 | /* |
| 99 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the | 98 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the |
| 100 | * subsystems that are otherwise unattached - it never has more than a | 99 | * subsystems that are otherwise unattached - it never has more than a |
| @@ -102,6 +101,39 @@ struct cgroupfs_root { | |||
| 102 | */ | 101 | */ |
| 103 | static struct cgroupfs_root rootnode; | 102 | static struct cgroupfs_root rootnode; |
| 104 | 103 | ||
| 104 | /* | ||
| 105 | * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when | ||
| 106 | * cgroup_subsys->use_id != 0. | ||
| 107 | */ | ||
| 108 | #define CSS_ID_MAX (65535) | ||
| 109 | struct css_id { | ||
| 110 | /* | ||
| 111 | * The css to which this ID points. This pointer is set to valid value | ||
| 112 | * after cgroup is populated. If cgroup is removed, this will be NULL. | ||
| 113 | * This pointer is expected to be RCU-safe because destroy() | ||
| 114 | * is called after synchronize_rcu(). But for safe use, css_is_removed() | ||
| 115 | * css_tryget() should be used for avoiding race. | ||
| 116 | */ | ||
| 117 | struct cgroup_subsys_state *css; | ||
| 118 | /* | ||
| 119 | * ID of this css. | ||
| 120 | */ | ||
| 121 | unsigned short id; | ||
| 122 | /* | ||
| 123 | * Depth in hierarchy which this ID belongs to. | ||
| 124 | */ | ||
| 125 | unsigned short depth; | ||
| 126 | /* | ||
| 127 | * ID is freed by RCU. (and lookup routine is RCU safe.) | ||
| 128 | */ | ||
| 129 | struct rcu_head rcu_head; | ||
| 130 | /* | ||
| 131 | * Hierarchy of CSS ID belongs to. | ||
| 132 | */ | ||
| 133 | unsigned short stack[0]; /* Array of Length (depth+1) */ | ||
| 134 | }; | ||
| 135 | |||
| 136 | |||
| 105 | /* The list of hierarchy roots */ | 137 | /* The list of hierarchy roots */ |
| 106 | 138 | ||
| 107 | static LIST_HEAD(roots); | 139 | static LIST_HEAD(roots); |
| @@ -185,6 +217,8 @@ struct cg_cgroup_link { | |||
| 185 | static struct css_set init_css_set; | 217 | static struct css_set init_css_set; |
| 186 | static struct cg_cgroup_link init_css_set_link; | 218 | static struct cg_cgroup_link init_css_set_link; |
| 187 | 219 | ||
| 220 | static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); | ||
| 221 | |||
| 188 | /* css_set_lock protects the list of css_set objects, and the | 222 | /* css_set_lock protects the list of css_set objects, and the |
| 189 | * chain of tasks off each css_set. Nests outside task->alloc_lock | 223 | * chain of tasks off each css_set. Nests outside task->alloc_lock |
| 190 | * due to cgroup_iter_start() */ | 224 | * due to cgroup_iter_start() */ |
| @@ -567,6 +601,9 @@ static struct backing_dev_info cgroup_backing_dev_info = { | |||
| 567 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 601 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
| 568 | }; | 602 | }; |
| 569 | 603 | ||
| 604 | static int alloc_css_id(struct cgroup_subsys *ss, | ||
| 605 | struct cgroup *parent, struct cgroup *child); | ||
| 606 | |||
| 570 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | 607 | static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) |
| 571 | { | 608 | { |
| 572 | struct inode *inode = new_inode(sb); | 609 | struct inode *inode = new_inode(sb); |
| @@ -585,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
| 585 | * Call subsys's pre_destroy handler. | 622 | * Call subsys's pre_destroy handler. |
| 586 | * This is called before css refcnt check. | 623 | * This is called before css refcnt check. |
| 587 | */ | 624 | */ |
| 588 | static void cgroup_call_pre_destroy(struct cgroup *cgrp) | 625 | static int cgroup_call_pre_destroy(struct cgroup *cgrp) |
| 589 | { | 626 | { |
| 590 | struct cgroup_subsys *ss; | 627 | struct cgroup_subsys *ss; |
| 628 | int ret = 0; | ||
| 629 | |||
| 591 | for_each_subsys(cgrp->root, ss) | 630 | for_each_subsys(cgrp->root, ss) |
| 592 | if (ss->pre_destroy) | 631 | if (ss->pre_destroy) { |
| 593 | ss->pre_destroy(ss, cgrp); | 632 | ret = ss->pre_destroy(ss, cgrp); |
| 594 | return; | 633 | if (ret) |
| 634 | break; | ||
| 635 | } | ||
| 636 | return ret; | ||
| 595 | } | 637 | } |
| 596 | 638 | ||
| 597 | static void free_cgroup_rcu(struct rcu_head *obj) | 639 | static void free_cgroup_rcu(struct rcu_head *obj) |
| @@ -685,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
| 685 | remove_dir(dentry); | 727 | remove_dir(dentry); |
| 686 | } | 728 | } |
| 687 | 729 | ||
| 730 | /* | ||
| 731 | * A queue for waiters to do rmdir() cgroup. A tasks will sleep when | ||
| 732 | * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some | ||
| 733 | * reference to css->refcnt. In general, this refcnt is expected to goes down | ||
| 734 | * to zero, soon. | ||
| 735 | * | ||
| 736 | * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex; | ||
| 737 | */ | ||
| 738 | DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); | ||
| 739 | |||
| 740 | static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp) | ||
| 741 | { | ||
| 742 | if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) | ||
| 743 | wake_up_all(&cgroup_rmdir_waitq); | ||
| 744 | } | ||
| 745 | |||
| 688 | static int rebind_subsystems(struct cgroupfs_root *root, | 746 | static int rebind_subsystems(struct cgroupfs_root *root, |
| 689 | unsigned long final_bits) | 747 | unsigned long final_bits) |
| 690 | { | 748 | { |
| @@ -857,16 +915,16 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 857 | } | 915 | } |
| 858 | 916 | ||
| 859 | ret = rebind_subsystems(root, opts.subsys_bits); | 917 | ret = rebind_subsystems(root, opts.subsys_bits); |
| 918 | if (ret) | ||
| 919 | goto out_unlock; | ||
| 860 | 920 | ||
| 861 | /* (re)populate subsystem files */ | 921 | /* (re)populate subsystem files */ |
| 862 | if (!ret) | 922 | cgroup_populate_dir(cgrp); |
| 863 | cgroup_populate_dir(cgrp); | ||
| 864 | 923 | ||
| 865 | if (opts.release_agent) | 924 | if (opts.release_agent) |
| 866 | strcpy(root->release_agent_path, opts.release_agent); | 925 | strcpy(root->release_agent_path, opts.release_agent); |
| 867 | out_unlock: | 926 | out_unlock: |
| 868 | if (opts.release_agent) | 927 | kfree(opts.release_agent); |
| 869 | kfree(opts.release_agent); | ||
| 870 | mutex_unlock(&cgroup_mutex); | 928 | mutex_unlock(&cgroup_mutex); |
| 871 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 929 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
| 872 | return ret; | 930 | return ret; |
| @@ -969,15 +1027,13 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
| 969 | /* First find the desired set of subsystems */ | 1027 | /* First find the desired set of subsystems */ |
| 970 | ret = parse_cgroupfs_options(data, &opts); | 1028 | ret = parse_cgroupfs_options(data, &opts); |
| 971 | if (ret) { | 1029 | if (ret) { |
| 972 | if (opts.release_agent) | 1030 | kfree(opts.release_agent); |
| 973 | kfree(opts.release_agent); | ||
| 974 | return ret; | 1031 | return ret; |
| 975 | } | 1032 | } |
| 976 | 1033 | ||
| 977 | root = kzalloc(sizeof(*root), GFP_KERNEL); | 1034 | root = kzalloc(sizeof(*root), GFP_KERNEL); |
| 978 | if (!root) { | 1035 | if (!root) { |
| 979 | if (opts.release_agent) | 1036 | kfree(opts.release_agent); |
| 980 | kfree(opts.release_agent); | ||
| 981 | return -ENOMEM; | 1037 | return -ENOMEM; |
| 982 | } | 1038 | } |
| 983 | 1039 | ||
| @@ -1280,6 +1336,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
| 1280 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 1336 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); |
| 1281 | synchronize_rcu(); | 1337 | synchronize_rcu(); |
| 1282 | put_css_set(cg); | 1338 | put_css_set(cg); |
| 1339 | |||
| 1340 | /* | ||
| 1341 | * wake up rmdir() waiter. the rmdir should fail since the cgroup | ||
| 1342 | * is no longer empty. | ||
| 1343 | */ | ||
| 1344 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
| 1283 | return 0; | 1345 | return 0; |
| 1284 | } | 1346 | } |
| 1285 | 1347 | ||
| @@ -1625,7 +1687,7 @@ static struct inode_operations cgroup_dir_inode_operations = { | |||
| 1625 | .rename = cgroup_rename, | 1687 | .rename = cgroup_rename, |
| 1626 | }; | 1688 | }; |
| 1627 | 1689 | ||
| 1628 | static int cgroup_create_file(struct dentry *dentry, int mode, | 1690 | static int cgroup_create_file(struct dentry *dentry, mode_t mode, |
| 1629 | struct super_block *sb) | 1691 | struct super_block *sb) |
| 1630 | { | 1692 | { |
| 1631 | static const struct dentry_operations cgroup_dops = { | 1693 | static const struct dentry_operations cgroup_dops = { |
| @@ -1671,7 +1733,7 @@ static int cgroup_create_file(struct dentry *dentry, int mode, | |||
| 1671 | * @mode: mode to set on new directory. | 1733 | * @mode: mode to set on new directory. |
| 1672 | */ | 1734 | */ |
| 1673 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | 1735 | static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, |
| 1674 | int mode) | 1736 | mode_t mode) |
| 1675 | { | 1737 | { |
| 1676 | struct dentry *parent; | 1738 | struct dentry *parent; |
| 1677 | int error = 0; | 1739 | int error = 0; |
| @@ -1689,6 +1751,33 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | |||
| 1689 | return error; | 1751 | return error; |
| 1690 | } | 1752 | } |
| 1691 | 1753 | ||
| 1754 | /** | ||
| 1755 | * cgroup_file_mode - deduce file mode of a control file | ||
| 1756 | * @cft: the control file in question | ||
| 1757 | * | ||
| 1758 | * returns cft->mode if ->mode is not 0 | ||
| 1759 | * returns S_IRUGO|S_IWUSR if it has both a read and a write handler | ||
| 1760 | * returns S_IRUGO if it has only a read handler | ||
| 1761 | * returns S_IWUSR if it has only a write hander | ||
| 1762 | */ | ||
| 1763 | static mode_t cgroup_file_mode(const struct cftype *cft) | ||
| 1764 | { | ||
| 1765 | mode_t mode = 0; | ||
| 1766 | |||
| 1767 | if (cft->mode) | ||
| 1768 | return cft->mode; | ||
| 1769 | |||
| 1770 | if (cft->read || cft->read_u64 || cft->read_s64 || | ||
| 1771 | cft->read_map || cft->read_seq_string) | ||
| 1772 | mode |= S_IRUGO; | ||
| 1773 | |||
| 1774 | if (cft->write || cft->write_u64 || cft->write_s64 || | ||
| 1775 | cft->write_string || cft->trigger) | ||
| 1776 | mode |= S_IWUSR; | ||
| 1777 | |||
| 1778 | return mode; | ||
| 1779 | } | ||
| 1780 | |||
| 1692 | int cgroup_add_file(struct cgroup *cgrp, | 1781 | int cgroup_add_file(struct cgroup *cgrp, |
| 1693 | struct cgroup_subsys *subsys, | 1782 | struct cgroup_subsys *subsys, |
| 1694 | const struct cftype *cft) | 1783 | const struct cftype *cft) |
| @@ -1696,6 +1785,7 @@ int cgroup_add_file(struct cgroup *cgrp, | |||
| 1696 | struct dentry *dir = cgrp->dentry; | 1785 | struct dentry *dir = cgrp->dentry; |
| 1697 | struct dentry *dentry; | 1786 | struct dentry *dentry; |
| 1698 | int error; | 1787 | int error; |
| 1788 | mode_t mode; | ||
| 1699 | 1789 | ||
| 1700 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 1790 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
| 1701 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { | 1791 | if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { |
| @@ -1706,7 +1796,8 @@ int cgroup_add_file(struct cgroup *cgrp, | |||
| 1706 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); | 1796 | BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); |
| 1707 | dentry = lookup_one_len(name, dir, strlen(name)); | 1797 | dentry = lookup_one_len(name, dir, strlen(name)); |
| 1708 | if (!IS_ERR(dentry)) { | 1798 | if (!IS_ERR(dentry)) { |
| 1709 | error = cgroup_create_file(dentry, 0644 | S_IFREG, | 1799 | mode = cgroup_file_mode(cft); |
| 1800 | error = cgroup_create_file(dentry, mode | S_IFREG, | ||
| 1710 | cgrp->root->sb); | 1801 | cgrp->root->sb); |
| 1711 | if (!error) | 1802 | if (!error) |
| 1712 | dentry->d_fsdata = (void *)cft; | 1803 | dentry->d_fsdata = (void *)cft; |
| @@ -2288,6 +2379,7 @@ static struct cftype files[] = { | |||
| 2288 | .write_u64 = cgroup_tasks_write, | 2379 | .write_u64 = cgroup_tasks_write, |
| 2289 | .release = cgroup_tasks_release, | 2380 | .release = cgroup_tasks_release, |
| 2290 | .private = FILE_TASKLIST, | 2381 | .private = FILE_TASKLIST, |
| 2382 | .mode = S_IRUGO | S_IWUSR, | ||
| 2291 | }, | 2383 | }, |
| 2292 | 2384 | ||
| 2293 | { | 2385 | { |
| @@ -2327,6 +2419,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp) | |||
| 2327 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) | 2419 | if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) |
| 2328 | return err; | 2420 | return err; |
| 2329 | } | 2421 | } |
| 2422 | /* This cgroup is ready now */ | ||
| 2423 | for_each_subsys(cgrp->root, ss) { | ||
| 2424 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
| 2425 | /* | ||
| 2426 | * Update id->css pointer and make this css visible from | ||
| 2427 | * CSS ID functions. This pointer will be dereferened | ||
| 2428 | * from RCU-read-side without locks. | ||
| 2429 | */ | ||
| 2430 | if (css->id) | ||
| 2431 | rcu_assign_pointer(css->id->css, css); | ||
| 2432 | } | ||
| 2330 | 2433 | ||
| 2331 | return 0; | 2434 | return 0; |
| 2332 | } | 2435 | } |
| @@ -2338,6 +2441,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
| 2338 | css->cgroup = cgrp; | 2441 | css->cgroup = cgrp; |
| 2339 | atomic_set(&css->refcnt, 1); | 2442 | atomic_set(&css->refcnt, 1); |
| 2340 | css->flags = 0; | 2443 | css->flags = 0; |
| 2444 | css->id = NULL; | ||
| 2341 | if (cgrp == dummytop) | 2445 | if (cgrp == dummytop) |
| 2342 | set_bit(CSS_ROOT, &css->flags); | 2446 | set_bit(CSS_ROOT, &css->flags); |
| 2343 | BUG_ON(cgrp->subsys[ss->subsys_id]); | 2447 | BUG_ON(cgrp->subsys[ss->subsys_id]); |
| @@ -2376,7 +2480,7 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | |||
| 2376 | * Must be called with the mutex on the parent inode held | 2480 | * Must be called with the mutex on the parent inode held |
| 2377 | */ | 2481 | */ |
| 2378 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | 2482 | static long cgroup_create(struct cgroup *parent, struct dentry *dentry, |
| 2379 | int mode) | 2483 | mode_t mode) |
| 2380 | { | 2484 | { |
| 2381 | struct cgroup *cgrp; | 2485 | struct cgroup *cgrp; |
| 2382 | struct cgroupfs_root *root = parent->root; | 2486 | struct cgroupfs_root *root = parent->root; |
| @@ -2413,6 +2517,10 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 2413 | goto err_destroy; | 2517 | goto err_destroy; |
| 2414 | } | 2518 | } |
| 2415 | init_cgroup_css(css, ss, cgrp); | 2519 | init_cgroup_css(css, ss, cgrp); |
| 2520 | if (ss->use_id) | ||
| 2521 | if (alloc_css_id(ss, parent, cgrp)) | ||
| 2522 | goto err_destroy; | ||
| 2523 | /* At error, ->destroy() callback has to free assigned ID. */ | ||
| 2416 | } | 2524 | } |
| 2417 | 2525 | ||
| 2418 | cgroup_lock_hierarchy(root); | 2526 | cgroup_lock_hierarchy(root); |
| @@ -2555,9 +2663,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
| 2555 | struct cgroup *cgrp = dentry->d_fsdata; | 2663 | struct cgroup *cgrp = dentry->d_fsdata; |
| 2556 | struct dentry *d; | 2664 | struct dentry *d; |
| 2557 | struct cgroup *parent; | 2665 | struct cgroup *parent; |
| 2666 | DEFINE_WAIT(wait); | ||
| 2667 | int ret; | ||
| 2558 | 2668 | ||
| 2559 | /* the vfs holds both inode->i_mutex already */ | 2669 | /* the vfs holds both inode->i_mutex already */ |
| 2560 | 2670 | again: | |
| 2561 | mutex_lock(&cgroup_mutex); | 2671 | mutex_lock(&cgroup_mutex); |
| 2562 | if (atomic_read(&cgrp->count) != 0) { | 2672 | if (atomic_read(&cgrp->count) != 0) { |
| 2563 | mutex_unlock(&cgroup_mutex); | 2673 | mutex_unlock(&cgroup_mutex); |
| @@ -2573,17 +2683,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
| 2573 | * Call pre_destroy handlers of subsys. Notify subsystems | 2683 | * Call pre_destroy handlers of subsys. Notify subsystems |
| 2574 | * that rmdir() request comes. | 2684 | * that rmdir() request comes. |
| 2575 | */ | 2685 | */ |
| 2576 | cgroup_call_pre_destroy(cgrp); | 2686 | ret = cgroup_call_pre_destroy(cgrp); |
| 2687 | if (ret) | ||
| 2688 | return ret; | ||
| 2577 | 2689 | ||
| 2578 | mutex_lock(&cgroup_mutex); | 2690 | mutex_lock(&cgroup_mutex); |
| 2579 | parent = cgrp->parent; | 2691 | parent = cgrp->parent; |
| 2580 | 2692 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { | |
| 2581 | if (atomic_read(&cgrp->count) | ||
| 2582 | || !list_empty(&cgrp->children) | ||
| 2583 | || !cgroup_clear_css_refs(cgrp)) { | ||
| 2584 | mutex_unlock(&cgroup_mutex); | 2693 | mutex_unlock(&cgroup_mutex); |
| 2585 | return -EBUSY; | 2694 | return -EBUSY; |
| 2586 | } | 2695 | } |
| 2696 | /* | ||
| 2697 | * css_put/get is provided for subsys to grab refcnt to css. In typical | ||
| 2698 | * case, subsystem has no reference after pre_destroy(). But, under | ||
| 2699 | * hierarchy management, some *temporal* refcnt can be hold. | ||
| 2700 | * To avoid returning -EBUSY to a user, waitqueue is used. If subsys | ||
| 2701 | * is really busy, it should return -EBUSY at pre_destroy(). wake_up | ||
| 2702 | * is called when css_put() is called and refcnt goes down to 0. | ||
| 2703 | */ | ||
| 2704 | set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
| 2705 | prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); | ||
| 2706 | |||
| 2707 | if (!cgroup_clear_css_refs(cgrp)) { | ||
| 2708 | mutex_unlock(&cgroup_mutex); | ||
| 2709 | schedule(); | ||
| 2710 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
| 2711 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
| 2712 | if (signal_pending(current)) | ||
| 2713 | return -EINTR; | ||
| 2714 | goto again; | ||
| 2715 | } | ||
| 2716 | /* NO css_tryget() can success after here. */ | ||
| 2717 | finish_wait(&cgroup_rmdir_waitq, &wait); | ||
| 2718 | clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); | ||
| 2587 | 2719 | ||
| 2588 | spin_lock(&release_list_lock); | 2720 | spin_lock(&release_list_lock); |
| 2589 | set_bit(CGRP_REMOVED, &cgrp->flags); | 2721 | set_bit(CGRP_REMOVED, &cgrp->flags); |
| @@ -2708,6 +2840,8 @@ int __init cgroup_init(void) | |||
| 2708 | struct cgroup_subsys *ss = subsys[i]; | 2840 | struct cgroup_subsys *ss = subsys[i]; |
| 2709 | if (!ss->early_init) | 2841 | if (!ss->early_init) |
| 2710 | cgroup_init_subsys(ss); | 2842 | cgroup_init_subsys(ss); |
| 2843 | if (ss->use_id) | ||
| 2844 | cgroup_subsys_init_idr(ss); | ||
| 2711 | } | 2845 | } |
| 2712 | 2846 | ||
| 2713 | /* Add init_css_set to the hash table */ | 2847 | /* Add init_css_set to the hash table */ |
| @@ -3084,18 +3218,19 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
| 3084 | } | 3218 | } |
| 3085 | 3219 | ||
| 3086 | /** | 3220 | /** |
| 3087 | * cgroup_is_descendant - see if @cgrp is a descendant of current task's cgrp | 3221 | * cgroup_is_descendant - see if @cgrp is a descendant of @task's cgrp |
| 3088 | * @cgrp: the cgroup in question | 3222 | * @cgrp: the cgroup in question |
| 3223 | * @task: the task in question | ||
| 3089 | * | 3224 | * |
| 3090 | * See if @cgrp is a descendant of the current task's cgroup in | 3225 | * See if @cgrp is a descendant of @task's cgroup in the appropriate |
| 3091 | * the appropriate hierarchy. | 3226 | * hierarchy. |
| 3092 | * | 3227 | * |
| 3093 | * If we are sending in dummytop, then presumably we are creating | 3228 | * If we are sending in dummytop, then presumably we are creating |
| 3094 | * the top cgroup in the subsystem. | 3229 | * the top cgroup in the subsystem. |
| 3095 | * | 3230 | * |
| 3096 | * Called only by the ns (nsproxy) cgroup. | 3231 | * Called only by the ns (nsproxy) cgroup. |
| 3097 | */ | 3232 | */ |
| 3098 | int cgroup_is_descendant(const struct cgroup *cgrp) | 3233 | int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task) |
| 3099 | { | 3234 | { |
| 3100 | int ret; | 3235 | int ret; |
| 3101 | struct cgroup *target; | 3236 | struct cgroup *target; |
| @@ -3105,7 +3240,7 @@ int cgroup_is_descendant(const struct cgroup *cgrp) | |||
| 3105 | return 1; | 3240 | return 1; |
| 3106 | 3241 | ||
| 3107 | get_first_subsys(cgrp, NULL, &subsys_id); | 3242 | get_first_subsys(cgrp, NULL, &subsys_id); |
| 3108 | target = task_cgroup(current, subsys_id); | 3243 | target = task_cgroup(task, subsys_id); |
| 3109 | while (cgrp != target && cgrp!= cgrp->top_cgroup) | 3244 | while (cgrp != target && cgrp!= cgrp->top_cgroup) |
| 3110 | cgrp = cgrp->parent; | 3245 | cgrp = cgrp->parent; |
| 3111 | ret = (cgrp == target); | 3246 | ret = (cgrp == target); |
| @@ -3138,10 +3273,12 @@ void __css_put(struct cgroup_subsys_state *css) | |||
| 3138 | { | 3273 | { |
| 3139 | struct cgroup *cgrp = css->cgroup; | 3274 | struct cgroup *cgrp = css->cgroup; |
| 3140 | rcu_read_lock(); | 3275 | rcu_read_lock(); |
| 3141 | if ((atomic_dec_return(&css->refcnt) == 1) && | 3276 | if (atomic_dec_return(&css->refcnt) == 1) { |
| 3142 | notify_on_release(cgrp)) { | 3277 | if (notify_on_release(cgrp)) { |
| 3143 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3278 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 3144 | check_for_release(cgrp); | 3279 | check_for_release(cgrp); |
| 3280 | } | ||
| 3281 | cgroup_wakeup_rmdir_waiters(cgrp); | ||
| 3145 | } | 3282 | } |
| 3146 | rcu_read_unlock(); | 3283 | rcu_read_unlock(); |
| 3147 | } | 3284 | } |
| @@ -3241,3 +3378,232 @@ static int __init cgroup_disable(char *str) | |||
| 3241 | return 1; | 3378 | return 1; |
| 3242 | } | 3379 | } |
| 3243 | __setup("cgroup_disable=", cgroup_disable); | 3380 | __setup("cgroup_disable=", cgroup_disable); |
| 3381 | |||
| 3382 | /* | ||
| 3383 | * Functons for CSS ID. | ||
| 3384 | */ | ||
| 3385 | |||
| 3386 | /* | ||
| 3387 | *To get ID other than 0, this should be called when !cgroup_is_removed(). | ||
| 3388 | */ | ||
| 3389 | unsigned short css_id(struct cgroup_subsys_state *css) | ||
| 3390 | { | ||
| 3391 | struct css_id *cssid = rcu_dereference(css->id); | ||
| 3392 | |||
| 3393 | if (cssid) | ||
| 3394 | return cssid->id; | ||
| 3395 | return 0; | ||
| 3396 | } | ||
| 3397 | |||
| 3398 | unsigned short css_depth(struct cgroup_subsys_state *css) | ||
| 3399 | { | ||
| 3400 | struct css_id *cssid = rcu_dereference(css->id); | ||
| 3401 | |||
| 3402 | if (cssid) | ||
| 3403 | return cssid->depth; | ||
| 3404 | return 0; | ||
| 3405 | } | ||
| 3406 | |||
| 3407 | bool css_is_ancestor(struct cgroup_subsys_state *child, | ||
| 3408 | const struct cgroup_subsys_state *root) | ||
| 3409 | { | ||
| 3410 | struct css_id *child_id = rcu_dereference(child->id); | ||
| 3411 | struct css_id *root_id = rcu_dereference(root->id); | ||
| 3412 | |||
| 3413 | if (!child_id || !root_id || (child_id->depth < root_id->depth)) | ||
| 3414 | return false; | ||
| 3415 | return child_id->stack[root_id->depth] == root_id->id; | ||
| 3416 | } | ||
| 3417 | |||
| 3418 | static void __free_css_id_cb(struct rcu_head *head) | ||
| 3419 | { | ||
| 3420 | struct css_id *id; | ||
| 3421 | |||
| 3422 | id = container_of(head, struct css_id, rcu_head); | ||
| 3423 | kfree(id); | ||
| 3424 | } | ||
| 3425 | |||
| 3426 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | ||
| 3427 | { | ||
| 3428 | struct css_id *id = css->id; | ||
| 3429 | /* When this is called before css_id initialization, id can be NULL */ | ||
| 3430 | if (!id) | ||
| 3431 | return; | ||
| 3432 | |||
| 3433 | BUG_ON(!ss->use_id); | ||
| 3434 | |||
| 3435 | rcu_assign_pointer(id->css, NULL); | ||
| 3436 | rcu_assign_pointer(css->id, NULL); | ||
| 3437 | spin_lock(&ss->id_lock); | ||
| 3438 | idr_remove(&ss->idr, id->id); | ||
| 3439 | spin_unlock(&ss->id_lock); | ||
| 3440 | call_rcu(&id->rcu_head, __free_css_id_cb); | ||
| 3441 | } | ||
| 3442 | |||
| 3443 | /* | ||
| 3444 | * This is called by init or create(). Then, calls to this function are | ||
| 3445 | * always serialized (By cgroup_mutex() at create()). | ||
| 3446 | */ | ||
| 3447 | |||
| 3448 | static struct css_id *get_new_cssid(struct cgroup_subsys *ss, int depth) | ||
| 3449 | { | ||
| 3450 | struct css_id *newid; | ||
| 3451 | int myid, error, size; | ||
| 3452 | |||
| 3453 | BUG_ON(!ss->use_id); | ||
| 3454 | |||
| 3455 | size = sizeof(*newid) + sizeof(unsigned short) * (depth + 1); | ||
| 3456 | newid = kzalloc(size, GFP_KERNEL); | ||
| 3457 | if (!newid) | ||
| 3458 | return ERR_PTR(-ENOMEM); | ||
| 3459 | /* get id */ | ||
| 3460 | if (unlikely(!idr_pre_get(&ss->idr, GFP_KERNEL))) { | ||
| 3461 | error = -ENOMEM; | ||
| 3462 | goto err_out; | ||
| 3463 | } | ||
| 3464 | spin_lock(&ss->id_lock); | ||
| 3465 | /* Don't use 0. allocates an ID of 1-65535 */ | ||
| 3466 | error = idr_get_new_above(&ss->idr, newid, 1, &myid); | ||
| 3467 | spin_unlock(&ss->id_lock); | ||
| 3468 | |||
| 3469 | /* Returns error when there are no free spaces for new ID.*/ | ||
| 3470 | if (error) { | ||
| 3471 | error = -ENOSPC; | ||
| 3472 | goto err_out; | ||
| 3473 | } | ||
| 3474 | if (myid > CSS_ID_MAX) | ||
| 3475 | goto remove_idr; | ||
| 3476 | |||
| 3477 | newid->id = myid; | ||
| 3478 | newid->depth = depth; | ||
| 3479 | return newid; | ||
| 3480 | remove_idr: | ||
| 3481 | error = -ENOSPC; | ||
| 3482 | spin_lock(&ss->id_lock); | ||
| 3483 | idr_remove(&ss->idr, myid); | ||
| 3484 | spin_unlock(&ss->id_lock); | ||
| 3485 | err_out: | ||
| 3486 | kfree(newid); | ||
| 3487 | return ERR_PTR(error); | ||
| 3488 | |||
| 3489 | } | ||
| 3490 | |||
| 3491 | static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss) | ||
| 3492 | { | ||
| 3493 | struct css_id *newid; | ||
| 3494 | struct cgroup_subsys_state *rootcss; | ||
| 3495 | |||
| 3496 | spin_lock_init(&ss->id_lock); | ||
| 3497 | idr_init(&ss->idr); | ||
| 3498 | |||
| 3499 | rootcss = init_css_set.subsys[ss->subsys_id]; | ||
| 3500 | newid = get_new_cssid(ss, 0); | ||
| 3501 | if (IS_ERR(newid)) | ||
| 3502 | return PTR_ERR(newid); | ||
| 3503 | |||
| 3504 | newid->stack[0] = newid->id; | ||
| 3505 | newid->css = rootcss; | ||
| 3506 | rootcss->id = newid; | ||
| 3507 | return 0; | ||
| 3508 | } | ||
| 3509 | |||
| 3510 | static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | ||
| 3511 | struct cgroup *child) | ||
| 3512 | { | ||
| 3513 | int subsys_id, i, depth = 0; | ||
| 3514 | struct cgroup_subsys_state *parent_css, *child_css; | ||
| 3515 | struct css_id *child_id, *parent_id = NULL; | ||
| 3516 | |||
| 3517 | subsys_id = ss->subsys_id; | ||
| 3518 | parent_css = parent->subsys[subsys_id]; | ||
| 3519 | child_css = child->subsys[subsys_id]; | ||
| 3520 | depth = css_depth(parent_css) + 1; | ||
| 3521 | parent_id = parent_css->id; | ||
| 3522 | |||
| 3523 | child_id = get_new_cssid(ss, depth); | ||
| 3524 | if (IS_ERR(child_id)) | ||
| 3525 | return PTR_ERR(child_id); | ||
| 3526 | |||
| 3527 | for (i = 0; i < depth; i++) | ||
| 3528 | child_id->stack[i] = parent_id->stack[i]; | ||
| 3529 | child_id->stack[depth] = child_id->id; | ||
| 3530 | /* | ||
| 3531 | * child_id->css pointer will be set after this cgroup is available | ||
| 3532 | * see cgroup_populate_dir() | ||
| 3533 | */ | ||
| 3534 | rcu_assign_pointer(child_css->id, child_id); | ||
| 3535 | |||
| 3536 | return 0; | ||
| 3537 | } | ||
| 3538 | |||
| 3539 | /** | ||
| 3540 | * css_lookup - lookup css by id | ||
| 3541 | * @ss: cgroup subsys to be looked into. | ||
| 3542 | * @id: the id | ||
| 3543 | * | ||
| 3544 | * Returns pointer to cgroup_subsys_state if there is valid one with id. | ||
| 3545 | * NULL if not. Should be called under rcu_read_lock() | ||
| 3546 | */ | ||
| 3547 | struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id) | ||
| 3548 | { | ||
| 3549 | struct css_id *cssid = NULL; | ||
| 3550 | |||
| 3551 | BUG_ON(!ss->use_id); | ||
| 3552 | cssid = idr_find(&ss->idr, id); | ||
| 3553 | |||
| 3554 | if (unlikely(!cssid)) | ||
| 3555 | return NULL; | ||
| 3556 | |||
| 3557 | return rcu_dereference(cssid->css); | ||
| 3558 | } | ||
| 3559 | |||
| 3560 | /** | ||
| 3561 | * css_get_next - lookup next cgroup under specified hierarchy. | ||
| 3562 | * @ss: pointer to subsystem | ||
| 3563 | * @id: current position of iteration. | ||
| 3564 | * @root: pointer to css. search tree under this. | ||
| 3565 | * @foundid: position of found object. | ||
| 3566 | * | ||
| 3567 | * Search next css under the specified hierarchy of rootid. Calling under | ||
| 3568 | * rcu_read_lock() is necessary. Returns NULL if it reaches the end. | ||
| 3569 | */ | ||
| 3570 | struct cgroup_subsys_state * | ||
| 3571 | css_get_next(struct cgroup_subsys *ss, int id, | ||
| 3572 | struct cgroup_subsys_state *root, int *foundid) | ||
| 3573 | { | ||
| 3574 | struct cgroup_subsys_state *ret = NULL; | ||
| 3575 | struct css_id *tmp; | ||
| 3576 | int tmpid; | ||
| 3577 | int rootid = css_id(root); | ||
| 3578 | int depth = css_depth(root); | ||
| 3579 | |||
| 3580 | if (!rootid) | ||
| 3581 | return NULL; | ||
| 3582 | |||
| 3583 | BUG_ON(!ss->use_id); | ||
| 3584 | /* fill start point for scan */ | ||
| 3585 | tmpid = id; | ||
| 3586 | while (1) { | ||
| 3587 | /* | ||
| 3588 | * scan next entry from bitmap(tree), tmpid is updated after | ||
| 3589 | * idr_get_next(). | ||
| 3590 | */ | ||
| 3591 | spin_lock(&ss->id_lock); | ||
| 3592 | tmp = idr_get_next(&ss->idr, &tmpid); | ||
| 3593 | spin_unlock(&ss->id_lock); | ||
| 3594 | |||
| 3595 | if (!tmp) | ||
| 3596 | break; | ||
| 3597 | if (tmp->depth >= depth && tmp->stack[depth] == rootid) { | ||
| 3598 | ret = rcu_dereference(tmp->css); | ||
| 3599 | if (ret) { | ||
| 3600 | *foundid = tmpid; | ||
| 3601 | break; | ||
| 3602 | } | ||
| 3603 | } | ||
| 3604 | /* continue to scan from next id */ | ||
| 3605 | tmpid = tmpid + 1; | ||
| 3606 | } | ||
| 3607 | return ret; | ||
| 3608 | } | ||
| 3609 | |||
