aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
authorAristeu Rozanski <aris@redhat.com>2012-08-23 16:53:30 -0400
committerTejun Heo <tj@kernel.org>2012-08-24 18:55:33 -0400
commit03b1cde6b22f625ae832b939bc7379ec1466aec5 (patch)
tree0aecdc872a6ab2a00a8026e1898d4f3492cca87b /kernel/cgroup.c
parent13af07df9b7e49f1987cf36aa048dc6c49d0f93d (diff)
cgroup: add xattr support
This is one of the items in the plumber's wish list. For use cases: >> What would the use case be for this? > > Attaching meta information to services, in an easily discoverable > way. For example, in systemd we create one cgroup for each service, and > could then store data like the main pid of the specific service as an > xattr on the cgroup itself. That way we'd have almost all service state > in the cgroupfs, which would make it possible to terminate systemd and > later restart it without losing any state information. But there's more: > for example, some very peculiar services cannot be terminated on > shutdown (i.e. fakeraid DM stuff) and it would be really nice if the > services in question could just mark that on their cgroup, by setting an > xattr. On the more desktopy side of things there are other > possibilities: for example there are plans defining what an application > is along the lines of a cgroup (i.e. an app being a collection of > processes). With xattrs one could then attach an icon or human readable > program name on the cgroup. > > The key idea is that this would allow attaching runtime meta information > to cgroups and everything they model (services, apps, vms), that doesn't > need any complex userspace infrastructure, has good access control > (i.e. because the file system enforces that anyway, and there's the > "trusted." xattr namespace), notifications (inotify), and can easily be > shared among applications. > > Lennart v7: - no changes v6: - remove user xattr namespace, only allow trusted and security v5: - check for capabilities before setting/removing xattrs v4: - no changes v3: - instead of config option, use mount option to enable xattr support Original-patch-by: Li Zefan <lizefan@huawei.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Tejun Heo <tj@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Lennart Poettering <lpoetter@redhat.com> Signed-off-by: Li Zefan <lizefan@huawei.com> Signed-off-by: Aristeu Rozanski <aris@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c100
1 files changed, 93 insertions, 7 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 875a7130647c..508b4a97ab19 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -276,7 +276,8 @@ inline int cgroup_is_removed(const struct cgroup *cgrp)
276 276
277/* bits in struct cgroupfs_root flags field */ 277/* bits in struct cgroupfs_root flags field */
278enum { 278enum {
279 ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ 279 ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
280 ROOT_XATTR, /* supports extended attributes */
280}; 281};
281 282
282static int cgroup_is_releasable(const struct cgroup *cgrp) 283static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -913,15 +914,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
913 */ 914 */
914 BUG_ON(!list_empty(&cgrp->pidlists)); 915 BUG_ON(!list_empty(&cgrp->pidlists));
915 916
917 simple_xattrs_free(&cgrp->xattrs);
918
916 kfree_rcu(cgrp, rcu_head); 919 kfree_rcu(cgrp, rcu_head);
917 } else { 920 } else {
918 struct cfent *cfe = __d_cfe(dentry); 921 struct cfent *cfe = __d_cfe(dentry);
919 struct cgroup *cgrp = dentry->d_parent->d_fsdata; 922 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
923 struct cftype *cft = cfe->type;
920 924
921 WARN_ONCE(!list_empty(&cfe->node) && 925 WARN_ONCE(!list_empty(&cfe->node) &&
922 cgrp != &cgrp->root->top_cgroup, 926 cgrp != &cgrp->root->top_cgroup,
923 "cfe still linked for %s\n", cfe->type->name); 927 "cfe still linked for %s\n", cfe->type->name);
924 kfree(cfe); 928 kfree(cfe);
929 simple_xattrs_free(&cft->xattrs);
925 } 930 }
926 iput(inode); 931 iput(inode);
927} 932}
@@ -1140,6 +1145,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry)
1140 seq_printf(seq, ",%s", ss->name); 1145 seq_printf(seq, ",%s", ss->name);
1141 if (test_bit(ROOT_NOPREFIX, &root->flags)) 1146 if (test_bit(ROOT_NOPREFIX, &root->flags))
1142 seq_puts(seq, ",noprefix"); 1147 seq_puts(seq, ",noprefix");
1148 if (test_bit(ROOT_XATTR, &root->flags))
1149 seq_puts(seq, ",xattr");
1143 if (strlen(root->release_agent_path)) 1150 if (strlen(root->release_agent_path))
1144 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 1151 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1145 if (clone_children(&root->top_cgroup)) 1152 if (clone_children(&root->top_cgroup))
@@ -1208,6 +1215,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1208 opts->clone_children = true; 1215 opts->clone_children = true;
1209 continue; 1216 continue;
1210 } 1217 }
1218 if (!strcmp(token, "xattr")) {
1219 set_bit(ROOT_XATTR, &opts->flags);
1220 continue;
1221 }
1211 if (!strncmp(token, "release_agent=", 14)) { 1222 if (!strncmp(token, "release_agent=", 14)) {
1212 /* Specifying two release agents is forbidden */ 1223 /* Specifying two release agents is forbidden */
1213 if (opts->release_agent) 1224 if (opts->release_agent)
@@ -1425,6 +1436,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1425 mutex_init(&cgrp->pidlist_mutex); 1436 mutex_init(&cgrp->pidlist_mutex);
1426 INIT_LIST_HEAD(&cgrp->event_list); 1437 INIT_LIST_HEAD(&cgrp->event_list);
1427 spin_lock_init(&cgrp->event_list_lock); 1438 spin_lock_init(&cgrp->event_list_lock);
1439 simple_xattrs_init(&cgrp->xattrs);
1428} 1440}
1429 1441
1430static void init_cgroup_root(struct cgroupfs_root *root) 1442static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1769,6 +1781,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
1769 mutex_unlock(&cgroup_root_mutex); 1781 mutex_unlock(&cgroup_root_mutex);
1770 mutex_unlock(&cgroup_mutex); 1782 mutex_unlock(&cgroup_mutex);
1771 1783
1784 simple_xattrs_free(&cgrp->xattrs);
1785
1772 kill_litter_super(sb); 1786 kill_litter_super(sb);
1773 cgroup_drop_root(root); 1787 cgroup_drop_root(root);
1774} 1788}
@@ -2575,6 +2589,64 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
2575 return simple_rename(old_dir, old_dentry, new_dir, new_dentry); 2589 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
2576} 2590}
2577 2591
2592static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
2593{
2594 if (S_ISDIR(dentry->d_inode->i_mode))
2595 return &__d_cgrp(dentry)->xattrs;
2596 else
2597 return &__d_cft(dentry)->xattrs;
2598}
2599
2600static inline int xattr_enabled(struct dentry *dentry)
2601{
2602 struct cgroupfs_root *root = dentry->d_sb->s_fs_info;
2603 return test_bit(ROOT_XATTR, &root->flags);
2604}
2605
2606static bool is_valid_xattr(const char *name)
2607{
2608 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
2609 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
2610 return true;
2611 return false;
2612}
2613
2614static int cgroup_setxattr(struct dentry *dentry, const char *name,
2615 const void *val, size_t size, int flags)
2616{
2617 if (!xattr_enabled(dentry))
2618 return -EOPNOTSUPP;
2619 if (!is_valid_xattr(name))
2620 return -EINVAL;
2621 return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags);
2622}
2623
2624static int cgroup_removexattr(struct dentry *dentry, const char *name)
2625{
2626 if (!xattr_enabled(dentry))
2627 return -EOPNOTSUPP;
2628 if (!is_valid_xattr(name))
2629 return -EINVAL;
2630 return simple_xattr_remove(__d_xattrs(dentry), name);
2631}
2632
2633static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name,
2634 void *buf, size_t size)
2635{
2636 if (!xattr_enabled(dentry))
2637 return -EOPNOTSUPP;
2638 if (!is_valid_xattr(name))
2639 return -EINVAL;
2640 return simple_xattr_get(__d_xattrs(dentry), name, buf, size);
2641}
2642
2643static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size)
2644{
2645 if (!xattr_enabled(dentry))
2646 return -EOPNOTSUPP;
2647 return simple_xattr_list(__d_xattrs(dentry), buf, size);
2648}
2649
2578static const struct file_operations cgroup_file_operations = { 2650static const struct file_operations cgroup_file_operations = {
2579 .read = cgroup_file_read, 2651 .read = cgroup_file_read,
2580 .write = cgroup_file_write, 2652 .write = cgroup_file_write,
@@ -2583,11 +2655,22 @@ static const struct file_operations cgroup_file_operations = {
2583 .release = cgroup_file_release, 2655 .release = cgroup_file_release,
2584}; 2656};
2585 2657
2658static const struct inode_operations cgroup_file_inode_operations = {
2659 .setxattr = cgroup_setxattr,
2660 .getxattr = cgroup_getxattr,
2661 .listxattr = cgroup_listxattr,
2662 .removexattr = cgroup_removexattr,
2663};
2664
2586static const struct inode_operations cgroup_dir_inode_operations = { 2665static const struct inode_operations cgroup_dir_inode_operations = {
2587 .lookup = cgroup_lookup, 2666 .lookup = cgroup_lookup,
2588 .mkdir = cgroup_mkdir, 2667 .mkdir = cgroup_mkdir,
2589 .rmdir = cgroup_rmdir, 2668 .rmdir = cgroup_rmdir,
2590 .rename = cgroup_rename, 2669 .rename = cgroup_rename,
2670 .setxattr = cgroup_setxattr,
2671 .getxattr = cgroup_getxattr,
2672 .listxattr = cgroup_listxattr,
2673 .removexattr = cgroup_removexattr,
2591}; 2674};
2592 2675
2593static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 2676static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -2635,6 +2718,7 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode,
2635 } else if (S_ISREG(mode)) { 2718 } else if (S_ISREG(mode)) {
2636 inode->i_size = 0; 2719 inode->i_size = 0;
2637 inode->i_fop = &cgroup_file_operations; 2720 inode->i_fop = &cgroup_file_operations;
2721 inode->i_op = &cgroup_file_inode_operations;
2638 } 2722 }
2639 d_instantiate(dentry, inode); 2723 d_instantiate(dentry, inode);
2640 dget(dentry); /* Extra count - pin the dentry in core */ 2724 dget(dentry); /* Extra count - pin the dentry in core */
@@ -2695,7 +2779,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
2695} 2779}
2696 2780
2697static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, 2781static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2698 const struct cftype *cft) 2782 struct cftype *cft)
2699{ 2783{
2700 struct dentry *dir = cgrp->dentry; 2784 struct dentry *dir = cgrp->dentry;
2701 struct cgroup *parent = __d_cgrp(dir); 2785 struct cgroup *parent = __d_cgrp(dir);
@@ -2705,6 +2789,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2705 umode_t mode; 2789 umode_t mode;
2706 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; 2790 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2707 2791
2792 simple_xattrs_init(&cft->xattrs);
2793
2708 /* does @cft->flags tell us to skip creation on @cgrp? */ 2794 /* does @cft->flags tell us to skip creation on @cgrp? */
2709 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) 2795 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2710 return 0; 2796 return 0;
@@ -2745,9 +2831,9 @@ out:
2745} 2831}
2746 2832
2747static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, 2833static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2748 const struct cftype cfts[], bool is_add) 2834 struct cftype cfts[], bool is_add)
2749{ 2835{
2750 const struct cftype *cft; 2836 struct cftype *cft;
2751 int err, ret = 0; 2837 int err, ret = 0;
2752 2838
2753 for (cft = cfts; cft->name[0] != '\0'; cft++) { 2839 for (cft = cfts; cft->name[0] != '\0'; cft++) {
@@ -2781,7 +2867,7 @@ static void cgroup_cfts_prepare(void)
2781} 2867}
2782 2868
2783static void cgroup_cfts_commit(struct cgroup_subsys *ss, 2869static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2784 const struct cftype *cfts, bool is_add) 2870 struct cftype *cfts, bool is_add)
2785 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) 2871 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2786{ 2872{
2787 LIST_HEAD(pending); 2873 LIST_HEAD(pending);
@@ -2832,7 +2918,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2832 * function currently returns 0 as long as @cfts registration is successful 2918 * function currently returns 0 as long as @cfts registration is successful
2833 * even if some file creation attempts on existing cgroups fail. 2919 * even if some file creation attempts on existing cgroups fail.
2834 */ 2920 */
2835int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) 2921int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2836{ 2922{
2837 struct cftype_set *set; 2923 struct cftype_set *set;
2838 2924
@@ -2862,7 +2948,7 @@ EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2862 * Returns 0 on successful unregistration, -ENOENT if @cfts is not 2948 * Returns 0 on successful unregistration, -ENOENT if @cfts is not
2863 * registered with @ss. 2949 * registered with @ss.
2864 */ 2950 */
2865int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) 2951int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
2866{ 2952{
2867 struct cftype_set *set; 2953 struct cftype_set *set;
2868 2954