diff options
author | Aristeu Rozanski <aris@redhat.com> | 2012-08-23 16:53:30 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-08-24 18:55:33 -0400 |
commit | 03b1cde6b22f625ae832b939bc7379ec1466aec5 (patch) | |
tree | 0aecdc872a6ab2a00a8026e1898d4f3492cca87b | |
parent | 13af07df9b7e49f1987cf36aa048dc6c49d0f93d (diff) |
cgroup: add xattr support
This is one of the items in the plumber's wish list.
For use cases:
>> What would the use case be for this?
>
> Attaching meta information to services, in an easily discoverable
> way. For example, in systemd we create one cgroup for each service, and
> could then store data like the main pid of the specific service as an
> xattr on the cgroup itself. That way we'd have almost all service state
> in the cgroupfs, which would make it possible to terminate systemd and
> later restart it without losing any state information. But there's more:
> for example, some very peculiar services cannot be terminated on
> shutdown (i.e. fakeraid DM stuff) and it would be really nice if the
> services in question could just mark that on their cgroup, by setting an
> xattr. On the more desktopy side of things there are other
> possibilities: for example there are plans defining what an application
> is along the lines of a cgroup (i.e. an app being a collection of
> processes). With xattrs one could then attach an icon or human readable
> program name on the cgroup.
>
> The key idea is that this would allow attaching runtime meta information
> to cgroups and everything they model (services, apps, vms), that doesn't
> need any complex userspace infrastructure, has good access control
> (i.e. because the file system enforces that anyway, and there's the
> "trusted." xattr namespace), notifications (inotify), and can easily be
> shared among applications.
>
> Lennart
v7:
- no changes
v6:
- remove user xattr namespace, only allow trusted and security
v5:
- check for capabilities before setting/removing xattrs
v4:
- no changes
v3:
- instead of config option, use mount option to enable xattr support
Original-patch-by: Li Zefan <lizefan@huawei.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Lennart Poettering <lpoetter@redhat.com>
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | include/linux/cgroup.h | 13 | ||||
-rw-r--r-- | kernel/cgroup.c | 100 |
2 files changed, 103 insertions, 10 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c90eaa803440..145901f5ef99 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/rwsem.h> | 17 | #include <linux/rwsem.h> |
18 | #include <linux/idr.h> | 18 | #include <linux/idr.h> |
19 | #include <linux/workqueue.h> | 19 | #include <linux/workqueue.h> |
20 | #include <linux/xattr.h> | ||
20 | 21 | ||
21 | #ifdef CONFIG_CGROUPS | 22 | #ifdef CONFIG_CGROUPS |
22 | 23 | ||
@@ -216,6 +217,9 @@ struct cgroup { | |||
216 | /* List of events which userspace want to receive */ | 217 | /* List of events which userspace want to receive */ |
217 | struct list_head event_list; | 218 | struct list_head event_list; |
218 | spinlock_t event_list_lock; | 219 | spinlock_t event_list_lock; |
220 | |||
221 | /* directory xattrs */ | ||
222 | struct simple_xattrs xattrs; | ||
219 | }; | 223 | }; |
220 | 224 | ||
221 | /* | 225 | /* |
@@ -309,6 +313,9 @@ struct cftype { | |||
309 | /* CFTYPE_* flags */ | 313 | /* CFTYPE_* flags */ |
310 | unsigned int flags; | 314 | unsigned int flags; |
311 | 315 | ||
316 | /* file xattrs */ | ||
317 | struct simple_xattrs xattrs; | ||
318 | |||
312 | int (*open)(struct inode *inode, struct file *file); | 319 | int (*open)(struct inode *inode, struct file *file); |
313 | ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, | 320 | ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, |
314 | struct file *file, | 321 | struct file *file, |
@@ -394,7 +401,7 @@ struct cftype { | |||
394 | */ | 401 | */ |
395 | struct cftype_set { | 402 | struct cftype_set { |
396 | struct list_head node; /* chained at subsys->cftsets */ | 403 | struct list_head node; /* chained at subsys->cftsets */ |
397 | const struct cftype *cfts; | 404 | struct cftype *cfts; |
398 | }; | 405 | }; |
399 | 406 | ||
400 | struct cgroup_scanner { | 407 | struct cgroup_scanner { |
@@ -406,8 +413,8 @@ struct cgroup_scanner { | |||
406 | void *data; | 413 | void *data; |
407 | }; | 414 | }; |
408 | 415 | ||
409 | int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); | 416 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
410 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts); | 417 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); |
411 | 418 | ||
412 | int cgroup_is_removed(const struct cgroup *cgrp); | 419 | int cgroup_is_removed(const struct cgroup *cgrp); |
413 | 420 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 875a7130647c..508b4a97ab19 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -276,7 +276,8 @@ inline int cgroup_is_removed(const struct cgroup *cgrp) | |||
276 | 276 | ||
277 | /* bits in struct cgroupfs_root flags field */ | 277 | /* bits in struct cgroupfs_root flags field */ |
278 | enum { | 278 | enum { |
279 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ | 279 | ROOT_NOPREFIX, /* mounted subsystems have no named prefix */ |
280 | ROOT_XATTR, /* supports extended attributes */ | ||
280 | }; | 281 | }; |
281 | 282 | ||
282 | static int cgroup_is_releasable(const struct cgroup *cgrp) | 283 | static int cgroup_is_releasable(const struct cgroup *cgrp) |
@@ -913,15 +914,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
913 | */ | 914 | */ |
914 | BUG_ON(!list_empty(&cgrp->pidlists)); | 915 | BUG_ON(!list_empty(&cgrp->pidlists)); |
915 | 916 | ||
917 | simple_xattrs_free(&cgrp->xattrs); | ||
918 | |||
916 | kfree_rcu(cgrp, rcu_head); | 919 | kfree_rcu(cgrp, rcu_head); |
917 | } else { | 920 | } else { |
918 | struct cfent *cfe = __d_cfe(dentry); | 921 | struct cfent *cfe = __d_cfe(dentry); |
919 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; | 922 | struct cgroup *cgrp = dentry->d_parent->d_fsdata; |
923 | struct cftype *cft = cfe->type; | ||
920 | 924 | ||
921 | WARN_ONCE(!list_empty(&cfe->node) && | 925 | WARN_ONCE(!list_empty(&cfe->node) && |
922 | cgrp != &cgrp->root->top_cgroup, | 926 | cgrp != &cgrp->root->top_cgroup, |
923 | "cfe still linked for %s\n", cfe->type->name); | 927 | "cfe still linked for %s\n", cfe->type->name); |
924 | kfree(cfe); | 928 | kfree(cfe); |
929 | simple_xattrs_free(&cft->xattrs); | ||
925 | } | 930 | } |
926 | iput(inode); | 931 | iput(inode); |
927 | } | 932 | } |
@@ -1140,6 +1145,8 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1140 | seq_printf(seq, ",%s", ss->name); | 1145 | seq_printf(seq, ",%s", ss->name); |
1141 | if (test_bit(ROOT_NOPREFIX, &root->flags)) | 1146 | if (test_bit(ROOT_NOPREFIX, &root->flags)) |
1142 | seq_puts(seq, ",noprefix"); | 1147 | seq_puts(seq, ",noprefix"); |
1148 | if (test_bit(ROOT_XATTR, &root->flags)) | ||
1149 | seq_puts(seq, ",xattr"); | ||
1143 | if (strlen(root->release_agent_path)) | 1150 | if (strlen(root->release_agent_path)) |
1144 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 1151 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
1145 | if (clone_children(&root->top_cgroup)) | 1152 | if (clone_children(&root->top_cgroup)) |
@@ -1208,6 +1215,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1208 | opts->clone_children = true; | 1215 | opts->clone_children = true; |
1209 | continue; | 1216 | continue; |
1210 | } | 1217 | } |
1218 | if (!strcmp(token, "xattr")) { | ||
1219 | set_bit(ROOT_XATTR, &opts->flags); | ||
1220 | continue; | ||
1221 | } | ||
1211 | if (!strncmp(token, "release_agent=", 14)) { | 1222 | if (!strncmp(token, "release_agent=", 14)) { |
1212 | /* Specifying two release agents is forbidden */ | 1223 | /* Specifying two release agents is forbidden */ |
1213 | if (opts->release_agent) | 1224 | if (opts->release_agent) |
@@ -1425,6 +1436,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1425 | mutex_init(&cgrp->pidlist_mutex); | 1436 | mutex_init(&cgrp->pidlist_mutex); |
1426 | INIT_LIST_HEAD(&cgrp->event_list); | 1437 | INIT_LIST_HEAD(&cgrp->event_list); |
1427 | spin_lock_init(&cgrp->event_list_lock); | 1438 | spin_lock_init(&cgrp->event_list_lock); |
1439 | simple_xattrs_init(&cgrp->xattrs); | ||
1428 | } | 1440 | } |
1429 | 1441 | ||
1430 | static void init_cgroup_root(struct cgroupfs_root *root) | 1442 | static void init_cgroup_root(struct cgroupfs_root *root) |
@@ -1769,6 +1781,8 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1769 | mutex_unlock(&cgroup_root_mutex); | 1781 | mutex_unlock(&cgroup_root_mutex); |
1770 | mutex_unlock(&cgroup_mutex); | 1782 | mutex_unlock(&cgroup_mutex); |
1771 | 1783 | ||
1784 | simple_xattrs_free(&cgrp->xattrs); | ||
1785 | |||
1772 | kill_litter_super(sb); | 1786 | kill_litter_super(sb); |
1773 | cgroup_drop_root(root); | 1787 | cgroup_drop_root(root); |
1774 | } | 1788 | } |
@@ -2575,6 +2589,64 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2575 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); | 2589 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); |
2576 | } | 2590 | } |
2577 | 2591 | ||
2592 | static struct simple_xattrs *__d_xattrs(struct dentry *dentry) | ||
2593 | { | ||
2594 | if (S_ISDIR(dentry->d_inode->i_mode)) | ||
2595 | return &__d_cgrp(dentry)->xattrs; | ||
2596 | else | ||
2597 | return &__d_cft(dentry)->xattrs; | ||
2598 | } | ||
2599 | |||
2600 | static inline int xattr_enabled(struct dentry *dentry) | ||
2601 | { | ||
2602 | struct cgroupfs_root *root = dentry->d_sb->s_fs_info; | ||
2603 | return test_bit(ROOT_XATTR, &root->flags); | ||
2604 | } | ||
2605 | |||
2606 | static bool is_valid_xattr(const char *name) | ||
2607 | { | ||
2608 | if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) || | ||
2609 | !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) | ||
2610 | return true; | ||
2611 | return false; | ||
2612 | } | ||
2613 | |||
2614 | static int cgroup_setxattr(struct dentry *dentry, const char *name, | ||
2615 | const void *val, size_t size, int flags) | ||
2616 | { | ||
2617 | if (!xattr_enabled(dentry)) | ||
2618 | return -EOPNOTSUPP; | ||
2619 | if (!is_valid_xattr(name)) | ||
2620 | return -EINVAL; | ||
2621 | return simple_xattr_set(__d_xattrs(dentry), name, val, size, flags); | ||
2622 | } | ||
2623 | |||
2624 | static int cgroup_removexattr(struct dentry *dentry, const char *name) | ||
2625 | { | ||
2626 | if (!xattr_enabled(dentry)) | ||
2627 | return -EOPNOTSUPP; | ||
2628 | if (!is_valid_xattr(name)) | ||
2629 | return -EINVAL; | ||
2630 | return simple_xattr_remove(__d_xattrs(dentry), name); | ||
2631 | } | ||
2632 | |||
2633 | static ssize_t cgroup_getxattr(struct dentry *dentry, const char *name, | ||
2634 | void *buf, size_t size) | ||
2635 | { | ||
2636 | if (!xattr_enabled(dentry)) | ||
2637 | return -EOPNOTSUPP; | ||
2638 | if (!is_valid_xattr(name)) | ||
2639 | return -EINVAL; | ||
2640 | return simple_xattr_get(__d_xattrs(dentry), name, buf, size); | ||
2641 | } | ||
2642 | |||
2643 | static ssize_t cgroup_listxattr(struct dentry *dentry, char *buf, size_t size) | ||
2644 | { | ||
2645 | if (!xattr_enabled(dentry)) | ||
2646 | return -EOPNOTSUPP; | ||
2647 | return simple_xattr_list(__d_xattrs(dentry), buf, size); | ||
2648 | } | ||
2649 | |||
2578 | static const struct file_operations cgroup_file_operations = { | 2650 | static const struct file_operations cgroup_file_operations = { |
2579 | .read = cgroup_file_read, | 2651 | .read = cgroup_file_read, |
2580 | .write = cgroup_file_write, | 2652 | .write = cgroup_file_write, |
@@ -2583,11 +2655,22 @@ static const struct file_operations cgroup_file_operations = { | |||
2583 | .release = cgroup_file_release, | 2655 | .release = cgroup_file_release, |
2584 | }; | 2656 | }; |
2585 | 2657 | ||
2658 | static const struct inode_operations cgroup_file_inode_operations = { | ||
2659 | .setxattr = cgroup_setxattr, | ||
2660 | .getxattr = cgroup_getxattr, | ||
2661 | .listxattr = cgroup_listxattr, | ||
2662 | .removexattr = cgroup_removexattr, | ||
2663 | }; | ||
2664 | |||
2586 | static const struct inode_operations cgroup_dir_inode_operations = { | 2665 | static const struct inode_operations cgroup_dir_inode_operations = { |
2587 | .lookup = cgroup_lookup, | 2666 | .lookup = cgroup_lookup, |
2588 | .mkdir = cgroup_mkdir, | 2667 | .mkdir = cgroup_mkdir, |
2589 | .rmdir = cgroup_rmdir, | 2668 | .rmdir = cgroup_rmdir, |
2590 | .rename = cgroup_rename, | 2669 | .rename = cgroup_rename, |
2670 | .setxattr = cgroup_setxattr, | ||
2671 | .getxattr = cgroup_getxattr, | ||
2672 | .listxattr = cgroup_listxattr, | ||
2673 | .removexattr = cgroup_removexattr, | ||
2591 | }; | 2674 | }; |
2592 | 2675 | ||
2593 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 2676 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
@@ -2635,6 +2718,7 @@ static int cgroup_create_file(struct dentry *dentry, umode_t mode, | |||
2635 | } else if (S_ISREG(mode)) { | 2718 | } else if (S_ISREG(mode)) { |
2636 | inode->i_size = 0; | 2719 | inode->i_size = 0; |
2637 | inode->i_fop = &cgroup_file_operations; | 2720 | inode->i_fop = &cgroup_file_operations; |
2721 | inode->i_op = &cgroup_file_inode_operations; | ||
2638 | } | 2722 | } |
2639 | d_instantiate(dentry, inode); | 2723 | d_instantiate(dentry, inode); |
2640 | dget(dentry); /* Extra count - pin the dentry in core */ | 2724 | dget(dentry); /* Extra count - pin the dentry in core */ |
@@ -2695,7 +2779,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) | |||
2695 | } | 2779 | } |
2696 | 2780 | ||
2697 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2781 | static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
2698 | const struct cftype *cft) | 2782 | struct cftype *cft) |
2699 | { | 2783 | { |
2700 | struct dentry *dir = cgrp->dentry; | 2784 | struct dentry *dir = cgrp->dentry; |
2701 | struct cgroup *parent = __d_cgrp(dir); | 2785 | struct cgroup *parent = __d_cgrp(dir); |
@@ -2705,6 +2789,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
2705 | umode_t mode; | 2789 | umode_t mode; |
2706 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; | 2790 | char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; |
2707 | 2791 | ||
2792 | simple_xattrs_init(&cft->xattrs); | ||
2793 | |||
2708 | /* does @cft->flags tell us to skip creation on @cgrp? */ | 2794 | /* does @cft->flags tell us to skip creation on @cgrp? */ |
2709 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) | 2795 | if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent) |
2710 | return 0; | 2796 | return 0; |
@@ -2745,9 +2831,9 @@ out: | |||
2745 | } | 2831 | } |
2746 | 2832 | ||
2747 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 2833 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
2748 | const struct cftype cfts[], bool is_add) | 2834 | struct cftype cfts[], bool is_add) |
2749 | { | 2835 | { |
2750 | const struct cftype *cft; | 2836 | struct cftype *cft; |
2751 | int err, ret = 0; | 2837 | int err, ret = 0; |
2752 | 2838 | ||
2753 | for (cft = cfts; cft->name[0] != '\0'; cft++) { | 2839 | for (cft = cfts; cft->name[0] != '\0'; cft++) { |
@@ -2781,7 +2867,7 @@ static void cgroup_cfts_prepare(void) | |||
2781 | } | 2867 | } |
2782 | 2868 | ||
2783 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | 2869 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, |
2784 | const struct cftype *cfts, bool is_add) | 2870 | struct cftype *cfts, bool is_add) |
2785 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) | 2871 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) |
2786 | { | 2872 | { |
2787 | LIST_HEAD(pending); | 2873 | LIST_HEAD(pending); |
@@ -2832,7 +2918,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss, | |||
2832 | * function currently returns 0 as long as @cfts registration is successful | 2918 | * function currently returns 0 as long as @cfts registration is successful |
2833 | * even if some file creation attempts on existing cgroups fail. | 2919 | * even if some file creation attempts on existing cgroups fail. |
2834 | */ | 2920 | */ |
2835 | int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | 2921 | int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
2836 | { | 2922 | { |
2837 | struct cftype_set *set; | 2923 | struct cftype_set *set; |
2838 | 2924 | ||
@@ -2862,7 +2948,7 @@ EXPORT_SYMBOL_GPL(cgroup_add_cftypes); | |||
2862 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not | 2948 | * Returns 0 on successful unregistration, -ENOENT if @cfts is not |
2863 | * registered with @ss. | 2949 | * registered with @ss. |
2864 | */ | 2950 | */ |
2865 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts) | 2951 | int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) |
2866 | { | 2952 | { |
2867 | struct cftype_set *set; | 2953 | struct cftype_set *set; |
2868 | 2954 | ||