aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c253
1 files changed, 3 insertions, 250 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8bd9cfdc70d7..4bccaa7dda35 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -56,11 +56,8 @@
56#include <linux/pid_namespace.h> 56#include <linux/pid_namespace.h>
57#include <linux/idr.h> 57#include <linux/idr.h>
58#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 58#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
59#include <linux/eventfd.h>
60#include <linux/poll.h>
61#include <linux/flex_array.h> /* used in cgroup_attach_task */ 59#include <linux/flex_array.h> /* used in cgroup_attach_task */
62#include <linux/kthread.h> 60#include <linux/kthread.h>
63#include <linux/file.h>
64 61
65#include <linux/atomic.h> 62#include <linux/atomic.h>
66 63
@@ -156,36 +153,6 @@ struct css_id {
156 unsigned short stack[0]; /* Array of Length (depth+1) */ 153 unsigned short stack[0]; /* Array of Length (depth+1) */
157}; 154};
158 155
159/*
160 * cgroup_event represents events which userspace want to receive.
161 */
162struct cgroup_event {
163 /*
164 * css which the event belongs to.
165 */
166 struct cgroup_subsys_state *css;
167 /*
168 * Control file which the event associated.
169 */
170 struct cftype *cft;
171 /*
172 * eventfd to signal userspace about the event.
173 */
174 struct eventfd_ctx *eventfd;
175 /*
176 * Each of these stored in a list by the cgroup.
177 */
178 struct list_head list;
179 /*
180 * All fields below needed to unregister event when
181 * userspace closes eventfd.
182 */
183 poll_table pt;
184 wait_queue_head_t *wqh;
185 wait_queue_t wait;
186 struct work_struct remove;
187};
188
189/* The list of hierarchy roots */ 156/* The list of hierarchy roots */
190 157
191static LIST_HEAD(cgroup_roots); 158static LIST_HEAD(cgroup_roots);
@@ -235,8 +202,8 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
235 * keep accessing it outside the said locks. This function may return 202 * keep accessing it outside the said locks. This function may return
236 * %NULL if @cgrp doesn't have @subsys_id enabled. 203 * %NULL if @cgrp doesn't have @subsys_id enabled.
237 */ 204 */
238static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, 205struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
239 struct cgroup_subsys *ss) 206 struct cgroup_subsys *ss)
240{ 207{
241 if (ss) 208 if (ss)
242 return rcu_dereference_check(cgrp->subsys[ss->subsys_id], 209 return rcu_dereference_check(cgrp->subsys[ss->subsys_id],
@@ -2663,7 +2630,7 @@ static const struct inode_operations cgroup_dir_inode_operations = {
2663/* 2630/*
2664 * Check if a file is a control file 2631 * Check if a file is a control file
2665 */ 2632 */
2666static inline struct cftype *__file_cft(struct file *file) 2633struct cftype *__file_cft(struct file *file)
2667{ 2634{
2668 if (file_inode(file)->i_fop != &cgroup_file_operations) 2635 if (file_inode(file)->i_fop != &cgroup_file_operations)
2669 return ERR_PTR(-EINVAL); 2636 return ERR_PTR(-EINVAL);
@@ -3949,202 +3916,6 @@ static void cgroup_dput(struct cgroup *cgrp)
3949 deactivate_super(sb); 3916 deactivate_super(sb);
3950} 3917}
3951 3918
3952/*
3953 * Unregister event and free resources.
3954 *
3955 * Gets called from workqueue.
3956 */
3957static void cgroup_event_remove(struct work_struct *work)
3958{
3959 struct cgroup_event *event = container_of(work, struct cgroup_event,
3960 remove);
3961 struct cgroup_subsys_state *css = event->css;
3962
3963 remove_wait_queue(event->wqh, &event->wait);
3964
3965 event->cft->unregister_event(css, event->cft, event->eventfd);
3966
3967 /* Notify userspace the event is going away. */
3968 eventfd_signal(event->eventfd, 1);
3969
3970 eventfd_ctx_put(event->eventfd);
3971 kfree(event);
3972 css_put(css);
3973}
3974
3975/*
3976 * Gets called on POLLHUP on eventfd when user closes it.
3977 *
3978 * Called with wqh->lock held and interrupts disabled.
3979 */
3980static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3981 int sync, void *key)
3982{
3983 struct cgroup_event *event = container_of(wait,
3984 struct cgroup_event, wait);
3985 struct cgroup *cgrp = event->css->cgroup;
3986 unsigned long flags = (unsigned long)key;
3987
3988 if (flags & POLLHUP) {
3989 /*
3990 * If the event has been detached at cgroup removal, we
3991 * can simply return knowing the other side will cleanup
3992 * for us.
3993 *
3994 * We can't race against event freeing since the other
3995 * side will require wqh->lock via remove_wait_queue(),
3996 * which we hold.
3997 */
3998 spin_lock(&cgrp->event_list_lock);
3999 if (!list_empty(&event->list)) {
4000 list_del_init(&event->list);
4001 /*
4002 * We are in atomic context, but cgroup_event_remove()
4003 * may sleep, so we have to call it in workqueue.
4004 */
4005 schedule_work(&event->remove);
4006 }
4007 spin_unlock(&cgrp->event_list_lock);
4008 }
4009
4010 return 0;
4011}
4012
4013static void cgroup_event_ptable_queue_proc(struct file *file,
4014 wait_queue_head_t *wqh, poll_table *pt)
4015{
4016 struct cgroup_event *event = container_of(pt,
4017 struct cgroup_event, pt);
4018
4019 event->wqh = wqh;
4020 add_wait_queue(wqh, &event->wait);
4021}
4022
4023/*
4024 * Parse input and register new cgroup event handler.
4025 *
4026 * Input must be in format '<event_fd> <control_fd> <args>'.
4027 * Interpretation of args is defined by control file implementation.
4028 */
4029static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
4030 struct cftype *cft, const char *buffer)
4031{
4032 struct cgroup *cgrp = dummy_css->cgroup;
4033 struct cgroup_event *event;
4034 struct cgroup_subsys_state *cfile_css;
4035 unsigned int efd, cfd;
4036 struct fd efile;
4037 struct fd cfile;
4038 char *endp;
4039 int ret;
4040
4041 efd = simple_strtoul(buffer, &endp, 10);
4042 if (*endp != ' ')
4043 return -EINVAL;
4044 buffer = endp + 1;
4045
4046 cfd = simple_strtoul(buffer, &endp, 10);
4047 if ((*endp != ' ') && (*endp != '\0'))
4048 return -EINVAL;
4049 buffer = endp + 1;
4050
4051 event = kzalloc(sizeof(*event), GFP_KERNEL);
4052 if (!event)
4053 return -ENOMEM;
4054
4055 INIT_LIST_HEAD(&event->list);
4056 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
4057 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
4058 INIT_WORK(&event->remove, cgroup_event_remove);
4059
4060 efile = fdget(efd);
4061 if (!efile.file) {
4062 ret = -EBADF;
4063 goto out_kfree;
4064 }
4065
4066 event->eventfd = eventfd_ctx_fileget(efile.file);
4067 if (IS_ERR(event->eventfd)) {
4068 ret = PTR_ERR(event->eventfd);
4069 goto out_put_efile;
4070 }
4071
4072 cfile = fdget(cfd);
4073 if (!cfile.file) {
4074 ret = -EBADF;
4075 goto out_put_eventfd;
4076 }
4077
4078 /* the process need read permission on control file */
4079 /* AV: shouldn't we check that it's been opened for read instead? */
4080 ret = inode_permission(file_inode(cfile.file), MAY_READ);
4081 if (ret < 0)
4082 goto out_put_cfile;
4083
4084 event->cft = __file_cft(cfile.file);
4085 if (IS_ERR(event->cft)) {
4086 ret = PTR_ERR(event->cft);
4087 goto out_put_cfile;
4088 }
4089
4090 if (!event->cft->ss) {
4091 ret = -EBADF;
4092 goto out_put_cfile;
4093 }
4094
4095 /*
4096 * Determine the css of @cfile, verify it belongs to the same
4097 * cgroup as cgroup.event_control, and associate @event with it.
4098 * Remaining events are automatically removed on cgroup destruction
4099 * but the removal is asynchronous, so take an extra ref.
4100 */
4101 rcu_read_lock();
4102
4103 ret = -EINVAL;
4104 event->css = cgroup_css(cgrp, event->cft->ss);
4105 cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
4106 if (event->css && event->css == cfile_css && css_tryget(event->css))
4107 ret = 0;
4108
4109 rcu_read_unlock();
4110 if (ret)
4111 goto out_put_cfile;
4112
4113 if (!event->cft->register_event || !event->cft->unregister_event) {
4114 ret = -EINVAL;
4115 goto out_put_css;
4116 }
4117
4118 ret = event->cft->register_event(event->css, event->cft,
4119 event->eventfd, buffer);
4120 if (ret)
4121 goto out_put_css;
4122
4123 efile.file->f_op->poll(efile.file, &event->pt);
4124
4125 spin_lock(&cgrp->event_list_lock);
4126 list_add(&event->list, &cgrp->event_list);
4127 spin_unlock(&cgrp->event_list_lock);
4128
4129 fdput(cfile);
4130 fdput(efile);
4131
4132 return 0;
4133
4134out_put_css:
4135 css_put(event->css);
4136out_put_cfile:
4137 fdput(cfile);
4138out_put_eventfd:
4139 eventfd_ctx_put(event->eventfd);
4140out_put_efile:
4141 fdput(efile);
4142out_kfree:
4143 kfree(event);
4144
4145 return ret;
4146}
4147
4148static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css, 3919static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
4149 struct cftype *cft) 3920 struct cftype *cft)
4150{ 3921{
@@ -4170,11 +3941,6 @@ static struct cftype cgroup_base_files[] = {
4170 .mode = S_IRUGO | S_IWUSR, 3941 .mode = S_IRUGO | S_IWUSR,
4171 }, 3942 },
4172 { 3943 {
4173 .name = "cgroup.event_control",
4174 .write_string = cgroup_write_event_control,
4175 .mode = S_IWUGO,
4176 },
4177 {
4178 .name = "cgroup.clone_children", 3944 .name = "cgroup.clone_children",
4179 .flags = CFTYPE_INSANE, 3945 .flags = CFTYPE_INSANE,
4180 .read_u64 = cgroup_clone_children_read, 3946 .read_u64 = cgroup_clone_children_read,
@@ -4666,7 +4432,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4666 __releases(&cgroup_mutex) __acquires(&cgroup_mutex) 4432 __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
4667{ 4433{
4668 struct dentry *d = cgrp->dentry; 4434 struct dentry *d = cgrp->dentry;
4669 struct cgroup_event *event, *tmp;
4670 struct cgroup_subsys *ss; 4435 struct cgroup_subsys *ss;
4671 struct cgroup *child; 4436 struct cgroup *child;
4672 bool empty; 4437 bool empty;
@@ -4741,18 +4506,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4741 dget(d); 4506 dget(d);
4742 cgroup_d_remove_dir(d); 4507 cgroup_d_remove_dir(d);
4743 4508
4744 /*
4745 * Unregister events and notify userspace.
4746 * Notify userspace about cgroup removing only after rmdir of cgroup
4747 * directory to avoid race between userspace and kernelspace.
4748 */
4749 spin_lock(&cgrp->event_list_lock);
4750 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
4751 list_del_init(&event->list);
4752 schedule_work(&event->remove);
4753 }
4754 spin_unlock(&cgrp->event_list_lock);
4755
4756 return 0; 4509 return 0;
4757}; 4510};
4758 4511