aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c248
1 files changed, 248 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 13b9d0f221b8..02dae3292668 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -45,6 +45,7 @@
45#include <linux/swapops.h> 45#include <linux/swapops.h>
46#include <linux/spinlock.h> 46#include <linux/spinlock.h>
47#include <linux/eventfd.h> 47#include <linux/eventfd.h>
48#include <linux/poll.h>
48#include <linux/sort.h> 49#include <linux/sort.h>
49#include <linux/fs.h> 50#include <linux/fs.h>
50#include <linux/seq_file.h> 51#include <linux/seq_file.h>
@@ -55,6 +56,7 @@
55#include <linux/cpu.h> 56#include <linux/cpu.h>
56#include <linux/oom.h> 57#include <linux/oom.h>
57#include <linux/lockdep.h> 58#include <linux/lockdep.h>
59#include <linux/file.h>
58#include "internal.h" 60#include "internal.h"
59#include <net/sock.h> 61#include <net/sock.h>
60#include <net/ip.h> 62#include <net/ip.h>
@@ -226,6 +228,36 @@ struct mem_cgroup_eventfd_list {
226 struct eventfd_ctx *eventfd; 228 struct eventfd_ctx *eventfd;
227}; 229};
228 230
231/*
232 * cgroup_event represents events which userspace want to receive.
233 */
234struct cgroup_event {
235 /*
236 * css which the event belongs to.
237 */
238 struct cgroup_subsys_state *css;
239 /*
240 * Control file which the event associated.
241 */
242 struct cftype *cft;
243 /*
244 * eventfd to signal userspace about the event.
245 */
246 struct eventfd_ctx *eventfd;
247 /*
248 * Each of these stored in a list by the cgroup.
249 */
250 struct list_head list;
251 /*
252 * All fields below needed to unregister event when
253 * userspace closes eventfd.
254 */
255 poll_table pt;
256 wait_queue_head_t *wqh;
257 wait_queue_t wait;
258 struct work_struct remove;
259};
260
229static void mem_cgroup_threshold(struct mem_cgroup *memcg); 261static void mem_cgroup_threshold(struct mem_cgroup *memcg);
230static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); 262static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
231 263
@@ -5947,6 +5979,202 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
5947} 5979}
5948#endif 5980#endif
5949 5981
5982/*
5983 * Unregister event and free resources.
5984 *
5985 * Gets called from workqueue.
5986 */
5987static void cgroup_event_remove(struct work_struct *work)
5988{
5989 struct cgroup_event *event = container_of(work, struct cgroup_event,
5990 remove);
5991 struct cgroup_subsys_state *css = event->css;
5992
5993 remove_wait_queue(event->wqh, &event->wait);
5994
5995 event->cft->unregister_event(css, event->cft, event->eventfd);
5996
5997 /* Notify userspace the event is going away. */
5998 eventfd_signal(event->eventfd, 1);
5999
6000 eventfd_ctx_put(event->eventfd);
6001 kfree(event);
6002 css_put(css);
6003}
6004
6005/*
6006 * Gets called on POLLHUP on eventfd when user closes it.
6007 *
6008 * Called with wqh->lock held and interrupts disabled.
6009 */
6010static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
6011 int sync, void *key)
6012{
6013 struct cgroup_event *event = container_of(wait,
6014 struct cgroup_event, wait);
6015 struct cgroup *cgrp = event->css->cgroup;
6016 unsigned long flags = (unsigned long)key;
6017
6018 if (flags & POLLHUP) {
6019 /*
6020 * If the event has been detached at cgroup removal, we
6021 * can simply return knowing the other side will cleanup
6022 * for us.
6023 *
6024 * We can't race against event freeing since the other
6025 * side will require wqh->lock via remove_wait_queue(),
6026 * which we hold.
6027 */
6028 spin_lock(&cgrp->event_list_lock);
6029 if (!list_empty(&event->list)) {
6030 list_del_init(&event->list);
6031 /*
6032 * We are in atomic context, but cgroup_event_remove()
6033 * may sleep, so we have to call it in workqueue.
6034 */
6035 schedule_work(&event->remove);
6036 }
6037 spin_unlock(&cgrp->event_list_lock);
6038 }
6039
6040 return 0;
6041}
6042
6043static void cgroup_event_ptable_queue_proc(struct file *file,
6044 wait_queue_head_t *wqh, poll_table *pt)
6045{
6046 struct cgroup_event *event = container_of(pt,
6047 struct cgroup_event, pt);
6048
6049 event->wqh = wqh;
6050 add_wait_queue(wqh, &event->wait);
6051}
6052
6053/*
6054 * Parse input and register new cgroup event handler.
6055 *
6056 * Input must be in format '<event_fd> <control_fd> <args>'.
6057 * Interpretation of args is defined by control file implementation.
6058 */
6059static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
6060 struct cftype *cft, const char *buffer)
6061{
6062 struct cgroup *cgrp = dummy_css->cgroup;
6063 struct cgroup_event *event;
6064 struct cgroup_subsys_state *cfile_css;
6065 unsigned int efd, cfd;
6066 struct fd efile;
6067 struct fd cfile;
6068 char *endp;
6069 int ret;
6070
6071 efd = simple_strtoul(buffer, &endp, 10);
6072 if (*endp != ' ')
6073 return -EINVAL;
6074 buffer = endp + 1;
6075
6076 cfd = simple_strtoul(buffer, &endp, 10);
6077 if ((*endp != ' ') && (*endp != '\0'))
6078 return -EINVAL;
6079 buffer = endp + 1;
6080
6081 event = kzalloc(sizeof(*event), GFP_KERNEL);
6082 if (!event)
6083 return -ENOMEM;
6084
6085 INIT_LIST_HEAD(&event->list);
6086 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
6087 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
6088 INIT_WORK(&event->remove, cgroup_event_remove);
6089
6090 efile = fdget(efd);
6091 if (!efile.file) {
6092 ret = -EBADF;
6093 goto out_kfree;
6094 }
6095
6096 event->eventfd = eventfd_ctx_fileget(efile.file);
6097 if (IS_ERR(event->eventfd)) {
6098 ret = PTR_ERR(event->eventfd);
6099 goto out_put_efile;
6100 }
6101
6102 cfile = fdget(cfd);
6103 if (!cfile.file) {
6104 ret = -EBADF;
6105 goto out_put_eventfd;
6106 }
6107
6108 /* the process need read permission on control file */
6109 /* AV: shouldn't we check that it's been opened for read instead? */
6110 ret = inode_permission(file_inode(cfile.file), MAY_READ);
6111 if (ret < 0)
6112 goto out_put_cfile;
6113
6114 event->cft = __file_cft(cfile.file);
6115 if (IS_ERR(event->cft)) {
6116 ret = PTR_ERR(event->cft);
6117 goto out_put_cfile;
6118 }
6119
6120 if (!event->cft->ss) {
6121 ret = -EBADF;
6122 goto out_put_cfile;
6123 }
6124
6125 /*
6126 * Determine the css of @cfile, verify it belongs to the same
6127 * cgroup as cgroup.event_control, and associate @event with it.
6128 * Remaining events are automatically removed on cgroup destruction
6129 * but the removal is asynchronous, so take an extra ref.
6130 */
6131 rcu_read_lock();
6132
6133 ret = -EINVAL;
6134 event->css = cgroup_css(cgrp, event->cft->ss);
6135 cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
6136 if (event->css && event->css == cfile_css && css_tryget(event->css))
6137 ret = 0;
6138
6139 rcu_read_unlock();
6140 if (ret)
6141 goto out_put_cfile;
6142
6143 if (!event->cft->register_event || !event->cft->unregister_event) {
6144 ret = -EINVAL;
6145 goto out_put_css;
6146 }
6147
6148 ret = event->cft->register_event(event->css, event->cft,
6149 event->eventfd, buffer);
6150 if (ret)
6151 goto out_put_css;
6152
6153 efile.file->f_op->poll(efile.file, &event->pt);
6154
6155 spin_lock(&cgrp->event_list_lock);
6156 list_add(&event->list, &cgrp->event_list);
6157 spin_unlock(&cgrp->event_list_lock);
6158
6159 fdput(cfile);
6160 fdput(efile);
6161
6162 return 0;
6163
6164out_put_css:
6165 css_put(event->css);
6166out_put_cfile:
6167 fdput(cfile);
6168out_put_eventfd:
6169 eventfd_ctx_put(event->eventfd);
6170out_put_efile:
6171 fdput(efile);
6172out_kfree:
6173 kfree(event);
6174
6175 return ret;
6176}
6177
5950static struct cftype mem_cgroup_files[] = { 6178static struct cftype mem_cgroup_files[] = {
5951 { 6179 {
5952 .name = "usage_in_bytes", 6180 .name = "usage_in_bytes",
@@ -5994,6 +6222,12 @@ static struct cftype mem_cgroup_files[] = {
5994 .read_u64 = mem_cgroup_hierarchy_read, 6222 .read_u64 = mem_cgroup_hierarchy_read,
5995 }, 6223 },
5996 { 6224 {
6225 .name = "cgroup.event_control",
6226 .write_string = cgroup_write_event_control,
6227 .flags = CFTYPE_NO_PREFIX,
6228 .mode = S_IWUGO,
6229 },
6230 {
5997 .name = "swappiness", 6231 .name = "swappiness",
5998 .read_u64 = mem_cgroup_swappiness_read, 6232 .read_u64 = mem_cgroup_swappiness_read,
5999 .write_u64 = mem_cgroup_swappiness_write, 6233 .write_u64 = mem_cgroup_swappiness_write,
@@ -6326,6 +6560,20 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
6326static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) 6560static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
6327{ 6561{
6328 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 6562 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
6563 struct cgroup *cgrp = css->cgroup;
6564 struct cgroup_event *event, *tmp;
6565
6566 /*
6567 * Unregister events and notify userspace.
6568 * Notify userspace about cgroup removing only after rmdir of cgroup
6569 * directory to avoid race between userspace and kernelspace.
6570 */
6571 spin_lock(&cgrp->event_list_lock);
6572 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
6573 list_del_init(&event->list);
6574 schedule_work(&event->remove);
6575 }
6576 spin_unlock(&cgrp->event_list_lock);
6329 6577
6330 kmem_cgroup_css_offline(memcg); 6578 kmem_cgroup_css_offline(memcg);
6331 6579