aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c228
1 files changed, 227 insertions, 1 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1bf4d6db54ab..ea94984a3895 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4,6 +4,10 @@
4 * Based originally on the cpuset system, extracted by Paul Menage 4 * Based originally on the cpuset system, extracted by Paul Menage
5 * Copyright (C) 2006 Google, Inc 5 * Copyright (C) 2006 Google, Inc
6 * 6 *
7 * Notifications support
8 * Copyright (C) 2009 Nokia Corporation
9 * Author: Kirill A. Shutemov
10 *
7 * Copyright notices from the original cpuset code: 11 * Copyright notices from the original cpuset code:
8 * -------------------------------------------------- 12 * --------------------------------------------------
9 * Copyright (C) 2003 BULL SA. 13 * Copyright (C) 2003 BULL SA.
@@ -53,6 +57,8 @@
53#include <linux/pid_namespace.h> 57#include <linux/pid_namespace.h>
54#include <linux/idr.h> 58#include <linux/idr.h>
55#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 59#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
60#include <linux/eventfd.h>
61#include <linux/poll.h>
56 62
57#include <asm/atomic.h> 63#include <asm/atomic.h>
58 64
@@ -152,6 +158,35 @@ struct css_id {
152 unsigned short stack[0]; /* Array of Length (depth+1) */ 158 unsigned short stack[0]; /* Array of Length (depth+1) */
153}; 159};
154 160
161/*
162 * cgroup_event represents events which userspace want to recieve.
163 */
164struct cgroup_event {
165 /*
166 * Cgroup which the event belongs to.
167 */
168 struct cgroup *cgrp;
169 /*
170 * Control file which the event associated.
171 */
172 struct cftype *cft;
173 /*
174 * eventfd to signal userspace about the event.
175 */
176 struct eventfd_ctx *eventfd;
177 /*
178 * Each of these stored in a list by the cgroup.
179 */
180 struct list_head list;
181 /*
182 * All fields below needed to unregister event when
183 * userspace closes eventfd.
184 */
185 poll_table pt;
186 wait_queue_head_t *wqh;
187 wait_queue_t wait;
188 struct work_struct remove;
189};
155 190
156/* The list of hierarchy roots */ 191/* The list of hierarchy roots */
157 192
@@ -760,14 +795,28 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
760static int cgroup_call_pre_destroy(struct cgroup *cgrp) 795static int cgroup_call_pre_destroy(struct cgroup *cgrp)
761{ 796{
762 struct cgroup_subsys *ss; 797 struct cgroup_subsys *ss;
798 struct cgroup_event *event, *tmp;
763 int ret = 0; 799 int ret = 0;
764 800
765 for_each_subsys(cgrp->root, ss) 801 for_each_subsys(cgrp->root, ss)
766 if (ss->pre_destroy) { 802 if (ss->pre_destroy) {
767 ret = ss->pre_destroy(ss, cgrp); 803 ret = ss->pre_destroy(ss, cgrp);
768 if (ret) 804 if (ret)
769 break; 805 goto out;
770 } 806 }
807
808 /*
809 * Unregister events and notify userspace.
810 */
811 spin_lock(&cgrp->event_list_lock);
812 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
813 list_del(&event->list);
814 eventfd_signal(event->eventfd, 1);
815 schedule_work(&event->remove);
816 }
817 spin_unlock(&cgrp->event_list_lock);
818
819out:
771 return ret; 820 return ret;
772} 821}
773 822
@@ -1239,6 +1288,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1239 INIT_LIST_HEAD(&cgrp->release_list); 1288 INIT_LIST_HEAD(&cgrp->release_list);
1240 INIT_LIST_HEAD(&cgrp->pidlists); 1289 INIT_LIST_HEAD(&cgrp->pidlists);
1241 mutex_init(&cgrp->pidlist_mutex); 1290 mutex_init(&cgrp->pidlist_mutex);
1291 INIT_LIST_HEAD(&cgrp->event_list);
1292 spin_lock_init(&cgrp->event_list_lock);
1242} 1293}
1243 1294
1244static void init_cgroup_root(struct cgroupfs_root *root) 1295static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2077,6 +2128,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
2077 .rename = cgroup_rename, 2128 .rename = cgroup_rename,
2078}; 2129};
2079 2130
2131/*
2132 * Check if a file is a control file
2133 */
2134static inline struct cftype *__file_cft(struct file *file)
2135{
2136 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2137 return ERR_PTR(-EINVAL);
2138 return __d_cft(file->f_dentry);
2139}
2140
2080static int cgroup_create_file(struct dentry *dentry, mode_t mode, 2141static int cgroup_create_file(struct dentry *dentry, mode_t mode,
2081 struct super_block *sb) 2142 struct super_block *sb)
2082{ 2143{
@@ -2931,6 +2992,166 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2931} 2992}
2932 2993
2933/* 2994/*
2995 * Unregister event and free resources.
2996 *
2997 * Gets called from workqueue.
2998 */
2999static void cgroup_event_remove(struct work_struct *work)
3000{
3001 struct cgroup_event *event = container_of(work, struct cgroup_event,
3002 remove);
3003 struct cgroup *cgrp = event->cgrp;
3004
3005 /* TODO: check return code */
3006 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3007
3008 eventfd_ctx_put(event->eventfd);
3009 remove_wait_queue(event->wqh, &event->wait);
3010 kfree(event);
3011}
3012
3013/*
3014 * Gets called on POLLHUP on eventfd when user closes it.
3015 *
3016 * Called with wqh->lock held and interrupts disabled.
3017 */
3018static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3019 int sync, void *key)
3020{
3021 struct cgroup_event *event = container_of(wait,
3022 struct cgroup_event, wait);
3023 struct cgroup *cgrp = event->cgrp;
3024 unsigned long flags = (unsigned long)key;
3025
3026 if (flags & POLLHUP) {
3027 spin_lock(&cgrp->event_list_lock);
3028 list_del(&event->list);
3029 spin_unlock(&cgrp->event_list_lock);
3030 /*
3031 * We are in atomic context, but cgroup_event_remove() may
3032 * sleep, so we have to call it in workqueue.
3033 */
3034 schedule_work(&event->remove);
3035 }
3036
3037 return 0;
3038}
3039
3040static void cgroup_event_ptable_queue_proc(struct file *file,
3041 wait_queue_head_t *wqh, poll_table *pt)
3042{
3043 struct cgroup_event *event = container_of(pt,
3044 struct cgroup_event, pt);
3045
3046 event->wqh = wqh;
3047 add_wait_queue(wqh, &event->wait);
3048}
3049
3050/*
3051 * Parse input and register new cgroup event handler.
3052 *
3053 * Input must be in format '<event_fd> <control_fd> <args>'.
3054 * Interpretation of args is defined by control file implementation.
3055 */
3056static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3057 const char *buffer)
3058{
3059 struct cgroup_event *event = NULL;
3060 unsigned int efd, cfd;
3061 struct file *efile = NULL;
3062 struct file *cfile = NULL;
3063 char *endp;
3064 int ret;
3065
3066 efd = simple_strtoul(buffer, &endp, 10);
3067 if (*endp != ' ')
3068 return -EINVAL;
3069 buffer = endp + 1;
3070
3071 cfd = simple_strtoul(buffer, &endp, 10);
3072 if ((*endp != ' ') && (*endp != '\0'))
3073 return -EINVAL;
3074 buffer = endp + 1;
3075
3076 event = kzalloc(sizeof(*event), GFP_KERNEL);
3077 if (!event)
3078 return -ENOMEM;
3079 event->cgrp = cgrp;
3080 INIT_LIST_HEAD(&event->list);
3081 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3082 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3083 INIT_WORK(&event->remove, cgroup_event_remove);
3084
3085 efile = eventfd_fget(efd);
3086 if (IS_ERR(efile)) {
3087 ret = PTR_ERR(efile);
3088 goto fail;
3089 }
3090
3091 event->eventfd = eventfd_ctx_fileget(efile);
3092 if (IS_ERR(event->eventfd)) {
3093 ret = PTR_ERR(event->eventfd);
3094 goto fail;
3095 }
3096
3097 cfile = fget(cfd);
3098 if (!cfile) {
3099 ret = -EBADF;
3100 goto fail;
3101 }
3102
3103 /* the process need read permission on control file */
3104 ret = file_permission(cfile, MAY_READ);
3105 if (ret < 0)
3106 goto fail;
3107
3108 event->cft = __file_cft(cfile);
3109 if (IS_ERR(event->cft)) {
3110 ret = PTR_ERR(event->cft);
3111 goto fail;
3112 }
3113
3114 if (!event->cft->register_event || !event->cft->unregister_event) {
3115 ret = -EINVAL;
3116 goto fail;
3117 }
3118
3119 ret = event->cft->register_event(cgrp, event->cft,
3120 event->eventfd, buffer);
3121 if (ret)
3122 goto fail;
3123
3124 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3125 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3126 ret = 0;
3127 goto fail;
3128 }
3129
3130 spin_lock(&cgrp->event_list_lock);
3131 list_add(&event->list, &cgrp->event_list);
3132 spin_unlock(&cgrp->event_list_lock);
3133
3134 fput(cfile);
3135 fput(efile);
3136
3137 return 0;
3138
3139fail:
3140 if (cfile)
3141 fput(cfile);
3142
3143 if (event && event->eventfd && !IS_ERR(event->eventfd))
3144 eventfd_ctx_put(event->eventfd);
3145
3146 if (!IS_ERR_OR_NULL(efile))
3147 fput(efile);
3148
3149 kfree(event);
3150
3151 return ret;
3152}
3153
3154/*
2934 * for the common functions, 'private' gives the type of file 3155 * for the common functions, 'private' gives the type of file
2935 */ 3156 */
2936/* for hysterical raisins, we can't put this on the older files */ 3157/* for hysterical raisins, we can't put this on the older files */
@@ -2955,6 +3176,11 @@ static struct cftype files[] = {
2955 .read_u64 = cgroup_read_notify_on_release, 3176 .read_u64 = cgroup_read_notify_on_release,
2956 .write_u64 = cgroup_write_notify_on_release, 3177 .write_u64 = cgroup_write_notify_on_release,
2957 }, 3178 },
3179 {
3180 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3181 .write_string = cgroup_write_event_control,
3182 .mode = S_IWUGO,
3183 },
2958}; 3184};
2959 3185
2960static struct cftype cft_release_agent = { 3186static struct cftype cft_release_agent = {