aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/cgroup.h10
-rw-r--r--kernel/cgroup.c222
2 files changed, 149 insertions, 83 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7166023e07d2..8ab91880a0ad 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -14,6 +14,7 @@
14#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
15#include <linux/cgroupstats.h> 15#include <linux/cgroupstats.h>
16#include <linux/prio_heap.h> 16#include <linux/prio_heap.h>
17#include <linux/rwsem.h>
17 18
18#ifdef CONFIG_CGROUPS 19#ifdef CONFIG_CGROUPS
19 20
@@ -136,6 +137,15 @@ struct cgroup {
136 * release_list_lock 137 * release_list_lock
137 */ 138 */
138 struct list_head release_list; 139 struct list_head release_list;
140
141 /* pids_mutex protects the fields below */
142 struct rw_semaphore pids_mutex;
143 /* Array of process ids in the cgroup */
144 pid_t *tasks_pids;
145 /* How many files are using the current tasks_pids array */
146 int pids_use_count;
147 /* Length of the current tasks_pids array */
148 int pids_length;
139}; 149};
140 150
141/* A css_set is a structure holding pointers to a set of 151/* A css_set is a structure holding pointers to a set of
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1e49218457e0..046c1609606b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -868,6 +868,14 @@ static struct super_operations cgroup_ops = {
868 .remount_fs = cgroup_remount, 868 .remount_fs = cgroup_remount,
869}; 869};
870 870
871static void init_cgroup_housekeeping(struct cgroup *cgrp)
872{
873 INIT_LIST_HEAD(&cgrp->sibling);
874 INIT_LIST_HEAD(&cgrp->children);
875 INIT_LIST_HEAD(&cgrp->css_sets);
876 INIT_LIST_HEAD(&cgrp->release_list);
877 init_rwsem(&cgrp->pids_mutex);
878}
871static void init_cgroup_root(struct cgroupfs_root *root) 879static void init_cgroup_root(struct cgroupfs_root *root)
872{ 880{
873 struct cgroup *cgrp = &root->top_cgroup; 881 struct cgroup *cgrp = &root->top_cgroup;
@@ -876,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
876 root->number_of_cgroups = 1; 884 root->number_of_cgroups = 1;
877 cgrp->root = root; 885 cgrp->root = root;
878 cgrp->top_cgroup = cgrp; 886 cgrp->top_cgroup = cgrp;
879 INIT_LIST_HEAD(&cgrp->sibling); 887 init_cgroup_housekeeping(cgrp);
880 INIT_LIST_HEAD(&cgrp->children);
881 INIT_LIST_HEAD(&cgrp->css_sets);
882 INIT_LIST_HEAD(&cgrp->release_list);
883} 888}
884 889
885static int cgroup_test_super(struct super_block *sb, void *data) 890static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1995,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
1995 * but we cannot guarantee that the information we produce is correct 2000 * but we cannot guarantee that the information we produce is correct
1996 * unless we produce it entirely atomically. 2001 * unless we produce it entirely atomically.
1997 * 2002 *
1998 * Upon tasks file open(), a struct ctr_struct is allocated, that
1999 * will have a pointer to an array (also allocated here). The struct
2000 * ctr_struct * is stored in file->private_data. Its resources will
2001 * be freed by release() when the file is closed. The array is used
2002 * to sprintf the PIDs and then used by read().
2003 */ 2003 */
2004struct ctr_struct {
2005 char *buf;
2006 int bufsz;
2007};
2008 2004
2009/* 2005/*
2010 * Load into 'pidarray' up to 'npids' of the tasks using cgroup 2006 * Load into 'pidarray' up to 'npids' of the tasks using cgroup
@@ -2086,42 +2082,132 @@ static int cmppid(const void *a, const void *b)
2086 return *(pid_t *)a - *(pid_t *)b; 2082 return *(pid_t *)a - *(pid_t *)b;
2087} 2083}
2088 2084
2085
2089/* 2086/*
2090 * Convert array 'a' of 'npids' pid_t's to a string of newline separated 2087 * seq_file methods for the "tasks" file. The seq_file position is the
2091 * decimal pids in 'buf'. Don't write more than 'sz' chars, but return 2088 * next pid to display; the seq_file iterator is a pointer to the pid
2092 * count 'cnt' of how many chars would be written if buf were large enough. 2089 * in the cgroup->tasks_pids array.
2093 */ 2090 */
2094static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) 2091
2092static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2095{ 2093{
2096 int cnt = 0; 2094 /*
2097 int i; 2095 * Initially we receive a position value that corresponds to
2096 * one more than the last pid shown (or 0 on the first call or
2097 * after a seek to the start). Use a binary-search to find the
2098 * next pid to display, if any
2099 */
2100 struct cgroup *cgrp = s->private;
2101 int index = 0, pid = *pos;
2102 int *iter;
2098 2103
2099 for (i = 0; i < npids; i++) 2104 down_read(&cgrp->pids_mutex);
2100 cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); 2105 if (pid) {
2101 return cnt; 2106 int end = cgrp->pids_length;
2107 int i;
2108 while (index < end) {
2109 int mid = (index + end) / 2;
2110 if (cgrp->tasks_pids[mid] == pid) {
2111 index = mid;
2112 break;
2113 } else if (cgrp->tasks_pids[mid] <= pid)
2114 index = mid + 1;
2115 else
2116 end = mid;
2117 }
2118 }
2119 /* If we're off the end of the array, we're done */
2120 if (index >= cgrp->pids_length)
2121 return NULL;
2122 /* Update the abstract position to be the actual pid that we found */
2123 iter = cgrp->tasks_pids + index;
2124 *pos = *iter;
2125 return iter;
2126}
2127
2128static void cgroup_tasks_stop(struct seq_file *s, void *v)
2129{
2130 struct cgroup *cgrp = s->private;
2131 up_read(&cgrp->pids_mutex);
2102} 2132}
2103 2133
2134static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2135{
2136 struct cgroup *cgrp = s->private;
2137 int *p = v;
2138 int *end = cgrp->tasks_pids + cgrp->pids_length;
2139
2140 /*
2141 * Advance to the next pid in the array. If this goes off the
2142 * end, we're done
2143 */
2144 p++;
2145 if (p >= end) {
2146 return NULL;
2147 } else {
2148 *pos = *p;
2149 return p;
2150 }
2151}
2152
2153static int cgroup_tasks_show(struct seq_file *s, void *v)
2154{
2155 return seq_printf(s, "%d\n", *(int *)v);
2156}
2157
2158static struct seq_operations cgroup_tasks_seq_operations = {
2159 .start = cgroup_tasks_start,
2160 .stop = cgroup_tasks_stop,
2161 .next = cgroup_tasks_next,
2162 .show = cgroup_tasks_show,
2163};
2164
2165static void release_cgroup_pid_array(struct cgroup *cgrp)
2166{
2167 down_write(&cgrp->pids_mutex);
2168 BUG_ON(!cgrp->pids_use_count);
2169 if (!--cgrp->pids_use_count) {
2170 kfree(cgrp->tasks_pids);
2171 cgrp->tasks_pids = NULL;
2172 cgrp->pids_length = 0;
2173 }
2174 up_write(&cgrp->pids_mutex);
2175}
2176
2177static int cgroup_tasks_release(struct inode *inode, struct file *file)
2178{
2179 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2180
2181 if (!(file->f_mode & FMODE_READ))
2182 return 0;
2183
2184 release_cgroup_pid_array(cgrp);
2185 return seq_release(inode, file);
2186}
2187
2188static struct file_operations cgroup_tasks_operations = {
2189 .read = seq_read,
2190 .llseek = seq_lseek,
2191 .write = cgroup_file_write,
2192 .release = cgroup_tasks_release,
2193};
2194
2104/* 2195/*
2105 * Handle an open on 'tasks' file. Prepare a buffer listing the 2196 * Handle an open on 'tasks' file. Prepare an array containing the
2106 * process id's of tasks currently attached to the cgroup being opened. 2197 * process id's of tasks currently attached to the cgroup being opened.
2107 *
2108 * Does not require any specific cgroup mutexes, and does not take any.
2109 */ 2198 */
2199
2110static int cgroup_tasks_open(struct inode *unused, struct file *file) 2200static int cgroup_tasks_open(struct inode *unused, struct file *file)
2111{ 2201{
2112 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 2202 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2113 struct ctr_struct *ctr;
2114 pid_t *pidarray; 2203 pid_t *pidarray;
2115 int npids; 2204 int npids;
2116 char c; 2205 int retval;
2117 2206
2207 /* Nothing to do for write-only files */
2118 if (!(file->f_mode & FMODE_READ)) 2208 if (!(file->f_mode & FMODE_READ))
2119 return 0; 2209 return 0;
2120 2210
2121 ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
2122 if (!ctr)
2123 goto err0;
2124
2125 /* 2211 /*
2126 * If cgroup gets more users after we read count, we won't have 2212 * If cgroup gets more users after we read count, we won't have
2127 * enough space - tough. This race is indistinguishable to the 2213 * enough space - tough. This race is indistinguishable to the
@@ -2129,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
2129 * show up until sometime later on. 2215 * show up until sometime later on.
2130 */ 2216 */
2131 npids = cgroup_task_count(cgrp); 2217 npids = cgroup_task_count(cgrp);
2132 if (npids) { 2218 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
2133 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); 2219 if (!pidarray)
2134 if (!pidarray) 2220 return -ENOMEM;
2135 goto err1; 2221 npids = pid_array_load(pidarray, npids, cgrp);
2136 2222 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2137 npids = pid_array_load(pidarray, npids, cgrp);
2138 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2139
2140 /* Call pid_array_to_buf() twice, first just to get bufsz */
2141 ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
2142 ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
2143 if (!ctr->buf)
2144 goto err2;
2145 ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
2146
2147 kfree(pidarray);
2148 } else {
2149 ctr->buf = NULL;
2150 ctr->bufsz = 0;
2151 }
2152 file->private_data = ctr;
2153 return 0;
2154
2155err2:
2156 kfree(pidarray);
2157err1:
2158 kfree(ctr);
2159err0:
2160 return -ENOMEM;
2161}
2162
2163static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
2164 struct cftype *cft,
2165 struct file *file, char __user *buf,
2166 size_t nbytes, loff_t *ppos)
2167{
2168 struct ctr_struct *ctr = file->private_data;
2169 2223
2170 return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); 2224 /*
2171} 2225 * Store the array in the cgroup, freeing the old
2226 * array if necessary
2227 */
2228 down_write(&cgrp->pids_mutex);
2229 kfree(cgrp->tasks_pids);
2230 cgrp->tasks_pids = pidarray;
2231 cgrp->pids_length = npids;
2232 cgrp->pids_use_count++;
2233 up_write(&cgrp->pids_mutex);
2172 2234
2173static int cgroup_tasks_release(struct inode *unused_inode, 2235 file->f_op = &cgroup_tasks_operations;
2174 struct file *file)
2175{
2176 struct ctr_struct *ctr;
2177 2236
2178 if (file->f_mode & FMODE_READ) { 2237 retval = seq_open(file, &cgroup_tasks_seq_operations);
2179 ctr = file->private_data; 2238 if (retval) {
2180 kfree(ctr->buf); 2239 release_cgroup_pid_array(cgrp);
2181 kfree(ctr); 2240 return retval;
2182 } 2241 }
2242 ((struct seq_file *)file->private_data)->private = cgrp;
2183 return 0; 2243 return 0;
2184} 2244}
2185 2245
@@ -2208,7 +2268,6 @@ static struct cftype files[] = {
2208 { 2268 {
2209 .name = "tasks", 2269 .name = "tasks",
2210 .open = cgroup_tasks_open, 2270 .open = cgroup_tasks_open,
2211 .read = cgroup_tasks_read,
2212 .write_u64 = cgroup_tasks_write, 2271 .write_u64 = cgroup_tasks_write,
2213 .release = cgroup_tasks_release, 2272 .release = cgroup_tasks_release,
2214 .private = FILE_TASKLIST, 2273 .private = FILE_TASKLIST,
@@ -2298,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2298 2357
2299 mutex_lock(&cgroup_mutex); 2358 mutex_lock(&cgroup_mutex);
2300 2359
2301 INIT_LIST_HEAD(&cgrp->sibling); 2360 init_cgroup_housekeeping(cgrp);
2302 INIT_LIST_HEAD(&cgrp->children);
2303 INIT_LIST_HEAD(&cgrp->css_sets);
2304 INIT_LIST_HEAD(&cgrp->release_list);
2305 2361
2306 cgrp->parent = parent; 2362 cgrp->parent = parent;
2307 cgrp->root = parent->root; 2363 cgrp->root = parent->root;