diff options
-rw-r--r-- | include/linux/cgroup.h | 10 | ||||
-rw-r--r-- | kernel/cgroup.c | 222 |
2 files changed, 149 insertions, 83 deletions
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7166023e07d2..8ab91880a0ad 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
15 | #include <linux/cgroupstats.h> | 15 | #include <linux/cgroupstats.h> |
16 | #include <linux/prio_heap.h> | 16 | #include <linux/prio_heap.h> |
17 | #include <linux/rwsem.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_CGROUPS | 19 | #ifdef CONFIG_CGROUPS |
19 | 20 | ||
@@ -136,6 +137,15 @@ struct cgroup { | |||
136 | * release_list_lock | 137 | * release_list_lock |
137 | */ | 138 | */ |
138 | struct list_head release_list; | 139 | struct list_head release_list; |
140 | |||
141 | /* pids_mutex protects the fields below */ | ||
142 | struct rw_semaphore pids_mutex; | ||
143 | /* Array of process ids in the cgroup */ | ||
144 | pid_t *tasks_pids; | ||
145 | /* How many files are using the current tasks_pids array */ | ||
146 | int pids_use_count; | ||
147 | /* Length of the current tasks_pids array */ | ||
148 | int pids_length; | ||
139 | }; | 149 | }; |
140 | 150 | ||
141 | /* A css_set is a structure holding pointers to a set of | 151 | /* A css_set is a structure holding pointers to a set of |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1e49218457e0..046c1609606b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -868,6 +868,14 @@ static struct super_operations cgroup_ops = { | |||
868 | .remount_fs = cgroup_remount, | 868 | .remount_fs = cgroup_remount, |
869 | }; | 869 | }; |
870 | 870 | ||
871 | static void init_cgroup_housekeeping(struct cgroup *cgrp) | ||
872 | { | ||
873 | INIT_LIST_HEAD(&cgrp->sibling); | ||
874 | INIT_LIST_HEAD(&cgrp->children); | ||
875 | INIT_LIST_HEAD(&cgrp->css_sets); | ||
876 | INIT_LIST_HEAD(&cgrp->release_list); | ||
877 | init_rwsem(&cgrp->pids_mutex); | ||
878 | } | ||
871 | static void init_cgroup_root(struct cgroupfs_root *root) | 879 | static void init_cgroup_root(struct cgroupfs_root *root) |
872 | { | 880 | { |
873 | struct cgroup *cgrp = &root->top_cgroup; | 881 | struct cgroup *cgrp = &root->top_cgroup; |
@@ -876,10 +884,7 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
876 | root->number_of_cgroups = 1; | 884 | root->number_of_cgroups = 1; |
877 | cgrp->root = root; | 885 | cgrp->root = root; |
878 | cgrp->top_cgroup = cgrp; | 886 | cgrp->top_cgroup = cgrp; |
879 | INIT_LIST_HEAD(&cgrp->sibling); | 887 | init_cgroup_housekeeping(cgrp); |
880 | INIT_LIST_HEAD(&cgrp->children); | ||
881 | INIT_LIST_HEAD(&cgrp->css_sets); | ||
882 | INIT_LIST_HEAD(&cgrp->release_list); | ||
883 | } | 888 | } |
884 | 889 | ||
885 | static int cgroup_test_super(struct super_block *sb, void *data) | 890 | static int cgroup_test_super(struct super_block *sb, void *data) |
@@ -1995,16 +2000,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
1995 | * but we cannot guarantee that the information we produce is correct | 2000 | * but we cannot guarantee that the information we produce is correct |
1996 | * unless we produce it entirely atomically. | 2001 | * unless we produce it entirely atomically. |
1997 | * | 2002 | * |
1998 | * Upon tasks file open(), a struct ctr_struct is allocated, that | ||
1999 | * will have a pointer to an array (also allocated here). The struct | ||
2000 | * ctr_struct * is stored in file->private_data. Its resources will | ||
2001 | * be freed by release() when the file is closed. The array is used | ||
2002 | * to sprintf the PIDs and then used by read(). | ||
2003 | */ | 2003 | */ |
2004 | struct ctr_struct { | ||
2005 | char *buf; | ||
2006 | int bufsz; | ||
2007 | }; | ||
2008 | 2004 | ||
2009 | /* | 2005 | /* |
2010 | * Load into 'pidarray' up to 'npids' of the tasks using cgroup | 2006 | * Load into 'pidarray' up to 'npids' of the tasks using cgroup |
@@ -2086,42 +2082,132 @@ static int cmppid(const void *a, const void *b) | |||
2086 | return *(pid_t *)a - *(pid_t *)b; | 2082 | return *(pid_t *)a - *(pid_t *)b; |
2087 | } | 2083 | } |
2088 | 2084 | ||
2085 | |||
2089 | /* | 2086 | /* |
2090 | * Convert array 'a' of 'npids' pid_t's to a string of newline separated | 2087 | * seq_file methods for the "tasks" file. The seq_file position is the |
2091 | * decimal pids in 'buf'. Don't write more than 'sz' chars, but return | 2088 | * next pid to display; the seq_file iterator is a pointer to the pid |
2092 | * count 'cnt' of how many chars would be written if buf were large enough. | 2089 | * in the cgroup->tasks_pids array. |
2093 | */ | 2090 | */ |
2094 | static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) | 2091 | |
2092 | static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos) | ||
2095 | { | 2093 | { |
2096 | int cnt = 0; | 2094 | /* |
2097 | int i; | 2095 | * Initially we receive a position value that corresponds to |
2096 | * one more than the last pid shown (or 0 on the first call or | ||
2097 | * after a seek to the start). Use a binary-search to find the | ||
2098 | * next pid to display, if any | ||
2099 | */ | ||
2100 | struct cgroup *cgrp = s->private; | ||
2101 | int index = 0, pid = *pos; | ||
2102 | int *iter; | ||
2098 | 2103 | ||
2099 | for (i = 0; i < npids; i++) | 2104 | down_read(&cgrp->pids_mutex); |
2100 | cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); | 2105 | if (pid) { |
2101 | return cnt; | 2106 | int end = cgrp->pids_length; |
2107 | int i; | ||
2108 | while (index < end) { | ||
2109 | int mid = (index + end) / 2; | ||
2110 | if (cgrp->tasks_pids[mid] == pid) { | ||
2111 | index = mid; | ||
2112 | break; | ||
2113 | } else if (cgrp->tasks_pids[mid] <= pid) | ||
2114 | index = mid + 1; | ||
2115 | else | ||
2116 | end = mid; | ||
2117 | } | ||
2118 | } | ||
2119 | /* If we're off the end of the array, we're done */ | ||
2120 | if (index >= cgrp->pids_length) | ||
2121 | return NULL; | ||
2122 | /* Update the abstract position to be the actual pid that we found */ | ||
2123 | iter = cgrp->tasks_pids + index; | ||
2124 | *pos = *iter; | ||
2125 | return iter; | ||
2126 | } | ||
2127 | |||
2128 | static void cgroup_tasks_stop(struct seq_file *s, void *v) | ||
2129 | { | ||
2130 | struct cgroup *cgrp = s->private; | ||
2131 | up_read(&cgrp->pids_mutex); | ||
2102 | } | 2132 | } |
2103 | 2133 | ||
2134 | static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) | ||
2135 | { | ||
2136 | struct cgroup *cgrp = s->private; | ||
2137 | int *p = v; | ||
2138 | int *end = cgrp->tasks_pids + cgrp->pids_length; | ||
2139 | |||
2140 | /* | ||
2141 | * Advance to the next pid in the array. If this goes off the | ||
2142 | * end, we're done | ||
2143 | */ | ||
2144 | p++; | ||
2145 | if (p >= end) { | ||
2146 | return NULL; | ||
2147 | } else { | ||
2148 | *pos = *p; | ||
2149 | return p; | ||
2150 | } | ||
2151 | } | ||
2152 | |||
2153 | static int cgroup_tasks_show(struct seq_file *s, void *v) | ||
2154 | { | ||
2155 | return seq_printf(s, "%d\n", *(int *)v); | ||
2156 | } | ||
2157 | |||
2158 | static struct seq_operations cgroup_tasks_seq_operations = { | ||
2159 | .start = cgroup_tasks_start, | ||
2160 | .stop = cgroup_tasks_stop, | ||
2161 | .next = cgroup_tasks_next, | ||
2162 | .show = cgroup_tasks_show, | ||
2163 | }; | ||
2164 | |||
2165 | static void release_cgroup_pid_array(struct cgroup *cgrp) | ||
2166 | { | ||
2167 | down_write(&cgrp->pids_mutex); | ||
2168 | BUG_ON(!cgrp->pids_use_count); | ||
2169 | if (!--cgrp->pids_use_count) { | ||
2170 | kfree(cgrp->tasks_pids); | ||
2171 | cgrp->tasks_pids = NULL; | ||
2172 | cgrp->pids_length = 0; | ||
2173 | } | ||
2174 | up_write(&cgrp->pids_mutex); | ||
2175 | } | ||
2176 | |||
2177 | static int cgroup_tasks_release(struct inode *inode, struct file *file) | ||
2178 | { | ||
2179 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | ||
2180 | |||
2181 | if (!(file->f_mode & FMODE_READ)) | ||
2182 | return 0; | ||
2183 | |||
2184 | release_cgroup_pid_array(cgrp); | ||
2185 | return seq_release(inode, file); | ||
2186 | } | ||
2187 | |||
2188 | static struct file_operations cgroup_tasks_operations = { | ||
2189 | .read = seq_read, | ||
2190 | .llseek = seq_lseek, | ||
2191 | .write = cgroup_file_write, | ||
2192 | .release = cgroup_tasks_release, | ||
2193 | }; | ||
2194 | |||
2104 | /* | 2195 | /* |
2105 | * Handle an open on 'tasks' file. Prepare a buffer listing the | 2196 | * Handle an open on 'tasks' file. Prepare an array containing the |
2106 | * process id's of tasks currently attached to the cgroup being opened. | 2197 | * process id's of tasks currently attached to the cgroup being opened. |
2107 | * | ||
2108 | * Does not require any specific cgroup mutexes, and does not take any. | ||
2109 | */ | 2198 | */ |
2199 | |||
2110 | static int cgroup_tasks_open(struct inode *unused, struct file *file) | 2200 | static int cgroup_tasks_open(struct inode *unused, struct file *file) |
2111 | { | 2201 | { |
2112 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2202 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
2113 | struct ctr_struct *ctr; | ||
2114 | pid_t *pidarray; | 2203 | pid_t *pidarray; |
2115 | int npids; | 2204 | int npids; |
2116 | char c; | 2205 | int retval; |
2117 | 2206 | ||
2207 | /* Nothing to do for write-only files */ | ||
2118 | if (!(file->f_mode & FMODE_READ)) | 2208 | if (!(file->f_mode & FMODE_READ)) |
2119 | return 0; | 2209 | return 0; |
2120 | 2210 | ||
2121 | ctr = kmalloc(sizeof(*ctr), GFP_KERNEL); | ||
2122 | if (!ctr) | ||
2123 | goto err0; | ||
2124 | |||
2125 | /* | 2211 | /* |
2126 | * If cgroup gets more users after we read count, we won't have | 2212 | * If cgroup gets more users after we read count, we won't have |
2127 | * enough space - tough. This race is indistinguishable to the | 2213 | * enough space - tough. This race is indistinguishable to the |
@@ -2129,57 +2215,31 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file) | |||
2129 | * show up until sometime later on. | 2215 | * show up until sometime later on. |
2130 | */ | 2216 | */ |
2131 | npids = cgroup_task_count(cgrp); | 2217 | npids = cgroup_task_count(cgrp); |
2132 | if (npids) { | 2218 | pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); |
2133 | pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); | 2219 | if (!pidarray) |
2134 | if (!pidarray) | 2220 | return -ENOMEM; |
2135 | goto err1; | 2221 | npids = pid_array_load(pidarray, npids, cgrp); |
2136 | 2222 | sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); | |
2137 | npids = pid_array_load(pidarray, npids, cgrp); | ||
2138 | sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); | ||
2139 | |||
2140 | /* Call pid_array_to_buf() twice, first just to get bufsz */ | ||
2141 | ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1; | ||
2142 | ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL); | ||
2143 | if (!ctr->buf) | ||
2144 | goto err2; | ||
2145 | ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids); | ||
2146 | |||
2147 | kfree(pidarray); | ||
2148 | } else { | ||
2149 | ctr->buf = NULL; | ||
2150 | ctr->bufsz = 0; | ||
2151 | } | ||
2152 | file->private_data = ctr; | ||
2153 | return 0; | ||
2154 | |||
2155 | err2: | ||
2156 | kfree(pidarray); | ||
2157 | err1: | ||
2158 | kfree(ctr); | ||
2159 | err0: | ||
2160 | return -ENOMEM; | ||
2161 | } | ||
2162 | |||
2163 | static ssize_t cgroup_tasks_read(struct cgroup *cgrp, | ||
2164 | struct cftype *cft, | ||
2165 | struct file *file, char __user *buf, | ||
2166 | size_t nbytes, loff_t *ppos) | ||
2167 | { | ||
2168 | struct ctr_struct *ctr = file->private_data; | ||
2169 | 2223 | ||
2170 | return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); | 2224 | /* |
2171 | } | 2225 | * Store the array in the cgroup, freeing the old |
2226 | * array if necessary | ||
2227 | */ | ||
2228 | down_write(&cgrp->pids_mutex); | ||
2229 | kfree(cgrp->tasks_pids); | ||
2230 | cgrp->tasks_pids = pidarray; | ||
2231 | cgrp->pids_length = npids; | ||
2232 | cgrp->pids_use_count++; | ||
2233 | up_write(&cgrp->pids_mutex); | ||
2172 | 2234 | ||
2173 | static int cgroup_tasks_release(struct inode *unused_inode, | 2235 | file->f_op = &cgroup_tasks_operations; |
2174 | struct file *file) | ||
2175 | { | ||
2176 | struct ctr_struct *ctr; | ||
2177 | 2236 | ||
2178 | if (file->f_mode & FMODE_READ) { | 2237 | retval = seq_open(file, &cgroup_tasks_seq_operations); |
2179 | ctr = file->private_data; | 2238 | if (retval) { |
2180 | kfree(ctr->buf); | 2239 | release_cgroup_pid_array(cgrp); |
2181 | kfree(ctr); | 2240 | return retval; |
2182 | } | 2241 | } |
2242 | ((struct seq_file *)file->private_data)->private = cgrp; | ||
2183 | return 0; | 2243 | return 0; |
2184 | } | 2244 | } |
2185 | 2245 | ||
@@ -2208,7 +2268,6 @@ static struct cftype files[] = { | |||
2208 | { | 2268 | { |
2209 | .name = "tasks", | 2269 | .name = "tasks", |
2210 | .open = cgroup_tasks_open, | 2270 | .open = cgroup_tasks_open, |
2211 | .read = cgroup_tasks_read, | ||
2212 | .write_u64 = cgroup_tasks_write, | 2271 | .write_u64 = cgroup_tasks_write, |
2213 | .release = cgroup_tasks_release, | 2272 | .release = cgroup_tasks_release, |
2214 | .private = FILE_TASKLIST, | 2273 | .private = FILE_TASKLIST, |
@@ -2298,10 +2357,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2298 | 2357 | ||
2299 | mutex_lock(&cgroup_mutex); | 2358 | mutex_lock(&cgroup_mutex); |
2300 | 2359 | ||
2301 | INIT_LIST_HEAD(&cgrp->sibling); | 2360 | init_cgroup_housekeeping(cgrp); |
2302 | INIT_LIST_HEAD(&cgrp->children); | ||
2303 | INIT_LIST_HEAD(&cgrp->css_sets); | ||
2304 | INIT_LIST_HEAD(&cgrp->release_list); | ||
2305 | 2361 | ||
2306 | cgrp->parent = parent; | 2362 | cgrp->parent = parent; |
2307 | cgrp->root = parent->root; | 2363 | cgrp->root = parent->root; |