aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Galbraith <efault@gmx.de>2012-10-28 15:19:23 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-30 05:26:04 -0400
commit5258f386ea4e8454bc801fb443e8a4217da1947c (patch)
treec97487f040b95f83a2c9d31d51cbfe57f35e59e3
parent8ed92e51f99c2199c64cb33b4ba95ab12940a94c (diff)
sched/autogroup: Fix crash on reboot when autogroup is disabled
Due to these two commits: 8323f26ce342 sched: Fix race in task_group() 800d4d30c8f2 sched, autogroup: Stop going ahead if autogroup is disabled ... autogroup scheduling's dynamic knobs are wrecked. With both patches applied, all you have to do to crash a box is disable autogroup during boot up, then reboot.. boom, NULL pointer dereference due to 800d4d30 not allowing autogroup to move things, and 8323f26ce making that the only way to switch runqueues. Remove most of the (dysfunctional) knobs and turn the remaining sched_autogroup_enabled knob readonly. If the user fiddles with cgroups hereafter, once tasks are moved, autogroup won't mess with them again unless they call setsid(). No knobs, no glitz, nada, just a cute little thing folks can turn on if they don't want to muck about with cgroups and/or systemd. Signed-off-by: Mike Galbraith <efault@gmx.de> Cc: Xiaotian Feng <xtfeng@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Xiaotian Feng <dannyfeng@tencent.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: <stable@vger.kernel.org> # v3.6 Link: http://lkml.kernel.org/r/1351451963.4999.8.camel@maggy.simpson.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--fs/proc/base.c78
-rw-r--r--kernel/sched/auto_group.c68
-rw-r--r--kernel/sched/auto_group.h9
-rw-r--r--kernel/sysctl.c6
4 files changed, 14 insertions, 147 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1b6c84cbdb73..bb1d9623bad2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1271,81 +1271,6 @@ static const struct file_operations proc_pid_sched_operations = {
1271 1271
1272#endif 1272#endif
1273 1273
1274#ifdef CONFIG_SCHED_AUTOGROUP
1275/*
1276 * Print out autogroup related information:
1277 */
1278static int sched_autogroup_show(struct seq_file *m, void *v)
1279{
1280 struct inode *inode = m->private;
1281 struct task_struct *p;
1282
1283 p = get_proc_task(inode);
1284 if (!p)
1285 return -ESRCH;
1286 proc_sched_autogroup_show_task(p, m);
1287
1288 put_task_struct(p);
1289
1290 return 0;
1291}
1292
1293static ssize_t
1294sched_autogroup_write(struct file *file, const char __user *buf,
1295 size_t count, loff_t *offset)
1296{
1297 struct inode *inode = file->f_path.dentry->d_inode;
1298 struct task_struct *p;
1299 char buffer[PROC_NUMBUF];
1300 int nice;
1301 int err;
1302
1303 memset(buffer, 0, sizeof(buffer));
1304 if (count > sizeof(buffer) - 1)
1305 count = sizeof(buffer) - 1;
1306 if (copy_from_user(buffer, buf, count))
1307 return -EFAULT;
1308
1309 err = kstrtoint(strstrip(buffer), 0, &nice);
1310 if (err < 0)
1311 return err;
1312
1313 p = get_proc_task(inode);
1314 if (!p)
1315 return -ESRCH;
1316
1317 err = proc_sched_autogroup_set_nice(p, nice);
1318 if (err)
1319 count = err;
1320
1321 put_task_struct(p);
1322
1323 return count;
1324}
1325
1326static int sched_autogroup_open(struct inode *inode, struct file *filp)
1327{
1328 int ret;
1329
1330 ret = single_open(filp, sched_autogroup_show, NULL);
1331 if (!ret) {
1332 struct seq_file *m = filp->private_data;
1333
1334 m->private = inode;
1335 }
1336 return ret;
1337}
1338
1339static const struct file_operations proc_pid_sched_autogroup_operations = {
1340 .open = sched_autogroup_open,
1341 .read = seq_read,
1342 .write = sched_autogroup_write,
1343 .llseek = seq_lseek,
1344 .release = single_release,
1345};
1346
1347#endif /* CONFIG_SCHED_AUTOGROUP */
1348
1349static ssize_t comm_write(struct file *file, const char __user *buf, 1274static ssize_t comm_write(struct file *file, const char __user *buf,
1350 size_t count, loff_t *offset) 1275 size_t count, loff_t *offset)
1351{ 1276{
@@ -3036,9 +2961,6 @@ static const struct pid_entry tgid_base_stuff[] = {
3036#ifdef CONFIG_SCHED_DEBUG 2961#ifdef CONFIG_SCHED_DEBUG
3037 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2962 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3038#endif 2963#endif
3039#ifdef CONFIG_SCHED_AUTOGROUP
3040 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
3041#endif
3042 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2964 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3043#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2965#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3044 INF("syscall", S_IRUGO, proc_pid_syscall), 2966 INF("syscall", S_IRUGO, proc_pid_syscall),
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 0984a21076a3..0f1bacb005a4 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -110,6 +110,9 @@ out_fail:
110 110
111bool task_wants_autogroup(struct task_struct *p, struct task_group *tg) 111bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
112{ 112{
113 if (!sysctl_sched_autogroup_enabled)
114 return false;
115
113 if (tg != &root_task_group) 116 if (tg != &root_task_group)
114 return false; 117 return false;
115 118
@@ -143,15 +146,11 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
143 146
144 p->signal->autogroup = autogroup_kref_get(ag); 147 p->signal->autogroup = autogroup_kref_get(ag);
145 148
146 if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
147 goto out;
148
149 t = p; 149 t = p;
150 do { 150 do {
151 sched_move_task(t); 151 sched_move_task(t);
152 } while_each_thread(p, t); 152 } while_each_thread(p, t);
153 153
154out:
155 unlock_task_sighand(p, &flags); 154 unlock_task_sighand(p, &flags);
156 autogroup_kref_put(prev); 155 autogroup_kref_put(prev);
157} 156}
@@ -159,8 +158,11 @@ out:
159/* Allocates GFP_KERNEL, cannot be called under any spinlock */ 158/* Allocates GFP_KERNEL, cannot be called under any spinlock */
160void sched_autogroup_create_attach(struct task_struct *p) 159void sched_autogroup_create_attach(struct task_struct *p)
161{ 160{
162 struct autogroup *ag = autogroup_create(); 161 struct autogroup *ag;
163 162
163 if (!sysctl_sched_autogroup_enabled)
164 return;
165 ag = autogroup_create();
164 autogroup_move_group(p, ag); 166 autogroup_move_group(p, ag);
165 /* drop extra reference added by autogroup_create() */ 167 /* drop extra reference added by autogroup_create() */
166 autogroup_kref_put(ag); 168 autogroup_kref_put(ag);
@@ -176,11 +178,15 @@ EXPORT_SYMBOL(sched_autogroup_detach);
176 178
177void sched_autogroup_fork(struct signal_struct *sig) 179void sched_autogroup_fork(struct signal_struct *sig)
178{ 180{
181 if (!sysctl_sched_autogroup_enabled)
182 return;
179 sig->autogroup = autogroup_task_get(current); 183 sig->autogroup = autogroup_task_get(current);
180} 184}
181 185
182void sched_autogroup_exit(struct signal_struct *sig) 186void sched_autogroup_exit(struct signal_struct *sig)
183{ 187{
188 if (!sysctl_sched_autogroup_enabled)
189 return;
184 autogroup_kref_put(sig->autogroup); 190 autogroup_kref_put(sig->autogroup);
185} 191}
186 192
@@ -193,58 +199,6 @@ static int __init setup_autogroup(char *str)
193 199
194__setup("noautogroup", setup_autogroup); 200__setup("noautogroup", setup_autogroup);
195 201
196#ifdef CONFIG_PROC_FS
197
198int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
199{
200 static unsigned long next = INITIAL_JIFFIES;
201 struct autogroup *ag;
202 int err;
203
204 if (nice < -20 || nice > 19)
205 return -EINVAL;
206
207 err = security_task_setnice(current, nice);
208 if (err)
209 return err;
210
211 if (nice < 0 && !can_nice(current, nice))
212 return -EPERM;
213
214 /* this is a heavy operation taking global locks.. */
215 if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
216 return -EAGAIN;
217
218 next = HZ / 10 + jiffies;
219 ag = autogroup_task_get(p);
220
221 down_write(&ag->lock);
222 err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
223 if (!err)
224 ag->nice = nice;
225 up_write(&ag->lock);
226
227 autogroup_kref_put(ag);
228
229 return err;
230}
231
232void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
233{
234 struct autogroup *ag = autogroup_task_get(p);
235
236 if (!task_group_is_autogroup(ag->tg))
237 goto out;
238
239 down_read(&ag->lock);
240 seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
241 up_read(&ag->lock);
242
243out:
244 autogroup_kref_put(ag);
245}
246#endif /* CONFIG_PROC_FS */
247
248#ifdef CONFIG_SCHED_DEBUG 202#ifdef CONFIG_SCHED_DEBUG
249int autogroup_path(struct task_group *tg, char *buf, int buflen) 203int autogroup_path(struct task_group *tg, char *buf, int buflen)
250{ 204{
diff --git a/kernel/sched/auto_group.h b/kernel/sched/auto_group.h
index 8bd047142816..4552c6bf79d2 100644
--- a/kernel/sched/auto_group.h
+++ b/kernel/sched/auto_group.h
@@ -4,11 +4,6 @@
4#include <linux/rwsem.h> 4#include <linux/rwsem.h>
5 5
6struct autogroup { 6struct autogroup {
7 /*
8 * reference doesn't mean how many thread attach to this
9 * autogroup now. It just stands for the number of task
10 * could use this autogroup.
11 */
12 struct kref kref; 7 struct kref kref;
13 struct task_group *tg; 8 struct task_group *tg;
14 struct rw_semaphore lock; 9 struct rw_semaphore lock;
@@ -29,9 +24,7 @@ extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
29static inline struct task_group * 24static inline struct task_group *
30autogroup_task_group(struct task_struct *p, struct task_group *tg) 25autogroup_task_group(struct task_struct *p, struct task_group *tg)
31{ 26{
32 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); 27 if (task_wants_autogroup(p, tg))
33
34 if (enabled && task_wants_autogroup(p, tg))
35 return p->signal->autogroup->tg; 28 return p->signal->autogroup->tg;
36 29
37 return tg; 30 return tg;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 81c7b1a1a307..2914d0f752cf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -363,10 +363,8 @@ static struct ctl_table kern_table[] = {
363 .procname = "sched_autogroup_enabled", 363 .procname = "sched_autogroup_enabled",
364 .data = &sysctl_sched_autogroup_enabled, 364 .data = &sysctl_sched_autogroup_enabled,
365 .maxlen = sizeof(unsigned int), 365 .maxlen = sizeof(unsigned int),
366 .mode = 0644, 366 .mode = 0444,
367 .proc_handler = proc_dointvec_minmax, 367 .proc_handler = proc_dointvec,
368 .extra1 = &zero,
369 .extra2 = &one,
370 }, 368 },
371#endif 369#endif
372#ifdef CONFIG_CFS_BANDWIDTH 370#ifdef CONFIG_CFS_BANDWIDTH