aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-20 19:37:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-20 19:37:55 -0500
commit466c19063b4b426d5c362572787cb249fbf4296b (patch)
tree214c9d9e914c80eb6b46c9ccd0cd261167393b44 /kernel
parent67290f41b2715de0e0ae93c9285fcbe37ffc5b22 (diff)
parent068c5cc5ac7414a8e9eb7856b4bf3cc4d4744267 (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched, cgroup: Use exit hook to avoid use-after-free crash sched: Fix signed unsigned comparison in check_preempt_tick() sched: Replace rq->bkl_count with rq->rq_sched_info.bkl_count sched, autogroup: Fix CONFIG_RT_GROUP_SCHED sched_setscheduler() failure sched: Display autogroup names in /proc/sched_debug sched: Reinstate group names in /proc/sched_debug sched: Update effective_load() to use global share weights
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_autogroup.c32
-rw-r--r--kernel/sched_autogroup.h4
-rw-r--r--kernel/sched_debug.c42
-rw-r--r--kernel/sched_fair.c35
5 files changed, 117 insertions, 22 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index ea3e5eff3878..18d38e4ec7ba 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -553,9 +553,6 @@ struct rq {
553 /* try_to_wake_up() stats */ 553 /* try_to_wake_up() stats */
554 unsigned int ttwu_count; 554 unsigned int ttwu_count;
555 unsigned int ttwu_local; 555 unsigned int ttwu_local;
556
557 /* BKL stats */
558 unsigned int bkl_count;
559#endif 556#endif
560}; 557};
561 558
@@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p)
609 struct task_group *tg; 606 struct task_group *tg;
610 struct cgroup_subsys_state *css; 607 struct cgroup_subsys_state *css;
611 608
609 if (p->flags & PF_EXITING)
610 return &root_task_group;
611
612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
613 lockdep_is_held(&task_rq(p)->lock)); 613 lockdep_is_held(&task_rq(p)->lock));
614 tg = container_of(css, struct task_group, css); 614 tg = container_of(css, struct task_group, css);
@@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev)
3887 schedstat_inc(this_rq(), sched_count); 3887 schedstat_inc(this_rq(), sched_count);
3888#ifdef CONFIG_SCHEDSTATS 3888#ifdef CONFIG_SCHEDSTATS
3889 if (unlikely(prev->lock_depth >= 0)) { 3889 if (unlikely(prev->lock_depth >= 0)) {
3890 schedstat_inc(this_rq(), bkl_count); 3890 schedstat_inc(this_rq(), rq_sched_info.bkl_count);
3891 schedstat_inc(prev, sched_info.bkl_count); 3891 schedstat_inc(prev, sched_info.bkl_count);
3892 } 3892 }
3893#endif 3893#endif
@@ -4871,7 +4871,8 @@ recheck:
4871 * assigned. 4871 * assigned.
4872 */ 4872 */
4873 if (rt_bandwidth_enabled() && rt_policy(policy) && 4873 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4874 task_group(p)->rt_bandwidth.rt_runtime == 0) { 4874 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4875 !task_group_is_autogroup(task_group(p))) {
4875 __task_rq_unlock(rq); 4876 __task_rq_unlock(rq);
4876 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 4877 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4877 return -EPERM; 4878 return -EPERM;
@@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8882 } 8883 }
8883} 8884}
8884 8885
8886static void
8887cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
8888{
8889 /*
8890 * cgroup_exit() is called in the copy_process() failure path.
8891 * Ignore this case since the task hasn't ran yet, this avoids
8892 * trying to poke a half freed task state from generic code.
8893 */
8894 if (!(task->flags & PF_EXITING))
8895 return;
8896
8897 sched_move_task(task);
8898}
8899
8885#ifdef CONFIG_FAIR_GROUP_SCHED 8900#ifdef CONFIG_FAIR_GROUP_SCHED
8886static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, 8901static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
8887 u64 shareval) 8902 u64 shareval)
@@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8954 .destroy = cpu_cgroup_destroy, 8969 .destroy = cpu_cgroup_destroy,
8955 .can_attach = cpu_cgroup_can_attach, 8970 .can_attach = cpu_cgroup_can_attach,
8956 .attach = cpu_cgroup_attach, 8971 .attach = cpu_cgroup_attach,
8972 .exit = cpu_cgroup_exit,
8957 .populate = cpu_cgroup_populate, 8973 .populate = cpu_cgroup_populate,
8958 .subsys_id = cpu_cgroup_subsys_id, 8974 .subsys_id = cpu_cgroup_subsys_id,
8959 .early_init = 1, 8975 .early_init = 1,
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 32a723b8f84c..9fb656283157 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref)
27{ 27{
28 struct autogroup *ag = container_of(kref, struct autogroup, kref); 28 struct autogroup *ag = container_of(kref, struct autogroup, kref);
29 29
30#ifdef CONFIG_RT_GROUP_SCHED
31 /* We've redirected RT tasks to the root task group... */
32 ag->tg->rt_se = NULL;
33 ag->tg->rt_rq = NULL;
34#endif
30 sched_destroy_group(ag->tg); 35 sched_destroy_group(ag->tg);
31} 36}
32 37
@@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
55 return ag; 60 return ag;
56} 61}
57 62
63#ifdef CONFIG_RT_GROUP_SCHED
64static void free_rt_sched_group(struct task_group *tg);
65#endif
66
58static inline struct autogroup *autogroup_create(void) 67static inline struct autogroup *autogroup_create(void)
59{ 68{
60 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 69 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void)
72 init_rwsem(&ag->lock); 81 init_rwsem(&ag->lock);
73 ag->id = atomic_inc_return(&autogroup_seq_nr); 82 ag->id = atomic_inc_return(&autogroup_seq_nr);
74 ag->tg = tg; 83 ag->tg = tg;
84#ifdef CONFIG_RT_GROUP_SCHED
85 /*
86 * Autogroup RT tasks are redirected to the root task group
87 * so we don't have to move tasks around upon policy change,
88 * or flail around trying to allocate bandwidth on the fly.
89 * A bandwidth exception in __sched_setscheduler() allows
90 * the policy change to proceed. Thereafter, task_group()
91 * returns &root_task_group, so zero bandwidth is required.
92 */
93 free_rt_sched_group(tg);
94 tg->rt_se = root_task_group.rt_se;
95 tg->rt_rq = root_task_group.rt_rq;
96#endif
75 tg->autogroup = ag; 97 tg->autogroup = ag;
76 98
77 return ag; 99 return ag;
@@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
106 return true; 128 return true;
107} 129}
108 130
131static inline bool task_group_is_autogroup(struct task_group *tg)
132{
133 return tg != &root_task_group && tg->autogroup;
134}
135
109static inline struct task_group * 136static inline struct task_group *
110autogroup_task_group(struct task_struct *p, struct task_group *tg) 137autogroup_task_group(struct task_struct *p, struct task_group *tg)
111{ 138{
@@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
231#ifdef CONFIG_SCHED_DEBUG 258#ifdef CONFIG_SCHED_DEBUG
232static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) 259static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
233{ 260{
261 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
262
263 if (!enabled || !tg->autogroup)
264 return 0;
265
234 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 266 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
235} 267}
236#endif /* CONFIG_SCHED_DEBUG */ 268#endif /* CONFIG_SCHED_DEBUG */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 5358e241cb20..7b859ffe5dad 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg);
15 15
16static inline void autogroup_init(struct task_struct *init_task) { } 16static inline void autogroup_init(struct task_struct *init_task) { }
17static inline void autogroup_free(struct task_group *tg) { } 17static inline void autogroup_free(struct task_group *tg) { }
18static inline bool task_group_is_autogroup(struct task_group *tg)
19{
20 return 0;
21}
18 22
19static inline struct task_group * 23static inline struct task_group *
20autogroup_task_group(struct task_struct *p, struct task_group *tg) 24autogroup_task_group(struct task_struct *p, struct task_group *tg)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1dfae3d014b5..eb6cb8edd075 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -16,6 +16,8 @@
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/utsname.h> 17#include <linux/utsname.h>
18 18
19static DEFINE_SPINLOCK(sched_debug_lock);
20
19/* 21/*
20 * This allows printing both to /proc/sched_debug and 22 * This allows printing both to /proc/sched_debug and
21 * to the console 23 * to the console
@@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
86} 88}
87#endif 89#endif
88 90
91#ifdef CONFIG_CGROUP_SCHED
92static char group_path[PATH_MAX];
93
94static char *task_group_path(struct task_group *tg)
95{
96 if (autogroup_path(tg, group_path, PATH_MAX))
97 return group_path;
98
99 /*
100 * May be NULL if the underlying cgroup isn't fully-created yet
101 */
102 if (!tg->css.cgroup) {
103 group_path[0] = '\0';
104 return group_path;
105 }
106 cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
107 return group_path;
108}
109#endif
110
89static void 111static void
90print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 112print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
91{ 113{
@@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
108 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", 130 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
109 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); 131 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
110#endif 132#endif
133#ifdef CONFIG_CGROUP_SCHED
134 SEQ_printf(m, " %s", task_group_path(task_group(p)));
135#endif
111 136
112 SEQ_printf(m, "\n"); 137 SEQ_printf(m, "\n");
113} 138}
@@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 struct sched_entity *last; 169 struct sched_entity *last;
145 unsigned long flags; 170 unsigned long flags;
146 171
172#ifdef CONFIG_FAIR_GROUP_SCHED
173 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
174#else
147 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); 175 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
176#endif
148 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 177 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
149 SPLIT_NS(cfs_rq->exec_clock)); 178 SPLIT_NS(cfs_rq->exec_clock));
150 179
@@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
191 220
192void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 221void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
193{ 222{
223#ifdef CONFIG_RT_GROUP_SCHED
224 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
225#else
194 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); 226 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
227#endif
195 228
196#define P(x) \ 229#define P(x) \
197 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 230 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running;
212static void print_cpu(struct seq_file *m, int cpu) 245static void print_cpu(struct seq_file *m, int cpu)
213{ 246{
214 struct rq *rq = cpu_rq(cpu); 247 struct rq *rq = cpu_rq(cpu);
248 unsigned long flags;
215 249
216#ifdef CONFIG_X86 250#ifdef CONFIG_X86
217 { 251 {
@@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu)
262 P(ttwu_count); 296 P(ttwu_count);
263 P(ttwu_local); 297 P(ttwu_local);
264 298
265 P(bkl_count); 299 SEQ_printf(m, " .%-30s: %d\n", "bkl_count",
300 rq->rq_sched_info.bkl_count);
266 301
267#undef P 302#undef P
303#undef P64
268#endif 304#endif
305 spin_lock_irqsave(&sched_debug_lock, flags);
269 print_cfs_stats(m, cpu); 306 print_cfs_stats(m, cpu);
270 print_rt_stats(m, cpu); 307 print_rt_stats(m, cpu);
271 308
309 rcu_read_lock();
272 print_rq(m, rq, cpu); 310 print_rq(m, rq, cpu);
311 rcu_read_unlock();
312 spin_unlock_irqrestore(&sched_debug_lock, flags);
273} 313}
274 314
275static const char *sched_tunable_scaling_names[] = { 315static const char *sched_tunable_scaling_names[] = {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c62ebae65cf0..77e9166d7bbf 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1062,6 +1062,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1062 struct sched_entity *se = __pick_next_entity(cfs_rq); 1062 struct sched_entity *se = __pick_next_entity(cfs_rq);
1063 s64 delta = curr->vruntime - se->vruntime; 1063 s64 delta = curr->vruntime - se->vruntime;
1064 1064
1065 if (delta < 0)
1066 return;
1067
1065 if (delta > ideal_runtime) 1068 if (delta > ideal_runtime)
1066 resched_task(rq_of(cfs_rq)->curr); 1069 resched_task(rq_of(cfs_rq)->curr);
1067 } 1070 }
@@ -1362,27 +1365,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
1362 return wl; 1365 return wl;
1363 1366
1364 for_each_sched_entity(se) { 1367 for_each_sched_entity(se) {
1365 long S, rw, s, a, b; 1368 long lw, w;
1366 1369
1367 S = se->my_q->tg->shares; 1370 tg = se->my_q->tg;
1368 s = se->load.weight; 1371 w = se->my_q->load.weight;
1369 rw = se->my_q->load.weight;
1370 1372
1371 a = S*(rw + wl); 1373 /* use this cpu's instantaneous contribution */
1372 b = S*rw + s*wg; 1374 lw = atomic_read(&tg->load_weight);
1375 lw -= se->my_q->load_contribution;
1376 lw += w + wg;
1373 1377
1374 wl = s*(a-b); 1378 wl += w;
1375 1379
1376 if (likely(b)) 1380 if (lw > 0 && wl < lw)
1377 wl /= b; 1381 wl = (wl * tg->shares) / lw;
1382 else
1383 wl = tg->shares;
1378 1384
1379 /* 1385 /* zero point is MIN_SHARES */
1380 * Assume the group is already running and will 1386 if (wl < MIN_SHARES)
1381 * thus already be accounted for in the weight. 1387 wl = MIN_SHARES;
1382 * 1388 wl -= se->load.weight;
1383 * That is, moving shares between CPUs, does not
1384 * alter the group weight.
1385 */
1386 wg = 0; 1389 wg = 0;
1387 } 1390 }
1388 1391