diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-20 19:37:55 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-20 19:37:55 -0500 |
| commit | 466c19063b4b426d5c362572787cb249fbf4296b (patch) | |
| tree | 214c9d9e914c80eb6b46c9ccd0cd261167393b44 | |
| parent | 67290f41b2715de0e0ae93c9285fcbe37ffc5b22 (diff) | |
| parent | 068c5cc5ac7414a8e9eb7856b4bf3cc4d4744267 (diff) | |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched, cgroup: Use exit hook to avoid use-after-free crash
sched: Fix signed unsigned comparison in check_preempt_tick()
sched: Replace rq->bkl_count with rq->rq_sched_info.bkl_count
sched, autogroup: Fix CONFIG_RT_GROUP_SCHED sched_setscheduler() failure
sched: Display autogroup names in /proc/sched_debug
sched: Reinstate group names in /proc/sched_debug
sched: Update effective_load() to use global share weights
| -rw-r--r-- | kernel/sched.c | 26 | ||||
| -rw-r--r-- | kernel/sched_autogroup.c | 32 | ||||
| -rw-r--r-- | kernel/sched_autogroup.h | 4 | ||||
| -rw-r--r-- | kernel/sched_debug.c | 42 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 35 |
5 files changed, 117 insertions, 22 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index ea3e5eff3878..18d38e4ec7ba 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -553,9 +553,6 @@ struct rq { | |||
| 553 | /* try_to_wake_up() stats */ | 553 | /* try_to_wake_up() stats */ |
| 554 | unsigned int ttwu_count; | 554 | unsigned int ttwu_count; |
| 555 | unsigned int ttwu_local; | 555 | unsigned int ttwu_local; |
| 556 | |||
| 557 | /* BKL stats */ | ||
| 558 | unsigned int bkl_count; | ||
| 559 | #endif | 556 | #endif |
| 560 | }; | 557 | }; |
| 561 | 558 | ||
| @@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 609 | struct task_group *tg; | 606 | struct task_group *tg; |
| 610 | struct cgroup_subsys_state *css; | 607 | struct cgroup_subsys_state *css; |
| 611 | 608 | ||
| 609 | if (p->flags & PF_EXITING) | ||
| 610 | return &root_task_group; | ||
| 611 | |||
| 612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
| 613 | lockdep_is_held(&task_rq(p)->lock)); | 613 | lockdep_is_held(&task_rq(p)->lock)); |
| 614 | tg = container_of(css, struct task_group, css); | 614 | tg = container_of(css, struct task_group, css); |
| @@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 3887 | schedstat_inc(this_rq(), sched_count); | 3887 | schedstat_inc(this_rq(), sched_count); |
| 3888 | #ifdef CONFIG_SCHEDSTATS | 3888 | #ifdef CONFIG_SCHEDSTATS |
| 3889 | if (unlikely(prev->lock_depth >= 0)) { | 3889 | if (unlikely(prev->lock_depth >= 0)) { |
| 3890 | schedstat_inc(this_rq(), bkl_count); | 3890 | schedstat_inc(this_rq(), rq_sched_info.bkl_count); |
| 3891 | schedstat_inc(prev, sched_info.bkl_count); | 3891 | schedstat_inc(prev, sched_info.bkl_count); |
| 3892 | } | 3892 | } |
| 3893 | #endif | 3893 | #endif |
| @@ -4871,7 +4871,8 @@ recheck: | |||
| 4871 | * assigned. | 4871 | * assigned. |
| 4872 | */ | 4872 | */ |
| 4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
| 4874 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | 4874 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
| 4875 | !task_group_is_autogroup(task_group(p))) { | ||
| 4875 | __task_rq_unlock(rq); | 4876 | __task_rq_unlock(rq); |
| 4876 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 4877 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
| 4877 | return -EPERM; | 4878 | return -EPERM; |
| @@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 8882 | } | 8883 | } |
| 8883 | } | 8884 | } |
| 8884 | 8885 | ||
| 8886 | static void | ||
| 8887 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) | ||
| 8888 | { | ||
| 8889 | /* | ||
| 8890 | * cgroup_exit() is called in the copy_process() failure path. | ||
| 8891 | * Ignore this case since the task hasn't ran yet, this avoids | ||
| 8892 | * trying to poke a half freed task state from generic code. | ||
| 8893 | */ | ||
| 8894 | if (!(task->flags & PF_EXITING)) | ||
| 8895 | return; | ||
| 8896 | |||
| 8897 | sched_move_task(task); | ||
| 8898 | } | ||
| 8899 | |||
| 8885 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8900 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 8886 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 8901 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
| 8887 | u64 shareval) | 8902 | u64 shareval) |
| @@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 8954 | .destroy = cpu_cgroup_destroy, | 8969 | .destroy = cpu_cgroup_destroy, |
| 8955 | .can_attach = cpu_cgroup_can_attach, | 8970 | .can_attach = cpu_cgroup_can_attach, |
| 8956 | .attach = cpu_cgroup_attach, | 8971 | .attach = cpu_cgroup_attach, |
| 8972 | .exit = cpu_cgroup_exit, | ||
| 8957 | .populate = cpu_cgroup_populate, | 8973 | .populate = cpu_cgroup_populate, |
| 8958 | .subsys_id = cpu_cgroup_subsys_id, | 8974 | .subsys_id = cpu_cgroup_subsys_id, |
| 8959 | .early_init = 1, | 8975 | .early_init = 1, |
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 32a723b8f84c..9fb656283157 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
| @@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref) | |||
| 27 | { | 27 | { |
| 28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); | 28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); |
| 29 | 29 | ||
| 30 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 31 | /* We've redirected RT tasks to the root task group... */ | ||
| 32 | ag->tg->rt_se = NULL; | ||
| 33 | ag->tg->rt_rq = NULL; | ||
| 34 | #endif | ||
| 30 | sched_destroy_group(ag->tg); | 35 | sched_destroy_group(ag->tg); |
| 31 | } | 36 | } |
| 32 | 37 | ||
| @@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p) | |||
| 55 | return ag; | 60 | return ag; |
| 56 | } | 61 | } |
| 57 | 62 | ||
| 63 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 64 | static void free_rt_sched_group(struct task_group *tg); | ||
| 65 | #endif | ||
| 66 | |||
| 58 | static inline struct autogroup *autogroup_create(void) | 67 | static inline struct autogroup *autogroup_create(void) |
| 59 | { | 68 | { |
| 60 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | 69 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); |
| @@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void) | |||
| 72 | init_rwsem(&ag->lock); | 81 | init_rwsem(&ag->lock); |
| 73 | ag->id = atomic_inc_return(&autogroup_seq_nr); | 82 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
| 74 | ag->tg = tg; | 83 | ag->tg = tg; |
| 84 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 85 | /* | ||
| 86 | * Autogroup RT tasks are redirected to the root task group | ||
| 87 | * so we don't have to move tasks around upon policy change, | ||
| 88 | * or flail around trying to allocate bandwidth on the fly. | ||
| 89 | * A bandwidth exception in __sched_setscheduler() allows | ||
| 90 | * the policy change to proceed. Thereafter, task_group() | ||
| 91 | * returns &root_task_group, so zero bandwidth is required. | ||
| 92 | */ | ||
| 93 | free_rt_sched_group(tg); | ||
| 94 | tg->rt_se = root_task_group.rt_se; | ||
| 95 | tg->rt_rq = root_task_group.rt_rq; | ||
| 96 | #endif | ||
| 75 | tg->autogroup = ag; | 97 | tg->autogroup = ag; |
| 76 | 98 | ||
| 77 | return ag; | 99 | return ag; |
| @@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
| 106 | return true; | 128 | return true; |
| 107 | } | 129 | } |
| 108 | 130 | ||
| 131 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
| 132 | { | ||
| 133 | return tg != &root_task_group && tg->autogroup; | ||
| 134 | } | ||
| 135 | |||
| 109 | static inline struct task_group * | 136 | static inline struct task_group * |
| 110 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 137 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
| 111 | { | 138 | { |
| @@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | |||
| 231 | #ifdef CONFIG_SCHED_DEBUG | 258 | #ifdef CONFIG_SCHED_DEBUG |
| 232 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | 259 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) |
| 233 | { | 260 | { |
| 261 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
| 262 | |||
| 263 | if (!enabled || !tg->autogroup) | ||
| 264 | return 0; | ||
| 265 | |||
| 234 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | 266 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); |
| 235 | } | 267 | } |
| 236 | #endif /* CONFIG_SCHED_DEBUG */ | 268 | #endif /* CONFIG_SCHED_DEBUG */ |
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 5358e241cb20..7b859ffe5dad 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
| @@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg); | |||
| 15 | 15 | ||
| 16 | static inline void autogroup_init(struct task_struct *init_task) { } | 16 | static inline void autogroup_init(struct task_struct *init_task) { } |
| 17 | static inline void autogroup_free(struct task_group *tg) { } | 17 | static inline void autogroup_free(struct task_group *tg) { } |
| 18 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
| 19 | { | ||
| 20 | return 0; | ||
| 21 | } | ||
| 18 | 22 | ||
| 19 | static inline struct task_group * | 23 | static inline struct task_group * |
| 20 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 24 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 1dfae3d014b5..eb6cb8edd075 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
| @@ -16,6 +16,8 @@ | |||
| 16 | #include <linux/kallsyms.h> | 16 | #include <linux/kallsyms.h> |
| 17 | #include <linux/utsname.h> | 17 | #include <linux/utsname.h> |
| 18 | 18 | ||
| 19 | static DEFINE_SPINLOCK(sched_debug_lock); | ||
| 20 | |||
| 19 | /* | 21 | /* |
| 20 | * This allows printing both to /proc/sched_debug and | 22 | * This allows printing both to /proc/sched_debug and |
| 21 | * to the console | 23 | * to the console |
| @@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group | |||
| 86 | } | 88 | } |
| 87 | #endif | 89 | #endif |
| 88 | 90 | ||
| 91 | #ifdef CONFIG_CGROUP_SCHED | ||
| 92 | static char group_path[PATH_MAX]; | ||
| 93 | |||
| 94 | static char *task_group_path(struct task_group *tg) | ||
| 95 | { | ||
| 96 | if (autogroup_path(tg, group_path, PATH_MAX)) | ||
| 97 | return group_path; | ||
| 98 | |||
| 99 | /* | ||
| 100 | * May be NULL if the underlying cgroup isn't fully-created yet | ||
| 101 | */ | ||
| 102 | if (!tg->css.cgroup) { | ||
| 103 | group_path[0] = '\0'; | ||
| 104 | return group_path; | ||
| 105 | } | ||
| 106 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); | ||
| 107 | return group_path; | ||
| 108 | } | ||
| 109 | #endif | ||
| 110 | |||
| 89 | static void | 111 | static void |
| 90 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | 112 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) |
| 91 | { | 113 | { |
| @@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
| 108 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", | 130 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", |
| 109 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); | 131 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); |
| 110 | #endif | 132 | #endif |
| 133 | #ifdef CONFIG_CGROUP_SCHED | ||
| 134 | SEQ_printf(m, " %s", task_group_path(task_group(p))); | ||
| 135 | #endif | ||
| 111 | 136 | ||
| 112 | SEQ_printf(m, "\n"); | 137 | SEQ_printf(m, "\n"); |
| 113 | } | 138 | } |
| @@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
| 144 | struct sched_entity *last; | 169 | struct sched_entity *last; |
| 145 | unsigned long flags; | 170 | unsigned long flags; |
| 146 | 171 | ||
| 172 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 173 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); | ||
| 174 | #else | ||
| 147 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | 175 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); |
| 176 | #endif | ||
| 148 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 177 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
| 149 | SPLIT_NS(cfs_rq->exec_clock)); | 178 | SPLIT_NS(cfs_rq->exec_clock)); |
| 150 | 179 | ||
| @@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
| 191 | 220 | ||
| 192 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | 221 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
| 193 | { | 222 | { |
| 223 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 224 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); | ||
| 225 | #else | ||
| 194 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | 226 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); |
| 227 | #endif | ||
| 195 | 228 | ||
| 196 | #define P(x) \ | 229 | #define P(x) \ |
| 197 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | 230 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) |
| @@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running; | |||
| 212 | static void print_cpu(struct seq_file *m, int cpu) | 245 | static void print_cpu(struct seq_file *m, int cpu) |
| 213 | { | 246 | { |
| 214 | struct rq *rq = cpu_rq(cpu); | 247 | struct rq *rq = cpu_rq(cpu); |
| 248 | unsigned long flags; | ||
| 215 | 249 | ||
| 216 | #ifdef CONFIG_X86 | 250 | #ifdef CONFIG_X86 |
| 217 | { | 251 | { |
| @@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
| 262 | P(ttwu_count); | 296 | P(ttwu_count); |
| 263 | P(ttwu_local); | 297 | P(ttwu_local); |
| 264 | 298 | ||
| 265 | P(bkl_count); | 299 | SEQ_printf(m, " .%-30s: %d\n", "bkl_count", |
| 300 | rq->rq_sched_info.bkl_count); | ||
| 266 | 301 | ||
| 267 | #undef P | 302 | #undef P |
| 303 | #undef P64 | ||
| 268 | #endif | 304 | #endif |
| 305 | spin_lock_irqsave(&sched_debug_lock, flags); | ||
| 269 | print_cfs_stats(m, cpu); | 306 | print_cfs_stats(m, cpu); |
| 270 | print_rt_stats(m, cpu); | 307 | print_rt_stats(m, cpu); |
| 271 | 308 | ||
| 309 | rcu_read_lock(); | ||
| 272 | print_rq(m, rq, cpu); | 310 | print_rq(m, rq, cpu); |
| 311 | rcu_read_unlock(); | ||
| 312 | spin_unlock_irqrestore(&sched_debug_lock, flags); | ||
| 273 | } | 313 | } |
| 274 | 314 | ||
| 275 | static const char *sched_tunable_scaling_names[] = { | 315 | static const char *sched_tunable_scaling_names[] = { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c62ebae65cf0..77e9166d7bbf 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -1062,6 +1062,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
| 1062 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1062 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
| 1063 | s64 delta = curr->vruntime - se->vruntime; | 1063 | s64 delta = curr->vruntime - se->vruntime; |
| 1064 | 1064 | ||
| 1065 | if (delta < 0) | ||
| 1066 | return; | ||
| 1067 | |||
| 1065 | if (delta > ideal_runtime) | 1068 | if (delta > ideal_runtime) |
| 1066 | resched_task(rq_of(cfs_rq)->curr); | 1069 | resched_task(rq_of(cfs_rq)->curr); |
| 1067 | } | 1070 | } |
| @@ -1362,27 +1365,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | |||
| 1362 | return wl; | 1365 | return wl; |
| 1363 | 1366 | ||
| 1364 | for_each_sched_entity(se) { | 1367 | for_each_sched_entity(se) { |
| 1365 | long S, rw, s, a, b; | 1368 | long lw, w; |
| 1366 | 1369 | ||
| 1367 | S = se->my_q->tg->shares; | 1370 | tg = se->my_q->tg; |
| 1368 | s = se->load.weight; | 1371 | w = se->my_q->load.weight; |
| 1369 | rw = se->my_q->load.weight; | ||
| 1370 | 1372 | ||
| 1371 | a = S*(rw + wl); | 1373 | /* use this cpu's instantaneous contribution */ |
| 1372 | b = S*rw + s*wg; | 1374 | lw = atomic_read(&tg->load_weight); |
| 1375 | lw -= se->my_q->load_contribution; | ||
| 1376 | lw += w + wg; | ||
| 1373 | 1377 | ||
| 1374 | wl = s*(a-b); | 1378 | wl += w; |
| 1375 | 1379 | ||
| 1376 | if (likely(b)) | 1380 | if (lw > 0 && wl < lw) |
| 1377 | wl /= b; | 1381 | wl = (wl * tg->shares) / lw; |
| 1382 | else | ||
| 1383 | wl = tg->shares; | ||
| 1378 | 1384 | ||
| 1379 | /* | 1385 | /* zero point is MIN_SHARES */ |
| 1380 | * Assume the group is already running and will | 1386 | if (wl < MIN_SHARES) |
| 1381 | * thus already be accounted for in the weight. | 1387 | wl = MIN_SHARES; |
| 1382 | * | 1388 | wl -= se->load.weight; |
| 1383 | * That is, moving shares between CPUs, does not | ||
| 1384 | * alter the group weight. | ||
| 1385 | */ | ||
| 1386 | wg = 0; | 1389 | wg = 0; |
| 1387 | } | 1390 | } |
| 1388 | 1391 | ||
