diff options
| author | Tejun Heo <tj@kernel.org> | 2012-04-01 15:30:01 -0400 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2012-04-01 15:55:00 -0400 |
| commit | 959d851caa48829eb85cb85aa949fd6b4c5d5bc6 (patch) | |
| tree | 3ba9c94ec346275fb44c4f0d1cd2537cdff8d811 /kernel/sched | |
| parent | a5567932fc926739e29e98487128080f40c61710 (diff) | |
| parent | 48ddbe194623ae089cc0576e60363f2d2e85662a (diff) | |
Merge branch 'for-3.5' of ../cgroup into block/for-3.5/core-merged
cgroup/for-3.5 contains the following changes which blk-cgroup needs
to proceed with the on-going cleanup.
* Dynamic addition and removal of cftypes to make config/stat file
handling modular for policies.
* cgroup removal update to not wait for css references to drain to fix
blkcg removal hang caused by cfq caching cfqgs.
Pull in cgroup/for-3.5 into block/for-3.5/core. This causes the
following conflicts in block/blk-cgroup.c.
* 761b3ef50e "cgroup: remove cgroup_subsys argument from callbacks"
conflicts with blkiocg_pre_destroy() addition and blkiocg_attach()
removal. Resolved by removing @subsys from all subsys methods.
* 676f7c8f84 "cgroup: relocate cftype and cgroup_subsys definitions in
controllers" conflicts with ->pre_destroy() and ->attach() updates
and removal of modular config. Resolved by dropping forward
declarations of the methods and applying updates to the relocated
blkio_subsys.
* 4baf6e3325 "cgroup: convert all non-memcg controllers to the new
cftype interface" builds upon the previous item. Resolved by adding
->base_cftypes to the relocated blkio_subsys.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/auto_group.c | 12 | ||||
| -rw-r--r-- | kernel/sched/core.c | 255 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 1 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 418 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 45 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 32 | ||||
| -rw-r--r-- | kernel/sched/stats.c | 4 |
7 files changed, 402 insertions, 365 deletions
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index e8a1f83ee0e7..0984a21076a3 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
| @@ -195,20 +195,20 @@ __setup("noautogroup", setup_autogroup); | |||
| 195 | 195 | ||
| 196 | #ifdef CONFIG_PROC_FS | 196 | #ifdef CONFIG_PROC_FS |
| 197 | 197 | ||
| 198 | int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) | 198 | int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) |
| 199 | { | 199 | { |
| 200 | static unsigned long next = INITIAL_JIFFIES; | 200 | static unsigned long next = INITIAL_JIFFIES; |
| 201 | struct autogroup *ag; | 201 | struct autogroup *ag; |
| 202 | int err; | 202 | int err; |
| 203 | 203 | ||
| 204 | if (*nice < -20 || *nice > 19) | 204 | if (nice < -20 || nice > 19) |
| 205 | return -EINVAL; | 205 | return -EINVAL; |
| 206 | 206 | ||
| 207 | err = security_task_setnice(current, *nice); | 207 | err = security_task_setnice(current, nice); |
| 208 | if (err) | 208 | if (err) |
| 209 | return err; | 209 | return err; |
| 210 | 210 | ||
| 211 | if (*nice < 0 && !can_nice(current, *nice)) | 211 | if (nice < 0 && !can_nice(current, nice)) |
| 212 | return -EPERM; | 212 | return -EPERM; |
| 213 | 213 | ||
| 214 | /* this is a heavy operation taking global locks.. */ | 214 | /* this is a heavy operation taking global locks.. */ |
| @@ -219,9 +219,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice) | |||
| 219 | ag = autogroup_task_get(p); | 219 | ag = autogroup_task_get(p); |
| 220 | 220 | ||
| 221 | down_write(&ag->lock); | 221 | down_write(&ag->lock); |
| 222 | err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]); | 222 | err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]); |
| 223 | if (!err) | 223 | if (!err) |
| 224 | ag->nice = *nice; | 224 | ag->nice = nice; |
| 225 | up_write(&ag->lock); | 225 | up_write(&ag->lock); |
| 226 | 226 | ||
| 227 | autogroup_kref_put(ag); | 227 | autogroup_kref_put(ag); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5255c9d2e053..afc6d7e71557 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -71,7 +71,9 @@ | |||
| 71 | #include <linux/ftrace.h> | 71 | #include <linux/ftrace.h> |
| 72 | #include <linux/slab.h> | 72 | #include <linux/slab.h> |
| 73 | #include <linux/init_task.h> | 73 | #include <linux/init_task.h> |
| 74 | #include <linux/binfmts.h> | ||
| 74 | 75 | ||
| 76 | #include <asm/switch_to.h> | ||
| 75 | #include <asm/tlb.h> | 77 | #include <asm/tlb.h> |
| 76 | #include <asm/irq_regs.h> | 78 | #include <asm/irq_regs.h> |
| 77 | #include <asm/mutex.h> | 79 | #include <asm/mutex.h> |
| @@ -162,13 +164,13 @@ static int sched_feat_show(struct seq_file *m, void *v) | |||
| 162 | 164 | ||
| 163 | #ifdef HAVE_JUMP_LABEL | 165 | #ifdef HAVE_JUMP_LABEL |
| 164 | 166 | ||
| 165 | #define jump_label_key__true jump_label_key_enabled | 167 | #define jump_label_key__true STATIC_KEY_INIT_TRUE |
| 166 | #define jump_label_key__false jump_label_key_disabled | 168 | #define jump_label_key__false STATIC_KEY_INIT_FALSE |
| 167 | 169 | ||
| 168 | #define SCHED_FEAT(name, enabled) \ | 170 | #define SCHED_FEAT(name, enabled) \ |
| 169 | jump_label_key__##enabled , | 171 | jump_label_key__##enabled , |
| 170 | 172 | ||
| 171 | struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { | 173 | struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { |
| 172 | #include "features.h" | 174 | #include "features.h" |
| 173 | }; | 175 | }; |
| 174 | 176 | ||
| @@ -176,14 +178,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { | |||
| 176 | 178 | ||
| 177 | static void sched_feat_disable(int i) | 179 | static void sched_feat_disable(int i) |
| 178 | { | 180 | { |
| 179 | if (jump_label_enabled(&sched_feat_keys[i])) | 181 | if (static_key_enabled(&sched_feat_keys[i])) |
| 180 | jump_label_dec(&sched_feat_keys[i]); | 182 | static_key_slow_dec(&sched_feat_keys[i]); |
| 181 | } | 183 | } |
| 182 | 184 | ||
| 183 | static void sched_feat_enable(int i) | 185 | static void sched_feat_enable(int i) |
| 184 | { | 186 | { |
| 185 | if (!jump_label_enabled(&sched_feat_keys[i])) | 187 | if (!static_key_enabled(&sched_feat_keys[i])) |
| 186 | jump_label_inc(&sched_feat_keys[i]); | 188 | static_key_slow_inc(&sched_feat_keys[i]); |
| 187 | } | 189 | } |
| 188 | #else | 190 | #else |
| 189 | static void sched_feat_disable(int i) { }; | 191 | static void sched_feat_disable(int i) { }; |
| @@ -894,7 +896,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 894 | delta -= irq_delta; | 896 | delta -= irq_delta; |
| 895 | #endif | 897 | #endif |
| 896 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | 898 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING |
| 897 | if (static_branch((¶virt_steal_rq_enabled))) { | 899 | if (static_key_false((¶virt_steal_rq_enabled))) { |
| 898 | u64 st; | 900 | u64 st; |
| 899 | 901 | ||
| 900 | steal = paravirt_steal_clock(cpu_of(rq)); | 902 | steal = paravirt_steal_clock(cpu_of(rq)); |
| @@ -1263,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
| 1263 | */ | 1265 | */ |
| 1264 | static int select_fallback_rq(int cpu, struct task_struct *p) | 1266 | static int select_fallback_rq(int cpu, struct task_struct *p) |
| 1265 | { | 1267 | { |
| 1266 | int dest_cpu; | ||
| 1267 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | 1268 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); |
| 1269 | enum { cpuset, possible, fail } state = cpuset; | ||
| 1270 | int dest_cpu; | ||
| 1268 | 1271 | ||
| 1269 | /* Look for allowed, online CPU in same node. */ | 1272 | /* Look for allowed, online CPU in same node. */ |
| 1270 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | 1273 | for_each_cpu(dest_cpu, nodemask) { |
| 1274 | if (!cpu_online(dest_cpu)) | ||
| 1275 | continue; | ||
| 1276 | if (!cpu_active(dest_cpu)) | ||
| 1277 | continue; | ||
| 1271 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 1278 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) |
| 1272 | return dest_cpu; | 1279 | return dest_cpu; |
| 1280 | } | ||
| 1273 | 1281 | ||
| 1274 | /* Any allowed, online CPU? */ | 1282 | for (;;) { |
| 1275 | dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); | 1283 | /* Any allowed, online CPU? */ |
| 1276 | if (dest_cpu < nr_cpu_ids) | 1284 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { |
| 1277 | return dest_cpu; | 1285 | if (!cpu_online(dest_cpu)) |
| 1286 | continue; | ||
| 1287 | if (!cpu_active(dest_cpu)) | ||
| 1288 | continue; | ||
| 1289 | goto out; | ||
| 1290 | } | ||
| 1278 | 1291 | ||
| 1279 | /* No more Mr. Nice Guy. */ | 1292 | switch (state) { |
| 1280 | dest_cpu = cpuset_cpus_allowed_fallback(p); | 1293 | case cpuset: |
| 1281 | /* | 1294 | /* No more Mr. Nice Guy. */ |
| 1282 | * Don't tell them about moving exiting tasks or | 1295 | cpuset_cpus_allowed_fallback(p); |
| 1283 | * kernel threads (both mm NULL), since they never | 1296 | state = possible; |
| 1284 | * leave kernel. | 1297 | break; |
| 1285 | */ | 1298 | |
| 1286 | if (p->mm && printk_ratelimit()) { | 1299 | case possible: |
| 1287 | printk(KERN_INFO "process %d (%s) no longer affine to cpu%d\n", | 1300 | do_set_cpus_allowed(p, cpu_possible_mask); |
| 1288 | task_pid_nr(p), p->comm, cpu); | 1301 | state = fail; |
| 1302 | break; | ||
| 1303 | |||
| 1304 | case fail: | ||
| 1305 | BUG(); | ||
| 1306 | break; | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | out: | ||
| 1311 | if (state != cpuset) { | ||
| 1312 | /* | ||
| 1313 | * Don't tell them about moving exiting tasks or | ||
| 1314 | * kernel threads (both mm NULL), since they never | ||
| 1315 | * leave kernel. | ||
| 1316 | */ | ||
| 1317 | if (p->mm && printk_ratelimit()) { | ||
| 1318 | printk_sched("process %d (%s) no longer affine to cpu%d\n", | ||
| 1319 | task_pid_nr(p), p->comm, cpu); | ||
| 1320 | } | ||
| 1289 | } | 1321 | } |
| 1290 | 1322 | ||
| 1291 | return dest_cpu; | 1323 | return dest_cpu; |
| @@ -1507,7 +1539,7 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags) | |||
| 1507 | } | 1539 | } |
| 1508 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 1540 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
| 1509 | 1541 | ||
| 1510 | static inline int ttwu_share_cache(int this_cpu, int that_cpu) | 1542 | bool cpus_share_cache(int this_cpu, int that_cpu) |
| 1511 | { | 1543 | { |
| 1512 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); | 1544 | return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); |
| 1513 | } | 1545 | } |
| @@ -1518,7 +1550,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) | |||
| 1518 | struct rq *rq = cpu_rq(cpu); | 1550 | struct rq *rq = cpu_rq(cpu); |
| 1519 | 1551 | ||
| 1520 | #if defined(CONFIG_SMP) | 1552 | #if defined(CONFIG_SMP) |
| 1521 | if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) { | 1553 | if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { |
| 1522 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ | 1554 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ |
| 1523 | ttwu_queue_remote(p, cpu); | 1555 | ttwu_queue_remote(p, cpu); |
| 1524 | return; | 1556 | return; |
| @@ -1932,7 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 1932 | local_irq_enable(); | 1964 | local_irq_enable(); |
| 1933 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ | 1965 | #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ |
| 1934 | finish_lock_switch(rq, prev); | 1966 | finish_lock_switch(rq, prev); |
| 1935 | trace_sched_stat_sleeptime(current, rq->clock); | 1967 | finish_arch_post_lock_switch(); |
| 1936 | 1968 | ||
| 1937 | fire_sched_in_preempt_notifiers(current); | 1969 | fire_sched_in_preempt_notifiers(current); |
| 1938 | if (mm) | 1970 | if (mm) |
| @@ -2267,13 +2299,10 @@ calc_load_n(unsigned long load, unsigned long exp, | |||
| 2267 | * Once we've updated the global active value, we need to apply the exponential | 2299 | * Once we've updated the global active value, we need to apply the exponential |
| 2268 | * weights adjusted to the number of cycles missed. | 2300 | * weights adjusted to the number of cycles missed. |
| 2269 | */ | 2301 | */ |
| 2270 | static void calc_global_nohz(unsigned long ticks) | 2302 | static void calc_global_nohz(void) |
| 2271 | { | 2303 | { |
| 2272 | long delta, active, n; | 2304 | long delta, active, n; |
| 2273 | 2305 | ||
| 2274 | if (time_before(jiffies, calc_load_update)) | ||
| 2275 | return; | ||
| 2276 | |||
| 2277 | /* | 2306 | /* |
| 2278 | * If we crossed a calc_load_update boundary, make sure to fold | 2307 | * If we crossed a calc_load_update boundary, make sure to fold |
| 2279 | * any pending idle changes, the respective CPUs might have | 2308 | * any pending idle changes, the respective CPUs might have |
| @@ -2285,31 +2314,25 @@ static void calc_global_nohz(unsigned long ticks) | |||
| 2285 | atomic_long_add(delta, &calc_load_tasks); | 2314 | atomic_long_add(delta, &calc_load_tasks); |
| 2286 | 2315 | ||
| 2287 | /* | 2316 | /* |
| 2288 | * If we were idle for multiple load cycles, apply them. | 2317 | * It could be the one fold was all it took, we done! |
| 2289 | */ | 2318 | */ |
| 2290 | if (ticks >= LOAD_FREQ) { | 2319 | if (time_before(jiffies, calc_load_update + 10)) |
| 2291 | n = ticks / LOAD_FREQ; | 2320 | return; |
| 2292 | 2321 | ||
| 2293 | active = atomic_long_read(&calc_load_tasks); | 2322 | /* |
| 2294 | active = active > 0 ? active * FIXED_1 : 0; | 2323 | * Catch-up, fold however many we are behind still |
| 2324 | */ | ||
| 2325 | delta = jiffies - calc_load_update - 10; | ||
| 2326 | n = 1 + (delta / LOAD_FREQ); | ||
| 2295 | 2327 | ||
| 2296 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | 2328 | active = atomic_long_read(&calc_load_tasks); |
| 2297 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | 2329 | active = active > 0 ? active * FIXED_1 : 0; |
| 2298 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
| 2299 | 2330 | ||
| 2300 | calc_load_update += n * LOAD_FREQ; | 2331 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); |
| 2301 | } | 2332 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); |
| 2333 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
| 2302 | 2334 | ||
| 2303 | /* | 2335 | calc_load_update += n * LOAD_FREQ; |
| 2304 | * Its possible the remainder of the above division also crosses | ||
| 2305 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
| 2306 | * which comes after this will take care of that. | ||
| 2307 | * | ||
| 2308 | * Consider us being 11 ticks before a cycle completion, and us | ||
| 2309 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
| 2310 | * age us 4 cycles, and the test in calc_global_load() will | ||
| 2311 | * pick up the final one. | ||
| 2312 | */ | ||
| 2313 | } | 2336 | } |
| 2314 | #else | 2337 | #else |
| 2315 | void calc_load_account_idle(struct rq *this_rq) | 2338 | void calc_load_account_idle(struct rq *this_rq) |
| @@ -2321,7 +2344,7 @@ static inline long calc_load_fold_idle(void) | |||
| 2321 | return 0; | 2344 | return 0; |
| 2322 | } | 2345 | } |
| 2323 | 2346 | ||
| 2324 | static void calc_global_nohz(unsigned long ticks) | 2347 | static void calc_global_nohz(void) |
| 2325 | { | 2348 | { |
| 2326 | } | 2349 | } |
| 2327 | #endif | 2350 | #endif |
| @@ -2349,8 +2372,6 @@ void calc_global_load(unsigned long ticks) | |||
| 2349 | { | 2372 | { |
| 2350 | long active; | 2373 | long active; |
| 2351 | 2374 | ||
| 2352 | calc_global_nohz(ticks); | ||
| 2353 | |||
| 2354 | if (time_before(jiffies, calc_load_update + 10)) | 2375 | if (time_before(jiffies, calc_load_update + 10)) |
| 2355 | return; | 2376 | return; |
| 2356 | 2377 | ||
| @@ -2362,6 +2383,16 @@ void calc_global_load(unsigned long ticks) | |||
| 2362 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | 2383 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); |
| 2363 | 2384 | ||
| 2364 | calc_load_update += LOAD_FREQ; | 2385 | calc_load_update += LOAD_FREQ; |
| 2386 | |||
| 2387 | /* | ||
| 2388 | * Account one period with whatever state we found before | ||
| 2389 | * folding in the nohz state and ageing the entire idle period. | ||
| 2390 | * | ||
| 2391 | * This avoids loosing a sample when we go idle between | ||
| 2392 | * calc_load_account_active() (10 ticks ago) and now and thus | ||
| 2393 | * under-accounting. | ||
| 2394 | */ | ||
| 2395 | calc_global_nohz(); | ||
| 2365 | } | 2396 | } |
| 2366 | 2397 | ||
| 2367 | /* | 2398 | /* |
| @@ -2756,7 +2787,7 @@ void account_idle_time(cputime_t cputime) | |||
| 2756 | static __always_inline bool steal_account_process_tick(void) | 2787 | static __always_inline bool steal_account_process_tick(void) |
| 2757 | { | 2788 | { |
| 2758 | #ifdef CONFIG_PARAVIRT | 2789 | #ifdef CONFIG_PARAVIRT |
| 2759 | if (static_branch(¶virt_steal_enabled)) { | 2790 | if (static_key_false(¶virt_steal_enabled)) { |
| 2760 | u64 steal, st = 0; | 2791 | u64 steal, st = 0; |
| 2761 | 2792 | ||
| 2762 | steal = paravirt_steal_clock(smp_processor_id()); | 2793 | steal = paravirt_steal_clock(smp_processor_id()); |
| @@ -3071,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count); | |||
| 3071 | */ | 3102 | */ |
| 3072 | static noinline void __schedule_bug(struct task_struct *prev) | 3103 | static noinline void __schedule_bug(struct task_struct *prev) |
| 3073 | { | 3104 | { |
| 3074 | struct pt_regs *regs = get_irq_regs(); | ||
| 3075 | |||
| 3076 | if (oops_in_progress) | 3105 | if (oops_in_progress) |
| 3077 | return; | 3106 | return; |
| 3078 | 3107 | ||
| @@ -3083,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
| 3083 | print_modules(); | 3112 | print_modules(); |
| 3084 | if (irqs_disabled()) | 3113 | if (irqs_disabled()) |
| 3085 | print_irqtrace_events(prev); | 3114 | print_irqtrace_events(prev); |
| 3086 | 3115 | dump_stack(); | |
| 3087 | if (regs) | ||
| 3088 | show_regs(regs); | ||
| 3089 | else | ||
| 3090 | dump_stack(); | ||
| 3091 | } | 3116 | } |
| 3092 | 3117 | ||
| 3093 | /* | 3118 | /* |
| @@ -3221,14 +3246,14 @@ need_resched: | |||
| 3221 | 3246 | ||
| 3222 | post_schedule(rq); | 3247 | post_schedule(rq); |
| 3223 | 3248 | ||
| 3224 | preempt_enable_no_resched(); | 3249 | sched_preempt_enable_no_resched(); |
| 3225 | if (need_resched()) | 3250 | if (need_resched()) |
| 3226 | goto need_resched; | 3251 | goto need_resched; |
| 3227 | } | 3252 | } |
| 3228 | 3253 | ||
| 3229 | static inline void sched_submit_work(struct task_struct *tsk) | 3254 | static inline void sched_submit_work(struct task_struct *tsk) |
| 3230 | { | 3255 | { |
| 3231 | if (!tsk->state) | 3256 | if (!tsk->state || tsk_is_pi_blocked(tsk)) |
| 3232 | return; | 3257 | return; |
| 3233 | /* | 3258 | /* |
| 3234 | * If we are going to sleep and we have plugged IO queued, | 3259 | * If we are going to sleep and we have plugged IO queued, |
| @@ -3247,6 +3272,18 @@ asmlinkage void __sched schedule(void) | |||
| 3247 | } | 3272 | } |
| 3248 | EXPORT_SYMBOL(schedule); | 3273 | EXPORT_SYMBOL(schedule); |
| 3249 | 3274 | ||
| 3275 | /** | ||
| 3276 | * schedule_preempt_disabled - called with preemption disabled | ||
| 3277 | * | ||
| 3278 | * Returns with preemption disabled. Note: preempt_count must be 1 | ||
| 3279 | */ | ||
| 3280 | void __sched schedule_preempt_disabled(void) | ||
| 3281 | { | ||
| 3282 | sched_preempt_enable_no_resched(); | ||
| 3283 | schedule(); | ||
| 3284 | preempt_disable(); | ||
| 3285 | } | ||
| 3286 | |||
| 3250 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 3287 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
| 3251 | 3288 | ||
| 3252 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 3289 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
| @@ -3407,9 +3444,9 @@ EXPORT_SYMBOL(__wake_up); | |||
| 3407 | /* | 3444 | /* |
| 3408 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. | 3445 | * Same as __wake_up but called with the spinlock in wait_queue_head_t held. |
| 3409 | */ | 3446 | */ |
| 3410 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode) | 3447 | void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) |
| 3411 | { | 3448 | { |
| 3412 | __wake_up_common(q, mode, 1, 0, NULL); | 3449 | __wake_up_common(q, mode, nr, 0, NULL); |
| 3413 | } | 3450 | } |
| 3414 | EXPORT_SYMBOL_GPL(__wake_up_locked); | 3451 | EXPORT_SYMBOL_GPL(__wake_up_locked); |
| 3415 | 3452 | ||
| @@ -3768,6 +3805,24 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 3768 | 3805 | ||
| 3769 | rq = __task_rq_lock(p); | 3806 | rq = __task_rq_lock(p); |
| 3770 | 3807 | ||
| 3808 | /* | ||
| 3809 | * Idle task boosting is a nono in general. There is one | ||
| 3810 | * exception, when PREEMPT_RT and NOHZ is active: | ||
| 3811 | * | ||
| 3812 | * The idle task calls get_next_timer_interrupt() and holds | ||
| 3813 | * the timer wheel base->lock on the CPU and another CPU wants | ||
| 3814 | * to access the timer (probably to cancel it). We can safely | ||
| 3815 | * ignore the boosting request, as the idle CPU runs this code | ||
| 3816 | * with interrupts disabled and will complete the lock | ||
| 3817 | * protected section without being interrupted. So there is no | ||
| 3818 | * real need to boost. | ||
| 3819 | */ | ||
| 3820 | if (unlikely(p == rq->idle)) { | ||
| 3821 | WARN_ON(p != rq->curr); | ||
| 3822 | WARN_ON(p->pi_blocked_on); | ||
| 3823 | goto out_unlock; | ||
| 3824 | } | ||
| 3825 | |||
| 3771 | trace_sched_pi_setprio(p, prio); | 3826 | trace_sched_pi_setprio(p, prio); |
| 3772 | oldprio = p->prio; | 3827 | oldprio = p->prio; |
| 3773 | prev_class = p->sched_class; | 3828 | prev_class = p->sched_class; |
| @@ -3791,11 +3846,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
| 3791 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 3846 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
| 3792 | 3847 | ||
| 3793 | check_class_changed(rq, p, prev_class, oldprio); | 3848 | check_class_changed(rq, p, prev_class, oldprio); |
| 3849 | out_unlock: | ||
| 3794 | __task_rq_unlock(rq); | 3850 | __task_rq_unlock(rq); |
| 3795 | } | 3851 | } |
| 3796 | |||
| 3797 | #endif | 3852 | #endif |
| 3798 | |||
| 3799 | void set_user_nice(struct task_struct *p, long nice) | 3853 | void set_user_nice(struct task_struct *p, long nice) |
| 3800 | { | 3854 | { |
| 3801 | int old_prio, delta, on_rq; | 3855 | int old_prio, delta, on_rq; |
| @@ -4475,7 +4529,7 @@ SYSCALL_DEFINE0(sched_yield) | |||
| 4475 | __release(rq->lock); | 4529 | __release(rq->lock); |
| 4476 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 4530 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
| 4477 | do_raw_spin_unlock(&rq->lock); | 4531 | do_raw_spin_unlock(&rq->lock); |
| 4478 | preempt_enable_no_resched(); | 4532 | sched_preempt_enable_no_resched(); |
| 4479 | 4533 | ||
| 4480 | schedule(); | 4534 | schedule(); |
| 4481 | 4535 | ||
| @@ -4549,8 +4603,24 @@ EXPORT_SYMBOL(__cond_resched_softirq); | |||
| 4549 | /** | 4603 | /** |
| 4550 | * yield - yield the current processor to other threads. | 4604 | * yield - yield the current processor to other threads. |
| 4551 | * | 4605 | * |
| 4552 | * This is a shortcut for kernel-space yielding - it marks the | 4606 | * Do not ever use this function, there's a 99% chance you're doing it wrong. |
| 4553 | * thread runnable and calls sys_sched_yield(). | 4607 | * |
| 4608 | * The scheduler is at all times free to pick the calling task as the most | ||
| 4609 | * eligible task to run, if removing the yield() call from your code breaks | ||
| 4610 | * it, its already broken. | ||
| 4611 | * | ||
| 4612 | * Typical broken usage is: | ||
| 4613 | * | ||
| 4614 | * while (!event) | ||
| 4615 | * yield(); | ||
| 4616 | * | ||
| 4617 | * where one assumes that yield() will let 'the other' process run that will | ||
| 4618 | * make event true. If the current task is a SCHED_FIFO task that will never | ||
| 4619 | * happen. Never use yield() as a progress guarantee!! | ||
| 4620 | * | ||
| 4621 | * If you want to use yield() to wait for something, use wait_event(). | ||
| 4622 | * If you want to use yield() to be 'nice' for others, use cond_resched(). | ||
| 4623 | * If you still want to use yield(), do not! | ||
| 4554 | */ | 4624 | */ |
| 4555 | void __sched yield(void) | 4625 | void __sched yield(void) |
| 4556 | { | 4626 | { |
| @@ -5382,7 +5452,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb, | |||
| 5382 | unsigned long action, void *hcpu) | 5452 | unsigned long action, void *hcpu) |
| 5383 | { | 5453 | { |
| 5384 | switch (action & ~CPU_TASKS_FROZEN) { | 5454 | switch (action & ~CPU_TASKS_FROZEN) { |
| 5385 | case CPU_ONLINE: | 5455 | case CPU_STARTING: |
| 5386 | case CPU_DOWN_FAILED: | 5456 | case CPU_DOWN_FAILED: |
| 5387 | set_cpu_active((long)hcpu, true); | 5457 | set_cpu_active((long)hcpu, true); |
| 5388 | return NOTIFY_OK; | 5458 | return NOTIFY_OK; |
| @@ -5754,7 +5824,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) | |||
| 5754 | * | 5824 | * |
| 5755 | * Also keep a unique ID per domain (we use the first cpu number in | 5825 | * Also keep a unique ID per domain (we use the first cpu number in |
| 5756 | * the cpumask of the domain), this allows us to quickly tell if | 5826 | * the cpumask of the domain), this allows us to quickly tell if |
| 5757 | * two cpus are in the same cache domain, see ttwu_share_cache(). | 5827 | * two cpus are in the same cache domain, see cpus_share_cache(). |
| 5758 | */ | 5828 | */ |
| 5759 | DEFINE_PER_CPU(struct sched_domain *, sd_llc); | 5829 | DEFINE_PER_CPU(struct sched_domain *, sd_llc); |
| 5760 | DEFINE_PER_CPU(int, sd_llc_id); | 5830 | DEFINE_PER_CPU(int, sd_llc_id); |
| @@ -6931,6 +7001,9 @@ void __init sched_init(void) | |||
| 6931 | rq->online = 0; | 7001 | rq->online = 0; |
| 6932 | rq->idle_stamp = 0; | 7002 | rq->idle_stamp = 0; |
| 6933 | rq->avg_idle = 2*sysctl_sched_migration_cost; | 7003 | rq->avg_idle = 2*sysctl_sched_migration_cost; |
| 7004 | |||
| 7005 | INIT_LIST_HEAD(&rq->cfs_tasks); | ||
| 7006 | |||
| 6934 | rq_attach_root(rq, &def_root_domain); | 7007 | rq_attach_root(rq, &def_root_domain); |
| 6935 | #ifdef CONFIG_NO_HZ | 7008 | #ifdef CONFIG_NO_HZ |
| 6936 | rq->nohz_flags = 0; | 7009 | rq->nohz_flags = 0; |
| @@ -7525,8 +7598,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | |||
| 7525 | struct task_group, css); | 7598 | struct task_group, css); |
| 7526 | } | 7599 | } |
| 7527 | 7600 | ||
| 7528 | static struct cgroup_subsys_state * | 7601 | static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) |
| 7529 | cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
| 7530 | { | 7602 | { |
| 7531 | struct task_group *tg, *parent; | 7603 | struct task_group *tg, *parent; |
| 7532 | 7604 | ||
| @@ -7543,15 +7615,14 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
| 7543 | return &tg->css; | 7615 | return &tg->css; |
| 7544 | } | 7616 | } |
| 7545 | 7617 | ||
| 7546 | static void | 7618 | static void cpu_cgroup_destroy(struct cgroup *cgrp) |
| 7547 | cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
| 7548 | { | 7619 | { |
| 7549 | struct task_group *tg = cgroup_tg(cgrp); | 7620 | struct task_group *tg = cgroup_tg(cgrp); |
| 7550 | 7621 | ||
| 7551 | sched_destroy_group(tg); | 7622 | sched_destroy_group(tg); |
| 7552 | } | 7623 | } |
| 7553 | 7624 | ||
| 7554 | static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7625 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, |
| 7555 | struct cgroup_taskset *tset) | 7626 | struct cgroup_taskset *tset) |
| 7556 | { | 7627 | { |
| 7557 | struct task_struct *task; | 7628 | struct task_struct *task; |
| @@ -7569,7 +7640,7 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 7569 | return 0; | 7640 | return 0; |
| 7570 | } | 7641 | } |
| 7571 | 7642 | ||
| 7572 | static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7643 | static void cpu_cgroup_attach(struct cgroup *cgrp, |
| 7573 | struct cgroup_taskset *tset) | 7644 | struct cgroup_taskset *tset) |
| 7574 | { | 7645 | { |
| 7575 | struct task_struct *task; | 7646 | struct task_struct *task; |
| @@ -7579,8 +7650,8 @@ static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 7579 | } | 7650 | } |
| 7580 | 7651 | ||
| 7581 | static void | 7652 | static void |
| 7582 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, | 7653 | cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp, |
| 7583 | struct cgroup *old_cgrp, struct task_struct *task) | 7654 | struct task_struct *task) |
| 7584 | { | 7655 | { |
| 7585 | /* | 7656 | /* |
| 7586 | * cgroup_exit() is called in the copy_process() failure path. | 7657 | * cgroup_exit() is called in the copy_process() failure path. |
| @@ -7899,13 +7970,9 @@ static struct cftype cpu_files[] = { | |||
| 7899 | .write_u64 = cpu_rt_period_write_uint, | 7970 | .write_u64 = cpu_rt_period_write_uint, |
| 7900 | }, | 7971 | }, |
| 7901 | #endif | 7972 | #endif |
| 7973 | { } /* terminate */ | ||
| 7902 | }; | 7974 | }; |
| 7903 | 7975 | ||
| 7904 | static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 7905 | { | ||
| 7906 | return cgroup_add_files(cont, ss, cpu_files, ARRAY_SIZE(cpu_files)); | ||
| 7907 | } | ||
| 7908 | |||
| 7909 | struct cgroup_subsys cpu_cgroup_subsys = { | 7976 | struct cgroup_subsys cpu_cgroup_subsys = { |
| 7910 | .name = "cpu", | 7977 | .name = "cpu", |
| 7911 | .create = cpu_cgroup_create, | 7978 | .create = cpu_cgroup_create, |
| @@ -7913,8 +7980,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 7913 | .can_attach = cpu_cgroup_can_attach, | 7980 | .can_attach = cpu_cgroup_can_attach, |
| 7914 | .attach = cpu_cgroup_attach, | 7981 | .attach = cpu_cgroup_attach, |
| 7915 | .exit = cpu_cgroup_exit, | 7982 | .exit = cpu_cgroup_exit, |
| 7916 | .populate = cpu_cgroup_populate, | ||
| 7917 | .subsys_id = cpu_cgroup_subsys_id, | 7983 | .subsys_id = cpu_cgroup_subsys_id, |
| 7984 | .base_cftypes = cpu_files, | ||
| 7918 | .early_init = 1, | 7985 | .early_init = 1, |
| 7919 | }; | 7986 | }; |
| 7920 | 7987 | ||
| @@ -7930,8 +7997,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
| 7930 | */ | 7997 | */ |
| 7931 | 7998 | ||
| 7932 | /* create a new cpu accounting group */ | 7999 | /* create a new cpu accounting group */ |
| 7933 | static struct cgroup_subsys_state *cpuacct_create( | 8000 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) |
| 7934 | struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
| 7935 | { | 8001 | { |
| 7936 | struct cpuacct *ca; | 8002 | struct cpuacct *ca; |
| 7937 | 8003 | ||
| @@ -7961,8 +8027,7 @@ out: | |||
| 7961 | } | 8027 | } |
| 7962 | 8028 | ||
| 7963 | /* destroy an existing cpu accounting group */ | 8029 | /* destroy an existing cpu accounting group */ |
| 7964 | static void | 8030 | static void cpuacct_destroy(struct cgroup *cgrp) |
| 7965 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
| 7966 | { | 8031 | { |
| 7967 | struct cpuacct *ca = cgroup_ca(cgrp); | 8032 | struct cpuacct *ca = cgroup_ca(cgrp); |
| 7968 | 8033 | ||
| @@ -8101,13 +8166,9 @@ static struct cftype files[] = { | |||
| 8101 | .name = "stat", | 8166 | .name = "stat", |
| 8102 | .read_map = cpuacct_stats_show, | 8167 | .read_map = cpuacct_stats_show, |
| 8103 | }, | 8168 | }, |
| 8169 | { } /* terminate */ | ||
| 8104 | }; | 8170 | }; |
| 8105 | 8171 | ||
| 8106 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | ||
| 8107 | { | ||
| 8108 | return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files)); | ||
| 8109 | } | ||
| 8110 | |||
| 8111 | /* | 8172 | /* |
| 8112 | * charge this task's execution time to its accounting group. | 8173 | * charge this task's execution time to its accounting group. |
| 8113 | * | 8174 | * |
| @@ -8139,7 +8200,7 @@ struct cgroup_subsys cpuacct_subsys = { | |||
| 8139 | .name = "cpuacct", | 8200 | .name = "cpuacct", |
| 8140 | .create = cpuacct_create, | 8201 | .create = cpuacct_create, |
| 8141 | .destroy = cpuacct_destroy, | 8202 | .destroy = cpuacct_destroy, |
| 8142 | .populate = cpuacct_populate, | ||
| 8143 | .subsys_id = cpuacct_subsys_id, | 8203 | .subsys_id = cpuacct_subsys_id, |
| 8204 | .base_cftypes = files, | ||
| 8144 | }; | 8205 | }; |
| 8145 | #endif /* CONFIG_CGROUP_CPUACCT */ | 8206 | #endif /* CONFIG_CGROUP_CPUACCT */ |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2a075e10004b..09acaa15161d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
| @@ -288,7 +288,6 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
| 288 | 288 | ||
| 289 | P(yld_count); | 289 | P(yld_count); |
| 290 | 290 | ||
| 291 | P(sched_switch); | ||
| 292 | P(sched_count); | 291 | P(sched_count); |
| 293 | P(sched_goidle); | 292 | P(sched_goidle); |
| 294 | #ifdef CONFIG_SMP | 293 | #ifdef CONFIG_SMP |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669d..0d97ebdc58f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
| 416 | 416 | ||
| 417 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 417 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
| 418 | 418 | ||
| 419 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 419 | static __always_inline |
| 420 | unsigned long delta_exec); | 420 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); |
| 421 | 421 | ||
| 422 | /************************************************************** | 422 | /************************************************************** |
| 423 | * Scheduling class tree data structure manipulation methods: | 423 | * Scheduling class tree data structure manipulation methods: |
| @@ -776,29 +776,16 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 776 | * Scheduling class queueing methods: | 776 | * Scheduling class queueing methods: |
| 777 | */ | 777 | */ |
| 778 | 778 | ||
| 779 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
| 780 | static void | ||
| 781 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
| 782 | { | ||
| 783 | cfs_rq->task_weight += weight; | ||
| 784 | } | ||
| 785 | #else | ||
| 786 | static inline void | ||
| 787 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
| 788 | { | ||
| 789 | } | ||
| 790 | #endif | ||
| 791 | |||
| 792 | static void | 779 | static void |
| 793 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 780 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 794 | { | 781 | { |
| 795 | update_load_add(&cfs_rq->load, se->load.weight); | 782 | update_load_add(&cfs_rq->load, se->load.weight); |
| 796 | if (!parent_entity(se)) | 783 | if (!parent_entity(se)) |
| 797 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); | 784 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); |
| 798 | if (entity_is_task(se)) { | 785 | #ifdef CONFIG_SMP |
| 799 | add_cfs_task_weight(cfs_rq, se->load.weight); | 786 | if (entity_is_task(se)) |
| 800 | list_add(&se->group_node, &cfs_rq->tasks); | 787 | list_add_tail(&se->group_node, &rq_of(cfs_rq)->cfs_tasks); |
| 801 | } | 788 | #endif |
| 802 | cfs_rq->nr_running++; | 789 | cfs_rq->nr_running++; |
| 803 | } | 790 | } |
| 804 | 791 | ||
| @@ -808,10 +795,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 808 | update_load_sub(&cfs_rq->load, se->load.weight); | 795 | update_load_sub(&cfs_rq->load, se->load.weight); |
| 809 | if (!parent_entity(se)) | 796 | if (!parent_entity(se)) |
| 810 | update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); | 797 | update_load_sub(&rq_of(cfs_rq)->load, se->load.weight); |
| 811 | if (entity_is_task(se)) { | 798 | if (entity_is_task(se)) |
| 812 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
| 813 | list_del_init(&se->group_node); | 799 | list_del_init(&se->group_node); |
| 814 | } | ||
| 815 | cfs_rq->nr_running--; | 800 | cfs_rq->nr_running--; |
| 816 | } | 801 | } |
| 817 | 802 | ||
| @@ -1003,6 +988,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 1003 | if (unlikely(delta > se->statistics.sleep_max)) | 988 | if (unlikely(delta > se->statistics.sleep_max)) |
| 1004 | se->statistics.sleep_max = delta; | 989 | se->statistics.sleep_max = delta; |
| 1005 | 990 | ||
| 991 | se->statistics.sleep_start = 0; | ||
| 1006 | se->statistics.sum_sleep_runtime += delta; | 992 | se->statistics.sum_sleep_runtime += delta; |
| 1007 | 993 | ||
| 1008 | if (tsk) { | 994 | if (tsk) { |
| @@ -1019,6 +1005,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 1019 | if (unlikely(delta > se->statistics.block_max)) | 1005 | if (unlikely(delta > se->statistics.block_max)) |
| 1020 | se->statistics.block_max = delta; | 1006 | se->statistics.block_max = delta; |
| 1021 | 1007 | ||
| 1008 | se->statistics.block_start = 0; | ||
| 1022 | se->statistics.sum_sleep_runtime += delta; | 1009 | se->statistics.sum_sleep_runtime += delta; |
| 1023 | 1010 | ||
| 1024 | if (tsk) { | 1011 | if (tsk) { |
| @@ -1175,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 1175 | __clear_buddies_skip(se); | 1162 | __clear_buddies_skip(se); |
| 1176 | } | 1163 | } |
| 1177 | 1164 | ||
| 1178 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); | 1165 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); |
| 1179 | 1166 | ||
| 1180 | static void | 1167 | static void |
| 1181 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | 1168 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) |
| @@ -1399,20 +1386,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
| 1399 | #ifdef CONFIG_CFS_BANDWIDTH | 1386 | #ifdef CONFIG_CFS_BANDWIDTH |
| 1400 | 1387 | ||
| 1401 | #ifdef HAVE_JUMP_LABEL | 1388 | #ifdef HAVE_JUMP_LABEL |
| 1402 | static struct jump_label_key __cfs_bandwidth_used; | 1389 | static struct static_key __cfs_bandwidth_used; |
| 1403 | 1390 | ||
| 1404 | static inline bool cfs_bandwidth_used(void) | 1391 | static inline bool cfs_bandwidth_used(void) |
| 1405 | { | 1392 | { |
| 1406 | return static_branch(&__cfs_bandwidth_used); | 1393 | return static_key_false(&__cfs_bandwidth_used); |
| 1407 | } | 1394 | } |
| 1408 | 1395 | ||
| 1409 | void account_cfs_bandwidth_used(int enabled, int was_enabled) | 1396 | void account_cfs_bandwidth_used(int enabled, int was_enabled) |
| 1410 | { | 1397 | { |
| 1411 | /* only need to count groups transitioning between enabled/!enabled */ | 1398 | /* only need to count groups transitioning between enabled/!enabled */ |
| 1412 | if (enabled && !was_enabled) | 1399 | if (enabled && !was_enabled) |
| 1413 | jump_label_inc(&__cfs_bandwidth_used); | 1400 | static_key_slow_inc(&__cfs_bandwidth_used); |
| 1414 | else if (!enabled && was_enabled) | 1401 | else if (!enabled && was_enabled) |
| 1415 | jump_label_dec(&__cfs_bandwidth_used); | 1402 | static_key_slow_dec(&__cfs_bandwidth_used); |
| 1416 | } | 1403 | } |
| 1417 | #else /* HAVE_JUMP_LABEL */ | 1404 | #else /* HAVE_JUMP_LABEL */ |
| 1418 | static bool cfs_bandwidth_used(void) | 1405 | static bool cfs_bandwidth_used(void) |
| @@ -1559,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | |||
| 1559 | resched_task(rq_of(cfs_rq)->curr); | 1546 | resched_task(rq_of(cfs_rq)->curr); |
| 1560 | } | 1547 | } |
| 1561 | 1548 | ||
| 1562 | static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 1549 | static __always_inline |
| 1563 | unsigned long delta_exec) | 1550 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) |
| 1564 | { | 1551 | { |
| 1565 | if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) | 1552 | if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) |
| 1566 | return; | 1553 | return; |
| @@ -2086,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq) | |||
| 2086 | } | 2073 | } |
| 2087 | 2074 | ||
| 2088 | #else /* CONFIG_CFS_BANDWIDTH */ | 2075 | #else /* CONFIG_CFS_BANDWIDTH */ |
| 2089 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 2076 | static __always_inline |
| 2090 | unsigned long delta_exec) {} | 2077 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {} |
| 2091 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 2078 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
| 2092 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | 2079 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} |
| 2093 | static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 2080 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
| 2094 | 2081 | ||
| 2095 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) | 2082 | static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) |
| 2096 | { | 2083 | { |
| @@ -2670,8 +2657,6 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2670 | /* | 2657 | /* |
| 2671 | * Otherwise, iterate the domains and find an elegible idle cpu. | 2658 | * Otherwise, iterate the domains and find an elegible idle cpu. |
| 2672 | */ | 2659 | */ |
| 2673 | rcu_read_lock(); | ||
| 2674 | |||
| 2675 | sd = rcu_dereference(per_cpu(sd_llc, target)); | 2660 | sd = rcu_dereference(per_cpu(sd_llc, target)); |
| 2676 | for_each_lower_domain(sd) { | 2661 | for_each_lower_domain(sd) { |
| 2677 | sg = sd->groups; | 2662 | sg = sd->groups; |
| @@ -2693,8 +2678,6 @@ next: | |||
| 2693 | } while (sg != sd->groups); | 2678 | } while (sg != sd->groups); |
| 2694 | } | 2679 | } |
| 2695 | done: | 2680 | done: |
| 2696 | rcu_read_unlock(); | ||
| 2697 | |||
| 2698 | return target; | 2681 | return target; |
| 2699 | } | 2682 | } |
| 2700 | 2683 | ||
| @@ -2920,7 +2903,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
| 2920 | return; | 2903 | return; |
| 2921 | 2904 | ||
| 2922 | /* | 2905 | /* |
| 2923 | * This is possible from callers such as pull_task(), in which we | 2906 | * This is possible from callers such as move_task(), in which we |
| 2924 | * unconditionally check_prempt_curr() after an enqueue (which may have | 2907 | * unconditionally check_prempt_curr() after an enqueue (which may have |
| 2925 | * lead to a throttle). This both saves work and prevents false | 2908 | * lead to a throttle). This both saves work and prevents false |
| 2926 | * next-buddy nomination below. | 2909 | * next-buddy nomination below. |
| @@ -3084,17 +3067,39 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp | |||
| 3084 | * Fair scheduling class load-balancing methods: | 3067 | * Fair scheduling class load-balancing methods: |
| 3085 | */ | 3068 | */ |
| 3086 | 3069 | ||
| 3070 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
| 3071 | |||
| 3072 | #define LBF_ALL_PINNED 0x01 | ||
| 3073 | #define LBF_NEED_BREAK 0x02 | ||
| 3074 | |||
| 3075 | struct lb_env { | ||
| 3076 | struct sched_domain *sd; | ||
| 3077 | |||
| 3078 | int src_cpu; | ||
| 3079 | struct rq *src_rq; | ||
| 3080 | |||
| 3081 | int dst_cpu; | ||
| 3082 | struct rq *dst_rq; | ||
| 3083 | |||
| 3084 | enum cpu_idle_type idle; | ||
| 3085 | long load_move; | ||
| 3086 | unsigned int flags; | ||
| 3087 | |||
| 3088 | unsigned int loop; | ||
| 3089 | unsigned int loop_break; | ||
| 3090 | unsigned int loop_max; | ||
| 3091 | }; | ||
| 3092 | |||
| 3087 | /* | 3093 | /* |
| 3088 | * pull_task - move a task from a remote runqueue to the local runqueue. | 3094 | * move_task - move a task from one runqueue to another runqueue. |
| 3089 | * Both runqueues must be locked. | 3095 | * Both runqueues must be locked. |
| 3090 | */ | 3096 | */ |
| 3091 | static void pull_task(struct rq *src_rq, struct task_struct *p, | 3097 | static void move_task(struct task_struct *p, struct lb_env *env) |
| 3092 | struct rq *this_rq, int this_cpu) | ||
| 3093 | { | 3098 | { |
| 3094 | deactivate_task(src_rq, p, 0); | 3099 | deactivate_task(env->src_rq, p, 0); |
| 3095 | set_task_cpu(p, this_cpu); | 3100 | set_task_cpu(p, env->dst_cpu); |
| 3096 | activate_task(this_rq, p, 0); | 3101 | activate_task(env->dst_rq, p, 0); |
| 3097 | check_preempt_curr(this_rq, p, 0); | 3102 | check_preempt_curr(env->dst_rq, p, 0); |
| 3098 | } | 3103 | } |
| 3099 | 3104 | ||
| 3100 | /* | 3105 | /* |
| @@ -3129,19 +3134,11 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
| 3129 | return delta < (s64)sysctl_sched_migration_cost; | 3134 | return delta < (s64)sysctl_sched_migration_cost; |
| 3130 | } | 3135 | } |
| 3131 | 3136 | ||
| 3132 | #define LBF_ALL_PINNED 0x01 | ||
| 3133 | #define LBF_NEED_BREAK 0x02 /* clears into HAD_BREAK */ | ||
| 3134 | #define LBF_HAD_BREAK 0x04 | ||
| 3135 | #define LBF_HAD_BREAKS 0x0C /* count HAD_BREAKs overflows into ABORT */ | ||
| 3136 | #define LBF_ABORT 0x10 | ||
| 3137 | |||
| 3138 | /* | 3137 | /* |
| 3139 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? | 3138 | * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? |
| 3140 | */ | 3139 | */ |
| 3141 | static | 3140 | static |
| 3142 | int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | 3141 | int can_migrate_task(struct task_struct *p, struct lb_env *env) |
| 3143 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 3144 | int *lb_flags) | ||
| 3145 | { | 3142 | { |
| 3146 | int tsk_cache_hot = 0; | 3143 | int tsk_cache_hot = 0; |
| 3147 | /* | 3144 | /* |
| @@ -3150,13 +3147,13 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 3150 | * 2) cannot be migrated to this CPU due to cpus_allowed, or | 3147 | * 2) cannot be migrated to this CPU due to cpus_allowed, or |
| 3151 | * 3) are cache-hot on their current CPU. | 3148 | * 3) are cache-hot on their current CPU. |
| 3152 | */ | 3149 | */ |
| 3153 | if (!cpumask_test_cpu(this_cpu, tsk_cpus_allowed(p))) { | 3150 | if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { |
| 3154 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); | 3151 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); |
| 3155 | return 0; | 3152 | return 0; |
| 3156 | } | 3153 | } |
| 3157 | *lb_flags &= ~LBF_ALL_PINNED; | 3154 | env->flags &= ~LBF_ALL_PINNED; |
| 3158 | 3155 | ||
| 3159 | if (task_running(rq, p)) { | 3156 | if (task_running(env->src_rq, p)) { |
| 3160 | schedstat_inc(p, se.statistics.nr_failed_migrations_running); | 3157 | schedstat_inc(p, se.statistics.nr_failed_migrations_running); |
| 3161 | return 0; | 3158 | return 0; |
| 3162 | } | 3159 | } |
| @@ -3167,12 +3164,12 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 3167 | * 2) too many balance attempts have failed. | 3164 | * 2) too many balance attempts have failed. |
| 3168 | */ | 3165 | */ |
| 3169 | 3166 | ||
| 3170 | tsk_cache_hot = task_hot(p, rq->clock_task, sd); | 3167 | tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd); |
| 3171 | if (!tsk_cache_hot || | 3168 | if (!tsk_cache_hot || |
| 3172 | sd->nr_balance_failed > sd->cache_nice_tries) { | 3169 | env->sd->nr_balance_failed > env->sd->cache_nice_tries) { |
| 3173 | #ifdef CONFIG_SCHEDSTATS | 3170 | #ifdef CONFIG_SCHEDSTATS |
| 3174 | if (tsk_cache_hot) { | 3171 | if (tsk_cache_hot) { |
| 3175 | schedstat_inc(sd, lb_hot_gained[idle]); | 3172 | schedstat_inc(env->sd, lb_hot_gained[env->idle]); |
| 3176 | schedstat_inc(p, se.statistics.nr_forced_migrations); | 3173 | schedstat_inc(p, se.statistics.nr_forced_migrations); |
| 3177 | } | 3174 | } |
| 3178 | #endif | 3175 | #endif |
| @@ -3193,65 +3190,80 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 3193 | * | 3190 | * |
| 3194 | * Called with both runqueues locked. | 3191 | * Called with both runqueues locked. |
| 3195 | */ | 3192 | */ |
| 3196 | static int | 3193 | static int move_one_task(struct lb_env *env) |
| 3197 | move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 3198 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
| 3199 | { | 3194 | { |
| 3200 | struct task_struct *p, *n; | 3195 | struct task_struct *p, *n; |
| 3201 | struct cfs_rq *cfs_rq; | ||
| 3202 | int pinned = 0; | ||
| 3203 | 3196 | ||
| 3204 | for_each_leaf_cfs_rq(busiest, cfs_rq) { | 3197 | list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) { |
| 3205 | list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) { | 3198 | if (throttled_lb_pair(task_group(p), env->src_rq->cpu, env->dst_cpu)) |
| 3206 | if (throttled_lb_pair(task_group(p), | 3199 | continue; |
| 3207 | busiest->cpu, this_cpu)) | ||
| 3208 | break; | ||
| 3209 | 3200 | ||
| 3210 | if (!can_migrate_task(p, busiest, this_cpu, | 3201 | if (!can_migrate_task(p, env)) |
| 3211 | sd, idle, &pinned)) | 3202 | continue; |
| 3212 | continue; | ||
| 3213 | 3203 | ||
| 3214 | pull_task(busiest, p, this_rq, this_cpu); | 3204 | move_task(p, env); |
| 3215 | /* | 3205 | /* |
| 3216 | * Right now, this is only the second place pull_task() | 3206 | * Right now, this is only the second place move_task() |
| 3217 | * is called, so we can safely collect pull_task() | 3207 | * is called, so we can safely collect move_task() |
| 3218 | * stats here rather than inside pull_task(). | 3208 | * stats here rather than inside move_task(). |
| 3219 | */ | 3209 | */ |
| 3220 | schedstat_inc(sd, lb_gained[idle]); | 3210 | schedstat_inc(env->sd, lb_gained[env->idle]); |
| 3221 | return 1; | 3211 | return 1; |
| 3222 | } | ||
| 3223 | } | 3212 | } |
| 3224 | |||
| 3225 | return 0; | 3213 | return 0; |
| 3226 | } | 3214 | } |
| 3227 | 3215 | ||
| 3228 | static unsigned long | 3216 | static unsigned long task_h_load(struct task_struct *p); |
| 3229 | balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | 3217 | |
| 3230 | unsigned long max_load_move, struct sched_domain *sd, | 3218 | /* |
| 3231 | enum cpu_idle_type idle, int *lb_flags, | 3219 | * move_tasks tries to move up to load_move weighted load from busiest to |
| 3232 | struct cfs_rq *busiest_cfs_rq) | 3220 | * this_rq, as part of a balancing operation within domain "sd". |
| 3221 | * Returns 1 if successful and 0 otherwise. | ||
| 3222 | * | ||
| 3223 | * Called with both runqueues locked. | ||
| 3224 | */ | ||
| 3225 | static int move_tasks(struct lb_env *env) | ||
| 3233 | { | 3226 | { |
| 3234 | int loops = 0, pulled = 0; | 3227 | struct list_head *tasks = &env->src_rq->cfs_tasks; |
| 3235 | long rem_load_move = max_load_move; | 3228 | struct task_struct *p; |
| 3236 | struct task_struct *p, *n; | 3229 | unsigned long load; |
| 3230 | int pulled = 0; | ||
| 3231 | |||
| 3232 | if (env->load_move <= 0) | ||
| 3233 | return 0; | ||
| 3237 | 3234 | ||
| 3238 | if (max_load_move == 0) | 3235 | while (!list_empty(tasks)) { |
| 3239 | goto out; | 3236 | p = list_first_entry(tasks, struct task_struct, se.group_node); |
| 3240 | 3237 | ||
| 3241 | list_for_each_entry_safe(p, n, &busiest_cfs_rq->tasks, se.group_node) { | 3238 | env->loop++; |
| 3242 | if (loops++ > sysctl_sched_nr_migrate) { | 3239 | /* We've more or less seen every task there is, call it quits */ |
| 3243 | *lb_flags |= LBF_NEED_BREAK; | 3240 | if (env->loop > env->loop_max) |
| 3241 | break; | ||
| 3242 | |||
| 3243 | /* take a breather every nr_migrate tasks */ | ||
| 3244 | if (env->loop > env->loop_break) { | ||
| 3245 | env->loop_break += sysctl_sched_nr_migrate; | ||
| 3246 | env->flags |= LBF_NEED_BREAK; | ||
| 3244 | break; | 3247 | break; |
| 3245 | } | 3248 | } |
| 3246 | 3249 | ||
| 3247 | if ((p->se.load.weight >> 1) > rem_load_move || | 3250 | if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) |
| 3248 | !can_migrate_task(p, busiest, this_cpu, sd, idle, | 3251 | goto next; |
| 3249 | lb_flags)) | 3252 | |
| 3250 | continue; | 3253 | load = task_h_load(p); |
| 3254 | |||
| 3255 | if (load < 16 && !env->sd->nr_balance_failed) | ||
| 3256 | goto next; | ||
| 3257 | |||
| 3258 | if ((load / 2) > env->load_move) | ||
| 3259 | goto next; | ||
| 3251 | 3260 | ||
| 3252 | pull_task(busiest, p, this_rq, this_cpu); | 3261 | if (!can_migrate_task(p, env)) |
| 3262 | goto next; | ||
| 3263 | |||
| 3264 | move_task(p, env); | ||
| 3253 | pulled++; | 3265 | pulled++; |
| 3254 | rem_load_move -= p->se.load.weight; | 3266 | env->load_move -= load; |
| 3255 | 3267 | ||
| 3256 | #ifdef CONFIG_PREEMPT | 3268 | #ifdef CONFIG_PREEMPT |
| 3257 | /* | 3269 | /* |
| @@ -3259,28 +3271,30 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 3259 | * kernels will stop after the first task is pulled to minimize | 3271 | * kernels will stop after the first task is pulled to minimize |
| 3260 | * the critical section. | 3272 | * the critical section. |
| 3261 | */ | 3273 | */ |
| 3262 | if (idle == CPU_NEWLY_IDLE) { | 3274 | if (env->idle == CPU_NEWLY_IDLE) |
| 3263 | *lb_flags |= LBF_ABORT; | ||
| 3264 | break; | 3275 | break; |
| 3265 | } | ||
| 3266 | #endif | 3276 | #endif |
| 3267 | 3277 | ||
| 3268 | /* | 3278 | /* |
| 3269 | * We only want to steal up to the prescribed amount of | 3279 | * We only want to steal up to the prescribed amount of |
| 3270 | * weighted load. | 3280 | * weighted load. |
| 3271 | */ | 3281 | */ |
| 3272 | if (rem_load_move <= 0) | 3282 | if (env->load_move <= 0) |
| 3273 | break; | 3283 | break; |
| 3284 | |||
| 3285 | continue; | ||
| 3286 | next: | ||
| 3287 | list_move_tail(&p->se.group_node, tasks); | ||
| 3274 | } | 3288 | } |
| 3275 | out: | 3289 | |
| 3276 | /* | 3290 | /* |
| 3277 | * Right now, this is one of only two places pull_task() is called, | 3291 | * Right now, this is one of only two places move_task() is called, |
| 3278 | * so we can safely collect pull_task() stats here rather than | 3292 | * so we can safely collect move_task() stats here rather than |
| 3279 | * inside pull_task(). | 3293 | * inside move_task(). |
| 3280 | */ | 3294 | */ |
| 3281 | schedstat_add(sd, lb_gained[idle], pulled); | 3295 | schedstat_add(env->sd, lb_gained[env->idle], pulled); |
| 3282 | 3296 | ||
| 3283 | return max_load_move - rem_load_move; | 3297 | return pulled; |
| 3284 | } | 3298 | } |
| 3285 | 3299 | ||
| 3286 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3300 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -3360,113 +3374,35 @@ static int tg_load_down(struct task_group *tg, void *data) | |||
| 3360 | 3374 | ||
| 3361 | static void update_h_load(long cpu) | 3375 | static void update_h_load(long cpu) |
| 3362 | { | 3376 | { |
| 3377 | rcu_read_lock(); | ||
| 3363 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | 3378 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); |
| 3379 | rcu_read_unlock(); | ||
| 3364 | } | 3380 | } |
| 3365 | 3381 | ||
| 3366 | static unsigned long | 3382 | static unsigned long task_h_load(struct task_struct *p) |
| 3367 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 3368 | unsigned long max_load_move, | ||
| 3369 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 3370 | int *lb_flags) | ||
| 3371 | { | 3383 | { |
| 3372 | long rem_load_move = max_load_move; | 3384 | struct cfs_rq *cfs_rq = task_cfs_rq(p); |
| 3373 | struct cfs_rq *busiest_cfs_rq; | 3385 | unsigned long load; |
| 3374 | |||
| 3375 | rcu_read_lock(); | ||
| 3376 | update_h_load(cpu_of(busiest)); | ||
| 3377 | |||
| 3378 | for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) { | ||
| 3379 | unsigned long busiest_h_load = busiest_cfs_rq->h_load; | ||
| 3380 | unsigned long busiest_weight = busiest_cfs_rq->load.weight; | ||
| 3381 | u64 rem_load, moved_load; | ||
| 3382 | |||
| 3383 | if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT)) | ||
| 3384 | break; | ||
| 3385 | |||
| 3386 | /* | ||
| 3387 | * empty group or part of a throttled hierarchy | ||
| 3388 | */ | ||
| 3389 | if (!busiest_cfs_rq->task_weight || | ||
| 3390 | throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu)) | ||
| 3391 | continue; | ||
| 3392 | |||
| 3393 | rem_load = (u64)rem_load_move * busiest_weight; | ||
| 3394 | rem_load = div_u64(rem_load, busiest_h_load + 1); | ||
| 3395 | |||
| 3396 | moved_load = balance_tasks(this_rq, this_cpu, busiest, | ||
| 3397 | rem_load, sd, idle, lb_flags, | ||
| 3398 | busiest_cfs_rq); | ||
| 3399 | |||
| 3400 | if (!moved_load) | ||
| 3401 | continue; | ||
| 3402 | 3386 | ||
| 3403 | moved_load *= busiest_h_load; | 3387 | load = p->se.load.weight; |
| 3404 | moved_load = div_u64(moved_load, busiest_weight + 1); | 3388 | load = div_u64(load * cfs_rq->h_load, cfs_rq->load.weight + 1); |
| 3405 | 3389 | ||
| 3406 | rem_load_move -= moved_load; | 3390 | return load; |
| 3407 | if (rem_load_move < 0) | ||
| 3408 | break; | ||
| 3409 | } | ||
| 3410 | rcu_read_unlock(); | ||
| 3411 | |||
| 3412 | return max_load_move - rem_load_move; | ||
| 3413 | } | 3391 | } |
| 3414 | #else | 3392 | #else |
| 3415 | static inline void update_shares(int cpu) | 3393 | static inline void update_shares(int cpu) |
| 3416 | { | 3394 | { |
| 3417 | } | 3395 | } |
| 3418 | 3396 | ||
| 3419 | static unsigned long | 3397 | static inline void update_h_load(long cpu) |
| 3420 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 3421 | unsigned long max_load_move, | ||
| 3422 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 3423 | int *lb_flags) | ||
| 3424 | { | 3398 | { |
| 3425 | return balance_tasks(this_rq, this_cpu, busiest, | ||
| 3426 | max_load_move, sd, idle, lb_flags, | ||
| 3427 | &busiest->cfs); | ||
| 3428 | } | 3399 | } |
| 3429 | #endif | ||
| 3430 | 3400 | ||
| 3431 | /* | 3401 | static unsigned long task_h_load(struct task_struct *p) |
| 3432 | * move_tasks tries to move up to max_load_move weighted load from busiest to | ||
| 3433 | * this_rq, as part of a balancing operation within domain "sd". | ||
| 3434 | * Returns 1 if successful and 0 otherwise. | ||
| 3435 | * | ||
| 3436 | * Called with both runqueues locked. | ||
| 3437 | */ | ||
| 3438 | static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 3439 | unsigned long max_load_move, | ||
| 3440 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 3441 | int *lb_flags) | ||
| 3442 | { | 3402 | { |
| 3443 | unsigned long total_load_moved = 0, load_moved; | 3403 | return p->se.load.weight; |
| 3444 | |||
| 3445 | do { | ||
| 3446 | load_moved = load_balance_fair(this_rq, this_cpu, busiest, | ||
| 3447 | max_load_move - total_load_moved, | ||
| 3448 | sd, idle, lb_flags); | ||
| 3449 | |||
| 3450 | total_load_moved += load_moved; | ||
| 3451 | |||
| 3452 | if (*lb_flags & (LBF_NEED_BREAK|LBF_ABORT)) | ||
| 3453 | break; | ||
| 3454 | |||
| 3455 | #ifdef CONFIG_PREEMPT | ||
| 3456 | /* | ||
| 3457 | * NEWIDLE balancing is a source of latency, so preemptible | ||
| 3458 | * kernels will stop after the first task is pulled to minimize | ||
| 3459 | * the critical section. | ||
| 3460 | */ | ||
| 3461 | if (idle == CPU_NEWLY_IDLE && this_rq->nr_running) { | ||
| 3462 | *lb_flags |= LBF_ABORT; | ||
| 3463 | break; | ||
| 3464 | } | ||
| 3465 | #endif | ||
| 3466 | } while (load_moved && max_load_move > total_load_moved); | ||
| 3467 | |||
| 3468 | return total_load_moved > 0; | ||
| 3469 | } | 3404 | } |
| 3405 | #endif | ||
| 3470 | 3406 | ||
| 3471 | /********** Helpers for find_busiest_group ************************/ | 3407 | /********** Helpers for find_busiest_group ************************/ |
| 3472 | /* | 3408 | /* |
| @@ -3776,6 +3712,11 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
| 3776 | struct sched_domain *child = sd->child; | 3712 | struct sched_domain *child = sd->child; |
| 3777 | struct sched_group *group, *sdg = sd->groups; | 3713 | struct sched_group *group, *sdg = sd->groups; |
| 3778 | unsigned long power; | 3714 | unsigned long power; |
| 3715 | unsigned long interval; | ||
| 3716 | |||
| 3717 | interval = msecs_to_jiffies(sd->balance_interval); | ||
| 3718 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
| 3719 | sdg->sgp->next_update = jiffies + interval; | ||
| 3779 | 3720 | ||
| 3780 | if (!child) { | 3721 | if (!child) { |
| 3781 | update_cpu_power(sd, cpu); | 3722 | update_cpu_power(sd, cpu); |
| @@ -3883,12 +3824,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
| 3883 | * domains. In the newly idle case, we will allow all the cpu's | 3824 | * domains. In the newly idle case, we will allow all the cpu's |
| 3884 | * to do the newly idle load balance. | 3825 | * to do the newly idle load balance. |
| 3885 | */ | 3826 | */ |
| 3886 | if (idle != CPU_NEWLY_IDLE && local_group) { | 3827 | if (local_group) { |
| 3887 | if (balance_cpu != this_cpu) { | 3828 | if (idle != CPU_NEWLY_IDLE) { |
| 3888 | *balance = 0; | 3829 | if (balance_cpu != this_cpu) { |
| 3889 | return; | 3830 | *balance = 0; |
| 3890 | } | 3831 | return; |
| 3891 | update_group_power(sd, this_cpu); | 3832 | } |
| 3833 | update_group_power(sd, this_cpu); | ||
| 3834 | } else if (time_after_eq(jiffies, group->sgp->next_update)) | ||
| 3835 | update_group_power(sd, this_cpu); | ||
| 3892 | } | 3836 | } |
| 3893 | 3837 | ||
| 3894 | /* Adjust by relative CPU power of the group */ | 3838 | /* Adjust by relative CPU power of the group */ |
| @@ -4451,13 +4395,21 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 4451 | struct sched_domain *sd, enum cpu_idle_type idle, | 4395 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 4452 | int *balance) | 4396 | int *balance) |
| 4453 | { | 4397 | { |
| 4454 | int ld_moved, lb_flags = 0, active_balance = 0; | 4398 | int ld_moved, active_balance = 0; |
| 4455 | struct sched_group *group; | 4399 | struct sched_group *group; |
| 4456 | unsigned long imbalance; | 4400 | unsigned long imbalance; |
| 4457 | struct rq *busiest; | 4401 | struct rq *busiest; |
| 4458 | unsigned long flags; | 4402 | unsigned long flags; |
| 4459 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4403 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
| 4460 | 4404 | ||
| 4405 | struct lb_env env = { | ||
| 4406 | .sd = sd, | ||
| 4407 | .dst_cpu = this_cpu, | ||
| 4408 | .dst_rq = this_rq, | ||
| 4409 | .idle = idle, | ||
| 4410 | .loop_break = sysctl_sched_nr_migrate, | ||
| 4411 | }; | ||
| 4412 | |||
| 4461 | cpumask_copy(cpus, cpu_active_mask); | 4413 | cpumask_copy(cpus, cpu_active_mask); |
| 4462 | 4414 | ||
| 4463 | schedstat_inc(sd, lb_count[idle]); | 4415 | schedstat_inc(sd, lb_count[idle]); |
| @@ -4492,32 +4444,34 @@ redo: | |||
| 4492 | * still unbalanced. ld_moved simply stays zero, so it is | 4444 | * still unbalanced. ld_moved simply stays zero, so it is |
| 4493 | * correctly treated as an imbalance. | 4445 | * correctly treated as an imbalance. |
| 4494 | */ | 4446 | */ |
| 4495 | lb_flags |= LBF_ALL_PINNED; | 4447 | env.flags |= LBF_ALL_PINNED; |
| 4448 | env.load_move = imbalance; | ||
| 4449 | env.src_cpu = busiest->cpu; | ||
| 4450 | env.src_rq = busiest; | ||
| 4451 | env.loop_max = busiest->nr_running; | ||
| 4452 | |||
| 4453 | more_balance: | ||
| 4496 | local_irq_save(flags); | 4454 | local_irq_save(flags); |
| 4497 | double_rq_lock(this_rq, busiest); | 4455 | double_rq_lock(this_rq, busiest); |
| 4498 | ld_moved = move_tasks(this_rq, this_cpu, busiest, | 4456 | if (!env.loop) |
| 4499 | imbalance, sd, idle, &lb_flags); | 4457 | update_h_load(env.src_cpu); |
| 4458 | ld_moved += move_tasks(&env); | ||
| 4500 | double_rq_unlock(this_rq, busiest); | 4459 | double_rq_unlock(this_rq, busiest); |
| 4501 | local_irq_restore(flags); | 4460 | local_irq_restore(flags); |
| 4502 | 4461 | ||
| 4462 | if (env.flags & LBF_NEED_BREAK) { | ||
| 4463 | env.flags &= ~LBF_NEED_BREAK; | ||
| 4464 | goto more_balance; | ||
| 4465 | } | ||
| 4466 | |||
| 4503 | /* | 4467 | /* |
| 4504 | * some other cpu did the load balance for us. | 4468 | * some other cpu did the load balance for us. |
| 4505 | */ | 4469 | */ |
| 4506 | if (ld_moved && this_cpu != smp_processor_id()) | 4470 | if (ld_moved && this_cpu != smp_processor_id()) |
| 4507 | resched_cpu(this_cpu); | 4471 | resched_cpu(this_cpu); |
| 4508 | 4472 | ||
| 4509 | if (lb_flags & LBF_ABORT) | ||
| 4510 | goto out_balanced; | ||
| 4511 | |||
| 4512 | if (lb_flags & LBF_NEED_BREAK) { | ||
| 4513 | lb_flags += LBF_HAD_BREAK - LBF_NEED_BREAK; | ||
| 4514 | if (lb_flags & LBF_ABORT) | ||
| 4515 | goto out_balanced; | ||
| 4516 | goto redo; | ||
| 4517 | } | ||
| 4518 | |||
| 4519 | /* All tasks on this runqueue were pinned by CPU affinity */ | 4473 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 4520 | if (unlikely(lb_flags & LBF_ALL_PINNED)) { | 4474 | if (unlikely(env.flags & LBF_ALL_PINNED)) { |
| 4521 | cpumask_clear_cpu(cpu_of(busiest), cpus); | 4475 | cpumask_clear_cpu(cpu_of(busiest), cpus); |
| 4522 | if (!cpumask_empty(cpus)) | 4476 | if (!cpumask_empty(cpus)) |
| 4523 | goto redo; | 4477 | goto redo; |
| @@ -4547,7 +4501,7 @@ redo: | |||
| 4547 | tsk_cpus_allowed(busiest->curr))) { | 4501 | tsk_cpus_allowed(busiest->curr))) { |
| 4548 | raw_spin_unlock_irqrestore(&busiest->lock, | 4502 | raw_spin_unlock_irqrestore(&busiest->lock, |
| 4549 | flags); | 4503 | flags); |
| 4550 | lb_flags |= LBF_ALL_PINNED; | 4504 | env.flags |= LBF_ALL_PINNED; |
| 4551 | goto out_one_pinned; | 4505 | goto out_one_pinned; |
| 4552 | } | 4506 | } |
| 4553 | 4507 | ||
| @@ -4600,7 +4554,7 @@ out_balanced: | |||
| 4600 | 4554 | ||
| 4601 | out_one_pinned: | 4555 | out_one_pinned: |
| 4602 | /* tune up the balancing interval */ | 4556 | /* tune up the balancing interval */ |
| 4603 | if (((lb_flags & LBF_ALL_PINNED) && | 4557 | if (((env.flags & LBF_ALL_PINNED) && |
| 4604 | sd->balance_interval < MAX_PINNED_INTERVAL) || | 4558 | sd->balance_interval < MAX_PINNED_INTERVAL) || |
| 4605 | (sd->balance_interval < sd->max_interval)) | 4559 | (sd->balance_interval < sd->max_interval)) |
| 4606 | sd->balance_interval *= 2; | 4560 | sd->balance_interval *= 2; |
| @@ -4710,10 +4664,18 @@ static int active_load_balance_cpu_stop(void *data) | |||
| 4710 | } | 4664 | } |
| 4711 | 4665 | ||
| 4712 | if (likely(sd)) { | 4666 | if (likely(sd)) { |
| 4667 | struct lb_env env = { | ||
| 4668 | .sd = sd, | ||
| 4669 | .dst_cpu = target_cpu, | ||
| 4670 | .dst_rq = target_rq, | ||
| 4671 | .src_cpu = busiest_rq->cpu, | ||
| 4672 | .src_rq = busiest_rq, | ||
| 4673 | .idle = CPU_IDLE, | ||
| 4674 | }; | ||
| 4675 | |||
| 4713 | schedstat_inc(sd, alb_count); | 4676 | schedstat_inc(sd, alb_count); |
| 4714 | 4677 | ||
| 4715 | if (move_one_task(target_rq, target_cpu, busiest_rq, | 4678 | if (move_one_task(&env)) |
| 4716 | sd, CPU_IDLE)) | ||
| 4717 | schedstat_inc(sd, alb_pushed); | 4679 | schedstat_inc(sd, alb_pushed); |
| 4718 | else | 4680 | else |
| 4719 | schedstat_inc(sd, alb_failed); | 4681 | schedstat_inc(sd, alb_failed); |
| @@ -4945,8 +4907,6 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | |||
| 4945 | 4907 | ||
| 4946 | static DEFINE_SPINLOCK(balancing); | 4908 | static DEFINE_SPINLOCK(balancing); |
| 4947 | 4909 | ||
| 4948 | static unsigned long __read_mostly max_load_balance_interval = HZ/10; | ||
| 4949 | |||
| 4950 | /* | 4910 | /* |
| 4951 | * Scale the max load_balance interval with the number of CPUs in the system. | 4911 | * Scale the max load_balance interval with the number of CPUs in the system. |
| 4952 | * This trades load-balance latency on larger machines for less cross talk. | 4912 | * This trades load-balance latency on larger machines for less cross talk. |
| @@ -5340,7 +5300,6 @@ static void set_curr_task_fair(struct rq *rq) | |||
| 5340 | void init_cfs_rq(struct cfs_rq *cfs_rq) | 5300 | void init_cfs_rq(struct cfs_rq *cfs_rq) |
| 5341 | { | 5301 | { |
| 5342 | cfs_rq->tasks_timeline = RB_ROOT; | 5302 | cfs_rq->tasks_timeline = RB_ROOT; |
| 5343 | INIT_LIST_HEAD(&cfs_rq->tasks); | ||
| 5344 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 5303 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
| 5345 | #ifndef CONFIG_64BIT | 5304 | #ifndef CONFIG_64BIT |
| 5346 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | 5305 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; |
| @@ -5612,6 +5571,7 @@ __init void init_sched_fair_class(void) | |||
| 5612 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); | 5571 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); |
| 5613 | 5572 | ||
| 5614 | #ifdef CONFIG_NO_HZ | 5573 | #ifdef CONFIG_NO_HZ |
| 5574 | nohz.next_balance = jiffies; | ||
| 5615 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | 5575 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); |
| 5616 | cpu_notifier(sched_ilb_notifier, 0); | 5576 | cpu_notifier(sched_ilb_notifier, 0); |
| 5617 | #endif | 5577 | #endif |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f42ae7fb5ec5..44af55e6d5d0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
| @@ -778,12 +778,9 @@ static inline int balance_runtime(struct rt_rq *rt_rq) | |||
| 778 | 778 | ||
| 779 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 779 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
| 780 | { | 780 | { |
| 781 | int i, idle = 1; | 781 | int i, idle = 1, throttled = 0; |
| 782 | const struct cpumask *span; | 782 | const struct cpumask *span; |
| 783 | 783 | ||
| 784 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) | ||
| 785 | return 1; | ||
| 786 | |||
| 787 | span = sched_rt_period_mask(); | 784 | span = sched_rt_period_mask(); |
| 788 | for_each_cpu(i, span) { | 785 | for_each_cpu(i, span) { |
| 789 | int enqueue = 0; | 786 | int enqueue = 0; |
| @@ -818,12 +815,17 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 818 | if (!rt_rq_throttled(rt_rq)) | 815 | if (!rt_rq_throttled(rt_rq)) |
| 819 | enqueue = 1; | 816 | enqueue = 1; |
| 820 | } | 817 | } |
| 818 | if (rt_rq->rt_throttled) | ||
| 819 | throttled = 1; | ||
| 821 | 820 | ||
| 822 | if (enqueue) | 821 | if (enqueue) |
| 823 | sched_rt_rq_enqueue(rt_rq); | 822 | sched_rt_rq_enqueue(rt_rq); |
| 824 | raw_spin_unlock(&rq->lock); | 823 | raw_spin_unlock(&rq->lock); |
| 825 | } | 824 | } |
| 826 | 825 | ||
| 826 | if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) | ||
| 827 | return 1; | ||
| 828 | |||
| 827 | return idle; | 829 | return idle; |
| 828 | } | 830 | } |
| 829 | 831 | ||
| @@ -855,8 +857,30 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
| 855 | return 0; | 857 | return 0; |
| 856 | 858 | ||
| 857 | if (rt_rq->rt_time > runtime) { | 859 | if (rt_rq->rt_time > runtime) { |
| 858 | rt_rq->rt_throttled = 1; | 860 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
| 859 | printk_once(KERN_WARNING "sched: RT throttling activated\n"); | 861 | |
| 862 | /* | ||
| 863 | * Don't actually throttle groups that have no runtime assigned | ||
| 864 | * but accrue some time due to boosting. | ||
| 865 | */ | ||
| 866 | if (likely(rt_b->rt_runtime)) { | ||
| 867 | static bool once = false; | ||
| 868 | |||
| 869 | rt_rq->rt_throttled = 1; | ||
| 870 | |||
| 871 | if (!once) { | ||
| 872 | once = true; | ||
| 873 | printk_sched("sched: RT throttling activated\n"); | ||
| 874 | } | ||
| 875 | } else { | ||
| 876 | /* | ||
| 877 | * In case we did anyway, make it go away, | ||
| 878 | * replenishment is a joke, since it will replenish us | ||
| 879 | * with exactly 0 ns. | ||
| 880 | */ | ||
| 881 | rt_rq->rt_time = 0; | ||
| 882 | } | ||
| 883 | |||
| 860 | if (rt_rq_throttled(rt_rq)) { | 884 | if (rt_rq_throttled(rt_rq)) { |
| 861 | sched_rt_rq_dequeue(rt_rq); | 885 | sched_rt_rq_dequeue(rt_rq); |
| 862 | return 1; | 886 | return 1; |
| @@ -884,7 +908,8 @@ static void update_curr_rt(struct rq *rq) | |||
| 884 | if (unlikely((s64)delta_exec < 0)) | 908 | if (unlikely((s64)delta_exec < 0)) |
| 885 | delta_exec = 0; | 909 | delta_exec = 0; |
| 886 | 910 | ||
| 887 | schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); | 911 | schedstat_set(curr->se.statistics.exec_max, |
| 912 | max(curr->se.statistics.exec_max, delta_exec)); | ||
| 888 | 913 | ||
| 889 | curr->se.sum_exec_runtime += delta_exec; | 914 | curr->se.sum_exec_runtime += delta_exec; |
| 890 | account_group_exec_runtime(curr, delta_exec); | 915 | account_group_exec_runtime(curr, delta_exec); |
| @@ -1403,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
| 1403 | next_idx: | 1428 | next_idx: |
| 1404 | if (idx >= MAX_RT_PRIO) | 1429 | if (idx >= MAX_RT_PRIO) |
| 1405 | continue; | 1430 | continue; |
| 1406 | if (next && next->prio < idx) | 1431 | if (next && next->prio <= idx) |
| 1407 | continue; | 1432 | continue; |
| 1408 | list_for_each_entry(rt_se, array->queue + idx, run_list) { | 1433 | list_for_each_entry(rt_se, array->queue + idx, run_list) { |
| 1409 | struct task_struct *p; | 1434 | struct task_struct *p; |
| @@ -1972,7 +1997,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
| 1972 | if (--p->rt.time_slice) | 1997 | if (--p->rt.time_slice) |
| 1973 | return; | 1998 | return; |
| 1974 | 1999 | ||
| 1975 | p->rt.time_slice = DEF_TIMESLICE; | 2000 | p->rt.time_slice = RR_TIMESLICE; |
| 1976 | 2001 | ||
| 1977 | /* | 2002 | /* |
| 1978 | * Requeue to the end of queue if we are not the only element | 2003 | * Requeue to the end of queue if we are not the only element |
| @@ -2000,7 +2025,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) | |||
| 2000 | * Time slice is 0 for SCHED_FIFO tasks | 2025 | * Time slice is 0 for SCHED_FIFO tasks |
| 2001 | */ | 2026 | */ |
| 2002 | if (task->policy == SCHED_RR) | 2027 | if (task->policy == SCHED_RR) |
| 2003 | return DEF_TIMESLICE; | 2028 | return RR_TIMESLICE; |
| 2004 | else | 2029 | else |
| 2005 | return 0; | 2030 | return 0; |
| 2006 | } | 2031 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..fb3acba4d52e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -36,11 +36,7 @@ extern __read_mostly int scheduler_running; | |||
| 36 | 36 | ||
| 37 | /* | 37 | /* |
| 38 | * These are the 'tuning knobs' of the scheduler: | 38 | * These are the 'tuning knobs' of the scheduler: |
| 39 | * | ||
| 40 | * default timeslice is 100 msecs (used only for SCHED_RR tasks). | ||
| 41 | * Timeslices get refilled after they expire. | ||
| 42 | */ | 39 | */ |
| 43 | #define DEF_TIMESLICE (100 * HZ / 1000) | ||
| 44 | 40 | ||
| 45 | /* | 41 | /* |
| 46 | * single value that denotes runtime == period, ie unlimited time. | 42 | * single value that denotes runtime == period, ie unlimited time. |
| @@ -216,9 +212,6 @@ struct cfs_rq { | |||
| 216 | struct rb_root tasks_timeline; | 212 | struct rb_root tasks_timeline; |
| 217 | struct rb_node *rb_leftmost; | 213 | struct rb_node *rb_leftmost; |
| 218 | 214 | ||
| 219 | struct list_head tasks; | ||
| 220 | struct list_head *balance_iterator; | ||
| 221 | |||
| 222 | /* | 215 | /* |
| 223 | * 'curr' points to currently running entity on this cfs_rq. | 216 | * 'curr' points to currently running entity on this cfs_rq. |
| 224 | * It is set to NULL otherwise (i.e when none are currently running). | 217 | * It is set to NULL otherwise (i.e when none are currently running). |
| @@ -246,11 +239,6 @@ struct cfs_rq { | |||
| 246 | 239 | ||
| 247 | #ifdef CONFIG_SMP | 240 | #ifdef CONFIG_SMP |
| 248 | /* | 241 | /* |
| 249 | * the part of load.weight contributed by tasks | ||
| 250 | */ | ||
| 251 | unsigned long task_weight; | ||
| 252 | |||
| 253 | /* | ||
| 254 | * h_load = weight * f(tg) | 242 | * h_load = weight * f(tg) |
| 255 | * | 243 | * |
| 256 | * Where f(tg) is the recursive weight fraction assigned to | 244 | * Where f(tg) is the recursive weight fraction assigned to |
| @@ -424,6 +412,8 @@ struct rq { | |||
| 424 | int cpu; | 412 | int cpu; |
| 425 | int online; | 413 | int online; |
| 426 | 414 | ||
| 415 | struct list_head cfs_tasks; | ||
| 416 | |||
| 427 | u64 rt_avg; | 417 | u64 rt_avg; |
| 428 | u64 age_stamp; | 418 | u64 age_stamp; |
| 429 | u64 idle_stamp; | 419 | u64 idle_stamp; |
| @@ -462,7 +452,6 @@ struct rq { | |||
| 462 | unsigned int yld_count; | 452 | unsigned int yld_count; |
| 463 | 453 | ||
| 464 | /* schedule() stats */ | 454 | /* schedule() stats */ |
| 465 | unsigned int sched_switch; | ||
| 466 | unsigned int sched_count; | 455 | unsigned int sched_count; |
| 467 | unsigned int sched_goidle; | 456 | unsigned int sched_goidle; |
| 468 | 457 | ||
| @@ -611,7 +600,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
| 611 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: | 600 | * Tunables that become constants when CONFIG_SCHED_DEBUG is off: |
| 612 | */ | 601 | */ |
| 613 | #ifdef CONFIG_SCHED_DEBUG | 602 | #ifdef CONFIG_SCHED_DEBUG |
| 614 | # include <linux/jump_label.h> | 603 | # include <linux/static_key.h> |
| 615 | # define const_debug __read_mostly | 604 | # define const_debug __read_mostly |
| 616 | #else | 605 | #else |
| 617 | # define const_debug const | 606 | # define const_debug const |
| @@ -630,18 +619,18 @@ enum { | |||
| 630 | #undef SCHED_FEAT | 619 | #undef SCHED_FEAT |
| 631 | 620 | ||
| 632 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) | 621 | #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) |
| 633 | static __always_inline bool static_branch__true(struct jump_label_key *key) | 622 | static __always_inline bool static_branch__true(struct static_key *key) |
| 634 | { | 623 | { |
| 635 | return likely(static_branch(key)); /* Not out of line branch. */ | 624 | return static_key_true(key); /* Not out of line branch. */ |
| 636 | } | 625 | } |
| 637 | 626 | ||
| 638 | static __always_inline bool static_branch__false(struct jump_label_key *key) | 627 | static __always_inline bool static_branch__false(struct static_key *key) |
| 639 | { | 628 | { |
| 640 | return unlikely(static_branch(key)); /* Out of line branch. */ | 629 | return static_key_false(key); /* Out of line branch. */ |
| 641 | } | 630 | } |
| 642 | 631 | ||
| 643 | #define SCHED_FEAT(name, enabled) \ | 632 | #define SCHED_FEAT(name, enabled) \ |
| 644 | static __always_inline bool static_branch_##name(struct jump_label_key *key) \ | 633 | static __always_inline bool static_branch_##name(struct static_key *key) \ |
| 645 | { \ | 634 | { \ |
| 646 | return static_branch__##enabled(key); \ | 635 | return static_branch__##enabled(key); \ |
| 647 | } | 636 | } |
| @@ -650,7 +639,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \ | |||
| 650 | 639 | ||
| 651 | #undef SCHED_FEAT | 640 | #undef SCHED_FEAT |
| 652 | 641 | ||
| 653 | extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; | 642 | extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; |
| 654 | #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) | 643 | #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) |
| 655 | #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ | 644 | #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ |
| 656 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) | 645 | #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) |
| @@ -692,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p) | |||
| 692 | #ifndef finish_arch_switch | 681 | #ifndef finish_arch_switch |
| 693 | # define finish_arch_switch(prev) do { } while (0) | 682 | # define finish_arch_switch(prev) do { } while (0) |
| 694 | #endif | 683 | #endif |
| 684 | #ifndef finish_arch_post_lock_switch | ||
| 685 | # define finish_arch_post_lock_switch() do { } while (0) | ||
| 686 | #endif | ||
| 695 | 687 | ||
| 696 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | 688 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW |
| 697 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 689 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 2a581ba8e190..903ffa9e8872 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c | |||
| @@ -32,9 +32,9 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 32 | 32 | ||
| 33 | /* runqueue-specific stats */ | 33 | /* runqueue-specific stats */ |
| 34 | seq_printf(seq, | 34 | seq_printf(seq, |
| 35 | "cpu%d %u %u %u %u %u %u %llu %llu %lu", | 35 | "cpu%d %u 0 %u %u %u %u %llu %llu %lu", |
| 36 | cpu, rq->yld_count, | 36 | cpu, rq->yld_count, |
| 37 | rq->sched_switch, rq->sched_count, rq->sched_goidle, | 37 | rq->sched_count, rq->sched_goidle, |
| 38 | rq->ttwu_count, rq->ttwu_local, | 38 | rq->ttwu_count, rq->ttwu_local, |
| 39 | rq->rq_cpu_time, | 39 | rq->rq_cpu_time, |
| 40 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); | 40 | rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); |
