diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 113 |
1 files changed, 73 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 45e17b83b7f1..6c10fa796ca0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -61,6 +61,7 @@ | |||
| 61 | #include <linux/delayacct.h> | 61 | #include <linux/delayacct.h> |
| 62 | #include <linux/reciprocal_div.h> | 62 | #include <linux/reciprocal_div.h> |
| 63 | #include <linux/unistd.h> | 63 | #include <linux/unistd.h> |
| 64 | #include <linux/pagemap.h> | ||
| 64 | 65 | ||
| 65 | #include <asm/tlb.h> | 66 | #include <asm/tlb.h> |
| 66 | 67 | ||
| @@ -262,7 +263,8 @@ struct rq { | |||
| 262 | s64 clock_max_delta; | 263 | s64 clock_max_delta; |
| 263 | 264 | ||
| 264 | unsigned int clock_warps, clock_overflows; | 265 | unsigned int clock_warps, clock_overflows; |
| 265 | unsigned int clock_unstable_events; | 266 | u64 idle_clock; |
| 267 | unsigned int clock_deep_idle_events; | ||
| 266 | u64 tick_timestamp; | 268 | u64 tick_timestamp; |
| 267 | 269 | ||
| 268 | atomic_t nr_iowait; | 270 | atomic_t nr_iowait; |
| @@ -556,18 +558,40 @@ static inline struct rq *this_rq_lock(void) | |||
| 556 | } | 558 | } |
| 557 | 559 | ||
| 558 | /* | 560 | /* |
| 559 | * CPU frequency is/was unstable - start new by setting prev_clock_raw: | 561 | * We are going deep-idle (irqs are disabled): |
| 560 | */ | 562 | */ |
| 561 | void sched_clock_unstable_event(void) | 563 | void sched_clock_idle_sleep_event(void) |
| 562 | { | 564 | { |
| 563 | unsigned long flags; | 565 | struct rq *rq = cpu_rq(smp_processor_id()); |
| 564 | struct rq *rq; | ||
| 565 | 566 | ||
| 566 | rq = task_rq_lock(current, &flags); | 567 | spin_lock(&rq->lock); |
| 567 | rq->prev_clock_raw = sched_clock(); | 568 | __update_rq_clock(rq); |
| 568 | rq->clock_unstable_events++; | 569 | spin_unlock(&rq->lock); |
| 569 | task_rq_unlock(rq, &flags); | 570 | rq->clock_deep_idle_events++; |
| 571 | } | ||
| 572 | EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); | ||
| 573 | |||
| 574 | /* | ||
| 575 | * We just idled delta nanoseconds (called with irqs disabled): | ||
| 576 | */ | ||
| 577 | void sched_clock_idle_wakeup_event(u64 delta_ns) | ||
| 578 | { | ||
| 579 | struct rq *rq = cpu_rq(smp_processor_id()); | ||
| 580 | u64 now = sched_clock(); | ||
| 581 | |||
| 582 | rq->idle_clock += delta_ns; | ||
| 583 | /* | ||
| 584 | * Override the previous timestamp and ignore all | ||
| 585 | * sched_clock() deltas that occured while we idled, | ||
| 586 | * and use the PM-provided delta_ns to advance the | ||
| 587 | * rq clock: | ||
| 588 | */ | ||
| 589 | spin_lock(&rq->lock); | ||
| 590 | rq->prev_clock_raw = now; | ||
| 591 | rq->clock += delta_ns; | ||
| 592 | spin_unlock(&rq->lock); | ||
| 570 | } | 593 | } |
| 594 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | ||
| 571 | 595 | ||
| 572 | /* | 596 | /* |
| 573 | * resched_task - mark a task 'to be rescheduled now'. | 597 | * resched_task - mark a task 'to be rescheduled now'. |
| @@ -645,7 +669,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) | |||
| 645 | /* | 669 | /* |
| 646 | * Shift right and round: | 670 | * Shift right and round: |
| 647 | */ | 671 | */ |
| 648 | #define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 672 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
| 649 | 673 | ||
| 650 | static unsigned long | 674 | static unsigned long |
| 651 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 675 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
| @@ -661,10 +685,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
| 661 | * Check whether we'd overflow the 64-bit multiplication: | 685 | * Check whether we'd overflow the 64-bit multiplication: |
| 662 | */ | 686 | */ |
| 663 | if (unlikely(tmp > WMULT_CONST)) | 687 | if (unlikely(tmp > WMULT_CONST)) |
| 664 | tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, | 688 | tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, |
| 665 | WMULT_SHIFT/2); | 689 | WMULT_SHIFT/2); |
| 666 | else | 690 | else |
| 667 | tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); | 691 | tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); |
| 668 | 692 | ||
| 669 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 693 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
| 670 | } | 694 | } |
| @@ -835,7 +859,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq) | |||
| 835 | 859 | ||
| 836 | static void set_load_weight(struct task_struct *p) | 860 | static void set_load_weight(struct task_struct *p) |
| 837 | { | 861 | { |
| 838 | task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; | ||
| 839 | p->se.wait_runtime = 0; | 862 | p->se.wait_runtime = 0; |
| 840 | 863 | ||
| 841 | if (task_has_rt_policy(p)) { | 864 | if (task_has_rt_policy(p)) { |
| @@ -1564,6 +1587,7 @@ static void __sched_fork(struct task_struct *p) | |||
| 1564 | p->se.wait_start_fair = 0; | 1587 | p->se.wait_start_fair = 0; |
| 1565 | p->se.exec_start = 0; | 1588 | p->se.exec_start = 0; |
| 1566 | p->se.sum_exec_runtime = 0; | 1589 | p->se.sum_exec_runtime = 0; |
| 1590 | p->se.prev_sum_exec_runtime = 0; | ||
| 1567 | p->se.delta_exec = 0; | 1591 | p->se.delta_exec = 0; |
| 1568 | p->se.delta_fair_run = 0; | 1592 | p->se.delta_fair_run = 0; |
| 1569 | p->se.delta_fair_sleep = 0; | 1593 | p->se.delta_fair_sleep = 0; |
| @@ -1659,6 +1683,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 1659 | 1683 | ||
| 1660 | p->prio = effective_prio(p); | 1684 | p->prio = effective_prio(p); |
| 1661 | 1685 | ||
| 1686 | if (rt_prio(p->prio)) | ||
| 1687 | p->sched_class = &rt_sched_class; | ||
| 1688 | else | ||
| 1689 | p->sched_class = &fair_sched_class; | ||
| 1690 | |||
| 1662 | if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || | 1691 | if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || |
| 1663 | (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || | 1692 | (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || |
| 1664 | !current->se.on_rq) { | 1693 | !current->se.on_rq) { |
| @@ -2157,12 +2186,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 2157 | if (task_running(rq, p)) | 2186 | if (task_running(rq, p)) |
| 2158 | return 0; | 2187 | return 0; |
| 2159 | 2188 | ||
| 2160 | /* | ||
| 2161 | * Aggressive migration if too many balance attempts have failed: | ||
| 2162 | */ | ||
| 2163 | if (sd->nr_balance_failed > sd->cache_nice_tries) | ||
| 2164 | return 1; | ||
| 2165 | |||
| 2166 | return 1; | 2189 | return 1; |
| 2167 | } | 2190 | } |
| 2168 | 2191 | ||
| @@ -2494,7 +2517,7 @@ group_next: | |||
| 2494 | * a think about bumping its value to force at least one task to be | 2517 | * a think about bumping its value to force at least one task to be |
| 2495 | * moved | 2518 | * moved |
| 2496 | */ | 2519 | */ |
| 2497 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { | 2520 | if (*imbalance < busiest_load_per_task) { |
| 2498 | unsigned long tmp, pwr_now, pwr_move; | 2521 | unsigned long tmp, pwr_now, pwr_move; |
| 2499 | unsigned int imbn; | 2522 | unsigned int imbn; |
| 2500 | 2523 | ||
| @@ -2546,10 +2569,8 @@ small_imbalance: | |||
| 2546 | pwr_move /= SCHED_LOAD_SCALE; | 2569 | pwr_move /= SCHED_LOAD_SCALE; |
| 2547 | 2570 | ||
| 2548 | /* Move if we gain throughput */ | 2571 | /* Move if we gain throughput */ |
| 2549 | if (pwr_move <= pwr_now) | 2572 | if (pwr_move > pwr_now) |
| 2550 | goto out_balanced; | 2573 | *imbalance = busiest_load_per_task; |
| 2551 | |||
| 2552 | *imbalance = busiest_load_per_task; | ||
| 2553 | } | 2574 | } |
| 2554 | 2575 | ||
| 2555 | return busiest; | 2576 | return busiest; |
| @@ -3020,6 +3041,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
| 3020 | struct sched_domain *sd; | 3041 | struct sched_domain *sd; |
| 3021 | /* Earliest time when we have to do rebalance again */ | 3042 | /* Earliest time when we have to do rebalance again */ |
| 3022 | unsigned long next_balance = jiffies + 60*HZ; | 3043 | unsigned long next_balance = jiffies + 60*HZ; |
| 3044 | int update_next_balance = 0; | ||
| 3023 | 3045 | ||
| 3024 | for_each_domain(cpu, sd) { | 3046 | for_each_domain(cpu, sd) { |
| 3025 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3047 | if (!(sd->flags & SD_LOAD_BALANCE)) |
| @@ -3056,8 +3078,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
| 3056 | if (sd->flags & SD_SERIALIZE) | 3078 | if (sd->flags & SD_SERIALIZE) |
| 3057 | spin_unlock(&balancing); | 3079 | spin_unlock(&balancing); |
| 3058 | out: | 3080 | out: |
| 3059 | if (time_after(next_balance, sd->last_balance + interval)) | 3081 | if (time_after(next_balance, sd->last_balance + interval)) { |
| 3060 | next_balance = sd->last_balance + interval; | 3082 | next_balance = sd->last_balance + interval; |
| 3083 | update_next_balance = 1; | ||
| 3084 | } | ||
| 3061 | 3085 | ||
| 3062 | /* | 3086 | /* |
| 3063 | * Stop the load balance at this level. There is another | 3087 | * Stop the load balance at this level. There is another |
| @@ -3067,7 +3091,14 @@ out: | |||
| 3067 | if (!balance) | 3091 | if (!balance) |
| 3068 | break; | 3092 | break; |
| 3069 | } | 3093 | } |
| 3070 | rq->next_balance = next_balance; | 3094 | |
| 3095 | /* | ||
| 3096 | * next_balance will be updated only when there is a need. | ||
| 3097 | * When the cpu is attached to null domain for ex, it will not be | ||
| 3098 | * updated. | ||
| 3099 | */ | ||
| 3100 | if (likely(update_next_balance)) | ||
| 3101 | rq->next_balance = next_balance; | ||
| 3071 | } | 3102 | } |
| 3072 | 3103 | ||
| 3073 | /* | 3104 | /* |
| @@ -4525,10 +4556,7 @@ asmlinkage long sys_sched_yield(void) | |||
| 4525 | struct rq *rq = this_rq_lock(); | 4556 | struct rq *rq = this_rq_lock(); |
| 4526 | 4557 | ||
| 4527 | schedstat_inc(rq, yld_cnt); | 4558 | schedstat_inc(rq, yld_cnt); |
| 4528 | if (unlikely(rq->nr_running == 1)) | 4559 | current->sched_class->yield_task(rq, current); |
| 4529 | schedstat_inc(rq, yld_act_empty); | ||
| 4530 | else | ||
| 4531 | current->sched_class->yield_task(rq, current); | ||
| 4532 | 4560 | ||
| 4533 | /* | 4561 | /* |
| 4534 | * Since we are going to call schedule() anyway, there's | 4562 | * Since we are going to call schedule() anyway, there's |
| @@ -4884,14 +4912,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | |||
| 4884 | static inline void sched_init_granularity(void) | 4912 | static inline void sched_init_granularity(void) |
| 4885 | { | 4913 | { |
| 4886 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 4914 | unsigned int factor = 1 + ilog2(num_online_cpus()); |
| 4887 | const unsigned long gran_limit = 100000000; | 4915 | const unsigned long limit = 100000000; |
| 4916 | |||
| 4917 | sysctl_sched_min_granularity *= factor; | ||
| 4918 | if (sysctl_sched_min_granularity > limit) | ||
| 4919 | sysctl_sched_min_granularity = limit; | ||
| 4888 | 4920 | ||
| 4889 | sysctl_sched_granularity *= factor; | 4921 | sysctl_sched_latency *= factor; |
| 4890 | if (sysctl_sched_granularity > gran_limit) | 4922 | if (sysctl_sched_latency > limit) |
| 4891 | sysctl_sched_granularity = gran_limit; | 4923 | sysctl_sched_latency = limit; |
| 4892 | 4924 | ||
| 4893 | sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; | 4925 | sysctl_sched_runtime_limit = sysctl_sched_latency; |
| 4894 | sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; | 4926 | sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2; |
| 4895 | } | 4927 | } |
| 4896 | 4928 | ||
| 4897 | #ifdef CONFIG_SMP | 4929 | #ifdef CONFIG_SMP |
| @@ -5234,15 +5266,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
| 5234 | static struct ctl_table sd_ctl_dir[] = { | 5266 | static struct ctl_table sd_ctl_dir[] = { |
| 5235 | { | 5267 | { |
| 5236 | .procname = "sched_domain", | 5268 | .procname = "sched_domain", |
| 5237 | .mode = 0755, | 5269 | .mode = 0555, |
| 5238 | }, | 5270 | }, |
| 5239 | {0,}, | 5271 | {0,}, |
| 5240 | }; | 5272 | }; |
| 5241 | 5273 | ||
| 5242 | static struct ctl_table sd_ctl_root[] = { | 5274 | static struct ctl_table sd_ctl_root[] = { |
| 5243 | { | 5275 | { |
| 5276 | .ctl_name = CTL_KERN, | ||
| 5244 | .procname = "kernel", | 5277 | .procname = "kernel", |
| 5245 | .mode = 0755, | 5278 | .mode = 0555, |
| 5246 | .child = sd_ctl_dir, | 5279 | .child = sd_ctl_dir, |
| 5247 | }, | 5280 | }, |
| 5248 | {0,}, | 5281 | {0,}, |
| @@ -5318,7 +5351,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
| 5318 | for_each_domain(cpu, sd) { | 5351 | for_each_domain(cpu, sd) { |
| 5319 | snprintf(buf, 32, "domain%d", i); | 5352 | snprintf(buf, 32, "domain%d", i); |
| 5320 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5353 | entry->procname = kstrdup(buf, GFP_KERNEL); |
| 5321 | entry->mode = 0755; | 5354 | entry->mode = 0555; |
| 5322 | entry->child = sd_alloc_ctl_domain_table(sd); | 5355 | entry->child = sd_alloc_ctl_domain_table(sd); |
| 5323 | entry++; | 5356 | entry++; |
| 5324 | i++; | 5357 | i++; |
| @@ -5338,7 +5371,7 @@ static void init_sched_domain_sysctl(void) | |||
| 5338 | for (i = 0; i < cpu_num; i++, entry++) { | 5371 | for (i = 0; i < cpu_num; i++, entry++) { |
| 5339 | snprintf(buf, 32, "cpu%d", i); | 5372 | snprintf(buf, 32, "cpu%d", i); |
| 5340 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5373 | entry->procname = kstrdup(buf, GFP_KERNEL); |
| 5341 | entry->mode = 0755; | 5374 | entry->mode = 0555; |
| 5342 | entry->child = sd_alloc_ctl_cpu_table(i); | 5375 | entry->child = sd_alloc_ctl_cpu_table(i); |
| 5343 | } | 5376 | } |
| 5344 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); | 5377 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); |
