aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c113
1 files changed, 73 insertions, 40 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 45e17b83b7f1..6c10fa796ca0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -61,6 +61,7 @@
61#include <linux/delayacct.h> 61#include <linux/delayacct.h>
62#include <linux/reciprocal_div.h> 62#include <linux/reciprocal_div.h>
63#include <linux/unistd.h> 63#include <linux/unistd.h>
64#include <linux/pagemap.h>
64 65
65#include <asm/tlb.h> 66#include <asm/tlb.h>
66 67
@@ -262,7 +263,8 @@ struct rq {
262 s64 clock_max_delta; 263 s64 clock_max_delta;
263 264
264 unsigned int clock_warps, clock_overflows; 265 unsigned int clock_warps, clock_overflows;
265 unsigned int clock_unstable_events; 266 u64 idle_clock;
267 unsigned int clock_deep_idle_events;
266 u64 tick_timestamp; 268 u64 tick_timestamp;
267 269
268 atomic_t nr_iowait; 270 atomic_t nr_iowait;
@@ -556,18 +558,40 @@ static inline struct rq *this_rq_lock(void)
556} 558}
557 559
558/* 560/*
559 * CPU frequency is/was unstable - start new by setting prev_clock_raw: 561 * We are going deep-idle (irqs are disabled):
560 */ 562 */
561void sched_clock_unstable_event(void) 563void sched_clock_idle_sleep_event(void)
562{ 564{
563 unsigned long flags; 565 struct rq *rq = cpu_rq(smp_processor_id());
564 struct rq *rq;
565 566
566 rq = task_rq_lock(current, &flags); 567 spin_lock(&rq->lock);
567 rq->prev_clock_raw = sched_clock(); 568 __update_rq_clock(rq);
568 rq->clock_unstable_events++; 569 spin_unlock(&rq->lock);
569 task_rq_unlock(rq, &flags); 570 rq->clock_deep_idle_events++;
571}
572EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
573
574/*
575 * We just idled delta nanoseconds (called with irqs disabled):
576 */
577void sched_clock_idle_wakeup_event(u64 delta_ns)
578{
579 struct rq *rq = cpu_rq(smp_processor_id());
580 u64 now = sched_clock();
581
582 rq->idle_clock += delta_ns;
583 /*
584 * Override the previous timestamp and ignore all
585 * sched_clock() deltas that occured while we idled,
586 * and use the PM-provided delta_ns to advance the
587 * rq clock:
588 */
589 spin_lock(&rq->lock);
590 rq->prev_clock_raw = now;
591 rq->clock += delta_ns;
592 spin_unlock(&rq->lock);
570} 593}
594EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
571 595
572/* 596/*
573 * resched_task - mark a task 'to be rescheduled now'. 597 * resched_task - mark a task 'to be rescheduled now'.
@@ -645,7 +669,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
645/* 669/*
646 * Shift right and round: 670 * Shift right and round:
647 */ 671 */
648#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 672#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
649 673
650static unsigned long 674static unsigned long
651calc_delta_mine(unsigned long delta_exec, unsigned long weight, 675calc_delta_mine(unsigned long delta_exec, unsigned long weight,
@@ -661,10 +685,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
661 * Check whether we'd overflow the 64-bit multiplication: 685 * Check whether we'd overflow the 64-bit multiplication:
662 */ 686 */
663 if (unlikely(tmp > WMULT_CONST)) 687 if (unlikely(tmp > WMULT_CONST))
664 tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, 688 tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
665 WMULT_SHIFT/2); 689 WMULT_SHIFT/2);
666 else 690 else
667 tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); 691 tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT);
668 692
669 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 693 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
670} 694}
@@ -835,7 +859,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq)
835 859
836static void set_load_weight(struct task_struct *p) 860static void set_load_weight(struct task_struct *p)
837{ 861{
838 task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime;
839 p->se.wait_runtime = 0; 862 p->se.wait_runtime = 0;
840 863
841 if (task_has_rt_policy(p)) { 864 if (task_has_rt_policy(p)) {
@@ -1564,6 +1587,7 @@ static void __sched_fork(struct task_struct *p)
1564 p->se.wait_start_fair = 0; 1587 p->se.wait_start_fair = 0;
1565 p->se.exec_start = 0; 1588 p->se.exec_start = 0;
1566 p->se.sum_exec_runtime = 0; 1589 p->se.sum_exec_runtime = 0;
1590 p->se.prev_sum_exec_runtime = 0;
1567 p->se.delta_exec = 0; 1591 p->se.delta_exec = 0;
1568 p->se.delta_fair_run = 0; 1592 p->se.delta_fair_run = 0;
1569 p->se.delta_fair_sleep = 0; 1593 p->se.delta_fair_sleep = 0;
@@ -1659,6 +1683,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1659 1683
1660 p->prio = effective_prio(p); 1684 p->prio = effective_prio(p);
1661 1685
1686 if (rt_prio(p->prio))
1687 p->sched_class = &rt_sched_class;
1688 else
1689 p->sched_class = &fair_sched_class;
1690
1662 if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || 1691 if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
1663 (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || 1692 (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
1664 !current->se.on_rq) { 1693 !current->se.on_rq) {
@@ -2157,12 +2186,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2157 if (task_running(rq, p)) 2186 if (task_running(rq, p))
2158 return 0; 2187 return 0;
2159 2188
2160 /*
2161 * Aggressive migration if too many balance attempts have failed:
2162 */
2163 if (sd->nr_balance_failed > sd->cache_nice_tries)
2164 return 1;
2165
2166 return 1; 2189 return 1;
2167} 2190}
2168 2191
@@ -2494,7 +2517,7 @@ group_next:
2494 * a think about bumping its value to force at least one task to be 2517 * a think about bumping its value to force at least one task to be
2495 * moved 2518 * moved
2496 */ 2519 */
2497 if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { 2520 if (*imbalance < busiest_load_per_task) {
2498 unsigned long tmp, pwr_now, pwr_move; 2521 unsigned long tmp, pwr_now, pwr_move;
2499 unsigned int imbn; 2522 unsigned int imbn;
2500 2523
@@ -2546,10 +2569,8 @@ small_imbalance:
2546 pwr_move /= SCHED_LOAD_SCALE; 2569 pwr_move /= SCHED_LOAD_SCALE;
2547 2570
2548 /* Move if we gain throughput */ 2571 /* Move if we gain throughput */
2549 if (pwr_move <= pwr_now) 2572 if (pwr_move > pwr_now)
2550 goto out_balanced; 2573 *imbalance = busiest_load_per_task;
2551
2552 *imbalance = busiest_load_per_task;
2553 } 2574 }
2554 2575
2555 return busiest; 2576 return busiest;
@@ -3020,6 +3041,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
3020 struct sched_domain *sd; 3041 struct sched_domain *sd;
3021 /* Earliest time when we have to do rebalance again */ 3042 /* Earliest time when we have to do rebalance again */
3022 unsigned long next_balance = jiffies + 60*HZ; 3043 unsigned long next_balance = jiffies + 60*HZ;
3044 int update_next_balance = 0;
3023 3045
3024 for_each_domain(cpu, sd) { 3046 for_each_domain(cpu, sd) {
3025 if (!(sd->flags & SD_LOAD_BALANCE)) 3047 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3056,8 +3078,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
3056 if (sd->flags & SD_SERIALIZE) 3078 if (sd->flags & SD_SERIALIZE)
3057 spin_unlock(&balancing); 3079 spin_unlock(&balancing);
3058out: 3080out:
3059 if (time_after(next_balance, sd->last_balance + interval)) 3081 if (time_after(next_balance, sd->last_balance + interval)) {
3060 next_balance = sd->last_balance + interval; 3082 next_balance = sd->last_balance + interval;
3083 update_next_balance = 1;
3084 }
3061 3085
3062 /* 3086 /*
3063 * Stop the load balance at this level. There is another 3087 * Stop the load balance at this level. There is another
@@ -3067,7 +3091,14 @@ out:
3067 if (!balance) 3091 if (!balance)
3068 break; 3092 break;
3069 } 3093 }
3070 rq->next_balance = next_balance; 3094
3095 /*
3096 * next_balance will be updated only when there is a need.
3097 * When the cpu is attached to null domain for ex, it will not be
3098 * updated.
3099 */
3100 if (likely(update_next_balance))
3101 rq->next_balance = next_balance;
3071} 3102}
3072 3103
3073/* 3104/*
@@ -4525,10 +4556,7 @@ asmlinkage long sys_sched_yield(void)
4525 struct rq *rq = this_rq_lock(); 4556 struct rq *rq = this_rq_lock();
4526 4557
4527 schedstat_inc(rq, yld_cnt); 4558 schedstat_inc(rq, yld_cnt);
4528 if (unlikely(rq->nr_running == 1)) 4559 current->sched_class->yield_task(rq, current);
4529 schedstat_inc(rq, yld_act_empty);
4530 else
4531 current->sched_class->yield_task(rq, current);
4532 4560
4533 /* 4561 /*
4534 * Since we are going to call schedule() anyway, there's 4562 * Since we are going to call schedule() anyway, there's
@@ -4884,14 +4912,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4884static inline void sched_init_granularity(void) 4912static inline void sched_init_granularity(void)
4885{ 4913{
4886 unsigned int factor = 1 + ilog2(num_online_cpus()); 4914 unsigned int factor = 1 + ilog2(num_online_cpus());
4887 const unsigned long gran_limit = 100000000; 4915 const unsigned long limit = 100000000;
4916
4917 sysctl_sched_min_granularity *= factor;
4918 if (sysctl_sched_min_granularity > limit)
4919 sysctl_sched_min_granularity = limit;
4888 4920
4889 sysctl_sched_granularity *= factor; 4921 sysctl_sched_latency *= factor;
4890 if (sysctl_sched_granularity > gran_limit) 4922 if (sysctl_sched_latency > limit)
4891 sysctl_sched_granularity = gran_limit; 4923 sysctl_sched_latency = limit;
4892 4924
4893 sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; 4925 sysctl_sched_runtime_limit = sysctl_sched_latency;
4894 sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; 4926 sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
4895} 4927}
4896 4928
4897#ifdef CONFIG_SMP 4929#ifdef CONFIG_SMP
@@ -5234,15 +5266,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5234static struct ctl_table sd_ctl_dir[] = { 5266static struct ctl_table sd_ctl_dir[] = {
5235 { 5267 {
5236 .procname = "sched_domain", 5268 .procname = "sched_domain",
5237 .mode = 0755, 5269 .mode = 0555,
5238 }, 5270 },
5239 {0,}, 5271 {0,},
5240}; 5272};
5241 5273
5242static struct ctl_table sd_ctl_root[] = { 5274static struct ctl_table sd_ctl_root[] = {
5243 { 5275 {
5276 .ctl_name = CTL_KERN,
5244 .procname = "kernel", 5277 .procname = "kernel",
5245 .mode = 0755, 5278 .mode = 0555,
5246 .child = sd_ctl_dir, 5279 .child = sd_ctl_dir,
5247 }, 5280 },
5248 {0,}, 5281 {0,},
@@ -5318,7 +5351,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5318 for_each_domain(cpu, sd) { 5351 for_each_domain(cpu, sd) {
5319 snprintf(buf, 32, "domain%d", i); 5352 snprintf(buf, 32, "domain%d", i);
5320 entry->procname = kstrdup(buf, GFP_KERNEL); 5353 entry->procname = kstrdup(buf, GFP_KERNEL);
5321 entry->mode = 0755; 5354 entry->mode = 0555;
5322 entry->child = sd_alloc_ctl_domain_table(sd); 5355 entry->child = sd_alloc_ctl_domain_table(sd);
5323 entry++; 5356 entry++;
5324 i++; 5357 i++;
@@ -5338,7 +5371,7 @@ static void init_sched_domain_sysctl(void)
5338 for (i = 0; i < cpu_num; i++, entry++) { 5371 for (i = 0; i < cpu_num; i++, entry++) {
5339 snprintf(buf, 32, "cpu%d", i); 5372 snprintf(buf, 32, "cpu%d", i);
5340 entry->procname = kstrdup(buf, GFP_KERNEL); 5373 entry->procname = kstrdup(buf, GFP_KERNEL);
5341 entry->mode = 0755; 5374 entry->mode = 0555;
5342 entry->child = sd_alloc_ctl_cpu_table(i); 5375 entry->child = sd_alloc_ctl_cpu_table(i);
5343 } 5376 }
5344 sd_sysctl_header = register_sysctl_table(sd_ctl_root); 5377 sd_sysctl_header = register_sysctl_table(sd_ctl_root);