aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c88
1 files changed, 60 insertions, 28 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 45e17b83b7f1..9fe473a190de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -262,7 +262,8 @@ struct rq {
262 s64 clock_max_delta; 262 s64 clock_max_delta;
263 263
264 unsigned int clock_warps, clock_overflows; 264 unsigned int clock_warps, clock_overflows;
265 unsigned int clock_unstable_events; 265 u64 idle_clock;
266 unsigned int clock_deep_idle_events;
266 u64 tick_timestamp; 267 u64 tick_timestamp;
267 268
268 atomic_t nr_iowait; 269 atomic_t nr_iowait;
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void)
556} 557}
557 558
558/* 559/*
559 * CPU frequency is/was unstable - start new by setting prev_clock_raw: 560 * We are going deep-idle (irqs are disabled):
560 */ 561 */
561void sched_clock_unstable_event(void) 562void sched_clock_idle_sleep_event(void)
562{ 563{
563 unsigned long flags; 564 struct rq *rq = cpu_rq(smp_processor_id());
564 struct rq *rq;
565 565
566 rq = task_rq_lock(current, &flags); 566 spin_lock(&rq->lock);
567 rq->prev_clock_raw = sched_clock(); 567 __update_rq_clock(rq);
568 rq->clock_unstable_events++; 568 spin_unlock(&rq->lock);
569 task_rq_unlock(rq, &flags); 569 rq->clock_deep_idle_events++;
570}
571EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
572
573/*
574 * We just idled delta nanoseconds (called with irqs disabled):
575 */
576void sched_clock_idle_wakeup_event(u64 delta_ns)
577{
578 struct rq *rq = cpu_rq(smp_processor_id());
579 u64 now = sched_clock();
580
581 rq->idle_clock += delta_ns;
582 /*
583 * Override the previous timestamp and ignore all
584 * sched_clock() deltas that occured while we idled,
585 * and use the PM-provided delta_ns to advance the
586 * rq clock:
587 */
588 spin_lock(&rq->lock);
589 rq->prev_clock_raw = now;
590 rq->clock += delta_ns;
591 spin_unlock(&rq->lock);
570} 592}
593EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
571 594
572/* 595/*
573 * resched_task - mark a task 'to be rescheduled now'. 596 * resched_task - mark a task 'to be rescheduled now'.
@@ -2157,12 +2180,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2157 if (task_running(rq, p)) 2180 if (task_running(rq, p))
2158 return 0; 2181 return 0;
2159 2182
2160 /*
2161 * Aggressive migration if too many balance attempts have failed:
2162 */
2163 if (sd->nr_balance_failed > sd->cache_nice_tries)
2164 return 1;
2165
2166 return 1; 2183 return 1;
2167} 2184}
2168 2185
@@ -2494,7 +2511,7 @@ group_next:
2494 * a think about bumping its value to force at least one task to be 2511 * a think about bumping its value to force at least one task to be
2495 * moved 2512 * moved
2496 */ 2513 */
2497 if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { 2514 if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) {
2498 unsigned long tmp, pwr_now, pwr_move; 2515 unsigned long tmp, pwr_now, pwr_move;
2499 unsigned int imbn; 2516 unsigned int imbn;
2500 2517
@@ -3020,6 +3037,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
3020 struct sched_domain *sd; 3037 struct sched_domain *sd;
3021 /* Earliest time when we have to do rebalance again */ 3038 /* Earliest time when we have to do rebalance again */
3022 unsigned long next_balance = jiffies + 60*HZ; 3039 unsigned long next_balance = jiffies + 60*HZ;
3040 int update_next_balance = 0;
3023 3041
3024 for_each_domain(cpu, sd) { 3042 for_each_domain(cpu, sd) {
3025 if (!(sd->flags & SD_LOAD_BALANCE)) 3043 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3056,8 +3074,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle)
3056 if (sd->flags & SD_SERIALIZE) 3074 if (sd->flags & SD_SERIALIZE)
3057 spin_unlock(&balancing); 3075 spin_unlock(&balancing);
3058out: 3076out:
3059 if (time_after(next_balance, sd->last_balance + interval)) 3077 if (time_after(next_balance, sd->last_balance + interval)) {
3060 next_balance = sd->last_balance + interval; 3078 next_balance = sd->last_balance + interval;
3079 update_next_balance = 1;
3080 }
3061 3081
3062 /* 3082 /*
3063 * Stop the load balance at this level. There is another 3083 * Stop the load balance at this level. There is another
@@ -3067,7 +3087,14 @@ out:
3067 if (!balance) 3087 if (!balance)
3068 break; 3088 break;
3069 } 3089 }
3070 rq->next_balance = next_balance; 3090
3091 /*
3092 * next_balance will be updated only when there is a need.
3093 * When the cpu is attached to null domain for ex, it will not be
3094 * updated.
3095 */
3096 if (likely(update_next_balance))
3097 rq->next_balance = next_balance;
3071} 3098}
3072 3099
3073/* 3100/*
@@ -4884,14 +4911,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
4884static inline void sched_init_granularity(void) 4911static inline void sched_init_granularity(void)
4885{ 4912{
4886 unsigned int factor = 1 + ilog2(num_online_cpus()); 4913 unsigned int factor = 1 + ilog2(num_online_cpus());
4887 const unsigned long gran_limit = 100000000; 4914 const unsigned long limit = 100000000;
4915
4916 sysctl_sched_min_granularity *= factor;
4917 if (sysctl_sched_min_granularity > limit)
4918 sysctl_sched_min_granularity = limit;
4888 4919
4889 sysctl_sched_granularity *= factor; 4920 sysctl_sched_latency *= factor;
4890 if (sysctl_sched_granularity > gran_limit) 4921 if (sysctl_sched_latency > limit)
4891 sysctl_sched_granularity = gran_limit; 4922 sysctl_sched_latency = limit;
4892 4923
4893 sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; 4924 sysctl_sched_runtime_limit = sysctl_sched_latency;
4894 sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; 4925 sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
4895} 4926}
4896 4927
4897#ifdef CONFIG_SMP 4928#ifdef CONFIG_SMP
@@ -5234,15 +5265,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5234static struct ctl_table sd_ctl_dir[] = { 5265static struct ctl_table sd_ctl_dir[] = {
5235 { 5266 {
5236 .procname = "sched_domain", 5267 .procname = "sched_domain",
5237 .mode = 0755, 5268 .mode = 0555,
5238 }, 5269 },
5239 {0,}, 5270 {0,},
5240}; 5271};
5241 5272
5242static struct ctl_table sd_ctl_root[] = { 5273static struct ctl_table sd_ctl_root[] = {
5243 { 5274 {
5275 .ctl_name = CTL_KERN,
5244 .procname = "kernel", 5276 .procname = "kernel",
5245 .mode = 0755, 5277 .mode = 0555,
5246 .child = sd_ctl_dir, 5278 .child = sd_ctl_dir,
5247 }, 5279 },
5248 {0,}, 5280 {0,},
@@ -5318,7 +5350,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5318 for_each_domain(cpu, sd) { 5350 for_each_domain(cpu, sd) {
5319 snprintf(buf, 32, "domain%d", i); 5351 snprintf(buf, 32, "domain%d", i);
5320 entry->procname = kstrdup(buf, GFP_KERNEL); 5352 entry->procname = kstrdup(buf, GFP_KERNEL);
5321 entry->mode = 0755; 5353 entry->mode = 0555;
5322 entry->child = sd_alloc_ctl_domain_table(sd); 5354 entry->child = sd_alloc_ctl_domain_table(sd);
5323 entry++; 5355 entry++;
5324 i++; 5356 i++;
@@ -5338,7 +5370,7 @@ static void init_sched_domain_sysctl(void)
5338 for (i = 0; i < cpu_num; i++, entry++) { 5370 for (i = 0; i < cpu_num; i++, entry++) {
5339 snprintf(buf, 32, "cpu%d", i); 5371 snprintf(buf, 32, "cpu%d", i);
5340 entry->procname = kstrdup(buf, GFP_KERNEL); 5372 entry->procname = kstrdup(buf, GFP_KERNEL);
5341 entry->mode = 0755; 5373 entry->mode = 0555;
5342 entry->child = sd_alloc_ctl_cpu_table(i); 5374 entry->child = sd_alloc_ctl_cpu_table(i);
5343 } 5375 }
5344 sd_sysctl_header = register_sysctl_table(sd_ctl_root); 5376 sd_sysctl_header = register_sysctl_table(sd_ctl_root);