diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 88 |
1 files changed, 60 insertions, 28 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 45e17b83b7f1..9fe473a190de 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -262,7 +262,8 @@ struct rq { | |||
262 | s64 clock_max_delta; | 262 | s64 clock_max_delta; |
263 | 263 | ||
264 | unsigned int clock_warps, clock_overflows; | 264 | unsigned int clock_warps, clock_overflows; |
265 | unsigned int clock_unstable_events; | 265 | u64 idle_clock; |
266 | unsigned int clock_deep_idle_events; | ||
266 | u64 tick_timestamp; | 267 | u64 tick_timestamp; |
267 | 268 | ||
268 | atomic_t nr_iowait; | 269 | atomic_t nr_iowait; |
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void) | |||
556 | } | 557 | } |
557 | 558 | ||
558 | /* | 559 | /* |
559 | * CPU frequency is/was unstable - start new by setting prev_clock_raw: | 560 | * We are going deep-idle (irqs are disabled): |
560 | */ | 561 | */ |
561 | void sched_clock_unstable_event(void) | 562 | void sched_clock_idle_sleep_event(void) |
562 | { | 563 | { |
563 | unsigned long flags; | 564 | struct rq *rq = cpu_rq(smp_processor_id()); |
564 | struct rq *rq; | ||
565 | 565 | ||
566 | rq = task_rq_lock(current, &flags); | 566 | spin_lock(&rq->lock); |
567 | rq->prev_clock_raw = sched_clock(); | 567 | __update_rq_clock(rq); |
568 | rq->clock_unstable_events++; | 568 | spin_unlock(&rq->lock); |
569 | task_rq_unlock(rq, &flags); | 569 | rq->clock_deep_idle_events++; |
570 | } | ||
571 | EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); | ||
572 | |||
573 | /* | ||
574 | * We just idled delta nanoseconds (called with irqs disabled): | ||
575 | */ | ||
576 | void sched_clock_idle_wakeup_event(u64 delta_ns) | ||
577 | { | ||
578 | struct rq *rq = cpu_rq(smp_processor_id()); | ||
579 | u64 now = sched_clock(); | ||
580 | |||
581 | rq->idle_clock += delta_ns; | ||
582 | /* | ||
583 | * Override the previous timestamp and ignore all | ||
584 | * sched_clock() deltas that occured while we idled, | ||
585 | * and use the PM-provided delta_ns to advance the | ||
586 | * rq clock: | ||
587 | */ | ||
588 | spin_lock(&rq->lock); | ||
589 | rq->prev_clock_raw = now; | ||
590 | rq->clock += delta_ns; | ||
591 | spin_unlock(&rq->lock); | ||
570 | } | 592 | } |
593 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | ||
571 | 594 | ||
572 | /* | 595 | /* |
573 | * resched_task - mark a task 'to be rescheduled now'. | 596 | * resched_task - mark a task 'to be rescheduled now'. |
@@ -2157,12 +2180,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
2157 | if (task_running(rq, p)) | 2180 | if (task_running(rq, p)) |
2158 | return 0; | 2181 | return 0; |
2159 | 2182 | ||
2160 | /* | ||
2161 | * Aggressive migration if too many balance attempts have failed: | ||
2162 | */ | ||
2163 | if (sd->nr_balance_failed > sd->cache_nice_tries) | ||
2164 | return 1; | ||
2165 | |||
2166 | return 1; | 2183 | return 1; |
2167 | } | 2184 | } |
2168 | 2185 | ||
@@ -2494,7 +2511,7 @@ group_next: | |||
2494 | * a think about bumping its value to force at least one task to be | 2511 | * a think about bumping its value to force at least one task to be |
2495 | * moved | 2512 | * moved |
2496 | */ | 2513 | */ |
2497 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { | 2514 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task) { |
2498 | unsigned long tmp, pwr_now, pwr_move; | 2515 | unsigned long tmp, pwr_now, pwr_move; |
2499 | unsigned int imbn; | 2516 | unsigned int imbn; |
2500 | 2517 | ||
@@ -3020,6 +3037,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3020 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
3021 | /* Earliest time when we have to do rebalance again */ | 3038 | /* Earliest time when we have to do rebalance again */ |
3022 | unsigned long next_balance = jiffies + 60*HZ; | 3039 | unsigned long next_balance = jiffies + 60*HZ; |
3040 | int update_next_balance = 0; | ||
3023 | 3041 | ||
3024 | for_each_domain(cpu, sd) { | 3042 | for_each_domain(cpu, sd) { |
3025 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3043 | if (!(sd->flags & SD_LOAD_BALANCE)) |
@@ -3056,8 +3074,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3056 | if (sd->flags & SD_SERIALIZE) | 3074 | if (sd->flags & SD_SERIALIZE) |
3057 | spin_unlock(&balancing); | 3075 | spin_unlock(&balancing); |
3058 | out: | 3076 | out: |
3059 | if (time_after(next_balance, sd->last_balance + interval)) | 3077 | if (time_after(next_balance, sd->last_balance + interval)) { |
3060 | next_balance = sd->last_balance + interval; | 3078 | next_balance = sd->last_balance + interval; |
3079 | update_next_balance = 1; | ||
3080 | } | ||
3061 | 3081 | ||
3062 | /* | 3082 | /* |
3063 | * Stop the load balance at this level. There is another | 3083 | * Stop the load balance at this level. There is another |
@@ -3067,7 +3087,14 @@ out: | |||
3067 | if (!balance) | 3087 | if (!balance) |
3068 | break; | 3088 | break; |
3069 | } | 3089 | } |
3070 | rq->next_balance = next_balance; | 3090 | |
3091 | /* | ||
3092 | * next_balance will be updated only when there is a need. | ||
3093 | * When the cpu is attached to null domain for ex, it will not be | ||
3094 | * updated. | ||
3095 | */ | ||
3096 | if (likely(update_next_balance)) | ||
3097 | rq->next_balance = next_balance; | ||
3071 | } | 3098 | } |
3072 | 3099 | ||
3073 | /* | 3100 | /* |
@@ -4884,14 +4911,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | |||
4884 | static inline void sched_init_granularity(void) | 4911 | static inline void sched_init_granularity(void) |
4885 | { | 4912 | { |
4886 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 4913 | unsigned int factor = 1 + ilog2(num_online_cpus()); |
4887 | const unsigned long gran_limit = 100000000; | 4914 | const unsigned long limit = 100000000; |
4915 | |||
4916 | sysctl_sched_min_granularity *= factor; | ||
4917 | if (sysctl_sched_min_granularity > limit) | ||
4918 | sysctl_sched_min_granularity = limit; | ||
4888 | 4919 | ||
4889 | sysctl_sched_granularity *= factor; | 4920 | sysctl_sched_latency *= factor; |
4890 | if (sysctl_sched_granularity > gran_limit) | 4921 | if (sysctl_sched_latency > limit) |
4891 | sysctl_sched_granularity = gran_limit; | 4922 | sysctl_sched_latency = limit; |
4892 | 4923 | ||
4893 | sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; | 4924 | sysctl_sched_runtime_limit = sysctl_sched_latency; |
4894 | sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; | 4925 | sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2; |
4895 | } | 4926 | } |
4896 | 4927 | ||
4897 | #ifdef CONFIG_SMP | 4928 | #ifdef CONFIG_SMP |
@@ -5234,15 +5265,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
5234 | static struct ctl_table sd_ctl_dir[] = { | 5265 | static struct ctl_table sd_ctl_dir[] = { |
5235 | { | 5266 | { |
5236 | .procname = "sched_domain", | 5267 | .procname = "sched_domain", |
5237 | .mode = 0755, | 5268 | .mode = 0555, |
5238 | }, | 5269 | }, |
5239 | {0,}, | 5270 | {0,}, |
5240 | }; | 5271 | }; |
5241 | 5272 | ||
5242 | static struct ctl_table sd_ctl_root[] = { | 5273 | static struct ctl_table sd_ctl_root[] = { |
5243 | { | 5274 | { |
5275 | .ctl_name = CTL_KERN, | ||
5244 | .procname = "kernel", | 5276 | .procname = "kernel", |
5245 | .mode = 0755, | 5277 | .mode = 0555, |
5246 | .child = sd_ctl_dir, | 5278 | .child = sd_ctl_dir, |
5247 | }, | 5279 | }, |
5248 | {0,}, | 5280 | {0,}, |
@@ -5318,7 +5350,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
5318 | for_each_domain(cpu, sd) { | 5350 | for_each_domain(cpu, sd) { |
5319 | snprintf(buf, 32, "domain%d", i); | 5351 | snprintf(buf, 32, "domain%d", i); |
5320 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5352 | entry->procname = kstrdup(buf, GFP_KERNEL); |
5321 | entry->mode = 0755; | 5353 | entry->mode = 0555; |
5322 | entry->child = sd_alloc_ctl_domain_table(sd); | 5354 | entry->child = sd_alloc_ctl_domain_table(sd); |
5323 | entry++; | 5355 | entry++; |
5324 | i++; | 5356 | i++; |
@@ -5338,7 +5370,7 @@ static void init_sched_domain_sysctl(void) | |||
5338 | for (i = 0; i < cpu_num; i++, entry++) { | 5370 | for (i = 0; i < cpu_num; i++, entry++) { |
5339 | snprintf(buf, 32, "cpu%d", i); | 5371 | snprintf(buf, 32, "cpu%d", i); |
5340 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5372 | entry->procname = kstrdup(buf, GFP_KERNEL); |
5341 | entry->mode = 0755; | 5373 | entry->mode = 0555; |
5342 | entry->child = sd_alloc_ctl_cpu_table(i); | 5374 | entry->child = sd_alloc_ctl_cpu_table(i); |
5343 | } | 5375 | } |
5344 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); | 5376 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); |