diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-05 18:30:49 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-05 18:30:49 -0500 |
commit | 897e81bea1fcfcd2c5cdb720c9efdb25da9ff374 (patch) | |
tree | 92cf33ed2c35c1ece633f09365702f1c8e24d415 /kernel/sched.c | |
parent | c3fa27d1367fac63ac8533d6f20ea851d0d70a10 (diff) | |
parent | 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (35 commits)
sched, cputime: Introduce thread_group_times()
sched, cputime: Cleanups related to task_times()
Revert "sched, x86: Optimize branch hint in __switch_to()"
sched: Fix isolcpus boot option
sched: Revert 498657a478c60be092208422fefa9c7b248729c2
sched, time: Define nsecs_to_jiffies()
sched: Remove task_{u,s,g}time()
sched: Introduce task_times() to replace task_{u,s}time() pair
sched: Limit the number of scheduler debug messages
sched.c: Call debug_show_all_locks() when dumping all tasks
sched, x86: Optimize branch hint in __switch_to()
sched: Optimize branch hint in context_switch()
sched: Optimize branch hint in pick_next_task_fair()
sched_feat_write(): Update ppos instead of file->f_pos
sched: Sched_rt_periodic_timer vs cpu hotplug
sched, kvm: Fix race condition involving sched_in_preempt_notifers
sched: More generic WAKE_AFFINE vs select_idle_sibling()
sched: Cleanup select_task_rq_fair()
sched: Fix granularity of task_u/stime()
sched: Fix/add missing update_rq_clock() calls
...
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 270 |
1 files changed, 174 insertions, 96 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6ae2739b8f19..aa31244caa9f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -535,14 +535,12 @@ struct rq { | |||
535 | #define CPU_LOAD_IDX_MAX 5 | 535 | #define CPU_LOAD_IDX_MAX 5 |
536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
537 | #ifdef CONFIG_NO_HZ | 537 | #ifdef CONFIG_NO_HZ |
538 | unsigned long last_tick_seen; | ||
539 | unsigned char in_nohz_recently; | 538 | unsigned char in_nohz_recently; |
540 | #endif | 539 | #endif |
541 | /* capture load from *all* tasks on this cpu: */ | 540 | /* capture load from *all* tasks on this cpu: */ |
542 | struct load_weight load; | 541 | struct load_weight load; |
543 | unsigned long nr_load_updates; | 542 | unsigned long nr_load_updates; |
544 | u64 nr_switches; | 543 | u64 nr_switches; |
545 | u64 nr_migrations_in; | ||
546 | 544 | ||
547 | struct cfs_rq cfs; | 545 | struct cfs_rq cfs; |
548 | struct rt_rq rt; | 546 | struct rt_rq rt; |
@@ -591,6 +589,8 @@ struct rq { | |||
591 | 589 | ||
592 | u64 rt_avg; | 590 | u64 rt_avg; |
593 | u64 age_stamp; | 591 | u64 age_stamp; |
592 | u64 idle_stamp; | ||
593 | u64 avg_idle; | ||
594 | #endif | 594 | #endif |
595 | 595 | ||
596 | /* calc_load related fields */ | 596 | /* calc_load related fields */ |
@@ -772,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
772 | if (!sched_feat_names[i]) | 772 | if (!sched_feat_names[i]) |
773 | return -EINVAL; | 773 | return -EINVAL; |
774 | 774 | ||
775 | filp->f_pos += cnt; | 775 | *ppos += cnt; |
776 | 776 | ||
777 | return cnt; | 777 | return cnt; |
778 | } | 778 | } |
@@ -2017,6 +2017,7 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) | |||
2017 | } | 2017 | } |
2018 | 2018 | ||
2019 | spin_lock_irqsave(&rq->lock, flags); | 2019 | spin_lock_irqsave(&rq->lock, flags); |
2020 | update_rq_clock(rq); | ||
2020 | set_task_cpu(p, cpu); | 2021 | set_task_cpu(p, cpu); |
2021 | p->cpus_allowed = cpumask_of_cpu(cpu); | 2022 | p->cpus_allowed = cpumask_of_cpu(cpu); |
2022 | p->rt.nr_cpus_allowed = 1; | 2023 | p->rt.nr_cpus_allowed = 1; |
@@ -2078,7 +2079,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2078 | #endif | 2079 | #endif |
2079 | if (old_cpu != new_cpu) { | 2080 | if (old_cpu != new_cpu) { |
2080 | p->se.nr_migrations++; | 2081 | p->se.nr_migrations++; |
2081 | new_rq->nr_migrations_in++; | ||
2082 | #ifdef CONFIG_SCHEDSTATS | 2082 | #ifdef CONFIG_SCHEDSTATS |
2083 | if (task_hot(p, old_rq->clock, NULL)) | 2083 | if (task_hot(p, old_rq->clock, NULL)) |
2084 | schedstat_inc(p, se.nr_forced2_migrations); | 2084 | schedstat_inc(p, se.nr_forced2_migrations); |
@@ -2115,6 +2115,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2115 | * it is sufficient to simply update the task's cpu field. | 2115 | * it is sufficient to simply update the task's cpu field. |
2116 | */ | 2116 | */ |
2117 | if (!p->se.on_rq && !task_running(rq, p)) { | 2117 | if (!p->se.on_rq && !task_running(rq, p)) { |
2118 | update_rq_clock(rq); | ||
2118 | set_task_cpu(p, dest_cpu); | 2119 | set_task_cpu(p, dest_cpu); |
2119 | return 0; | 2120 | return 0; |
2120 | } | 2121 | } |
@@ -2376,14 +2377,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2376 | task_rq_unlock(rq, &flags); | 2377 | task_rq_unlock(rq, &flags); |
2377 | 2378 | ||
2378 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2379 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2379 | if (cpu != orig_cpu) | 2380 | if (cpu != orig_cpu) { |
2381 | local_irq_save(flags); | ||
2382 | rq = cpu_rq(cpu); | ||
2383 | update_rq_clock(rq); | ||
2380 | set_task_cpu(p, cpu); | 2384 | set_task_cpu(p, cpu); |
2381 | 2385 | local_irq_restore(flags); | |
2386 | } | ||
2382 | rq = task_rq_lock(p, &flags); | 2387 | rq = task_rq_lock(p, &flags); |
2383 | 2388 | ||
2384 | if (rq != orig_rq) | ||
2385 | update_rq_clock(rq); | ||
2386 | |||
2387 | WARN_ON(p->state != TASK_WAKING); | 2389 | WARN_ON(p->state != TASK_WAKING); |
2388 | cpu = task_cpu(p); | 2390 | cpu = task_cpu(p); |
2389 | 2391 | ||
@@ -2440,6 +2442,17 @@ out_running: | |||
2440 | #ifdef CONFIG_SMP | 2442 | #ifdef CONFIG_SMP |
2441 | if (p->sched_class->task_wake_up) | 2443 | if (p->sched_class->task_wake_up) |
2442 | p->sched_class->task_wake_up(rq, p); | 2444 | p->sched_class->task_wake_up(rq, p); |
2445 | |||
2446 | if (unlikely(rq->idle_stamp)) { | ||
2447 | u64 delta = rq->clock - rq->idle_stamp; | ||
2448 | u64 max = 2*sysctl_sched_migration_cost; | ||
2449 | |||
2450 | if (delta > max) | ||
2451 | rq->avg_idle = max; | ||
2452 | else | ||
2453 | update_avg(&rq->avg_idle, delta); | ||
2454 | rq->idle_stamp = 0; | ||
2455 | } | ||
2443 | #endif | 2456 | #endif |
2444 | out: | 2457 | out: |
2445 | task_rq_unlock(rq, &flags); | 2458 | task_rq_unlock(rq, &flags); |
@@ -2545,6 +2558,7 @@ static void __sched_fork(struct task_struct *p) | |||
2545 | void sched_fork(struct task_struct *p, int clone_flags) | 2558 | void sched_fork(struct task_struct *p, int clone_flags) |
2546 | { | 2559 | { |
2547 | int cpu = get_cpu(); | 2560 | int cpu = get_cpu(); |
2561 | unsigned long flags; | ||
2548 | 2562 | ||
2549 | __sched_fork(p); | 2563 | __sched_fork(p); |
2550 | 2564 | ||
@@ -2581,7 +2595,10 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2581 | #ifdef CONFIG_SMP | 2595 | #ifdef CONFIG_SMP |
2582 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2596 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); |
2583 | #endif | 2597 | #endif |
2598 | local_irq_save(flags); | ||
2599 | update_rq_clock(cpu_rq(cpu)); | ||
2584 | set_task_cpu(p, cpu); | 2600 | set_task_cpu(p, cpu); |
2601 | local_irq_restore(flags); | ||
2585 | 2602 | ||
2586 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2603 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2587 | if (likely(sched_info_on())) | 2604 | if (likely(sched_info_on())) |
@@ -2848,14 +2865,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2848 | */ | 2865 | */ |
2849 | arch_start_context_switch(prev); | 2866 | arch_start_context_switch(prev); |
2850 | 2867 | ||
2851 | if (unlikely(!mm)) { | 2868 | if (likely(!mm)) { |
2852 | next->active_mm = oldmm; | 2869 | next->active_mm = oldmm; |
2853 | atomic_inc(&oldmm->mm_count); | 2870 | atomic_inc(&oldmm->mm_count); |
2854 | enter_lazy_tlb(oldmm, next); | 2871 | enter_lazy_tlb(oldmm, next); |
2855 | } else | 2872 | } else |
2856 | switch_mm(oldmm, mm, next); | 2873 | switch_mm(oldmm, mm, next); |
2857 | 2874 | ||
2858 | if (unlikely(!prev->mm)) { | 2875 | if (likely(!prev->mm)) { |
2859 | prev->active_mm = NULL; | 2876 | prev->active_mm = NULL; |
2860 | rq->prev_mm = oldmm; | 2877 | rq->prev_mm = oldmm; |
2861 | } | 2878 | } |
@@ -3018,15 +3035,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
3018 | } | 3035 | } |
3019 | 3036 | ||
3020 | /* | 3037 | /* |
3021 | * Externally visible per-cpu scheduler statistics: | ||
3022 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
3023 | */ | ||
3024 | u64 cpu_nr_migrations(int cpu) | ||
3025 | { | ||
3026 | return cpu_rq(cpu)->nr_migrations_in; | ||
3027 | } | ||
3028 | |||
3029 | /* | ||
3030 | * Update rq->cpu_load[] statistics. This function is usually called every | 3038 | * Update rq->cpu_load[] statistics. This function is usually called every |
3031 | * scheduler tick (TICK_NSEC). | 3039 | * scheduler tick (TICK_NSEC). |
3032 | */ | 3040 | */ |
@@ -4126,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4126 | unsigned long flags; | 4134 | unsigned long flags; |
4127 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4128 | 4136 | ||
4129 | cpumask_setall(cpus); | 4137 | cpumask_copy(cpus, cpu_online_mask); |
4130 | 4138 | ||
4131 | /* | 4139 | /* |
4132 | * When power savings policy is enabled for the parent domain, idle | 4140 | * When power savings policy is enabled for the parent domain, idle |
@@ -4289,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4289 | int all_pinned = 0; | 4297 | int all_pinned = 0; |
4290 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4291 | 4299 | ||
4292 | cpumask_setall(cpus); | 4300 | cpumask_copy(cpus, cpu_online_mask); |
4293 | 4301 | ||
4294 | /* | 4302 | /* |
4295 | * When power savings policy is enabled for the parent domain, idle | 4303 | * When power savings policy is enabled for the parent domain, idle |
@@ -4429,6 +4437,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4429 | int pulled_task = 0; | 4437 | int pulled_task = 0; |
4430 | unsigned long next_balance = jiffies + HZ; | 4438 | unsigned long next_balance = jiffies + HZ; |
4431 | 4439 | ||
4440 | this_rq->idle_stamp = this_rq->clock; | ||
4441 | |||
4442 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | ||
4443 | return; | ||
4444 | |||
4432 | for_each_domain(this_cpu, sd) { | 4445 | for_each_domain(this_cpu, sd) { |
4433 | unsigned long interval; | 4446 | unsigned long interval; |
4434 | 4447 | ||
@@ -4443,8 +4456,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4443 | interval = msecs_to_jiffies(sd->balance_interval); | 4456 | interval = msecs_to_jiffies(sd->balance_interval); |
4444 | if (time_after(next_balance, sd->last_balance + interval)) | 4457 | if (time_after(next_balance, sd->last_balance + interval)) |
4445 | next_balance = sd->last_balance + interval; | 4458 | next_balance = sd->last_balance + interval; |
4446 | if (pulled_task) | 4459 | if (pulled_task) { |
4460 | this_rq->idle_stamp = 0; | ||
4447 | break; | 4461 | break; |
4462 | } | ||
4448 | } | 4463 | } |
4449 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 4464 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
4450 | /* | 4465 | /* |
@@ -5046,8 +5061,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
5046 | p->gtime = cputime_add(p->gtime, cputime); | 5061 | p->gtime = cputime_add(p->gtime, cputime); |
5047 | 5062 | ||
5048 | /* Add guest time to cpustat. */ | 5063 | /* Add guest time to cpustat. */ |
5049 | cpustat->user = cputime64_add(cpustat->user, tmp); | 5064 | if (TASK_NICE(p) > 0) { |
5050 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | 5065 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
5066 | cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); | ||
5067 | } else { | ||
5068 | cpustat->user = cputime64_add(cpustat->user, tmp); | ||
5069 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | ||
5070 | } | ||
5051 | } | 5071 | } |
5052 | 5072 | ||
5053 | /* | 5073 | /* |
@@ -5162,60 +5182,86 @@ void account_idle_ticks(unsigned long ticks) | |||
5162 | * Use precise platform statistics if available: | 5182 | * Use precise platform statistics if available: |
5163 | */ | 5183 | */ |
5164 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 5184 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
5165 | cputime_t task_utime(struct task_struct *p) | 5185 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5166 | { | 5186 | { |
5167 | return p->utime; | 5187 | *ut = p->utime; |
5188 | *st = p->stime; | ||
5168 | } | 5189 | } |
5169 | 5190 | ||
5170 | cputime_t task_stime(struct task_struct *p) | 5191 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5171 | { | 5192 | { |
5172 | return p->stime; | 5193 | struct task_cputime cputime; |
5194 | |||
5195 | thread_group_cputime(p, &cputime); | ||
5196 | |||
5197 | *ut = cputime.utime; | ||
5198 | *st = cputime.stime; | ||
5173 | } | 5199 | } |
5174 | #else | 5200 | #else |
5175 | cputime_t task_utime(struct task_struct *p) | 5201 | |
5202 | #ifndef nsecs_to_cputime | ||
5203 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
5204 | #endif | ||
5205 | |||
5206 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5176 | { | 5207 | { |
5177 | clock_t utime = cputime_to_clock_t(p->utime), | 5208 | cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); |
5178 | total = utime + cputime_to_clock_t(p->stime); | ||
5179 | u64 temp; | ||
5180 | 5209 | ||
5181 | /* | 5210 | /* |
5182 | * Use CFS's precise accounting: | 5211 | * Use CFS's precise accounting: |
5183 | */ | 5212 | */ |
5184 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | 5213 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); |
5185 | 5214 | ||
5186 | if (total) { | 5215 | if (total) { |
5187 | temp *= utime; | 5216 | u64 temp; |
5217 | |||
5218 | temp = (u64)(rtime * utime); | ||
5188 | do_div(temp, total); | 5219 | do_div(temp, total); |
5189 | } | 5220 | utime = (cputime_t)temp; |
5190 | utime = (clock_t)temp; | 5221 | } else |
5222 | utime = rtime; | ||
5223 | |||
5224 | /* | ||
5225 | * Compare with previous values, to keep monotonicity: | ||
5226 | */ | ||
5227 | p->prev_utime = max(p->prev_utime, utime); | ||
5228 | p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); | ||
5191 | 5229 | ||
5192 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | 5230 | *ut = p->prev_utime; |
5193 | return p->prev_utime; | 5231 | *st = p->prev_stime; |
5194 | } | 5232 | } |
5195 | 5233 | ||
5196 | cputime_t task_stime(struct task_struct *p) | 5234 | /* |
5235 | * Must be called with siglock held. | ||
5236 | */ | ||
5237 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5197 | { | 5238 | { |
5198 | clock_t stime; | 5239 | struct signal_struct *sig = p->signal; |
5240 | struct task_cputime cputime; | ||
5241 | cputime_t rtime, utime, total; | ||
5199 | 5242 | ||
5200 | /* | 5243 | thread_group_cputime(p, &cputime); |
5201 | * Use CFS's precise accounting. (we subtract utime from | ||
5202 | * the total, to make sure the total observed by userspace | ||
5203 | * grows monotonically - apps rely on that): | ||
5204 | */ | ||
5205 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
5206 | cputime_to_clock_t(task_utime(p)); | ||
5207 | 5244 | ||
5208 | if (stime >= 0) | 5245 | total = cputime_add(cputime.utime, cputime.stime); |
5209 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | 5246 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); |
5210 | 5247 | ||
5211 | return p->prev_stime; | 5248 | if (total) { |
5212 | } | 5249 | u64 temp; |
5213 | #endif | ||
5214 | 5250 | ||
5215 | inline cputime_t task_gtime(struct task_struct *p) | 5251 | temp = (u64)(rtime * cputime.utime); |
5216 | { | 5252 | do_div(temp, total); |
5217 | return p->gtime; | 5253 | utime = (cputime_t)temp; |
5254 | } else | ||
5255 | utime = rtime; | ||
5256 | |||
5257 | sig->prev_utime = max(sig->prev_utime, utime); | ||
5258 | sig->prev_stime = max(sig->prev_stime, | ||
5259 | cputime_sub(rtime, sig->prev_utime)); | ||
5260 | |||
5261 | *ut = sig->prev_utime; | ||
5262 | *st = sig->prev_stime; | ||
5218 | } | 5263 | } |
5264 | #endif | ||
5219 | 5265 | ||
5220 | /* | 5266 | /* |
5221 | * This function gets called by the timer code, with HZ frequency. | 5267 | * This function gets called by the timer code, with HZ frequency. |
@@ -6175,22 +6221,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
6175 | BUG_ON(p->se.on_rq); | 6221 | BUG_ON(p->se.on_rq); |
6176 | 6222 | ||
6177 | p->policy = policy; | 6223 | p->policy = policy; |
6178 | switch (p->policy) { | ||
6179 | case SCHED_NORMAL: | ||
6180 | case SCHED_BATCH: | ||
6181 | case SCHED_IDLE: | ||
6182 | p->sched_class = &fair_sched_class; | ||
6183 | break; | ||
6184 | case SCHED_FIFO: | ||
6185 | case SCHED_RR: | ||
6186 | p->sched_class = &rt_sched_class; | ||
6187 | break; | ||
6188 | } | ||
6189 | |||
6190 | p->rt_priority = prio; | 6224 | p->rt_priority = prio; |
6191 | p->normal_prio = normal_prio(p); | 6225 | p->normal_prio = normal_prio(p); |
6192 | /* we are holding p->pi_lock already */ | 6226 | /* we are holding p->pi_lock already */ |
6193 | p->prio = rt_mutex_getprio(p); | 6227 | p->prio = rt_mutex_getprio(p); |
6228 | if (rt_prio(p->prio)) | ||
6229 | p->sched_class = &rt_sched_class; | ||
6230 | else | ||
6231 | p->sched_class = &fair_sched_class; | ||
6194 | set_load_weight(p); | 6232 | set_load_weight(p); |
6195 | } | 6233 | } |
6196 | 6234 | ||
@@ -6935,7 +6973,7 @@ void show_state_filter(unsigned long state_filter) | |||
6935 | /* | 6973 | /* |
6936 | * Only show locks if all tasks are dumped: | 6974 | * Only show locks if all tasks are dumped: |
6937 | */ | 6975 | */ |
6938 | if (state_filter == -1) | 6976 | if (!state_filter) |
6939 | debug_show_all_locks(); | 6977 | debug_show_all_locks(); |
6940 | } | 6978 | } |
6941 | 6979 | ||
@@ -7740,6 +7778,16 @@ early_initcall(migration_init); | |||
7740 | 7778 | ||
7741 | #ifdef CONFIG_SCHED_DEBUG | 7779 | #ifdef CONFIG_SCHED_DEBUG |
7742 | 7780 | ||
7781 | static __read_mostly int sched_domain_debug_enabled; | ||
7782 | |||
7783 | static int __init sched_domain_debug_setup(char *str) | ||
7784 | { | ||
7785 | sched_domain_debug_enabled = 1; | ||
7786 | |||
7787 | return 0; | ||
7788 | } | ||
7789 | early_param("sched_debug", sched_domain_debug_setup); | ||
7790 | |||
7743 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 7791 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
7744 | struct cpumask *groupmask) | 7792 | struct cpumask *groupmask) |
7745 | { | 7793 | { |
@@ -7826,6 +7874,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
7826 | cpumask_var_t groupmask; | 7874 | cpumask_var_t groupmask; |
7827 | int level = 0; | 7875 | int level = 0; |
7828 | 7876 | ||
7877 | if (!sched_domain_debug_enabled) | ||
7878 | return; | ||
7879 | |||
7829 | if (!sd) { | 7880 | if (!sd) { |
7830 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); | 7881 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); |
7831 | return; | 7882 | return; |
@@ -7905,6 +7956,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
7905 | 7956 | ||
7906 | static void free_rootdomain(struct root_domain *rd) | 7957 | static void free_rootdomain(struct root_domain *rd) |
7907 | { | 7958 | { |
7959 | synchronize_sched(); | ||
7960 | |||
7908 | cpupri_cleanup(&rd->cpupri); | 7961 | cpupri_cleanup(&rd->cpupri); |
7909 | 7962 | ||
7910 | free_cpumask_var(rd->rto_mask); | 7963 | free_cpumask_var(rd->rto_mask); |
@@ -8045,6 +8098,7 @@ static cpumask_var_t cpu_isolated_map; | |||
8045 | /* Setup the mask of cpus configured for isolated domains */ | 8098 | /* Setup the mask of cpus configured for isolated domains */ |
8046 | static int __init isolated_cpu_setup(char *str) | 8099 | static int __init isolated_cpu_setup(char *str) |
8047 | { | 8100 | { |
8101 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | ||
8048 | cpulist_parse(str, cpu_isolated_map); | 8102 | cpulist_parse(str, cpu_isolated_map); |
8049 | return 1; | 8103 | return 1; |
8050 | } | 8104 | } |
@@ -8881,7 +8935,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) | |||
8881 | return __build_sched_domains(cpu_map, NULL); | 8935 | return __build_sched_domains(cpu_map, NULL); |
8882 | } | 8936 | } |
8883 | 8937 | ||
8884 | static struct cpumask *doms_cur; /* current sched domains */ | 8938 | static cpumask_var_t *doms_cur; /* current sched domains */ |
8885 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 8939 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
8886 | static struct sched_domain_attr *dattr_cur; | 8940 | static struct sched_domain_attr *dattr_cur; |
8887 | /* attribues of custom domains in 'doms_cur' */ | 8941 | /* attribues of custom domains in 'doms_cur' */ |
@@ -8903,6 +8957,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) | |||
8903 | return 0; | 8957 | return 0; |
8904 | } | 8958 | } |
8905 | 8959 | ||
8960 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms) | ||
8961 | { | ||
8962 | int i; | ||
8963 | cpumask_var_t *doms; | ||
8964 | |||
8965 | doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); | ||
8966 | if (!doms) | ||
8967 | return NULL; | ||
8968 | for (i = 0; i < ndoms; i++) { | ||
8969 | if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { | ||
8970 | free_sched_domains(doms, i); | ||
8971 | return NULL; | ||
8972 | } | ||
8973 | } | ||
8974 | return doms; | ||
8975 | } | ||
8976 | |||
8977 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) | ||
8978 | { | ||
8979 | unsigned int i; | ||
8980 | for (i = 0; i < ndoms; i++) | ||
8981 | free_cpumask_var(doms[i]); | ||
8982 | kfree(doms); | ||
8983 | } | ||
8984 | |||
8906 | /* | 8985 | /* |
8907 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 8986 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
8908 | * For now this just excludes isolated cpus, but could be used to | 8987 | * For now this just excludes isolated cpus, but could be used to |
@@ -8914,12 +8993,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) | |||
8914 | 8993 | ||
8915 | arch_update_cpu_topology(); | 8994 | arch_update_cpu_topology(); |
8916 | ndoms_cur = 1; | 8995 | ndoms_cur = 1; |
8917 | doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); | 8996 | doms_cur = alloc_sched_domains(ndoms_cur); |
8918 | if (!doms_cur) | 8997 | if (!doms_cur) |
8919 | doms_cur = fallback_doms; | 8998 | doms_cur = &fallback_doms; |
8920 | cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); | 8999 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
8921 | dattr_cur = NULL; | 9000 | dattr_cur = NULL; |
8922 | err = build_sched_domains(doms_cur); | 9001 | err = build_sched_domains(doms_cur[0]); |
8923 | register_sched_domain_sysctl(); | 9002 | register_sched_domain_sysctl(); |
8924 | 9003 | ||
8925 | return err; | 9004 | return err; |
@@ -8969,19 +9048,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8969 | * doms_new[] to the current sched domain partitioning, doms_cur[]. | 9048 | * doms_new[] to the current sched domain partitioning, doms_cur[]. |
8970 | * It destroys each deleted domain and builds each new domain. | 9049 | * It destroys each deleted domain and builds each new domain. |
8971 | * | 9050 | * |
8972 | * 'doms_new' is an array of cpumask's of length 'ndoms_new'. | 9051 | * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. |
8973 | * The masks don't intersect (don't overlap.) We should setup one | 9052 | * The masks don't intersect (don't overlap.) We should setup one |
8974 | * sched domain for each mask. CPUs not in any of the cpumasks will | 9053 | * sched domain for each mask. CPUs not in any of the cpumasks will |
8975 | * not be load balanced. If the same cpumask appears both in the | 9054 | * not be load balanced. If the same cpumask appears both in the |
8976 | * current 'doms_cur' domains and in the new 'doms_new', we can leave | 9055 | * current 'doms_cur' domains and in the new 'doms_new', we can leave |
8977 | * it as it is. | 9056 | * it as it is. |
8978 | * | 9057 | * |
8979 | * The passed in 'doms_new' should be kmalloc'd. This routine takes | 9058 | * The passed in 'doms_new' should be allocated using |
8980 | * ownership of it and will kfree it when done with it. If the caller | 9059 | * alloc_sched_domains. This routine takes ownership of it and will |
8981 | * failed the kmalloc call, then it can pass in doms_new == NULL && | 9060 | * free_sched_domains it when done with it. If the caller failed the |
8982 | * ndoms_new == 1, and partition_sched_domains() will fallback to | 9061 | * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, |
8983 | * the single partition 'fallback_doms', it also forces the domains | 9062 | * and partition_sched_domains() will fallback to the single partition |
8984 | * to be rebuilt. | 9063 | * 'fallback_doms', it also forces the domains to be rebuilt. |
8985 | * | 9064 | * |
8986 | * If doms_new == NULL it will be replaced with cpu_online_mask. | 9065 | * If doms_new == NULL it will be replaced with cpu_online_mask. |
8987 | * ndoms_new == 0 is a special case for destroying existing domains, | 9066 | * ndoms_new == 0 is a special case for destroying existing domains, |
@@ -8989,8 +9068,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8989 | * | 9068 | * |
8990 | * Call with hotplug lock held | 9069 | * Call with hotplug lock held |
8991 | */ | 9070 | */ |
8992 | /* FIXME: Change to struct cpumask *doms_new[] */ | 9071 | void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
8993 | void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | ||
8994 | struct sched_domain_attr *dattr_new) | 9072 | struct sched_domain_attr *dattr_new) |
8995 | { | 9073 | { |
8996 | int i, j, n; | 9074 | int i, j, n; |
@@ -9009,40 +9087,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
9009 | /* Destroy deleted domains */ | 9087 | /* Destroy deleted domains */ |
9010 | for (i = 0; i < ndoms_cur; i++) { | 9088 | for (i = 0; i < ndoms_cur; i++) { |
9011 | for (j = 0; j < n && !new_topology; j++) { | 9089 | for (j = 0; j < n && !new_topology; j++) { |
9012 | if (cpumask_equal(&doms_cur[i], &doms_new[j]) | 9090 | if (cpumask_equal(doms_cur[i], doms_new[j]) |
9013 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 9091 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
9014 | goto match1; | 9092 | goto match1; |
9015 | } | 9093 | } |
9016 | /* no match - a current sched domain not in new doms_new[] */ | 9094 | /* no match - a current sched domain not in new doms_new[] */ |
9017 | detach_destroy_domains(doms_cur + i); | 9095 | detach_destroy_domains(doms_cur[i]); |
9018 | match1: | 9096 | match1: |
9019 | ; | 9097 | ; |
9020 | } | 9098 | } |
9021 | 9099 | ||
9022 | if (doms_new == NULL) { | 9100 | if (doms_new == NULL) { |
9023 | ndoms_cur = 0; | 9101 | ndoms_cur = 0; |
9024 | doms_new = fallback_doms; | 9102 | doms_new = &fallback_doms; |
9025 | cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); | 9103 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); |
9026 | WARN_ON_ONCE(dattr_new); | 9104 | WARN_ON_ONCE(dattr_new); |
9027 | } | 9105 | } |
9028 | 9106 | ||
9029 | /* Build new domains */ | 9107 | /* Build new domains */ |
9030 | for (i = 0; i < ndoms_new; i++) { | 9108 | for (i = 0; i < ndoms_new; i++) { |
9031 | for (j = 0; j < ndoms_cur && !new_topology; j++) { | 9109 | for (j = 0; j < ndoms_cur && !new_topology; j++) { |
9032 | if (cpumask_equal(&doms_new[i], &doms_cur[j]) | 9110 | if (cpumask_equal(doms_new[i], doms_cur[j]) |
9033 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 9111 | && dattrs_equal(dattr_new, i, dattr_cur, j)) |
9034 | goto match2; | 9112 | goto match2; |
9035 | } | 9113 | } |
9036 | /* no match - add a new doms_new */ | 9114 | /* no match - add a new doms_new */ |
9037 | __build_sched_domains(doms_new + i, | 9115 | __build_sched_domains(doms_new[i], |
9038 | dattr_new ? dattr_new + i : NULL); | 9116 | dattr_new ? dattr_new + i : NULL); |
9039 | match2: | 9117 | match2: |
9040 | ; | 9118 | ; |
9041 | } | 9119 | } |
9042 | 9120 | ||
9043 | /* Remember the new sched domains */ | 9121 | /* Remember the new sched domains */ |
9044 | if (doms_cur != fallback_doms) | 9122 | if (doms_cur != &fallback_doms) |
9045 | kfree(doms_cur); | 9123 | free_sched_domains(doms_cur, ndoms_cur); |
9046 | kfree(dattr_cur); /* kfree(NULL) is safe */ | 9124 | kfree(dattr_cur); /* kfree(NULL) is safe */ |
9047 | doms_cur = doms_new; | 9125 | doms_cur = doms_new; |
9048 | dattr_cur = dattr_new; | 9126 | dattr_cur = dattr_new; |
@@ -9364,10 +9442,6 @@ void __init sched_init(void) | |||
9364 | #ifdef CONFIG_CPUMASK_OFFSTACK | 9442 | #ifdef CONFIG_CPUMASK_OFFSTACK |
9365 | alloc_size += num_possible_cpus() * cpumask_size(); | 9443 | alloc_size += num_possible_cpus() * cpumask_size(); |
9366 | #endif | 9444 | #endif |
9367 | /* | ||
9368 | * As sched_init() is called before page_alloc is setup, | ||
9369 | * we use alloc_bootmem(). | ||
9370 | */ | ||
9371 | if (alloc_size) { | 9445 | if (alloc_size) { |
9372 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 9446 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
9373 | 9447 | ||
@@ -9522,6 +9596,8 @@ void __init sched_init(void) | |||
9522 | rq->cpu = i; | 9596 | rq->cpu = i; |
9523 | rq->online = 0; | 9597 | rq->online = 0; |
9524 | rq->migration_thread = NULL; | 9598 | rq->migration_thread = NULL; |
9599 | rq->idle_stamp = 0; | ||
9600 | rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
9525 | INIT_LIST_HEAD(&rq->migration_queue); | 9601 | INIT_LIST_HEAD(&rq->migration_queue); |
9526 | rq_attach_root(rq, &def_root_domain); | 9602 | rq_attach_root(rq, &def_root_domain); |
9527 | #endif | 9603 | #endif |
@@ -9571,7 +9647,9 @@ void __init sched_init(void) | |||
9571 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 9647 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9572 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 9648 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9573 | #endif | 9649 | #endif |
9574 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9650 | /* May be allocated at isolcpus cmdline parse time */ |
9651 | if (cpu_isolated_map == NULL) | ||
9652 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | ||
9575 | #endif /* SMP */ | 9653 | #endif /* SMP */ |
9576 | 9654 | ||
9577 | perf_event_init(); | 9655 | perf_event_init(); |