diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 13:43:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 13:43:39 -0400 |
commit | 0b981cb94bc63a2d0e5eccccdca75fe57643ffce (patch) | |
tree | 966ad6e6807fd1041d9962c9904e032a5ab07a65 /kernel/sched/fair.c | |
parent | 4cba3335826cbb36a218c3f5a1387e2c7c7ca9aa (diff) | |
parent | fdf9c356502ae02238efcdf90cefd7b473a63fd4 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"Continued quest to clean up and enhance the cputime code by Frederic
Weisbecker, in preparation for future tickless kernel features.
Other than that, smallish changes."
Fix up trivial conflicts due to additions next to each other in arch/{x86/}Kconfig
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
cputime: Make finegrained irqtime accounting generally available
cputime: Gather time/stats accounting config options into a single menu
ia64: Reuse system and user vtime accounting functions on task switch
ia64: Consolidate user vtime accounting
vtime: Consolidate system/idle context detection
cputime: Use a proper subsystem naming for vtime related APIs
sched: cpu_power: enable ARCH_POWER
sched/nohz: Clean up select_nohz_load_balancer()
sched: Fix load avg vs. cpu-hotplug
sched: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW
sched: Fix nohz_idle_balance()
sched: Remove useless code in yield_to()
sched: Add time unit suffix to sched sysctl knobs
sched/debug: Limit sd->*_idx range on sysctl
sched: Remove AFFINE_WAKEUPS feature flag
s390: Remove leftover account_tick_vtime() header
cputime: Consolidate vtime handling on context switch
sched: Move cputime code to its own file
cputime: Generalize CONFIG_VIRT_CPU_ACCOUNTING
tile: Remove SD_PREFER_LOCAL leftover
...
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 81 |
1 files changed, 24 insertions, 57 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 96e2b18b6283..6b800a14b990 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se) | |||
597 | /* | 597 | /* |
598 | * The idea is to set a period in which each task runs once. | 598 | * The idea is to set a period in which each task runs once. |
599 | * | 599 | * |
600 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 600 | * When there are too many tasks (sched_nr_latency) we have to stretch |
601 | * this period because otherwise the slices get too small. | 601 | * this period because otherwise the slices get too small. |
602 | * | 602 | * |
603 | * p = (nr <= nl) ? l : l*nr/nl | 603 | * p = (nr <= nl) ? l : l*nr/nl |
@@ -2700,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
2700 | int prev_cpu = task_cpu(p); | 2700 | int prev_cpu = task_cpu(p); |
2701 | int new_cpu = cpu; | 2701 | int new_cpu = cpu; |
2702 | int want_affine = 0; | 2702 | int want_affine = 0; |
2703 | int want_sd = 1; | ||
2704 | int sync = wake_flags & WF_SYNC; | 2703 | int sync = wake_flags & WF_SYNC; |
2705 | 2704 | ||
2706 | if (p->nr_cpus_allowed == 1) | 2705 | if (p->nr_cpus_allowed == 1) |
@@ -2718,48 +2717,21 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | |||
2718 | continue; | 2717 | continue; |
2719 | 2718 | ||
2720 | /* | 2719 | /* |
2721 | * If power savings logic is enabled for a domain, see if we | ||
2722 | * are not overloaded, if so, don't balance wider. | ||
2723 | */ | ||
2724 | if (tmp->flags & (SD_PREFER_LOCAL)) { | ||
2725 | unsigned long power = 0; | ||
2726 | unsigned long nr_running = 0; | ||
2727 | unsigned long capacity; | ||
2728 | int i; | ||
2729 | |||
2730 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
2731 | power += power_of(i); | ||
2732 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
2733 | } | ||
2734 | |||
2735 | capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE); | ||
2736 | |||
2737 | if (nr_running < capacity) | ||
2738 | want_sd = 0; | ||
2739 | } | ||
2740 | |||
2741 | /* | ||
2742 | * If both cpu and prev_cpu are part of this domain, | 2720 | * If both cpu and prev_cpu are part of this domain, |
2743 | * cpu is a valid SD_WAKE_AFFINE target. | 2721 | * cpu is a valid SD_WAKE_AFFINE target. |
2744 | */ | 2722 | */ |
2745 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | 2723 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && |
2746 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | 2724 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { |
2747 | affine_sd = tmp; | 2725 | affine_sd = tmp; |
2748 | want_affine = 0; | ||
2749 | } | ||
2750 | |||
2751 | if (!want_sd && !want_affine) | ||
2752 | break; | 2726 | break; |
2727 | } | ||
2753 | 2728 | ||
2754 | if (!(tmp->flags & sd_flag)) | 2729 | if (tmp->flags & sd_flag) |
2755 | continue; | ||
2756 | |||
2757 | if (want_sd) | ||
2758 | sd = tmp; | 2730 | sd = tmp; |
2759 | } | 2731 | } |
2760 | 2732 | ||
2761 | if (affine_sd) { | 2733 | if (affine_sd) { |
2762 | if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) | 2734 | if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) |
2763 | prev_cpu = cpu; | 2735 | prev_cpu = cpu; |
2764 | 2736 | ||
2765 | new_cpu = select_idle_sibling(p, prev_cpu); | 2737 | new_cpu = select_idle_sibling(p, prev_cpu); |
@@ -4295,7 +4267,7 @@ redo: | |||
4295 | goto out_balanced; | 4267 | goto out_balanced; |
4296 | } | 4268 | } |
4297 | 4269 | ||
4298 | BUG_ON(busiest == this_rq); | 4270 | BUG_ON(busiest == env.dst_rq); |
4299 | 4271 | ||
4300 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); | 4272 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); |
4301 | 4273 | ||
@@ -4316,7 +4288,7 @@ redo: | |||
4316 | update_h_load(env.src_cpu); | 4288 | update_h_load(env.src_cpu); |
4317 | more_balance: | 4289 | more_balance: |
4318 | local_irq_save(flags); | 4290 | local_irq_save(flags); |
4319 | double_rq_lock(this_rq, busiest); | 4291 | double_rq_lock(env.dst_rq, busiest); |
4320 | 4292 | ||
4321 | /* | 4293 | /* |
4322 | * cur_ld_moved - load moved in current iteration | 4294 | * cur_ld_moved - load moved in current iteration |
@@ -4324,7 +4296,7 @@ more_balance: | |||
4324 | */ | 4296 | */ |
4325 | cur_ld_moved = move_tasks(&env); | 4297 | cur_ld_moved = move_tasks(&env); |
4326 | ld_moved += cur_ld_moved; | 4298 | ld_moved += cur_ld_moved; |
4327 | double_rq_unlock(this_rq, busiest); | 4299 | double_rq_unlock(env.dst_rq, busiest); |
4328 | local_irq_restore(flags); | 4300 | local_irq_restore(flags); |
4329 | 4301 | ||
4330 | if (env.flags & LBF_NEED_BREAK) { | 4302 | if (env.flags & LBF_NEED_BREAK) { |
@@ -4360,8 +4332,7 @@ more_balance: | |||
4360 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && | 4332 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && |
4361 | lb_iterations++ < max_lb_iterations) { | 4333 | lb_iterations++ < max_lb_iterations) { |
4362 | 4334 | ||
4363 | this_rq = cpu_rq(env.new_dst_cpu); | 4335 | env.dst_rq = cpu_rq(env.new_dst_cpu); |
4364 | env.dst_rq = this_rq; | ||
4365 | env.dst_cpu = env.new_dst_cpu; | 4336 | env.dst_cpu = env.new_dst_cpu; |
4366 | env.flags &= ~LBF_SOME_PINNED; | 4337 | env.flags &= ~LBF_SOME_PINNED; |
4367 | env.loop = 0; | 4338 | env.loop = 0; |
@@ -4646,7 +4617,7 @@ static void nohz_balancer_kick(int cpu) | |||
4646 | return; | 4617 | return; |
4647 | } | 4618 | } |
4648 | 4619 | ||
4649 | static inline void clear_nohz_tick_stopped(int cpu) | 4620 | static inline void nohz_balance_exit_idle(int cpu) |
4650 | { | 4621 | { |
4651 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { | 4622 | if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { |
4652 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); | 4623 | cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); |
@@ -4686,28 +4657,23 @@ void set_cpu_sd_state_idle(void) | |||
4686 | } | 4657 | } |
4687 | 4658 | ||
4688 | /* | 4659 | /* |
4689 | * This routine will record that this cpu is going idle with tick stopped. | 4660 | * This routine will record that the cpu is going idle with tick stopped. |
4690 | * This info will be used in performing idle load balancing in the future. | 4661 | * This info will be used in performing idle load balancing in the future. |
4691 | */ | 4662 | */ |
4692 | void select_nohz_load_balancer(int stop_tick) | 4663 | void nohz_balance_enter_idle(int cpu) |
4693 | { | 4664 | { |
4694 | int cpu = smp_processor_id(); | ||
4695 | |||
4696 | /* | 4665 | /* |
4697 | * If this cpu is going down, then nothing needs to be done. | 4666 | * If this cpu is going down, then nothing needs to be done. |
4698 | */ | 4667 | */ |
4699 | if (!cpu_active(cpu)) | 4668 | if (!cpu_active(cpu)) |
4700 | return; | 4669 | return; |
4701 | 4670 | ||
4702 | if (stop_tick) { | 4671 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) |
4703 | if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) | 4672 | return; |
4704 | return; | ||
4705 | 4673 | ||
4706 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); | 4674 | cpumask_set_cpu(cpu, nohz.idle_cpus_mask); |
4707 | atomic_inc(&nohz.nr_cpus); | 4675 | atomic_inc(&nohz.nr_cpus); |
4708 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); | 4676 | set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); |
4709 | } | ||
4710 | return; | ||
4711 | } | 4677 | } |
4712 | 4678 | ||
4713 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | 4679 | static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, |
@@ -4715,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, | |||
4715 | { | 4681 | { |
4716 | switch (action & ~CPU_TASKS_FROZEN) { | 4682 | switch (action & ~CPU_TASKS_FROZEN) { |
4717 | case CPU_DYING: | 4683 | case CPU_DYING: |
4718 | clear_nohz_tick_stopped(smp_processor_id()); | 4684 | nohz_balance_exit_idle(smp_processor_id()); |
4719 | return NOTIFY_OK; | 4685 | return NOTIFY_OK; |
4720 | default: | 4686 | default: |
4721 | return NOTIFY_DONE; | 4687 | return NOTIFY_DONE; |
@@ -4837,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) | |||
4837 | if (need_resched()) | 4803 | if (need_resched()) |
4838 | break; | 4804 | break; |
4839 | 4805 | ||
4840 | raw_spin_lock_irq(&this_rq->lock); | 4806 | rq = cpu_rq(balance_cpu); |
4841 | update_rq_clock(this_rq); | 4807 | |
4842 | update_idle_cpu_load(this_rq); | 4808 | raw_spin_lock_irq(&rq->lock); |
4843 | raw_spin_unlock_irq(&this_rq->lock); | 4809 | update_rq_clock(rq); |
4810 | update_idle_cpu_load(rq); | ||
4811 | raw_spin_unlock_irq(&rq->lock); | ||
4844 | 4812 | ||
4845 | rebalance_domains(balance_cpu, CPU_IDLE); | 4813 | rebalance_domains(balance_cpu, CPU_IDLE); |
4846 | 4814 | ||
4847 | rq = cpu_rq(balance_cpu); | ||
4848 | if (time_after(this_rq->next_balance, rq->next_balance)) | 4815 | if (time_after(this_rq->next_balance, rq->next_balance)) |
4849 | this_rq->next_balance = rq->next_balance; | 4816 | this_rq->next_balance = rq->next_balance; |
4850 | } | 4817 | } |
@@ -4875,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu) | |||
4875 | * busy tick after returning from idle, we will update the busy stats. | 4842 | * busy tick after returning from idle, we will update the busy stats. |
4876 | */ | 4843 | */ |
4877 | set_cpu_sd_state_busy(); | 4844 | set_cpu_sd_state_busy(); |
4878 | clear_nohz_tick_stopped(cpu); | 4845 | nohz_balance_exit_idle(cpu); |
4879 | 4846 | ||
4880 | /* | 4847 | /* |
4881 | * None are in tickless mode and hence no need for NOHZ idle load | 4848 | * None are in tickless mode and hence no need for NOHZ idle load |