aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-01 13:43:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-01 13:43:39 -0400
commit0b981cb94bc63a2d0e5eccccdca75fe57643ffce (patch)
tree966ad6e6807fd1041d9962c9904e032a5ab07a65 /kernel/sched/fair.c
parent4cba3335826cbb36a218c3f5a1387e2c7c7ca9aa (diff)
parentfdf9c356502ae02238efcdf90cefd7b473a63fd4 (diff)
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar: "Continued quest to clean up and enhance the cputime code by Frederic Weisbecker, in preparation for future tickless kernel features. Other than that, smallish changes." Fix up trivial conflicts due to additions next to each other in arch/{x86/}Kconfig * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) cputime: Make finegrained irqtime accounting generally available cputime: Gather time/stats accounting config options into a single menu ia64: Reuse system and user vtime accounting functions on task switch ia64: Consolidate user vtime accounting vtime: Consolidate system/idle context detection cputime: Use a proper subsystem naming for vtime related APIs sched: cpu_power: enable ARCH_POWER sched/nohz: Clean up select_nohz_load_balancer() sched: Fix load avg vs. cpu-hotplug sched: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW sched: Fix nohz_idle_balance() sched: Remove useless code in yield_to() sched: Add time unit suffix to sched sysctl knobs sched/debug: Limit sd->*_idx range on sysctl sched: Remove AFFINE_WAKEUPS feature flag s390: Remove leftover account_tick_vtime() header cputime: Consolidate vtime handling on context switch sched: Move cputime code to its own file cputime: Generalize CONFIG_VIRT_CPU_ACCOUNTING tile: Remove SD_PREFER_LOCAL leftover ...
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c81
1 files changed, 24 insertions, 57 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 96e2b18b6283..6b800a14b990 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -597,7 +597,7 @@ calc_delta_fair(unsigned long delta, struct sched_entity *se)
597/* 597/*
598 * The idea is to set a period in which each task runs once. 598 * The idea is to set a period in which each task runs once.
599 * 599 *
600 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch 600 * When there are too many tasks (sched_nr_latency) we have to stretch
601 * this period because otherwise the slices get too small. 601 * this period because otherwise the slices get too small.
602 * 602 *
603 * p = (nr <= nl) ? l : l*nr/nl 603 * p = (nr <= nl) ? l : l*nr/nl
@@ -2700,7 +2700,6 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2700 int prev_cpu = task_cpu(p); 2700 int prev_cpu = task_cpu(p);
2701 int new_cpu = cpu; 2701 int new_cpu = cpu;
2702 int want_affine = 0; 2702 int want_affine = 0;
2703 int want_sd = 1;
2704 int sync = wake_flags & WF_SYNC; 2703 int sync = wake_flags & WF_SYNC;
2705 2704
2706 if (p->nr_cpus_allowed == 1) 2705 if (p->nr_cpus_allowed == 1)
@@ -2718,48 +2717,21 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2718 continue; 2717 continue;
2719 2718
2720 /* 2719 /*
2721 * If power savings logic is enabled for a domain, see if we
2722 * are not overloaded, if so, don't balance wider.
2723 */
2724 if (tmp->flags & (SD_PREFER_LOCAL)) {
2725 unsigned long power = 0;
2726 unsigned long nr_running = 0;
2727 unsigned long capacity;
2728 int i;
2729
2730 for_each_cpu(i, sched_domain_span(tmp)) {
2731 power += power_of(i);
2732 nr_running += cpu_rq(i)->cfs.nr_running;
2733 }
2734
2735 capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
2736
2737 if (nr_running < capacity)
2738 want_sd = 0;
2739 }
2740
2741 /*
2742 * If both cpu and prev_cpu are part of this domain, 2720 * If both cpu and prev_cpu are part of this domain,
2743 * cpu is a valid SD_WAKE_AFFINE target. 2721 * cpu is a valid SD_WAKE_AFFINE target.
2744 */ 2722 */
2745 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && 2723 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
2746 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { 2724 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
2747 affine_sd = tmp; 2725 affine_sd = tmp;
2748 want_affine = 0;
2749 }
2750
2751 if (!want_sd && !want_affine)
2752 break; 2726 break;
2727 }
2753 2728
2754 if (!(tmp->flags & sd_flag)) 2729 if (tmp->flags & sd_flag)
2755 continue;
2756
2757 if (want_sd)
2758 sd = tmp; 2730 sd = tmp;
2759 } 2731 }
2760 2732
2761 if (affine_sd) { 2733 if (affine_sd) {
2762 if (cpu == prev_cpu || wake_affine(affine_sd, p, sync)) 2734 if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
2763 prev_cpu = cpu; 2735 prev_cpu = cpu;
2764 2736
2765 new_cpu = select_idle_sibling(p, prev_cpu); 2737 new_cpu = select_idle_sibling(p, prev_cpu);
@@ -4295,7 +4267,7 @@ redo:
4295 goto out_balanced; 4267 goto out_balanced;
4296 } 4268 }
4297 4269
4298 BUG_ON(busiest == this_rq); 4270 BUG_ON(busiest == env.dst_rq);
4299 4271
4300 schedstat_add(sd, lb_imbalance[idle], env.imbalance); 4272 schedstat_add(sd, lb_imbalance[idle], env.imbalance);
4301 4273
@@ -4316,7 +4288,7 @@ redo:
4316 update_h_load(env.src_cpu); 4288 update_h_load(env.src_cpu);
4317more_balance: 4289more_balance:
4318 local_irq_save(flags); 4290 local_irq_save(flags);
4319 double_rq_lock(this_rq, busiest); 4291 double_rq_lock(env.dst_rq, busiest);
4320 4292
4321 /* 4293 /*
4322 * cur_ld_moved - load moved in current iteration 4294 * cur_ld_moved - load moved in current iteration
@@ -4324,7 +4296,7 @@ more_balance:
4324 */ 4296 */
4325 cur_ld_moved = move_tasks(&env); 4297 cur_ld_moved = move_tasks(&env);
4326 ld_moved += cur_ld_moved; 4298 ld_moved += cur_ld_moved;
4327 double_rq_unlock(this_rq, busiest); 4299 double_rq_unlock(env.dst_rq, busiest);
4328 local_irq_restore(flags); 4300 local_irq_restore(flags);
4329 4301
4330 if (env.flags & LBF_NEED_BREAK) { 4302 if (env.flags & LBF_NEED_BREAK) {
@@ -4360,8 +4332,7 @@ more_balance:
4360 if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && 4332 if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 &&
4361 lb_iterations++ < max_lb_iterations) { 4333 lb_iterations++ < max_lb_iterations) {
4362 4334
4363 this_rq = cpu_rq(env.new_dst_cpu); 4335 env.dst_rq = cpu_rq(env.new_dst_cpu);
4364 env.dst_rq = this_rq;
4365 env.dst_cpu = env.new_dst_cpu; 4336 env.dst_cpu = env.new_dst_cpu;
4366 env.flags &= ~LBF_SOME_PINNED; 4337 env.flags &= ~LBF_SOME_PINNED;
4367 env.loop = 0; 4338 env.loop = 0;
@@ -4646,7 +4617,7 @@ static void nohz_balancer_kick(int cpu)
4646 return; 4617 return;
4647} 4618}
4648 4619
4649static inline void clear_nohz_tick_stopped(int cpu) 4620static inline void nohz_balance_exit_idle(int cpu)
4650{ 4621{
4651 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { 4622 if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) {
4652 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); 4623 cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
@@ -4686,28 +4657,23 @@ void set_cpu_sd_state_idle(void)
4686} 4657}
4687 4658
4688/* 4659/*
4689 * This routine will record that this cpu is going idle with tick stopped. 4660 * This routine will record that the cpu is going idle with tick stopped.
4690 * This info will be used in performing idle load balancing in the future. 4661 * This info will be used in performing idle load balancing in the future.
4691 */ 4662 */
4692void select_nohz_load_balancer(int stop_tick) 4663void nohz_balance_enter_idle(int cpu)
4693{ 4664{
4694 int cpu = smp_processor_id();
4695
4696 /* 4665 /*
4697 * If this cpu is going down, then nothing needs to be done. 4666 * If this cpu is going down, then nothing needs to be done.
4698 */ 4667 */
4699 if (!cpu_active(cpu)) 4668 if (!cpu_active(cpu))
4700 return; 4669 return;
4701 4670
4702 if (stop_tick) { 4671 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
4703 if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) 4672 return;
4704 return;
4705 4673
4706 cpumask_set_cpu(cpu, nohz.idle_cpus_mask); 4674 cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
4707 atomic_inc(&nohz.nr_cpus); 4675 atomic_inc(&nohz.nr_cpus);
4708 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); 4676 set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
4709 }
4710 return;
4711} 4677}
4712 4678
4713static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, 4679static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
@@ -4715,7 +4681,7 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
4715{ 4681{
4716 switch (action & ~CPU_TASKS_FROZEN) { 4682 switch (action & ~CPU_TASKS_FROZEN) {
4717 case CPU_DYING: 4683 case CPU_DYING:
4718 clear_nohz_tick_stopped(smp_processor_id()); 4684 nohz_balance_exit_idle(smp_processor_id());
4719 return NOTIFY_OK; 4685 return NOTIFY_OK;
4720 default: 4686 default:
4721 return NOTIFY_DONE; 4687 return NOTIFY_DONE;
@@ -4837,14 +4803,15 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
4837 if (need_resched()) 4803 if (need_resched())
4838 break; 4804 break;
4839 4805
4840 raw_spin_lock_irq(&this_rq->lock); 4806 rq = cpu_rq(balance_cpu);
4841 update_rq_clock(this_rq); 4807
4842 update_idle_cpu_load(this_rq); 4808 raw_spin_lock_irq(&rq->lock);
4843 raw_spin_unlock_irq(&this_rq->lock); 4809 update_rq_clock(rq);
4810 update_idle_cpu_load(rq);
4811 raw_spin_unlock_irq(&rq->lock);
4844 4812
4845 rebalance_domains(balance_cpu, CPU_IDLE); 4813 rebalance_domains(balance_cpu, CPU_IDLE);
4846 4814
4847 rq = cpu_rq(balance_cpu);
4848 if (time_after(this_rq->next_balance, rq->next_balance)) 4815 if (time_after(this_rq->next_balance, rq->next_balance))
4849 this_rq->next_balance = rq->next_balance; 4816 this_rq->next_balance = rq->next_balance;
4850 } 4817 }
@@ -4875,7 +4842,7 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
4875 * busy tick after returning from idle, we will update the busy stats. 4842 * busy tick after returning from idle, we will update the busy stats.
4876 */ 4843 */
4877 set_cpu_sd_state_busy(); 4844 set_cpu_sd_state_busy();
4878 clear_nohz_tick_stopped(cpu); 4845 nohz_balance_exit_idle(cpu);
4879 4846
4880 /* 4847 /*
4881 * None are in tickless mode and hence no need for NOHZ idle load 4848 * None are in tickless mode and hence no need for NOHZ idle load