aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorVincent Guittot <vincent.guittot@linaro.org>2015-02-27 10:54:14 -0500
committerIngo Molnar <mingo@kernel.org>2015-03-27 04:36:06 -0400
commit1aaf90a4b88aae26a4535ba01dacab520a310d17 (patch)
tree1874f1bd42c23ef58d38e139d8295c5593fde242 /kernel/sched
parentcaff37ef96eac7fe96a582d032f6958e834e9447 (diff)
sched: Move CFS tasks to CPUs with higher capacity
When a CPU is used to handle a lot of IRQs or some RT tasks, the remaining capacity for CFS tasks can be significantly reduced. Once we detect such situation by comparing cpu_capacity_orig and cpu_capacity, we trig an idle load balance to check if it's worth moving its tasks on an idle CPU. It's worth trying to move the task before the CPU is fully utilized to minimize the preemption by irq or RT tasks. Once the idle load_balance has selected the busiest CPU, it will look for an active load balance for only two cases: - There is only 1 task on the busiest CPU. - We haven't been able to move a task of the busiest rq. A CPU with a reduced capacity is included in the 1st case, and it's worth to actively migrate its task if the idle CPU has got more available capacity for CFS tasks. This test has been added in need_active_balance. As a sidenote, this will not generate more spurious ilb because we already trig an ilb if there is more than 1 busy cpu. If this cpu is the only one that has a task, we will trig the ilb once for migrating the task. The nohz_kick_needed function has been cleaned up a bit while adding the new test env.src_cpu and env.src_rq must be set unconditionnally because they are used in need_active_balance which is called even if busiest->nr_running equals 1 Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Morten.Rasmussen@arm.com Cc: dietmar.eggemann@arm.com Cc: efault@gmx.de Cc: kamalesh@linux.vnet.ibm.com Cc: linaro-kernel@lists.linaro.org Cc: nicolas.pitre@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: riel@redhat.com Link: http://lkml.kernel.org/r/1425052454-25797-12-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/fair.c69
1 files changed, 47 insertions, 22 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d36f8d221669..0576ce0e0af2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6855,6 +6855,19 @@ static int need_active_balance(struct lb_env *env)
6855 return 1; 6855 return 1;
6856 } 6856 }
6857 6857
6858 /*
6859 * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
6860 * It's worth migrating the task if the src_cpu's capacity is reduced
6861 * because of other sched_class or IRQs if more capacity stays
6862 * available on dst_cpu.
6863 */
6864 if ((env->idle != CPU_NOT_IDLE) &&
6865 (env->src_rq->cfs.h_nr_running == 1)) {
6866 if ((check_cpu_capacity(env->src_rq, sd)) &&
6867 (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
6868 return 1;
6869 }
6870
6858 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); 6871 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
6859} 6872}
6860 6873
@@ -6954,6 +6967,9 @@ redo:
6954 6967
6955 schedstat_add(sd, lb_imbalance[idle], env.imbalance); 6968 schedstat_add(sd, lb_imbalance[idle], env.imbalance);
6956 6969
6970 env.src_cpu = busiest->cpu;
6971 env.src_rq = busiest;
6972
6957 ld_moved = 0; 6973 ld_moved = 0;
6958 if (busiest->nr_running > 1) { 6974 if (busiest->nr_running > 1) {
6959 /* 6975 /*
@@ -6963,8 +6979,6 @@ redo:
6963 * correctly treated as an imbalance. 6979 * correctly treated as an imbalance.
6964 */ 6980 */
6965 env.flags |= LBF_ALL_PINNED; 6981 env.flags |= LBF_ALL_PINNED;
6966 env.src_cpu = busiest->cpu;
6967 env.src_rq = busiest;
6968 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); 6982 env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
6969 6983
6970more_balance: 6984more_balance:
@@ -7664,22 +7678,25 @@ end:
7664 7678
7665/* 7679/*
7666 * Current heuristic for kicking the idle load balancer in the presence 7680 * Current heuristic for kicking the idle load balancer in the presence
7667 * of an idle cpu is the system. 7681 * of an idle cpu in the system.
7668 * - This rq has more than one task. 7682 * - This rq has more than one task.
7669 * - At any scheduler domain level, this cpu's scheduler group has multiple 7683 * - This rq has at least one CFS task and the capacity of the CPU is
7670 * busy cpu's exceeding the group's capacity. 7684 * significantly reduced because of RT tasks or IRQs.
7685 * - At parent of LLC scheduler domain level, this cpu's scheduler group has
7686 * multiple busy cpu.
7671 * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler 7687 * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
7672 * domain span are idle. 7688 * domain span are idle.
7673 */ 7689 */
7674static inline int nohz_kick_needed(struct rq *rq) 7690static inline bool nohz_kick_needed(struct rq *rq)
7675{ 7691{
7676 unsigned long now = jiffies; 7692 unsigned long now = jiffies;
7677 struct sched_domain *sd; 7693 struct sched_domain *sd;
7678 struct sched_group_capacity *sgc; 7694 struct sched_group_capacity *sgc;
7679 int nr_busy, cpu = rq->cpu; 7695 int nr_busy, cpu = rq->cpu;
7696 bool kick = false;
7680 7697
7681 if (unlikely(rq->idle_balance)) 7698 if (unlikely(rq->idle_balance))
7682 return 0; 7699 return false;
7683 7700
7684 /* 7701 /*
7685 * We may be recently in ticked or tickless idle mode. At the first 7702 * We may be recently in ticked or tickless idle mode. At the first
@@ -7693,38 +7710,46 @@ static inline int nohz_kick_needed(struct rq *rq)
7693 * balancing. 7710 * balancing.
7694 */ 7711 */
7695 if (likely(!atomic_read(&nohz.nr_cpus))) 7712 if (likely(!atomic_read(&nohz.nr_cpus)))
7696 return 0; 7713 return false;
7697 7714
7698 if (time_before(now, nohz.next_balance)) 7715 if (time_before(now, nohz.next_balance))
7699 return 0; 7716 return false;
7700 7717
7701 if (rq->nr_running >= 2) 7718 if (rq->nr_running >= 2)
7702 goto need_kick; 7719 return true;
7703 7720
7704 rcu_read_lock(); 7721 rcu_read_lock();
7705 sd = rcu_dereference(per_cpu(sd_busy, cpu)); 7722 sd = rcu_dereference(per_cpu(sd_busy, cpu));
7706
7707 if (sd) { 7723 if (sd) {
7708 sgc = sd->groups->sgc; 7724 sgc = sd->groups->sgc;
7709 nr_busy = atomic_read(&sgc->nr_busy_cpus); 7725 nr_busy = atomic_read(&sgc->nr_busy_cpus);
7710 7726
7711 if (nr_busy > 1) 7727 if (nr_busy > 1) {
7712 goto need_kick_unlock; 7728 kick = true;
7729 goto unlock;
7730 }
7731
7713 } 7732 }
7714 7733
7715 sd = rcu_dereference(per_cpu(sd_asym, cpu)); 7734 sd = rcu_dereference(rq->sd);
7735 if (sd) {
7736 if ((rq->cfs.h_nr_running >= 1) &&
7737 check_cpu_capacity(rq, sd)) {
7738 kick = true;
7739 goto unlock;
7740 }
7741 }
7716 7742
7743 sd = rcu_dereference(per_cpu(sd_asym, cpu));
7717 if (sd && (cpumask_first_and(nohz.idle_cpus_mask, 7744 if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
7718 sched_domain_span(sd)) < cpu)) 7745 sched_domain_span(sd)) < cpu)) {
7719 goto need_kick_unlock; 7746 kick = true;
7720 7747 goto unlock;
7721 rcu_read_unlock(); 7748 }
7722 return 0;
7723 7749
7724need_kick_unlock: 7750unlock:
7725 rcu_read_unlock(); 7751 rcu_read_unlock();
7726need_kick: 7752 return kick;
7727 return 1;
7728} 7753}
7729#else 7754#else
7730static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { } 7755static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }