aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Low <jason.low2@hp.com>2014-05-08 20:49:22 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-22 05:16:32 -0400
commit52a08ef1f13a11289c9e18cd4cfb4e51c024058b (patch)
treefbdde6221312e3ab2ea3a258a94d3e24dace5c54
parenta9467fa3cd2d5bf39e7cb7d0706d29d7ef4df212 (diff)
sched: Fix the rq->next_balance logic in rebalance_domains() and idle_balance()
Currently, in idle_balance(), we update rq->next_balance when we pull_tasks. However, it is also important to update this in the !pulled_tasks case too. When the CPU is "busy" (the CPU isn't idle), rq->next_balance gets computed using sd->busy_factor (so we increase the balance interval when the CPU is busy). However, when the CPU goes idle, rq->next_balance could still be set to a large value that was computed with the sd->busy_factor. Thus, we need to also update rq->next_balance in idle_balance() in the cases where !pulled_tasks too, so that rq->next_balance gets updated without taking the busy_factor into account when the CPU is about to go idle. This patch makes rq->next_balance get updated independently of whether or not we pulled_task. Also, we add logic to ensure that we always traverse at least 1 of the sched domains to get a proper next_balance value for updating rq->next_balance. Additionally, since load_balance() modifies the sd->balance_interval, we need to re-obtain the sched domain's interval after the call to load_balance() in rebalance_domains() before we update rq->next_balance. This patch adds and uses 2 new helper functions, update_next_balance() and get_sd_balance_interval() to update next_balance and obtain the sched domain's balance_interval. Signed-off-by: Jason Low <jason.low2@hp.com> Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: daniel.lezcano@linaro.org Cc: alex.shi@linaro.org Cc: efault@gmx.de Cc: vincent.guittot@linaro.org Cc: morten.rasmussen@arm.com Cc: aswin@hp.com Link: http://lkml.kernel.org/r/1399596562.2200.7.camel@j-VirtualBox Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/fair.c69
1 files changed, 46 insertions, 23 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 429164d117ea..26ec6686a00b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6672,17 +6672,44 @@ out:
6672 return ld_moved; 6672 return ld_moved;
6673} 6673}
6674 6674
6675static inline unsigned long
6676get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
6677{
6678 unsigned long interval = sd->balance_interval;
6679
6680 if (cpu_busy)
6681 interval *= sd->busy_factor;
6682
6683 /* scale ms to jiffies */
6684 interval = msecs_to_jiffies(interval);
6685 interval = clamp(interval, 1UL, max_load_balance_interval);
6686
6687 return interval;
6688}
6689
6690static inline void
6691update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance)
6692{
6693 unsigned long interval, next;
6694
6695 interval = get_sd_balance_interval(sd, cpu_busy);
6696 next = sd->last_balance + interval;
6697
6698 if (time_after(*next_balance, next))
6699 *next_balance = next;
6700}
6701
6675/* 6702/*
6676 * idle_balance is called by schedule() if this_cpu is about to become 6703 * idle_balance is called by schedule() if this_cpu is about to become
6677 * idle. Attempts to pull tasks from other CPUs. 6704 * idle. Attempts to pull tasks from other CPUs.
6678 */ 6705 */
6679static int idle_balance(struct rq *this_rq) 6706static int idle_balance(struct rq *this_rq)
6680{ 6707{
6708 unsigned long next_balance = jiffies + HZ;
6709 int this_cpu = this_rq->cpu;
6681 struct sched_domain *sd; 6710 struct sched_domain *sd;
6682 int pulled_task = 0; 6711 int pulled_task = 0;
6683 unsigned long next_balance = jiffies + HZ;
6684 u64 curr_cost = 0; 6712 u64 curr_cost = 0;
6685 int this_cpu = this_rq->cpu;
6686 6713
6687 idle_enter_fair(this_rq); 6714 idle_enter_fair(this_rq);
6688 6715
@@ -6692,8 +6719,15 @@ static int idle_balance(struct rq *this_rq)
6692 */ 6719 */
6693 this_rq->idle_stamp = rq_clock(this_rq); 6720 this_rq->idle_stamp = rq_clock(this_rq);
6694 6721
6695 if (this_rq->avg_idle < sysctl_sched_migration_cost) 6722 if (this_rq->avg_idle < sysctl_sched_migration_cost) {
6723 rcu_read_lock();
6724 sd = rcu_dereference_check_sched_domain(this_rq->sd);
6725 if (sd)
6726 update_next_balance(sd, 0, &next_balance);
6727 rcu_read_unlock();
6728
6696 goto out; 6729 goto out;
6730 }
6697 6731
6698 /* 6732 /*
6699 * Drop the rq->lock, but keep IRQ/preempt disabled. 6733 * Drop the rq->lock, but keep IRQ/preempt disabled.
@@ -6703,15 +6737,16 @@ static int idle_balance(struct rq *this_rq)
6703 update_blocked_averages(this_cpu); 6737 update_blocked_averages(this_cpu);
6704 rcu_read_lock(); 6738 rcu_read_lock();
6705 for_each_domain(this_cpu, sd) { 6739 for_each_domain(this_cpu, sd) {
6706 unsigned long interval;
6707 int continue_balancing = 1; 6740 int continue_balancing = 1;
6708 u64 t0, domain_cost; 6741 u64 t0, domain_cost;
6709 6742
6710 if (!(sd->flags & SD_LOAD_BALANCE)) 6743 if (!(sd->flags & SD_LOAD_BALANCE))
6711 continue; 6744 continue;
6712 6745
6713 if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) 6746 if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
6747 update_next_balance(sd, 0, &next_balance);
6714 break; 6748 break;
6749 }
6715 6750
6716 if (sd->flags & SD_BALANCE_NEWIDLE) { 6751 if (sd->flags & SD_BALANCE_NEWIDLE) {
6717 t0 = sched_clock_cpu(this_cpu); 6752 t0 = sched_clock_cpu(this_cpu);
@@ -6727,9 +6762,7 @@ static int idle_balance(struct rq *this_rq)
6727 curr_cost += domain_cost; 6762 curr_cost += domain_cost;
6728 } 6763 }
6729 6764
6730 interval = msecs_to_jiffies(sd->balance_interval); 6765 update_next_balance(sd, 0, &next_balance);
6731 if (time_after(next_balance, sd->last_balance + interval))
6732 next_balance = sd->last_balance + interval;
6733 6766
6734 /* 6767 /*
6735 * Stop searching for tasks to pull if there are 6768 * Stop searching for tasks to pull if there are
@@ -6753,15 +6786,11 @@ static int idle_balance(struct rq *this_rq)
6753 if (this_rq->cfs.h_nr_running && !pulled_task) 6786 if (this_rq->cfs.h_nr_running && !pulled_task)
6754 pulled_task = 1; 6787 pulled_task = 1;
6755 6788
6756 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 6789out:
6757 /* 6790 /* Move the next balance forward */
6758 * We are going idle. next_balance may be set based on 6791 if (time_after(this_rq->next_balance, next_balance))
6759 * a busy processor. So reset next_balance.
6760 */
6761 this_rq->next_balance = next_balance; 6792 this_rq->next_balance = next_balance;
6762 }
6763 6793
6764out:
6765 /* Is there a task of a high priority class? */ 6794 /* Is there a task of a high priority class? */
6766 if (this_rq->nr_running != this_rq->cfs.h_nr_running) 6795 if (this_rq->nr_running != this_rq->cfs.h_nr_running)
6767 pulled_task = -1; 6796 pulled_task = -1;
@@ -7044,16 +7073,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
7044 break; 7073 break;
7045 } 7074 }
7046 7075
7047 interval = sd->balance_interval; 7076 interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
7048 if (idle != CPU_IDLE)
7049 interval *= sd->busy_factor;
7050
7051 /* scale ms to jiffies */
7052 interval = msecs_to_jiffies(interval);
7053 interval = clamp(interval, 1UL, max_load_balance_interval);
7054 7077
7055 need_serialize = sd->flags & SD_SERIALIZE; 7078 need_serialize = sd->flags & SD_SERIALIZE;
7056
7057 if (need_serialize) { 7079 if (need_serialize) {
7058 if (!spin_trylock(&balancing)) 7080 if (!spin_trylock(&balancing))
7059 goto out; 7081 goto out;
@@ -7069,6 +7091,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
7069 idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; 7091 idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
7070 } 7092 }
7071 sd->last_balance = jiffies; 7093 sd->last_balance = jiffies;
7094 interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
7072 } 7095 }
7073 if (need_serialize) 7096 if (need_serialize)
7074 spin_unlock(&balancing); 7097 spin_unlock(&balancing);