diff options
author | Jason Low <jason.low2@hp.com> | 2014-05-08 20:49:22 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-05-22 05:16:32 -0400 |
commit | 52a08ef1f13a11289c9e18cd4cfb4e51c024058b (patch) | |
tree | fbdde6221312e3ab2ea3a258a94d3e24dace5c54 | |
parent | a9467fa3cd2d5bf39e7cb7d0706d29d7ef4df212 (diff) |
sched: Fix the rq->next_balance logic in rebalance_domains() and idle_balance()
Currently, in idle_balance(), we update rq->next_balance when we pull_tasks.
However, it is also important to update this in the !pulled_tasks case too.
When the CPU is "busy" (the CPU isn't idle), rq->next_balance gets computed
using sd->busy_factor (so we increase the balance interval when the CPU is
busy). However, when the CPU goes idle, rq->next_balance could still be set
to a large value that was computed with the sd->busy_factor.
Thus, we need to also update rq->next_balance in idle_balance() in the cases
where !pulled_tasks too, so that rq->next_balance gets updated without taking
the busy_factor into account when the CPU is about to go idle.
This patch makes rq->next_balance get updated independently of whether or
not we pulled_task. Also, we add logic to ensure that we always traverse
at least 1 of the sched domains to get a proper next_balance value for
updating rq->next_balance.
Additionally, since load_balance() modifies the sd->balance_interval, we
need to re-obtain the sched domain's interval after the call to
load_balance() in rebalance_domains() before we update rq->next_balance.
This patch adds and uses 2 new helper functions, update_next_balance() and
get_sd_balance_interval() to update next_balance and obtain the sched
domain's balance_interval.
Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: daniel.lezcano@linaro.org
Cc: alex.shi@linaro.org
Cc: efault@gmx.de
Cc: vincent.guittot@linaro.org
Cc: morten.rasmussen@arm.com
Cc: aswin@hp.com
Link: http://lkml.kernel.org/r/1399596562.2200.7.camel@j-VirtualBox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | kernel/sched/fair.c | 69 |
1 files changed, 46 insertions, 23 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 429164d117ea..26ec6686a00b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -6672,17 +6672,44 @@ out: | |||
6672 | return ld_moved; | 6672 | return ld_moved; |
6673 | } | 6673 | } |
6674 | 6674 | ||
6675 | static inline unsigned long | ||
6676 | get_sd_balance_interval(struct sched_domain *sd, int cpu_busy) | ||
6677 | { | ||
6678 | unsigned long interval = sd->balance_interval; | ||
6679 | |||
6680 | if (cpu_busy) | ||
6681 | interval *= sd->busy_factor; | ||
6682 | |||
6683 | /* scale ms to jiffies */ | ||
6684 | interval = msecs_to_jiffies(interval); | ||
6685 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
6686 | |||
6687 | return interval; | ||
6688 | } | ||
6689 | |||
6690 | static inline void | ||
6691 | update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance) | ||
6692 | { | ||
6693 | unsigned long interval, next; | ||
6694 | |||
6695 | interval = get_sd_balance_interval(sd, cpu_busy); | ||
6696 | next = sd->last_balance + interval; | ||
6697 | |||
6698 | if (time_after(*next_balance, next)) | ||
6699 | *next_balance = next; | ||
6700 | } | ||
6701 | |||
6675 | /* | 6702 | /* |
6676 | * idle_balance is called by schedule() if this_cpu is about to become | 6703 | * idle_balance is called by schedule() if this_cpu is about to become |
6677 | * idle. Attempts to pull tasks from other CPUs. | 6704 | * idle. Attempts to pull tasks from other CPUs. |
6678 | */ | 6705 | */ |
6679 | static int idle_balance(struct rq *this_rq) | 6706 | static int idle_balance(struct rq *this_rq) |
6680 | { | 6707 | { |
6708 | unsigned long next_balance = jiffies + HZ; | ||
6709 | int this_cpu = this_rq->cpu; | ||
6681 | struct sched_domain *sd; | 6710 | struct sched_domain *sd; |
6682 | int pulled_task = 0; | 6711 | int pulled_task = 0; |
6683 | unsigned long next_balance = jiffies + HZ; | ||
6684 | u64 curr_cost = 0; | 6712 | u64 curr_cost = 0; |
6685 | int this_cpu = this_rq->cpu; | ||
6686 | 6713 | ||
6687 | idle_enter_fair(this_rq); | 6714 | idle_enter_fair(this_rq); |
6688 | 6715 | ||
@@ -6692,8 +6719,15 @@ static int idle_balance(struct rq *this_rq) | |||
6692 | */ | 6719 | */ |
6693 | this_rq->idle_stamp = rq_clock(this_rq); | 6720 | this_rq->idle_stamp = rq_clock(this_rq); |
6694 | 6721 | ||
6695 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | 6722 | if (this_rq->avg_idle < sysctl_sched_migration_cost) { |
6723 | rcu_read_lock(); | ||
6724 | sd = rcu_dereference_check_sched_domain(this_rq->sd); | ||
6725 | if (sd) | ||
6726 | update_next_balance(sd, 0, &next_balance); | ||
6727 | rcu_read_unlock(); | ||
6728 | |||
6696 | goto out; | 6729 | goto out; |
6730 | } | ||
6697 | 6731 | ||
6698 | /* | 6732 | /* |
6699 | * Drop the rq->lock, but keep IRQ/preempt disabled. | 6733 | * Drop the rq->lock, but keep IRQ/preempt disabled. |
@@ -6703,15 +6737,16 @@ static int idle_balance(struct rq *this_rq) | |||
6703 | update_blocked_averages(this_cpu); | 6737 | update_blocked_averages(this_cpu); |
6704 | rcu_read_lock(); | 6738 | rcu_read_lock(); |
6705 | for_each_domain(this_cpu, sd) { | 6739 | for_each_domain(this_cpu, sd) { |
6706 | unsigned long interval; | ||
6707 | int continue_balancing = 1; | 6740 | int continue_balancing = 1; |
6708 | u64 t0, domain_cost; | 6741 | u64 t0, domain_cost; |
6709 | 6742 | ||
6710 | if (!(sd->flags & SD_LOAD_BALANCE)) | 6743 | if (!(sd->flags & SD_LOAD_BALANCE)) |
6711 | continue; | 6744 | continue; |
6712 | 6745 | ||
6713 | if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) | 6746 | if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { |
6747 | update_next_balance(sd, 0, &next_balance); | ||
6714 | break; | 6748 | break; |
6749 | } | ||
6715 | 6750 | ||
6716 | if (sd->flags & SD_BALANCE_NEWIDLE) { | 6751 | if (sd->flags & SD_BALANCE_NEWIDLE) { |
6717 | t0 = sched_clock_cpu(this_cpu); | 6752 | t0 = sched_clock_cpu(this_cpu); |
@@ -6727,9 +6762,7 @@ static int idle_balance(struct rq *this_rq) | |||
6727 | curr_cost += domain_cost; | 6762 | curr_cost += domain_cost; |
6728 | } | 6763 | } |
6729 | 6764 | ||
6730 | interval = msecs_to_jiffies(sd->balance_interval); | 6765 | update_next_balance(sd, 0, &next_balance); |
6731 | if (time_after(next_balance, sd->last_balance + interval)) | ||
6732 | next_balance = sd->last_balance + interval; | ||
6733 | 6766 | ||
6734 | /* | 6767 | /* |
6735 | * Stop searching for tasks to pull if there are | 6768 | * Stop searching for tasks to pull if there are |
@@ -6753,15 +6786,11 @@ static int idle_balance(struct rq *this_rq) | |||
6753 | if (this_rq->cfs.h_nr_running && !pulled_task) | 6786 | if (this_rq->cfs.h_nr_running && !pulled_task) |
6754 | pulled_task = 1; | 6787 | pulled_task = 1; |
6755 | 6788 | ||
6756 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 6789 | out: |
6757 | /* | 6790 | /* Move the next balance forward */ |
6758 | * We are going idle. next_balance may be set based on | 6791 | if (time_after(this_rq->next_balance, next_balance)) |
6759 | * a busy processor. So reset next_balance. | ||
6760 | */ | ||
6761 | this_rq->next_balance = next_balance; | 6792 | this_rq->next_balance = next_balance; |
6762 | } | ||
6763 | 6793 | ||
6764 | out: | ||
6765 | /* Is there a task of a high priority class? */ | 6794 | /* Is there a task of a high priority class? */ |
6766 | if (this_rq->nr_running != this_rq->cfs.h_nr_running) | 6795 | if (this_rq->nr_running != this_rq->cfs.h_nr_running) |
6767 | pulled_task = -1; | 6796 | pulled_task = -1; |
@@ -7044,16 +7073,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) | |||
7044 | break; | 7073 | break; |
7045 | } | 7074 | } |
7046 | 7075 | ||
7047 | interval = sd->balance_interval; | 7076 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); |
7048 | if (idle != CPU_IDLE) | ||
7049 | interval *= sd->busy_factor; | ||
7050 | |||
7051 | /* scale ms to jiffies */ | ||
7052 | interval = msecs_to_jiffies(interval); | ||
7053 | interval = clamp(interval, 1UL, max_load_balance_interval); | ||
7054 | 7077 | ||
7055 | need_serialize = sd->flags & SD_SERIALIZE; | 7078 | need_serialize = sd->flags & SD_SERIALIZE; |
7056 | |||
7057 | if (need_serialize) { | 7079 | if (need_serialize) { |
7058 | if (!spin_trylock(&balancing)) | 7080 | if (!spin_trylock(&balancing)) |
7059 | goto out; | 7081 | goto out; |
@@ -7069,6 +7091,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) | |||
7069 | idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; | 7091 | idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE; |
7070 | } | 7092 | } |
7071 | sd->last_balance = jiffies; | 7093 | sd->last_balance = jiffies; |
7094 | interval = get_sd_balance_interval(sd, idle != CPU_IDLE); | ||
7072 | } | 7095 | } |
7073 | if (need_serialize) | 7096 | if (need_serialize) |
7074 | spin_unlock(&balancing); | 7097 | spin_unlock(&balancing); |