aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorVenkatesh Pallipadi <venki@google.com>2011-02-14 17:38:50 -0500
committerIngo Molnar <mingo@elte.hu>2011-02-16 07:33:20 -0500
commit46e49b3836c7cd2ae5b5fe76fa981d0d292a52fe (patch)
tree25c9a3fa6ad6f45fb8553e0ebe52b973a02f4ef6 /kernel
parent48fa4b8ecf683f5e411303553da9e186e8b8406e (diff)
sched: Wholesale removal of sd_idle logic
sd_idle logic was introduced way back in 2005 (commit 5969fe06), as an HT optimization. As per the discussion in the thread here: lkml - sched: Resolve sd_idle and first_idle_cpu Catch-22 - v1 https://patchwork.kernel.org/patch/532501/ The capacity based logic in the load balancer right now handles this in a much cleaner way, handling more than 2 SMT siblings etc, and sd_idle does not seem to bring any additional benefits. sd_idle logic also has some bugs that has performance impact. Here is the patch that removes the sd_idle logic altogether. Also, there was a dependency of sched_mc_power_savings == 2, with sd_idle logic. Signed-off-by: Venkatesh Pallipadi <venki@google.com> Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1297723130-693-1-git-send-email-venki@google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched_fair.c53
1 files changed, 11 insertions, 42 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 027024694043..d384e739ea95 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2672,7 +2672,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2672 * @this_cpu: Cpu for which load balance is currently performed. 2672 * @this_cpu: Cpu for which load balance is currently performed.
2673 * @idle: Idle status of this_cpu 2673 * @idle: Idle status of this_cpu
2674 * @load_idx: Load index of sched_domain of this_cpu for load calc. 2674 * @load_idx: Load index of sched_domain of this_cpu for load calc.
2675 * @sd_idle: Idle status of the sched_domain containing group.
2676 * @local_group: Does group contain this_cpu. 2675 * @local_group: Does group contain this_cpu.
2677 * @cpus: Set of cpus considered for load balancing. 2676 * @cpus: Set of cpus considered for load balancing.
2678 * @balance: Should we balance. 2677 * @balance: Should we balance.
@@ -2680,7 +2679,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2680 */ 2679 */
2681static inline void update_sg_lb_stats(struct sched_domain *sd, 2680static inline void update_sg_lb_stats(struct sched_domain *sd,
2682 struct sched_group *group, int this_cpu, 2681 struct sched_group *group, int this_cpu,
2683 enum cpu_idle_type idle, int load_idx, int *sd_idle, 2682 enum cpu_idle_type idle, int load_idx,
2684 int local_group, const struct cpumask *cpus, 2683 int local_group, const struct cpumask *cpus,
2685 int *balance, struct sg_lb_stats *sgs) 2684 int *balance, struct sg_lb_stats *sgs)
2686{ 2685{
@@ -2700,9 +2699,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2700 for_each_cpu_and(i, sched_group_cpus(group), cpus) { 2699 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
2701 struct rq *rq = cpu_rq(i); 2700 struct rq *rq = cpu_rq(i);
2702 2701
2703 if (*sd_idle && rq->nr_running)
2704 *sd_idle = 0;
2705
2706 /* Bias balancing toward cpus of our domain */ 2702 /* Bias balancing toward cpus of our domain */
2707 if (local_group) { 2703 if (local_group) {
2708 if (idle_cpu(i) && !first_idle_cpu) { 2704 if (idle_cpu(i) && !first_idle_cpu) {
@@ -2817,15 +2813,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
2817 * @sd: sched_domain whose statistics are to be updated. 2813 * @sd: sched_domain whose statistics are to be updated.
2818 * @this_cpu: Cpu for which load balance is currently performed. 2814 * @this_cpu: Cpu for which load balance is currently performed.
2819 * @idle: Idle status of this_cpu 2815 * @idle: Idle status of this_cpu
2820 * @sd_idle: Idle status of the sched_domain containing sg.
2821 * @cpus: Set of cpus considered for load balancing. 2816 * @cpus: Set of cpus considered for load balancing.
2822 * @balance: Should we balance. 2817 * @balance: Should we balance.
2823 * @sds: variable to hold the statistics for this sched_domain. 2818 * @sds: variable to hold the statistics for this sched_domain.
2824 */ 2819 */
2825static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, 2820static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2826 enum cpu_idle_type idle, int *sd_idle, 2821 enum cpu_idle_type idle, const struct cpumask *cpus,
2827 const struct cpumask *cpus, int *balance, 2822 int *balance, struct sd_lb_stats *sds)
2828 struct sd_lb_stats *sds)
2829{ 2823{
2830 struct sched_domain *child = sd->child; 2824 struct sched_domain *child = sd->child;
2831 struct sched_group *sg = sd->groups; 2825 struct sched_group *sg = sd->groups;
@@ -2843,7 +2837,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2843 2837
2844 local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); 2838 local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
2845 memset(&sgs, 0, sizeof(sgs)); 2839 memset(&sgs, 0, sizeof(sgs));
2846 update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle, 2840 update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
2847 local_group, cpus, balance, &sgs); 2841 local_group, cpus, balance, &sgs);
2848 2842
2849 if (local_group && !(*balance)) 2843 if (local_group && !(*balance))
@@ -3095,7 +3089,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3095 * @imbalance: Variable which stores amount of weighted load which should 3089 * @imbalance: Variable which stores amount of weighted load which should
3096 * be moved to restore balance/put a group to idle. 3090 * be moved to restore balance/put a group to idle.
3097 * @idle: The idle status of this_cpu. 3091 * @idle: The idle status of this_cpu.
3098 * @sd_idle: The idleness of sd
3099 * @cpus: The set of CPUs under consideration for load-balancing. 3092 * @cpus: The set of CPUs under consideration for load-balancing.
3100 * @balance: Pointer to a variable indicating if this_cpu 3093 * @balance: Pointer to a variable indicating if this_cpu
3101 * is the appropriate cpu to perform load balancing at this_level. 3094 * is the appropriate cpu to perform load balancing at this_level.
@@ -3108,7 +3101,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
3108static struct sched_group * 3101static struct sched_group *
3109find_busiest_group(struct sched_domain *sd, int this_cpu, 3102find_busiest_group(struct sched_domain *sd, int this_cpu,
3110 unsigned long *imbalance, enum cpu_idle_type idle, 3103 unsigned long *imbalance, enum cpu_idle_type idle,
3111 int *sd_idle, const struct cpumask *cpus, int *balance) 3104 const struct cpumask *cpus, int *balance)
3112{ 3105{
3113 struct sd_lb_stats sds; 3106 struct sd_lb_stats sds;
3114 3107
@@ -3118,8 +3111,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3118 * Compute the various statistics relavent for load balancing at 3111 * Compute the various statistics relavent for load balancing at
3119 * this level. 3112 * this level.
3120 */ 3113 */
3121 update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus, 3114 update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
3122 balance, &sds);
3123 3115
3124 /* Cases where imbalance does not exist from POV of this_cpu */ 3116 /* Cases where imbalance does not exist from POV of this_cpu */
3125 /* 1) this_cpu is not the appropriate cpu to perform load balancing 3117 /* 1) this_cpu is not the appropriate cpu to perform load balancing
@@ -3255,7 +3247,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
3255/* Working cpumask for load_balance and load_balance_newidle. */ 3247/* Working cpumask for load_balance and load_balance_newidle. */
3256static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); 3248static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
3257 3249
3258static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, 3250static int need_active_balance(struct sched_domain *sd, int idle,
3259 int busiest_cpu, int this_cpu) 3251 int busiest_cpu, int this_cpu)
3260{ 3252{
3261 if (idle == CPU_NEWLY_IDLE) { 3253 if (idle == CPU_NEWLY_IDLE) {
@@ -3287,10 +3279,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
3287 * move_tasks() will succeed. ld_moved will be true and this 3279 * move_tasks() will succeed. ld_moved will be true and this
3288 * active balance code will not be triggered. 3280 * active balance code will not be triggered.
3289 */ 3281 */
3290 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3291 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3292 return 0;
3293
3294 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) 3282 if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
3295 return 0; 3283 return 0;
3296 } 3284 }
@@ -3308,7 +3296,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3308 struct sched_domain *sd, enum cpu_idle_type idle, 3296 struct sched_domain *sd, enum cpu_idle_type idle,
3309 int *balance) 3297 int *balance)
3310{ 3298{
3311 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3299 int ld_moved, all_pinned = 0, active_balance = 0;
3312 struct sched_group *group; 3300 struct sched_group *group;
3313 unsigned long imbalance; 3301 unsigned long imbalance;
3314 struct rq *busiest; 3302 struct rq *busiest;
@@ -3317,20 +3305,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3317 3305
3318 cpumask_copy(cpus, cpu_active_mask); 3306 cpumask_copy(cpus, cpu_active_mask);
3319 3307
3320 /*
3321 * When power savings policy is enabled for the parent domain, idle
3322 * sibling can pick up load irrespective of busy siblings. In this case,
3323 * let the state of idle sibling percolate up as CPU_IDLE, instead of
3324 * portraying it as CPU_NOT_IDLE.
3325 */
3326 if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
3327 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3328 sd_idle = 1;
3329
3330 schedstat_inc(sd, lb_count[idle]); 3308 schedstat_inc(sd, lb_count[idle]);
3331 3309
3332redo: 3310redo:
3333 group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, 3311 group = find_busiest_group(sd, this_cpu, &imbalance, idle,
3334 cpus, balance); 3312 cpus, balance);
3335 3313
3336 if (*balance == 0) 3314 if (*balance == 0)
@@ -3392,8 +3370,7 @@ redo:
3392 if (idle != CPU_NEWLY_IDLE) 3370 if (idle != CPU_NEWLY_IDLE)
3393 sd->nr_balance_failed++; 3371 sd->nr_balance_failed++;
3394 3372
3395 if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), 3373 if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
3396 this_cpu)) {
3397 raw_spin_lock_irqsave(&busiest->lock, flags); 3374 raw_spin_lock_irqsave(&busiest->lock, flags);
3398 3375
3399 /* don't kick the active_load_balance_cpu_stop, 3376 /* don't kick the active_load_balance_cpu_stop,
@@ -3448,10 +3425,6 @@ redo:
3448 sd->balance_interval *= 2; 3425 sd->balance_interval *= 2;
3449 } 3426 }
3450 3427
3451 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
3452 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3453 ld_moved = -1;
3454
3455 goto out; 3428 goto out;
3456 3429
3457out_balanced: 3430out_balanced:
@@ -3465,11 +3438,7 @@ out_one_pinned:
3465 (sd->balance_interval < sd->max_interval)) 3438 (sd->balance_interval < sd->max_interval))
3466 sd->balance_interval *= 2; 3439 sd->balance_interval *= 2;
3467 3440
3468 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 3441 ld_moved = 0;
3469 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
3470 ld_moved = -1;
3471 else
3472 ld_moved = 0;
3473out: 3442out:
3474 return ld_moved; 3443 return ld_moved;
3475} 3444}