diff options
author | Venkatesh Pallipadi <venki@google.com> | 2011-02-14 17:38:50 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-02-16 07:33:20 -0500 |
commit | 46e49b3836c7cd2ae5b5fe76fa981d0d292a52fe (patch) | |
tree | 25c9a3fa6ad6f45fb8553e0ebe52b973a02f4ef6 /kernel | |
parent | 48fa4b8ecf683f5e411303553da9e186e8b8406e (diff) |
sched: Wholesale removal of sd_idle logic
sd_idle logic was introduced way back in 2005 (commit 5969fe06),
as an HT optimization.
As per the discussion in the thread here:
lkml - sched: Resolve sd_idle and first_idle_cpu Catch-22 - v1
https://patchwork.kernel.org/patch/532501/
The capacity based logic in the load balancer right now handles this
in a much cleaner way, handling more than 2 SMT siblings etc, and sd_idle
does not seem to bring any additional benefits. sd_idle logic also has
some bugs that has performance impact. Here is the patch that removes
the sd_idle logic altogether.
Also, there was a dependency of sched_mc_power_savings == 2, with sd_idle
logic.
Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Acked-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1297723130-693-1-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched_fair.c | 53 |
1 files changed, 11 insertions, 42 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 027024694043..d384e739ea95 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -2672,7 +2672,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
2672 | * @this_cpu: Cpu for which load balance is currently performed. | 2672 | * @this_cpu: Cpu for which load balance is currently performed. |
2673 | * @idle: Idle status of this_cpu | 2673 | * @idle: Idle status of this_cpu |
2674 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 2674 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
2675 | * @sd_idle: Idle status of the sched_domain containing group. | ||
2676 | * @local_group: Does group contain this_cpu. | 2675 | * @local_group: Does group contain this_cpu. |
2677 | * @cpus: Set of cpus considered for load balancing. | 2676 | * @cpus: Set of cpus considered for load balancing. |
2678 | * @balance: Should we balance. | 2677 | * @balance: Should we balance. |
@@ -2680,7 +2679,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
2680 | */ | 2679 | */ |
2681 | static inline void update_sg_lb_stats(struct sched_domain *sd, | 2680 | static inline void update_sg_lb_stats(struct sched_domain *sd, |
2682 | struct sched_group *group, int this_cpu, | 2681 | struct sched_group *group, int this_cpu, |
2683 | enum cpu_idle_type idle, int load_idx, int *sd_idle, | 2682 | enum cpu_idle_type idle, int load_idx, |
2684 | int local_group, const struct cpumask *cpus, | 2683 | int local_group, const struct cpumask *cpus, |
2685 | int *balance, struct sg_lb_stats *sgs) | 2684 | int *balance, struct sg_lb_stats *sgs) |
2686 | { | 2685 | { |
@@ -2700,9 +2699,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2700 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | 2699 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { |
2701 | struct rq *rq = cpu_rq(i); | 2700 | struct rq *rq = cpu_rq(i); |
2702 | 2701 | ||
2703 | if (*sd_idle && rq->nr_running) | ||
2704 | *sd_idle = 0; | ||
2705 | |||
2706 | /* Bias balancing toward cpus of our domain */ | 2702 | /* Bias balancing toward cpus of our domain */ |
2707 | if (local_group) { | 2703 | if (local_group) { |
2708 | if (idle_cpu(i) && !first_idle_cpu) { | 2704 | if (idle_cpu(i) && !first_idle_cpu) { |
@@ -2817,15 +2813,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, | |||
2817 | * @sd: sched_domain whose statistics are to be updated. | 2813 | * @sd: sched_domain whose statistics are to be updated. |
2818 | * @this_cpu: Cpu for which load balance is currently performed. | 2814 | * @this_cpu: Cpu for which load balance is currently performed. |
2819 | * @idle: Idle status of this_cpu | 2815 | * @idle: Idle status of this_cpu |
2820 | * @sd_idle: Idle status of the sched_domain containing sg. | ||
2821 | * @cpus: Set of cpus considered for load balancing. | 2816 | * @cpus: Set of cpus considered for load balancing. |
2822 | * @balance: Should we balance. | 2817 | * @balance: Should we balance. |
2823 | * @sds: variable to hold the statistics for this sched_domain. | 2818 | * @sds: variable to hold the statistics for this sched_domain. |
2824 | */ | 2819 | */ |
2825 | static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | 2820 | static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, |
2826 | enum cpu_idle_type idle, int *sd_idle, | 2821 | enum cpu_idle_type idle, const struct cpumask *cpus, |
2827 | const struct cpumask *cpus, int *balance, | 2822 | int *balance, struct sd_lb_stats *sds) |
2828 | struct sd_lb_stats *sds) | ||
2829 | { | 2823 | { |
2830 | struct sched_domain *child = sd->child; | 2824 | struct sched_domain *child = sd->child; |
2831 | struct sched_group *sg = sd->groups; | 2825 | struct sched_group *sg = sd->groups; |
@@ -2843,7 +2837,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
2843 | 2837 | ||
2844 | local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); | 2838 | local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); |
2845 | memset(&sgs, 0, sizeof(sgs)); | 2839 | memset(&sgs, 0, sizeof(sgs)); |
2846 | update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle, | 2840 | update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, |
2847 | local_group, cpus, balance, &sgs); | 2841 | local_group, cpus, balance, &sgs); |
2848 | 2842 | ||
2849 | if (local_group && !(*balance)) | 2843 | if (local_group && !(*balance)) |
@@ -3095,7 +3089,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3095 | * @imbalance: Variable which stores amount of weighted load which should | 3089 | * @imbalance: Variable which stores amount of weighted load which should |
3096 | * be moved to restore balance/put a group to idle. | 3090 | * be moved to restore balance/put a group to idle. |
3097 | * @idle: The idle status of this_cpu. | 3091 | * @idle: The idle status of this_cpu. |
3098 | * @sd_idle: The idleness of sd | ||
3099 | * @cpus: The set of CPUs under consideration for load-balancing. | 3092 | * @cpus: The set of CPUs under consideration for load-balancing. |
3100 | * @balance: Pointer to a variable indicating if this_cpu | 3093 | * @balance: Pointer to a variable indicating if this_cpu |
3101 | * is the appropriate cpu to perform load balancing at this_level. | 3094 | * is the appropriate cpu to perform load balancing at this_level. |
@@ -3108,7 +3101,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3108 | static struct sched_group * | 3101 | static struct sched_group * |
3109 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 3102 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
3110 | unsigned long *imbalance, enum cpu_idle_type idle, | 3103 | unsigned long *imbalance, enum cpu_idle_type idle, |
3111 | int *sd_idle, const struct cpumask *cpus, int *balance) | 3104 | const struct cpumask *cpus, int *balance) |
3112 | { | 3105 | { |
3113 | struct sd_lb_stats sds; | 3106 | struct sd_lb_stats sds; |
3114 | 3107 | ||
@@ -3118,8 +3111,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3118 | * Compute the various statistics relavent for load balancing at | 3111 | * Compute the various statistics relavent for load balancing at |
3119 | * this level. | 3112 | * this level. |
3120 | */ | 3113 | */ |
3121 | update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus, | 3114 | update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds); |
3122 | balance, &sds); | ||
3123 | 3115 | ||
3124 | /* Cases where imbalance does not exist from POV of this_cpu */ | 3116 | /* Cases where imbalance does not exist from POV of this_cpu */ |
3125 | /* 1) this_cpu is not the appropriate cpu to perform load balancing | 3117 | /* 1) this_cpu is not the appropriate cpu to perform load balancing |
@@ -3255,7 +3247,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group, | |||
3255 | /* Working cpumask for load_balance and load_balance_newidle. */ | 3247 | /* Working cpumask for load_balance and load_balance_newidle. */ |
3256 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 3248 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); |
3257 | 3249 | ||
3258 | static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, | 3250 | static int need_active_balance(struct sched_domain *sd, int idle, |
3259 | int busiest_cpu, int this_cpu) | 3251 | int busiest_cpu, int this_cpu) |
3260 | { | 3252 | { |
3261 | if (idle == CPU_NEWLY_IDLE) { | 3253 | if (idle == CPU_NEWLY_IDLE) { |
@@ -3287,10 +3279,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, | |||
3287 | * move_tasks() will succeed. ld_moved will be true and this | 3279 | * move_tasks() will succeed. ld_moved will be true and this |
3288 | * active balance code will not be triggered. | 3280 | * active balance code will not be triggered. |
3289 | */ | 3281 | */ |
3290 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
3291 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3292 | return 0; | ||
3293 | |||
3294 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) | 3282 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) |
3295 | return 0; | 3283 | return 0; |
3296 | } | 3284 | } |
@@ -3308,7 +3296,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3308 | struct sched_domain *sd, enum cpu_idle_type idle, | 3296 | struct sched_domain *sd, enum cpu_idle_type idle, |
3309 | int *balance) | 3297 | int *balance) |
3310 | { | 3298 | { |
3311 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 3299 | int ld_moved, all_pinned = 0, active_balance = 0; |
3312 | struct sched_group *group; | 3300 | struct sched_group *group; |
3313 | unsigned long imbalance; | 3301 | unsigned long imbalance; |
3314 | struct rq *busiest; | 3302 | struct rq *busiest; |
@@ -3317,20 +3305,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3317 | 3305 | ||
3318 | cpumask_copy(cpus, cpu_active_mask); | 3306 | cpumask_copy(cpus, cpu_active_mask); |
3319 | 3307 | ||
3320 | /* | ||
3321 | * When power savings policy is enabled for the parent domain, idle | ||
3322 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
3323 | * let the state of idle sibling percolate up as CPU_IDLE, instead of | ||
3324 | * portraying it as CPU_NOT_IDLE. | ||
3325 | */ | ||
3326 | if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | ||
3327 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3328 | sd_idle = 1; | ||
3329 | |||
3330 | schedstat_inc(sd, lb_count[idle]); | 3308 | schedstat_inc(sd, lb_count[idle]); |
3331 | 3309 | ||
3332 | redo: | 3310 | redo: |
3333 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3311 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, |
3334 | cpus, balance); | 3312 | cpus, balance); |
3335 | 3313 | ||
3336 | if (*balance == 0) | 3314 | if (*balance == 0) |
@@ -3392,8 +3370,7 @@ redo: | |||
3392 | if (idle != CPU_NEWLY_IDLE) | 3370 | if (idle != CPU_NEWLY_IDLE) |
3393 | sd->nr_balance_failed++; | 3371 | sd->nr_balance_failed++; |
3394 | 3372 | ||
3395 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), | 3373 | if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) { |
3396 | this_cpu)) { | ||
3397 | raw_spin_lock_irqsave(&busiest->lock, flags); | 3374 | raw_spin_lock_irqsave(&busiest->lock, flags); |
3398 | 3375 | ||
3399 | /* don't kick the active_load_balance_cpu_stop, | 3376 | /* don't kick the active_load_balance_cpu_stop, |
@@ -3448,10 +3425,6 @@ redo: | |||
3448 | sd->balance_interval *= 2; | 3425 | sd->balance_interval *= 2; |
3449 | } | 3426 | } |
3450 | 3427 | ||
3451 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
3452 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3453 | ld_moved = -1; | ||
3454 | |||
3455 | goto out; | 3428 | goto out; |
3456 | 3429 | ||
3457 | out_balanced: | 3430 | out_balanced: |
@@ -3465,11 +3438,7 @@ out_one_pinned: | |||
3465 | (sd->balance_interval < sd->max_interval)) | 3438 | (sd->balance_interval < sd->max_interval)) |
3466 | sd->balance_interval *= 2; | 3439 | sd->balance_interval *= 2; |
3467 | 3440 | ||
3468 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3441 | ld_moved = 0; |
3469 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3470 | ld_moved = -1; | ||
3471 | else | ||
3472 | ld_moved = 0; | ||
3473 | out: | 3442 | out: |
3474 | return ld_moved; | 3443 | return ld_moved; |
3475 | } | 3444 | } |