aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_fair.c34
3 files changed, 34 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0036e52a24a..2c79e921a68b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -862,6 +862,7 @@ struct sched_group {
862 * single CPU. 862 * single CPU.
863 */ 863 */
864 unsigned int cpu_power, cpu_power_orig; 864 unsigned int cpu_power, cpu_power_orig;
865 unsigned int group_weight;
865 866
866 /* 867 /*
867 * The CPUs this group covers. 868 * The CPUs this group covers.
diff --git a/kernel/sched.c b/kernel/sched.c
index aa14a56f9d03..36a088018fe0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6960,6 +6960,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6960 if (cpu != group_first_cpu(sd->groups)) 6960 if (cpu != group_first_cpu(sd->groups))
6961 return; 6961 return;
6962 6962
6963 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
6964
6963 child = sd->child; 6965 child = sd->child;
6964 6966
6965 sd->groups->cpu_power = 0; 6967 sd->groups->cpu_power = 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f4f6a8326dd0..034c4f410b36 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2035,13 +2035,16 @@ struct sd_lb_stats {
2035 unsigned long this_load_per_task; 2035 unsigned long this_load_per_task;
2036 unsigned long this_nr_running; 2036 unsigned long this_nr_running;
2037 unsigned long this_has_capacity; 2037 unsigned long this_has_capacity;
2038 unsigned int this_idle_cpus;
2038 2039
2039 /* Statistics of the busiest group */ 2040 /* Statistics of the busiest group */
2041 unsigned int busiest_idle_cpus;
2040 unsigned long max_load; 2042 unsigned long max_load;
2041 unsigned long busiest_load_per_task; 2043 unsigned long busiest_load_per_task;
2042 unsigned long busiest_nr_running; 2044 unsigned long busiest_nr_running;
2043 unsigned long busiest_group_capacity; 2045 unsigned long busiest_group_capacity;
2044 unsigned long busiest_has_capacity; 2046 unsigned long busiest_has_capacity;
2047 unsigned int busiest_group_weight;
2045 2048
2046 int group_imb; /* Is there imbalance in this sd */ 2049 int group_imb; /* Is there imbalance in this sd */
2047#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2050#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2063,6 +2066,8 @@ struct sg_lb_stats {
2063 unsigned long sum_nr_running; /* Nr tasks running in the group */ 2066 unsigned long sum_nr_running; /* Nr tasks running in the group */
2064 unsigned long sum_weighted_load; /* Weighted load of group's tasks */ 2067 unsigned long sum_weighted_load; /* Weighted load of group's tasks */
2065 unsigned long group_capacity; 2068 unsigned long group_capacity;
2069 unsigned long idle_cpus;
2070 unsigned long group_weight;
2066 int group_imb; /* Is there an imbalance in the group ? */ 2071 int group_imb; /* Is there an imbalance in the group ? */
2067 int group_has_capacity; /* Is there extra capacity in the group? */ 2072 int group_has_capacity; /* Is there extra capacity in the group? */
2068}; 2073};
@@ -2431,7 +2436,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2431 sgs->group_load += load; 2436 sgs->group_load += load;
2432 sgs->sum_nr_running += rq->nr_running; 2437 sgs->sum_nr_running += rq->nr_running;
2433 sgs->sum_weighted_load += weighted_cpuload(i); 2438 sgs->sum_weighted_load += weighted_cpuload(i);
2434 2439 if (idle_cpu(i))
2440 sgs->idle_cpus++;
2435 } 2441 }
2436 2442
2437 /* 2443 /*
@@ -2469,6 +2475,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2469 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2475 sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
2470 if (!sgs->group_capacity) 2476 if (!sgs->group_capacity)
2471 sgs->group_capacity = fix_small_capacity(sd, group); 2477 sgs->group_capacity = fix_small_capacity(sd, group);
2478 sgs->group_weight = group->group_weight;
2472 2479
2473 if (sgs->group_capacity > sgs->sum_nr_running) 2480 if (sgs->group_capacity > sgs->sum_nr_running)
2474 sgs->group_has_capacity = 1; 2481 sgs->group_has_capacity = 1;
@@ -2576,13 +2583,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
2576 sds->this_nr_running = sgs.sum_nr_running; 2583 sds->this_nr_running = sgs.sum_nr_running;
2577 sds->this_load_per_task = sgs.sum_weighted_load; 2584 sds->this_load_per_task = sgs.sum_weighted_load;
2578 sds->this_has_capacity = sgs.group_has_capacity; 2585 sds->this_has_capacity = sgs.group_has_capacity;
2586 sds->this_idle_cpus = sgs.idle_cpus;
2579 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { 2587 } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
2580 sds->max_load = sgs.avg_load; 2588 sds->max_load = sgs.avg_load;
2581 sds->busiest = sg; 2589 sds->busiest = sg;
2582 sds->busiest_nr_running = sgs.sum_nr_running; 2590 sds->busiest_nr_running = sgs.sum_nr_running;
2591 sds->busiest_idle_cpus = sgs.idle_cpus;
2583 sds->busiest_group_capacity = sgs.group_capacity; 2592 sds->busiest_group_capacity = sgs.group_capacity;
2584 sds->busiest_load_per_task = sgs.sum_weighted_load; 2593 sds->busiest_load_per_task = sgs.sum_weighted_load;
2585 sds->busiest_has_capacity = sgs.group_has_capacity; 2594 sds->busiest_has_capacity = sgs.group_has_capacity;
2595 sds->busiest_group_weight = sgs.group_weight;
2586 sds->group_imb = sgs.group_imb; 2596 sds->group_imb = sgs.group_imb;
2587 } 2597 }
2588 2598
@@ -2860,8 +2870,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2860 if (sds.this_load >= sds.avg_load) 2870 if (sds.this_load >= sds.avg_load)
2861 goto out_balanced; 2871 goto out_balanced;
2862 2872
2863 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) 2873 /*
2864 goto out_balanced; 2874 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
2875 * And to check for busy balance use !idle_cpu instead of
2876 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
2877 * even when they are idle.
2878 */
2879 if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
2880 if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
2881 goto out_balanced;
2882 } else {
2883 /*
2884 * This cpu is idle. If the busiest group load doesn't
2885 * have more tasks than the number of available cpu's and
2886 * there is no imbalance between this and busiest group
2887 * wrt to idle cpu's, it is balanced.
2888 */
2889 if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
2890 sds.busiest_nr_running <= sds.busiest_group_weight)
2891 goto out_balanced;
2892 }
2865 2893
2866force_balance: 2894force_balance:
2867 /* Looks like there is an imbalance. Compute it */ 2895 /* Looks like there is an imbalance. Compute it */