aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSrivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>2010-06-08 00:57:02 -0400
committerIngo Molnar <mingo@elte.hu>2010-06-09 04:34:54 -0400
commit9d5efe05eb0c904545a28b19c18b949f23334de0 (patch)
treeafa28fe2f3abd6fb538a7f0039c75a5e6463eeb1
parent83cd4fe27ad8446619b2e030b171b858501de87d (diff)
sched: Fix capacity calculations for SMT4
Handle cpu capacity being reported as 0 on cores with more number of hardware threads. For example on a Power7 core with 4 hardware threads, core power is 1177 and thus power of each hardware thread is 1177/4 = 294. This low power can lead to capacity for each hardware thread being calculated as 0, which leads to tasks bouncing within the core madly! Fix this by reporting capacity for hardware threads as 1, provided their power is not scaled down significantly because of frequency scaling or real-time tasks usage of cpu. Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arjan van de Ven <arjan@linux.intel.com> LKML-Reference: <20100608045702.21D03CC895@localhost.localdomain> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/sched_fair.c53
2 files changed, 44 insertions, 11 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a3e5b1cd043..c731296e5e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -857,7 +857,7 @@ struct sched_group {
857 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 857 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
858 * single CPU. 858 * single CPU.
859 */ 859 */
860 unsigned int cpu_power; 860 unsigned int cpu_power, cpu_power_orig;
861 861
862 /* 862 /*
863 * The CPUs this group covers. 863 * The CPUs this group covers.
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6ee2e0af665..b9b3462483b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2285,13 +2285,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2285 unsigned long power = SCHED_LOAD_SCALE; 2285 unsigned long power = SCHED_LOAD_SCALE;
2286 struct sched_group *sdg = sd->groups; 2286 struct sched_group *sdg = sd->groups;
2287 2287
2288 if (sched_feat(ARCH_POWER))
2289 power *= arch_scale_freq_power(sd, cpu);
2290 else
2291 power *= default_scale_freq_power(sd, cpu);
2292
2293 power >>= SCHED_LOAD_SHIFT;
2294
2295 if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { 2288 if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
2296 if (sched_feat(ARCH_POWER)) 2289 if (sched_feat(ARCH_POWER))
2297 power *= arch_scale_smt_power(sd, cpu); 2290 power *= arch_scale_smt_power(sd, cpu);
@@ -2301,6 +2294,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
2301 power >>= SCHED_LOAD_SHIFT; 2294 power >>= SCHED_LOAD_SHIFT;
2302 } 2295 }
2303 2296
2297 sdg->cpu_power_orig = power;
2298
2299 if (sched_feat(ARCH_POWER))
2300 power *= arch_scale_freq_power(sd, cpu);
2301 else
2302 power *= default_scale_freq_power(sd, cpu);
2303
2304 power >>= SCHED_LOAD_SHIFT;
2305
2304 power *= scale_rt_power(cpu); 2306 power *= scale_rt_power(cpu);
2305 power >>= SCHED_LOAD_SHIFT; 2307 power >>= SCHED_LOAD_SHIFT;
2306 2308
@@ -2333,6 +2335,31 @@ static void update_group_power(struct sched_domain *sd, int cpu)
2333 sdg->cpu_power = power; 2335 sdg->cpu_power = power;
2334} 2336}
2335 2337
2338/*
2339 * Try and fix up capacity for tiny siblings, this is needed when
2340 * things like SD_ASYM_PACKING need f_b_g to select another sibling
2341 * which on its own isn't powerful enough.
2342 *
2343 * See update_sd_pick_busiest() and check_asym_packing().
2344 */
2345static inline int
2346fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
2347{
2348 /*
2349 * Only siblings can have significantly less than SCHED_LOAD_SCALE
2350 */
2351 if (sd->level != SD_LV_SIBLING)
2352 return 0;
2353
2354 /*
2355 * If ~90% of the cpu_power is still there, we're good.
2356 */
2357 if (group->cpu_power * 32 < group->cpu_power_orig * 29)
2358 return 1;
2359
2360 return 0;
2361}
2362
2336/** 2363/**
2337 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 2364 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
2338 * @sd: The sched_domain whose statistics are to be updated. 2365 * @sd: The sched_domain whose statistics are to be updated.
@@ -2426,6 +2453,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
2426 2453
2427 sgs->group_capacity = 2454 sgs->group_capacity =
2428 DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); 2455 DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
2456 if (!sgs->group_capacity)
2457 sgs->group_capacity = fix_small_capacity(sd, group);
2429} 2458}
2430 2459
2431/** 2460/**
@@ -2724,8 +2753,9 @@ ret:
2724 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2753 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2725 */ 2754 */
2726static struct rq * 2755static struct rq *
2727find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, 2756find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
2728 unsigned long imbalance, const struct cpumask *cpus) 2757 enum cpu_idle_type idle, unsigned long imbalance,
2758 const struct cpumask *cpus)
2729{ 2759{
2730 struct rq *busiest = NULL, *rq; 2760 struct rq *busiest = NULL, *rq;
2731 unsigned long max_load = 0; 2761 unsigned long max_load = 0;
@@ -2736,6 +2766,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
2736 unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); 2766 unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
2737 unsigned long wl; 2767 unsigned long wl;
2738 2768
2769 if (!capacity)
2770 capacity = fix_small_capacity(sd, group);
2771
2739 if (!cpumask_test_cpu(i, cpus)) 2772 if (!cpumask_test_cpu(i, cpus))
2740 continue; 2773 continue;
2741 2774
@@ -2852,7 +2885,7 @@ redo:
2852 goto out_balanced; 2885 goto out_balanced;
2853 } 2886 }
2854 2887
2855 busiest = find_busiest_queue(group, idle, imbalance, cpus); 2888 busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
2856 if (!busiest) { 2889 if (!busiest) {
2857 schedstat_inc(sd, lb_nobusyq[idle]); 2890 schedstat_inc(sd, lb_nobusyq[idle]);
2858 goto out_balanced; 2891 goto out_balanced;