aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-09-10 07:50:02 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-15 10:01:05 -0400
commitc88d5910890ad35af283344417891344604f0438 (patch)
tree4e2025d569c3e03a7ec5163f0a9bc159114ee14e /kernel/sched.c
parente9c8431185d6c406887190519f6dbdd112641686 (diff)
sched: Merge select_task_rq_fair() and sched_balance_self()
The problem with wake_idle() is that is doesn't respect things like cpu_power, which means it doesn't deal well with SMT nor the recent RT interaction. To cure this, it needs to do what sched_balance_self() does, which leads to the possibility of merging select_task_rq_fair() and sched_balance_self(). Modify sched_balance_self() to: - update_shares() when walking up the domain tree, (it only called it for the top domain, but it should have done this anyway), which allows us to remove this ugly bit from try_to_wake_up(). - do wake_affine() on the smallest domain that contains both this (the waking) and the prev (the wakee) cpu for WAKE invocations. Then use the top-down balance steps it had to replace wake_idle(). This leads to the dissapearance of SD_WAKE_BALANCE and SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE. SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective. Touch all topology bits to replace the old with new SD flags -- platforms might need re-tuning, enabling SD_BALANCE_WAKE conditionally on a NUMA distance seems like a good additional feature, magny-core and small nehalem systems would want this enabled, systems with slow interconnects would not. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c41
1 files changed, 3 insertions, 38 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index fc6fda881d2e..6c819f338b11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -512,14 +512,6 @@ struct root_domain {
512#ifdef CONFIG_SMP 512#ifdef CONFIG_SMP
513 struct cpupri cpupri; 513 struct cpupri cpupri;
514#endif 514#endif
515#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
516 /*
517 * Preferred wake up cpu nominated by sched_mc balance that will be
518 * used when most cpus are idle in the system indicating overall very
519 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
520 */
521 unsigned int sched_mc_preferred_wakeup_cpu;
522#endif
523}; 515};
524 516
525/* 517/*
@@ -2315,22 +2307,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2315 if (!sched_feat(SYNC_WAKEUPS)) 2307 if (!sched_feat(SYNC_WAKEUPS))
2316 sync = 0; 2308 sync = 0;
2317 2309
2318#ifdef CONFIG_SMP
2319 if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
2320 struct sched_domain *sd;
2321
2322 this_cpu = raw_smp_processor_id();
2323 cpu = task_cpu(p);
2324
2325 for_each_domain(this_cpu, sd) {
2326 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2327 update_shares(sd);
2328 break;
2329 }
2330 }
2331 }
2332#endif
2333
2334 this_cpu = get_cpu(); 2310 this_cpu = get_cpu();
2335 2311
2336 smp_wmb(); 2312 smp_wmb();
@@ -3533,11 +3509,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
3533 *imbalance = sds->min_load_per_task; 3509 *imbalance = sds->min_load_per_task;
3534 sds->busiest = sds->group_min; 3510 sds->busiest = sds->group_min;
3535 3511
3536 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3537 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3538 group_first_cpu(sds->group_leader);
3539 }
3540
3541 return 1; 3512 return 1;
3542 3513
3543} 3514}
@@ -7850,9 +7821,7 @@ static int sd_degenerate(struct sched_domain *sd)
7850 } 7821 }
7851 7822
7852 /* Following flags don't use groups */ 7823 /* Following flags don't use groups */
7853 if (sd->flags & (SD_WAKE_IDLE | 7824 if (sd->flags & (SD_WAKE_AFFINE))
7854 SD_WAKE_AFFINE |
7855 SD_WAKE_BALANCE))
7856 return 0; 7825 return 0;
7857 7826
7858 return 1; 7827 return 1;
@@ -7869,10 +7838,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
7869 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) 7838 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
7870 return 0; 7839 return 0;
7871 7840
7872 /* Does parent contain flags not in child? */
7873 /* WAKE_BALANCE is a subset of WAKE_AFFINE */
7874 if (cflags & SD_WAKE_AFFINE)
7875 pflags &= ~SD_WAKE_BALANCE;
7876 /* Flags needing groups don't count if only 1 group in parent */ 7841 /* Flags needing groups don't count if only 1 group in parent */
7877 if (parent->groups == parent->groups->next) { 7842 if (parent->groups == parent->groups->next) {
7878 pflags &= ~(SD_LOAD_BALANCE | 7843 pflags &= ~(SD_LOAD_BALANCE |
@@ -8558,10 +8523,10 @@ static void set_domain_attribute(struct sched_domain *sd,
8558 request = attr->relax_domain_level; 8523 request = attr->relax_domain_level;
8559 if (request < sd->level) { 8524 if (request < sd->level) {
8560 /* turn off idle balance on this domain */ 8525 /* turn off idle balance on this domain */
8561 sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); 8526 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
8562 } else { 8527 } else {
8563 /* turn on idle balance on this domain */ 8528 /* turn on idle balance on this domain */
8564 sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); 8529 sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
8565 } 8530 }
8566} 8531}
8567 8532