diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-09-16 15:29:43 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-09-16 15:29:43 -0400 |
commit | 37407ea7f93864c2cfc03edf8f37872ec539ea2b (patch) | |
tree | 7c07e7adadd40fc94cebfe816f1c65a4a630b147 /kernel/sched/core.c | |
parent | 3f0c3c8fe30c725c1264fb6db8cc4b69db3a658a (diff) |
Revert "sched: Improve scalability via 'CPU buddies', which withstand random perturbations"
This reverts commit 970e178985cadbca660feb02f4d2ee3a09f7fdda.
Nikolay Ulyanitsky reported thatthe 3.6-rc5 kernel has a 15-20%
performance drop on PostgreSQL 9.2 on his machine (running "pgbench").
Borislav Petkov was able to reproduce this, and bisected it to this
commit 970e178985ca ("sched: Improve scalability via 'CPU buddies' ...")
apparently because the new single-idle-buddy model simply doesn't find
idle CPU's to reschedule on aggressively enough.
Mike Galbraith suspects that it is likely due to the user-mode spinlocks
in PostgreSQL not reacting well to preemption, but we don't really know
the details - I'll just revert the commit for now.
There are hopefully other approaches to improve scheduler scalability
without it causing these kinds of downsides.
Reported-by: Nikolay Ulyanitsky <lystor@gmail.com>
Bisected-by: Borislav Petkov <bp@alien8.de>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 39 |
1 files changed, 1 insertions, 38 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a4ea245f3d85..649c9f876cb1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -6014,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) | |||
6014 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this | 6014 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this |
6015 | * allows us to avoid some pointer chasing select_idle_sibling(). | 6015 | * allows us to avoid some pointer chasing select_idle_sibling(). |
6016 | * | 6016 | * |
6017 | * Iterate domains and sched_groups downward, assigning CPUs to be | ||
6018 | * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing | ||
6019 | * due to random perturbation self canceling, ie sw buddies pull | ||
6020 | * their counterpart to their CPU's hw counterpart. | ||
6021 | * | ||
6022 | * Also keep a unique ID per domain (we use the first cpu number in | 6017 | * Also keep a unique ID per domain (we use the first cpu number in |
6023 | * the cpumask of the domain), this allows us to quickly tell if | 6018 | * the cpumask of the domain), this allows us to quickly tell if |
6024 | * two cpus are in the same cache domain, see cpus_share_cache(). | 6019 | * two cpus are in the same cache domain, see cpus_share_cache(). |
@@ -6032,40 +6027,8 @@ static void update_top_cache_domain(int cpu) | |||
6032 | int id = cpu; | 6027 | int id = cpu; |
6033 | 6028 | ||
6034 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); | 6029 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); |
6035 | if (sd) { | 6030 | if (sd) |
6036 | struct sched_domain *tmp = sd; | ||
6037 | struct sched_group *sg, *prev; | ||
6038 | bool right; | ||
6039 | |||
6040 | /* | ||
6041 | * Traverse to first CPU in group, and count hops | ||
6042 | * to cpu from there, switching direction on each | ||
6043 | * hop, never ever pointing the last CPU rightward. | ||
6044 | */ | ||
6045 | do { | ||
6046 | id = cpumask_first(sched_domain_span(tmp)); | ||
6047 | prev = sg = tmp->groups; | ||
6048 | right = 1; | ||
6049 | |||
6050 | while (cpumask_first(sched_group_cpus(sg)) != id) | ||
6051 | sg = sg->next; | ||
6052 | |||
6053 | while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) { | ||
6054 | prev = sg; | ||
6055 | sg = sg->next; | ||
6056 | right = !right; | ||
6057 | } | ||
6058 | |||
6059 | /* A CPU went down, never point back to domain start. */ | ||
6060 | if (right && cpumask_first(sched_group_cpus(sg->next)) == id) | ||
6061 | right = false; | ||
6062 | |||
6063 | sg = right ? sg->next : prev; | ||
6064 | tmp->idle_buddy = cpumask_first(sched_group_cpus(sg)); | ||
6065 | } while ((tmp = tmp->child)); | ||
6066 | |||
6067 | id = cpumask_first(sched_domain_span(sd)); | 6031 | id = cpumask_first(sched_domain_span(sd)); |
6068 | } | ||
6069 | 6032 | ||
6070 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); | 6033 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); |
6071 | per_cpu(sd_llc_id, cpu) = id; | 6034 | per_cpu(sd_llc_id, cpu) = id; |