aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSiddha, Suresh B <suresh.b.siddha@intel.com>2006-03-27 04:15:22 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-27 11:44:43 -0500
commit1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 (patch)
treeccfa4927ebc7a8f663f9ac9e7789a713a33253ff /kernel
parent77e4bfbcf071f795b54862455dce8902b3fc29c2 (diff)
[PATCH] sched: new sched domain for representing multi-core
Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c73
1 files changed, 68 insertions, 5 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index a96a05d23262..8a8b71b5751b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5574,11 +5574,31 @@ static int cpu_to_cpu_group(int cpu)
5574} 5574}
5575#endif 5575#endif
5576 5576
5577#ifdef CONFIG_SCHED_MC
5578static DEFINE_PER_CPU(struct sched_domain, core_domains);
5579static struct sched_group sched_group_core[NR_CPUS];
5580#endif
5581
5582#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
5583static int cpu_to_core_group(int cpu)
5584{
5585 return first_cpu(cpu_sibling_map[cpu]);
5586}
5587#elif defined(CONFIG_SCHED_MC)
5588static int cpu_to_core_group(int cpu)
5589{
5590 return cpu;
5591}
5592#endif
5593
5577static DEFINE_PER_CPU(struct sched_domain, phys_domains); 5594static DEFINE_PER_CPU(struct sched_domain, phys_domains);
5578static struct sched_group sched_group_phys[NR_CPUS]; 5595static struct sched_group sched_group_phys[NR_CPUS];
5579static int cpu_to_phys_group(int cpu) 5596static int cpu_to_phys_group(int cpu)
5580{ 5597{
5581#ifdef CONFIG_SCHED_SMT 5598#if defined(CONFIG_SCHED_MC)
5599 cpumask_t mask = cpu_coregroup_map(cpu);
5600 return first_cpu(mask);
5601#elif defined(CONFIG_SCHED_SMT)
5582 return first_cpu(cpu_sibling_map[cpu]); 5602 return first_cpu(cpu_sibling_map[cpu]);
5583#else 5603#else
5584 return cpu; 5604 return cpu;
@@ -5676,6 +5696,17 @@ void build_sched_domains(const cpumask_t *cpu_map)
5676 sd->parent = p; 5696 sd->parent = p;
5677 sd->groups = &sched_group_phys[group]; 5697 sd->groups = &sched_group_phys[group];
5678 5698
5699#ifdef CONFIG_SCHED_MC
5700 p = sd;
5701 sd = &per_cpu(core_domains, i);
5702 group = cpu_to_core_group(i);
5703 *sd = SD_MC_INIT;
5704 sd->span = cpu_coregroup_map(i);
5705 cpus_and(sd->span, sd->span, *cpu_map);
5706 sd->parent = p;
5707 sd->groups = &sched_group_core[group];
5708#endif
5709
5679#ifdef CONFIG_SCHED_SMT 5710#ifdef CONFIG_SCHED_SMT
5680 p = sd; 5711 p = sd;
5681 sd = &per_cpu(cpu_domains, i); 5712 sd = &per_cpu(cpu_domains, i);
@@ -5701,6 +5732,19 @@ void build_sched_domains(const cpumask_t *cpu_map)
5701 } 5732 }
5702#endif 5733#endif
5703 5734
5735#ifdef CONFIG_SCHED_MC
5736 /* Set up multi-core groups */
5737 for_each_cpu_mask(i, *cpu_map) {
5738 cpumask_t this_core_map = cpu_coregroup_map(i);
5739 cpus_and(this_core_map, this_core_map, *cpu_map);
5740 if (i != first_cpu(this_core_map))
5741 continue;
5742 init_sched_build_groups(sched_group_core, this_core_map,
5743 &cpu_to_core_group);
5744 }
5745#endif
5746
5747
5704 /* Set up physical groups */ 5748 /* Set up physical groups */
5705 for (i = 0; i < MAX_NUMNODES; i++) { 5749 for (i = 0; i < MAX_NUMNODES; i++) {
5706 cpumask_t nodemask = node_to_cpumask(i); 5750 cpumask_t nodemask = node_to_cpumask(i);
@@ -5797,11 +5841,31 @@ void build_sched_domains(const cpumask_t *cpu_map)
5797 power = SCHED_LOAD_SCALE; 5841 power = SCHED_LOAD_SCALE;
5798 sd->groups->cpu_power = power; 5842 sd->groups->cpu_power = power;
5799#endif 5843#endif
5844#ifdef CONFIG_SCHED_MC
5845 sd = &per_cpu(core_domains, i);
5846 power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
5847 * SCHED_LOAD_SCALE / 10;
5848 sd->groups->cpu_power = power;
5849
5850 sd = &per_cpu(phys_domains, i);
5800 5851
5852 /*
5853 * This has to be < 2 * SCHED_LOAD_SCALE
5854 * Lets keep it SCHED_LOAD_SCALE, so that
5855 * while calculating NUMA group's cpu_power
5856 * we can simply do
5857 * numa_group->cpu_power += phys_group->cpu_power;
5858 *
5859 * See "only add power once for each physical pkg"
5860 * comment below
5861 */
5862 sd->groups->cpu_power = SCHED_LOAD_SCALE;
5863#else
5801 sd = &per_cpu(phys_domains, i); 5864 sd = &per_cpu(phys_domains, i);
5802 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * 5865 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5803 (cpus_weight(sd->groups->cpumask)-1) / 10; 5866 (cpus_weight(sd->groups->cpumask)-1) / 10;
5804 sd->groups->cpu_power = power; 5867 sd->groups->cpu_power = power;
5868#endif
5805 5869
5806#ifdef CONFIG_NUMA 5870#ifdef CONFIG_NUMA
5807 sd = &per_cpu(allnodes_domains, i); 5871 sd = &per_cpu(allnodes_domains, i);
@@ -5823,7 +5887,6 @@ void build_sched_domains(const cpumask_t *cpu_map)
5823next_sg: 5887next_sg:
5824 for_each_cpu_mask(j, sg->cpumask) { 5888 for_each_cpu_mask(j, sg->cpumask) {
5825 struct sched_domain *sd; 5889 struct sched_domain *sd;
5826 int power;
5827 5890
5828 sd = &per_cpu(phys_domains, j); 5891 sd = &per_cpu(phys_domains, j);
5829 if (j != first_cpu(sd->groups->cpumask)) { 5892 if (j != first_cpu(sd->groups->cpumask)) {
@@ -5833,10 +5896,8 @@ next_sg:
5833 */ 5896 */
5834 continue; 5897 continue;
5835 } 5898 }
5836 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
5837 (cpus_weight(sd->groups->cpumask)-1) / 10;
5838 5899
5839 sg->cpu_power += power; 5900 sg->cpu_power += sd->groups->cpu_power;
5840 } 5901 }
5841 sg = sg->next; 5902 sg = sg->next;
5842 if (sg != sched_group_nodes[i]) 5903 if (sg != sched_group_nodes[i])
@@ -5849,6 +5910,8 @@ next_sg:
5849 struct sched_domain *sd; 5910 struct sched_domain *sd;
5850#ifdef CONFIG_SCHED_SMT 5911#ifdef CONFIG_SCHED_SMT
5851 sd = &per_cpu(cpu_domains, i); 5912 sd = &per_cpu(cpu_domains, i);
5913#elif defined(CONFIG_SCHED_MC)
5914 sd = &per_cpu(core_domains, i);
5852#else 5915#else
5853 sd = &per_cpu(phys_domains, i); 5916 sd = &per_cpu(phys_domains, i);
5854#endif 5917#endif