[PATCH] sched: new sched domain for representing multi-core

Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Siddha, Suresh B <suresh.b.siddha@intel.com> 2006-03-27 04:15:22 -0500
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-03-27 11:44:43 -0500
commit: 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 (patch)
tree: ccfa4927ebc7a8f663f9ac9e7789a713a33253ff /arch/x86_64/kernel
parent: 77e4bfbcf071f795b54862455dce8902b3fc29c2 (diff)
2 files changed, 26 insertions, 1 deletions
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index a57eec8311a7..d1f3e9272c05 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
        cpuid(1, &eax, &ebx, &ecx, &edx);
-        c->apicid = phys_pkg_id(0);
        if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
                return;
@@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
                        c->x86_capability[2] = cpuid_edx(0x80860001);
        }
+        c->apicid = phys_pkg_id(0);
        /*
         * Vendor-specific initialization.  In this section we
         * canonicalize the feature flags, meaning if there are
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 66e98659d077..ea48fa638070 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 /* core ID of each logical CPU */
 u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+/* Last level cache ID of each logical CPU */
+u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
@@ -445,6 +448,18 @@ void __cpuinit smp_callin(void)
        cpu_set(cpuid, cpu_callin_map);
 }
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
+{
+        struct cpuinfo_x86 *c = cpu_data + cpu;
+        /*
+         * For perf, we return last level cache shared map.
+         * TBD: when power saving sched policy is added, we will return
+         *      cpu_core_map when power saving policy is enabled
+         */
+        return c->llc_shared_map;
+}
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
@@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu)
                                cpu_set(cpu, cpu_sibling_map[i]);
                                cpu_set(i, cpu_core_map[cpu]);
                                cpu_set(cpu, cpu_core_map[i]);
+                                cpu_set(i, c[cpu].llc_shared_map);
+                                cpu_set(cpu, c[i].llc_shared_map);
                        }
                }
        } else {
                cpu_set(cpu, cpu_sibling_map[cpu]);
        }
+        cpu_set(cpu, c[cpu].llc_shared_map);
        if (current_cpu_data.x86_max_cores == 1) {
                cpu_core_map[cpu] = cpu_sibling_map[cpu];
                c[cpu].booted_cores = 1;
@@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu)
        }
        for_each_cpu_mask(i, cpu_sibling_setup_map) {
+                if (cpu_llc_id[cpu] != BAD_APICID &&
+                    cpu_llc_id[cpu] == cpu_llc_id[i]) {
+                        cpu_set(i, c[cpu].llc_shared_map);
+                        cpu_set(cpu, c[i].llc_shared_map);
+                }
                if (phys_proc_id[cpu] == phys_proc_id[i]) {
                        cpu_set(i, cpu_core_map[cpu]);
                        cpu_set(cpu, cpu_core_map[i]);
author	Siddha, Suresh B <suresh.b.siddha@intel.com>	2006-03-27 04:15:22 -0500
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-03-27 11:44:43 -0500
commit	1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 (patch)
tree	ccfa4927ebc7a8f663f9ac9e7789a713a33253ff /arch/x86_64/kernel
parent	77e4bfbcf071f795b54862455dce8902b3fc29c2 (diff)