diff options
author | Ingo Molnar <mingo@elte.hu> | 2007-07-09 12:51:57 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-07-09 12:51:57 -0400 |
commit | 0437e109e1841607f2988891eaa36c531c6aa6ac (patch) | |
tree | e9d8f170786f7e33d4c5829cb008cf38d42a2014 /arch | |
parent | 0e6aca43e08a62a48d6770e9a159dbec167bf4c6 (diff) |
sched: zap the migration init / cache-hot balancing code
the SMP load-balancer uses the boot-time migration-cost estimation
code to attempt to improve the quality of balancing. The reason for
this code is that the discrete priority queues do not preserve
the order of scheduling accurately, so the load-balancer skips
tasks that were running on a CPU 'recently'.
this code is fundamental fragile: the boot-time migration cost detector
doesnt really work on systems that had large L3 caches, it caused boot
delays on large systems and the whole cache-hot concept made the
balancing code pretty undeterministic as well.
(and hey, i wrote most of it, so i can say it out loud that it sucks ;-)
under CFS the same purpose of cache affinity can be achieved without
any special cache-hot special-case: tasks are sorted in the 'timeline'
tree and the SMP balancer picks tasks from the left side of the
tree, thus the most cache-cold task is balanced automatically.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/i386/kernel/smpboot.c | 12 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 6 | ||||
-rw-r--r-- | arch/mips/kernel/smp.c | 11 | ||||
-rw-r--r-- | arch/sparc/kernel/smp.c | 10 | ||||
-rw-r--r-- | arch/sparc64/kernel/smp.c | 27 |
5 files changed, 0 insertions, 66 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 88baed1e7e83..0b2954534b8e 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -941,17 +941,6 @@ exit: | |||
941 | } | 941 | } |
942 | #endif | 942 | #endif |
943 | 943 | ||
944 | static void smp_tune_scheduling(void) | ||
945 | { | ||
946 | if (cpu_khz) { | ||
947 | /* cache size in kB */ | ||
948 | long cachesize = boot_cpu_data.x86_cache_size; | ||
949 | |||
950 | if (cachesize > 0) | ||
951 | max_cache_size = cachesize * 1024; | ||
952 | } | ||
953 | } | ||
954 | |||
955 | /* | 944 | /* |
956 | * Cycle through the processors sending APIC IPIs to boot each. | 945 | * Cycle through the processors sending APIC IPIs to boot each. |
957 | */ | 946 | */ |
@@ -980,7 +969,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
980 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; | 969 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; |
981 | 970 | ||
982 | current_thread_info()->cpu = 0; | 971 | current_thread_info()->cpu = 0; |
983 | smp_tune_scheduling(); | ||
984 | 972 | ||
985 | set_cpu_sibling_map(0); | 973 | set_cpu_sibling_map(0); |
986 | 974 | ||
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index eaa6a24bc0b6..188fb73c6845 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c | |||
@@ -805,7 +805,6 @@ static void __cpuinit | |||
805 | get_max_cacheline_size (void) | 805 | get_max_cacheline_size (void) |
806 | { | 806 | { |
807 | unsigned long line_size, max = 1; | 807 | unsigned long line_size, max = 1; |
808 | unsigned int cache_size = 0; | ||
809 | u64 l, levels, unique_caches; | 808 | u64 l, levels, unique_caches; |
810 | pal_cache_config_info_t cci; | 809 | pal_cache_config_info_t cci; |
811 | s64 status; | 810 | s64 status; |
@@ -835,8 +834,6 @@ get_max_cacheline_size (void) | |||
835 | line_size = 1 << cci.pcci_line_size; | 834 | line_size = 1 << cci.pcci_line_size; |
836 | if (line_size > max) | 835 | if (line_size > max) |
837 | max = line_size; | 836 | max = line_size; |
838 | if (cache_size < cci.pcci_cache_size) | ||
839 | cache_size = cci.pcci_cache_size; | ||
840 | if (!cci.pcci_unified) { | 837 | if (!cci.pcci_unified) { |
841 | status = ia64_pal_cache_config_info(l, | 838 | status = ia64_pal_cache_config_info(l, |
842 | /* cache_type (instruction)= */ 1, | 839 | /* cache_type (instruction)= */ 1, |
@@ -853,9 +850,6 @@ get_max_cacheline_size (void) | |||
853 | ia64_i_cache_stride_shift = cci.pcci_stride; | 850 | ia64_i_cache_stride_shift = cci.pcci_stride; |
854 | } | 851 | } |
855 | out: | 852 | out: |
856 | #ifdef CONFIG_SMP | ||
857 | max_cache_size = max(max_cache_size, cache_size); | ||
858 | #endif | ||
859 | if (max > ia64_max_cacheline_size) | 853 | if (max > ia64_max_cacheline_size) |
860 | ia64_max_cacheline_size = max; | 854 | ia64_max_cacheline_size = max; |
861 | } | 855 | } |
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 67edfa7ed93a..a1b017f2dbb3 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c | |||
@@ -51,16 +51,6 @@ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ | |||
51 | EXPORT_SYMBOL(phys_cpu_present_map); | 51 | EXPORT_SYMBOL(phys_cpu_present_map); |
52 | EXPORT_SYMBOL(cpu_online_map); | 52 | EXPORT_SYMBOL(cpu_online_map); |
53 | 53 | ||
54 | /* This happens early in bootup, can't really do it better */ | ||
55 | static void smp_tune_scheduling (void) | ||
56 | { | ||
57 | struct cache_desc *cd = ¤t_cpu_data.scache; | ||
58 | unsigned long cachesize = cd->linesz * cd->sets * cd->ways; | ||
59 | |||
60 | if (cachesize > max_cache_size) | ||
61 | max_cache_size = cachesize; | ||
62 | } | ||
63 | |||
64 | extern void __init calibrate_delay(void); | 54 | extern void __init calibrate_delay(void); |
65 | extern ATTRIB_NORET void cpu_idle(void); | 55 | extern ATTRIB_NORET void cpu_idle(void); |
66 | 56 | ||
@@ -228,7 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
228 | { | 218 | { |
229 | init_new_context(current, &init_mm); | 219 | init_new_context(current, &init_mm); |
230 | current_thread_info()->cpu = 0; | 220 | current_thread_info()->cpu = 0; |
231 | smp_tune_scheduling(); | ||
232 | plat_prepare_cpus(max_cpus); | 221 | plat_prepare_cpus(max_cpus); |
233 | #ifndef CONFIG_HOTPLUG_CPU | 222 | #ifndef CONFIG_HOTPLUG_CPU |
234 | cpu_present_map = cpu_possible_map; | 223 | cpu_present_map = cpu_possible_map; |
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c index 4d9ad59031bb..4fea3ac7bff0 100644 --- a/arch/sparc/kernel/smp.c +++ b/arch/sparc/kernel/smp.c | |||
@@ -68,16 +68,6 @@ void __cpuinit smp_store_cpu_info(int id) | |||
68 | cpu_data(id).prom_node = cpu_node; | 68 | cpu_data(id).prom_node = cpu_node; |
69 | cpu_data(id).mid = cpu_get_hwmid(cpu_node); | 69 | cpu_data(id).mid = cpu_get_hwmid(cpu_node); |
70 | 70 | ||
71 | /* this is required to tune the scheduler correctly */ | ||
72 | /* is it possible to have CPUs with different cache sizes? */ | ||
73 | if (id == boot_cpu_id) { | ||
74 | int cache_line,cache_nlines; | ||
75 | cache_line = 0x20; | ||
76 | cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line); | ||
77 | cache_nlines = 0x8000; | ||
78 | cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines); | ||
79 | max_cache_size = cache_line * cache_nlines; | ||
80 | } | ||
81 | if (cpu_data(id).mid < 0) | 71 | if (cpu_data(id).mid < 0) |
82 | panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); | 72 | panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); |
83 | } | 73 | } |
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 4dcd7d0b60f2..40e40f968d61 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -1163,32 +1163,6 @@ int setup_profiling_timer(unsigned int multiplier) | |||
1163 | return -EINVAL; | 1163 | return -EINVAL; |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | static void __init smp_tune_scheduling(void) | ||
1167 | { | ||
1168 | unsigned int smallest = ~0U; | ||
1169 | int i; | ||
1170 | |||
1171 | for (i = 0; i < NR_CPUS; i++) { | ||
1172 | unsigned int val = cpu_data(i).ecache_size; | ||
1173 | |||
1174 | if (val && val < smallest) | ||
1175 | smallest = val; | ||
1176 | } | ||
1177 | |||
1178 | /* Any value less than 256K is nonsense. */ | ||
1179 | if (smallest < (256U * 1024U)) | ||
1180 | smallest = 256 * 1024; | ||
1181 | |||
1182 | max_cache_size = smallest; | ||
1183 | |||
1184 | if (smallest < 1U * 1024U * 1024U) | ||
1185 | printk(KERN_INFO "Using max_cache_size of %uKB\n", | ||
1186 | smallest / 1024U); | ||
1187 | else | ||
1188 | printk(KERN_INFO "Using max_cache_size of %uMB\n", | ||
1189 | smallest / 1024U / 1024U); | ||
1190 | } | ||
1191 | |||
1192 | /* Constrain the number of cpus to max_cpus. */ | 1166 | /* Constrain the number of cpus to max_cpus. */ |
1193 | void __init smp_prepare_cpus(unsigned int max_cpus) | 1167 | void __init smp_prepare_cpus(unsigned int max_cpus) |
1194 | { | 1168 | { |
@@ -1206,7 +1180,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
1206 | } | 1180 | } |
1207 | 1181 | ||
1208 | cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; | 1182 | cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy; |
1209 | smp_tune_scheduling(); | ||
1210 | } | 1183 | } |
1211 | 1184 | ||
1212 | void __devinit smp_prepare_boot_cpu(void) | 1185 | void __devinit smp_prepare_boot_cpu(void) |