sched: zap the migration init / cache-hot balancing code

the SMP load-balancer uses the boot-time migration-cost estimation code to attempt to improve the quality of balancing. The reason for this code is that the discrete priority queues do not preserve the order of scheduling accurately, so the load-balancer skips tasks that were running on a CPU 'recently'. this code is fundamental fragile: the boot-time migration cost detector doesnt really work on systems that had large L3 caches, it caused boot delays on large systems and the whole cache-hot concept made the balancing code pretty undeterministic as well. (and hey, i wrote most of it, so i can say it out loud that it sucks ;-) under CFS the same purpose of cache affinity can be achieved without any special cache-hot special-case: tasks are sorted in the 'timeline' tree and the SMP balancer picks tasks from the left side of the tree, thus the most cache-cold task is balanced automatically. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2007-07-09 12:51:57 -0400
committer: Ingo Molnar <mingo@elte.hu> 2007-07-09 12:51:57 -0400
commit: 0437e109e1841607f2988891eaa36c531c6aa6ac (patch)
tree: e9d8f170786f7e33d4c5829cb008cf38d42a2014 /arch
parent: 0e6aca43e08a62a48d6770e9a159dbec167bf4c6 (diff)
5 files changed, 0 insertions, 66 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 88baed1e7e83..0b2954534b8e 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -941,17 +941,6 @@ exit:
 }
 #endif
-static void smp_tune_scheduling(void)
-{
-        if (cpu_khz) {
-                /* cache size in kB */
-                long cachesize = boot_cpu_data.x86_cache_size;
-                if (cachesize > 0)
-                        max_cache_size = cachesize * 1024;
-        }
-}
 /*
 * Cycle through the processors sending APIC IPIs to boot each.
 */
@@ -980,7 +969,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
        x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
        current_thread_info()->cpu = 0;
-        smp_tune_scheduling();
        set_cpu_sibling_map(0);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index eaa6a24bc0b6..188fb73c6845 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -805,7 +805,6 @@ static void __cpuinit
 get_max_cacheline_size (void)
 {
        unsigned long line_size, max = 1;
-        unsigned int cache_size = 0;
        u64 l, levels, unique_caches;
        pal_cache_config_info_t cci;
        s64 status;
@@ -835,8 +834,6 @@ get_max_cacheline_size (void)
                line_size = 1 << cci.pcci_line_size;
                if (line_size > max)
                        max = line_size;
-                if (cache_size < cci.pcci_cache_size)
-                        cache_size = cci.pcci_cache_size;
                if (!cci.pcci_unified) {
                        status = ia64_pal_cache_config_info(l,
                                                    /* cache_type (instruction)= */ 1,
@@ -853,9 +850,6 @@ get_max_cacheline_size (void)
                        ia64_i_cache_stride_shift = cci.pcci_stride;
        }
  out:
-#ifdef CONFIG_SMP
-        max_cache_size = max(max_cache_size, cache_size);
-#endif
        if (max > ia64_max_cacheline_size)
                ia64_max_cacheline_size = max;
 }
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 67edfa7ed93a..a1b017f2dbb3 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -51,16 +51,6 @@ int __cpu_logical_map[NR_CPUS];		/* Map logical to physical */
 EXPORT_SYMBOL(phys_cpu_present_map);
 EXPORT_SYMBOL(cpu_online_map);
-/* This happens early in bootup, can't really do it better */
-static void smp_tune_scheduling (void)
-{
-        struct cache_desc *cd = &current_cpu_data.scache;
-        unsigned long cachesize = cd->linesz * cd->sets * cd->ways;
-        if (cachesize > max_cache_size)
-                max_cache_size = cachesize;
-}
 extern void __init calibrate_delay(void);
 extern ATTRIB_NORET void cpu_idle(void);
@@ -228,7 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 {
        init_new_context(current, &init_mm);
        current_thread_info()->cpu = 0;
-        smp_tune_scheduling();
        plat_prepare_cpus(max_cpus);
 #ifndef CONFIG_HOTPLUG_CPU
        cpu_present_map = cpu_possible_map;
diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c
index 4d9ad59031bb..4fea3ac7bff0 100644
--- a/arch/sparc/kernel/smp.c
+++ b/arch/sparc/kernel/smp.c
@@ -68,16 +68,6 @@ void __cpuinit smp_store_cpu_info(int id)
        cpu_data(id).prom_node = cpu_node;
        cpu_data(id).mid = cpu_get_hwmid(cpu_node);
-        /* this is required to tune the scheduler correctly */
-        /* is it possible to have CPUs with different cache sizes? */
-        if (id == boot_cpu_id) {
-                int cache_line,cache_nlines;
-                cache_line = 0x20;
-                cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line);
-                cache_nlines = 0x8000;
-                cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines);
-                max_cache_size = cache_line * cache_nlines;
-        }
        if (cpu_data(id).mid < 0)
                panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
 }
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 4dcd7d0b60f2..40e40f968d61 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1163,32 +1163,6 @@ int setup_profiling_timer(unsigned int multiplier)
        return -EINVAL;
 }
-static void __init smp_tune_scheduling(void)
-{
-        unsigned int smallest = ~0U;
-        int i;
-        for (i = 0; i < NR_CPUS; i++) {
-                unsigned int val = cpu_data(i).ecache_size;
-                if (val && val < smallest)
-                        smallest = val;
-        }
-        /* Any value less than 256K is nonsense.  */
-        if (smallest < (256U * 1024U))
-                smallest = 256 * 1024;
-        max_cache_size = smallest;
-        if (smallest < 1U * 1024U * 1024U)
-                printk(KERN_INFO "Using max_cache_size of %uKB\n",
-                       smallest / 1024U);
-        else
-                printk(KERN_INFO "Using max_cache_size of %uMB\n",
-                       smallest / 1024U / 1024U);
-}
 /* Constrain the number of cpus to max_cpus.  */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
@@ -1206,7 +1180,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
        }
        cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
-        smp_tune_scheduling();
 }
 void __devinit smp_prepare_boot_cpu(void)
author	Ingo Molnar <mingo@elte.hu>	2007-07-09 12:51:57 -0400
committer	Ingo Molnar <mingo@elte.hu>	2007-07-09 12:51:57 -0400
commit	0437e109e1841607f2988891eaa36c531c6aa6ac (patch)
tree	e9d8f170786f7e33d4c5829cb008cf38d42a2014 /arch
parent	0e6aca43e08a62a48d6770e9a159dbec167bf4c6 (diff)

diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 88baed1e7e83..0b2954534b8e 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c
@@ -941,17 +941,6 @@ exit:
941	}	941	}
942	#endif	942	#endif
943		943
944	static void smp_tune_scheduling(void)
945	{
946	if (cpu_khz) {
947	/* cache size in kB */
948	long cachesize = boot_cpu_data.x86_cache_size;
949
950	if (cachesize > 0)
951	max_cache_size = cachesize * 1024;
952	}
953	}
954
955	/*	944	/*
956	* Cycle through the processors sending APIC IPIs to boot each.	945	* Cycle through the processors sending APIC IPIs to boot each.
957	*/	946	*/
@@ -980,7 +969,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
980	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;	969	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
981		970
982	current_thread_info()->cpu = 0;	971	current_thread_info()->cpu = 0;
983	smp_tune_scheduling();
984		972
985	set_cpu_sibling_map(0);	973	set_cpu_sibling_map(0);
986		974


diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index eaa6a24bc0b6..188fb73c6845 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c
@@ -805,7 +805,6 @@ static void __cpuinit
805	get_max_cacheline_size (void)	805	get_max_cacheline_size (void)
806	{	806	{
807	unsigned long line_size, max = 1;	807	unsigned long line_size, max = 1;
808	unsigned int cache_size = 0;
809	u64 l, levels, unique_caches;	808	u64 l, levels, unique_caches;
810	pal_cache_config_info_t cci;	809	pal_cache_config_info_t cci;
811	s64 status;	810	s64 status;
@@ -835,8 +834,6 @@ get_max_cacheline_size (void)
835	line_size = 1 << cci.pcci_line_size;	834	line_size = 1 << cci.pcci_line_size;
836	if (line_size > max)	835	if (line_size > max)
837	max = line_size;	836	max = line_size;
838	if (cache_size < cci.pcci_cache_size)
839	cache_size = cci.pcci_cache_size;
840	if (!cci.pcci_unified) {	837	if (!cci.pcci_unified) {
841	status = ia64_pal_cache_config_info(l,	838	status = ia64_pal_cache_config_info(l,
842	/* cache_type (instruction)= */ 1,	839	/* cache_type (instruction)= */ 1,
@@ -853,9 +850,6 @@ get_max_cacheline_size (void)
853	ia64_i_cache_stride_shift = cci.pcci_stride;	850	ia64_i_cache_stride_shift = cci.pcci_stride;
854	}	851	}
855	out:	852	out:
856	#ifdef CONFIG_SMP
857	max_cache_size = max(max_cache_size, cache_size);
858	#endif
859	if (max > ia64_max_cacheline_size)	853	if (max > ia64_max_cacheline_size)
860	ia64_max_cacheline_size = max;	854	ia64_max_cacheline_size = max;
861	}	855	}


diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 67edfa7ed93a..a1b017f2dbb3 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c
@@ -51,16 +51,6 @@ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */
51	EXPORT_SYMBOL(phys_cpu_present_map);	51	EXPORT_SYMBOL(phys_cpu_present_map);
52	EXPORT_SYMBOL(cpu_online_map);	52	EXPORT_SYMBOL(cpu_online_map);
53		53
54	/* This happens early in bootup, can't really do it better */
55	static void smp_tune_scheduling (void)
56	{
57	struct cache_desc *cd = &current_cpu_data.scache;
58	unsigned long cachesize = cd->linesz * cd->sets * cd->ways;
59
60	if (cachesize > max_cache_size)
61	max_cache_size = cachesize;
62	}
63
64	extern void __init calibrate_delay(void);	54	extern void __init calibrate_delay(void);
65	extern ATTRIB_NORET void cpu_idle(void);	55	extern ATTRIB_NORET void cpu_idle(void);
66		56
@@ -228,7 +218,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
228	{	218	{
229	init_new_context(current, &init_mm);	219	init_new_context(current, &init_mm);
230	current_thread_info()->cpu = 0;	220	current_thread_info()->cpu = 0;
231	smp_tune_scheduling();
232	plat_prepare_cpus(max_cpus);	221	plat_prepare_cpus(max_cpus);
233	#ifndef CONFIG_HOTPLUG_CPU	222	#ifndef CONFIG_HOTPLUG_CPU
234	cpu_present_map = cpu_possible_map;	223	cpu_present_map = cpu_possible_map;


diff --git a/arch/sparc/kernel/smp.c b/arch/sparc/kernel/smp.c index 4d9ad59031bb..4fea3ac7bff0 100644 --- a/arch/sparc/kernel/smp.c +++ b/arch/sparc/kernel/smp.c
@@ -68,16 +68,6 @@ void __cpuinit smp_store_cpu_info(int id)
68	cpu_data(id).prom_node = cpu_node;	68	cpu_data(id).prom_node = cpu_node;
69	cpu_data(id).mid = cpu_get_hwmid(cpu_node);	69	cpu_data(id).mid = cpu_get_hwmid(cpu_node);
70		70
71	/* this is required to tune the scheduler correctly */
72	/* is it possible to have CPUs with different cache sizes? */
73	if (id == boot_cpu_id) {
74	int cache_line,cache_nlines;
75	cache_line = 0x20;
76	cache_line = prom_getintdefault(cpu_node, "ecache-line-size", cache_line);
77	cache_nlines = 0x8000;
78	cache_nlines = prom_getintdefault(cpu_node, "ecache-nlines", cache_nlines);
79	max_cache_size = cache_line * cache_nlines;
80	}
81	if (cpu_data(id).mid < 0)	71	if (cpu_data(id).mid < 0)
82	panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);	72	panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
83	}	73	}


diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 4dcd7d0b60f2..40e40f968d61 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c
@@ -1163,32 +1163,6 @@ int setup_profiling_timer(unsigned int multiplier)
1163	return -EINVAL;	1163	return -EINVAL;
1164	}	1164	}
1165		1165
1166	static void __init smp_tune_scheduling(void)
1167	{
1168	unsigned int smallest = ~0U;
1169	int i;
1170
1171	for (i = 0; i < NR_CPUS; i++) {
1172	unsigned int val = cpu_data(i).ecache_size;
1173
1174	if (val && val < smallest)
1175	smallest = val;
1176	}
1177
1178	/* Any value less than 256K is nonsense. */
1179	if (smallest < (256U * 1024U))
1180	smallest = 256 * 1024;
1181
1182	max_cache_size = smallest;
1183
1184	if (smallest < 1U * 1024U * 1024U)
1185	printk(KERN_INFO "Using max_cache_size of %uKB\n",
1186	smallest / 1024U);
1187	else
1188	printk(KERN_INFO "Using max_cache_size of %uMB\n",
1189	smallest / 1024U / 1024U);
1190	}
1191
1192	/* Constrain the number of cpus to max_cpus. */	1166	/* Constrain the number of cpus to max_cpus. */
1193	void __init smp_prepare_cpus(unsigned int max_cpus)	1167	void __init smp_prepare_cpus(unsigned int max_cpus)
1194	{	1168	{
@@ -1206,7 +1180,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
1206	}	1180	}
1207		1181
1208	cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;	1182	cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
1209	smp_tune_scheduling();
1210	}	1183	}
1211		1184
1212	void __devinit smp_prepare_boot_cpu(void)	1185	void __devinit smp_prepare_boot_cpu(void)