diff options
author | Andi Kleen <ak@suse.de> | 2005-04-16 18:25:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:25:19 -0400 |
commit | a8ab26fe5bfeef43bdcde5182ca051ae0647607e (patch) | |
tree | 456f92b42111f83a4568b27efd863f20ffabbe3c | |
parent | ebfcaa96fccc01301a577c5c56a5f00543cf167e (diff) |
[PATCH] x86_64: Switch SMP bootup over to new CPU hotplug state machine
This will allow hotplug CPU in the future and in general cleans up a lot of
crufty code. It also should plug some races that the old hackish way
introduces. Remove one old race workaround in NMI watchdog setup that is not
needed anymore.
I removed the old total sum of bogomips reporting code. The brag value of
BogoMips has been greatly devalued in the last years on the open market.
Real CPU hotplug will need some more work, but the infrastructure for it is
there now.
One drawback: the new TSC sync algorithm is less accurate than before. The
old way of zeroing TSCs is too intrusive to do later. Instead the TSC of the
BP is duplicated now, which is less accurate.
akpm:
- sync_tsc_bp_init seems to have the sense of `init' inverted.
- SPIN_LOCK_UNLOCKED is deprecated - use DEFINE_SPINLOCK.
Cc: <rusty@rustcorp.com.au>
Cc: <mingo@elte.hu>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/smp.c | 25 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 688 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 9 | ||||
-rw-r--r-- | include/asm-x86_64/msr.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/proto.h | 2 | ||||
-rw-r--r-- | include/asm-x86_64/smp.h | 29 |
7 files changed, 371 insertions, 389 deletions
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 6ec5217f4b73..72f40ca4f06b 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -130,12 +130,6 @@ int __init check_nmi_watchdog (void) | |||
130 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 130 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
131 | 131 | ||
132 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 132 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
133 | #ifdef CONFIG_SMP | ||
134 | /* Check cpu_callin_map here because that is set | ||
135 | after the timer is started. */ | ||
136 | if (!cpu_isset(cpu, cpu_callin_map)) | ||
137 | continue; | ||
138 | #endif | ||
139 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { | 133 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { |
140 | printk("CPU#%d: NMI appears to be stuck (%d)!\n", | 134 | printk("CPU#%d: NMI appears to be stuck (%d)!\n", |
141 | cpu, | 135 | cpu, |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index e3cdbf9a88bd..1e379ed17b1d 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/mach_apic.h> | 27 | #include <asm/mach_apic.h> |
28 | #include <asm/mmu_context.h> | 28 | #include <asm/mmu_context.h> |
29 | #include <asm/proto.h> | 29 | #include <asm/proto.h> |
30 | #include <asm/apicdef.h> | ||
30 | 31 | ||
31 | /* | 32 | /* |
32 | * Smarter SMP flushing macros. | 33 | * Smarter SMP flushing macros. |
@@ -413,3 +414,27 @@ asmlinkage void smp_call_function_interrupt(void) | |||
413 | atomic_inc(&call_data->finished); | 414 | atomic_inc(&call_data->finished); |
414 | } | 415 | } |
415 | } | 416 | } |
417 | |||
418 | int safe_smp_processor_id(void) | ||
419 | { | ||
420 | int apicid, i; | ||
421 | |||
422 | if (disable_apic) | ||
423 | return 0; | ||
424 | |||
425 | apicid = hard_smp_processor_id(); | ||
426 | if (x86_cpu_to_apicid[apicid] == apicid) | ||
427 | return apicid; | ||
428 | |||
429 | for (i = 0; i < NR_CPUS; ++i) { | ||
430 | if (x86_cpu_to_apicid[i] == apicid) | ||
431 | return i; | ||
432 | } | ||
433 | |||
434 | /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI, | ||
435 | * or called too early. Either way, we must be CPU 0. */ | ||
436 | if (x86_cpu_to_apicid[0] == BAD_APICID) | ||
437 | return 0; | ||
438 | |||
439 | return 0; /* Should not happen */ | ||
440 | } | ||
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 7e2de8925b0f..eca01e82d3c7 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -12,8 +12,7 @@ | |||
12 | * Pentium Pro and Pentium-II/Xeon MP machines. | 12 | * Pentium Pro and Pentium-II/Xeon MP machines. |
13 | * Original development of Linux SMP code supported by Caldera. | 13 | * Original development of Linux SMP code supported by Caldera. |
14 | * | 14 | * |
15 | * This code is released under the GNU General Public License version 2 or | 15 | * This code is released under the GNU General Public License version 2 |
16 | * later. | ||
17 | * | 16 | * |
18 | * Fixes | 17 | * Fixes |
19 | * Felix Koop : NR_CPUS used properly | 18 | * Felix Koop : NR_CPUS used properly |
@@ -31,9 +30,13 @@ | |||
31 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. | 30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. |
32 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs | 31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs |
33 | * Andi Kleen : Changed for SMP boot into long mode. | 32 | * Andi Kleen : Changed for SMP boot into long mode. |
34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. | 33 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. |
34 | * Andi Kleen : Converted to new state machine. | ||
35 | * Various cleanups. | ||
36 | * Probably mostly hotplug CPU ready now. | ||
35 | */ | 37 | */ |
36 | 38 | ||
39 | |||
37 | #include <linux/config.h> | 40 | #include <linux/config.h> |
38 | #include <linux/init.h> | 41 | #include <linux/init.h> |
39 | 42 | ||
@@ -54,11 +57,15 @@ | |||
54 | #include <asm/tlbflush.h> | 57 | #include <asm/tlbflush.h> |
55 | #include <asm/proto.h> | 58 | #include <asm/proto.h> |
56 | 59 | ||
60 | /* Change for real CPU hotplug. Note other files need to be fixed | ||
61 | first too. */ | ||
62 | #define __cpuinit __init | ||
63 | #define __cpuinitdata __initdata | ||
64 | |||
57 | /* Number of siblings per CPU package */ | 65 | /* Number of siblings per CPU package */ |
58 | int smp_num_siblings = 1; | 66 | int smp_num_siblings = 1; |
59 | /* Package ID of each logical CPU */ | 67 | /* Package ID of each logical CPU */ |
60 | u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | 68 | u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
61 | /* Core ID of each logical CPU */ | ||
62 | u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | 69 | u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
63 | EXPORT_SYMBOL(phys_proc_id); | 70 | EXPORT_SYMBOL(phys_proc_id); |
64 | EXPORT_SYMBOL(cpu_core_id); | 71 | EXPORT_SYMBOL(cpu_core_id); |
@@ -66,13 +73,24 @@ EXPORT_SYMBOL(cpu_core_id); | |||
66 | /* Bitmask of currently online CPUs */ | 73 | /* Bitmask of currently online CPUs */ |
67 | cpumask_t cpu_online_map; | 74 | cpumask_t cpu_online_map; |
68 | 75 | ||
76 | EXPORT_SYMBOL(cpu_online_map); | ||
77 | |||
78 | /* | ||
79 | * Private maps to synchronize booting between AP and BP. | ||
80 | * Probably not needed anymore, but it makes for easier debugging. -AK | ||
81 | */ | ||
69 | cpumask_t cpu_callin_map; | 82 | cpumask_t cpu_callin_map; |
70 | cpumask_t cpu_callout_map; | 83 | cpumask_t cpu_callout_map; |
71 | static cpumask_t smp_commenced_mask; | 84 | |
85 | cpumask_t cpu_possible_map; | ||
86 | EXPORT_SYMBOL(cpu_possible_map); | ||
72 | 87 | ||
73 | /* Per CPU bogomips and other parameters */ | 88 | /* Per CPU bogomips and other parameters */ |
74 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 89 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
75 | 90 | ||
91 | /* Set when the idlers are all forked */ | ||
92 | int smp_threads_ready; | ||
93 | |||
76 | cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; | 94 | cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; |
77 | cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; | 95 | cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; |
78 | 96 | ||
@@ -80,8 +98,8 @@ cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; | |||
80 | * Trampoline 80x86 program as an array. | 98 | * Trampoline 80x86 program as an array. |
81 | */ | 99 | */ |
82 | 100 | ||
83 | extern unsigned char trampoline_data []; | 101 | extern unsigned char trampoline_data[]; |
84 | extern unsigned char trampoline_end []; | 102 | extern unsigned char trampoline_end[]; |
85 | 103 | ||
86 | /* | 104 | /* |
87 | * Currently trivial. Write the real->protected mode | 105 | * Currently trivial. Write the real->protected mode |
@@ -89,7 +107,7 @@ extern unsigned char trampoline_end []; | |||
89 | * has made sure it's suitably aligned. | 107 | * has made sure it's suitably aligned. |
90 | */ | 108 | */ |
91 | 109 | ||
92 | static unsigned long __init setup_trampoline(void) | 110 | static unsigned long __cpuinit setup_trampoline(void) |
93 | { | 111 | { |
94 | void *tramp = __va(SMP_TRAMPOLINE_BASE); | 112 | void *tramp = __va(SMP_TRAMPOLINE_BASE); |
95 | memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); | 113 | memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); |
@@ -101,7 +119,7 @@ static unsigned long __init setup_trampoline(void) | |||
101 | * a given CPU | 119 | * a given CPU |
102 | */ | 120 | */ |
103 | 121 | ||
104 | static void __init smp_store_cpu_info(int id) | 122 | static void __cpuinit smp_store_cpu_info(int id) |
105 | { | 123 | { |
106 | struct cpuinfo_x86 *c = cpu_data + id; | 124 | struct cpuinfo_x86 *c = cpu_data + id; |
107 | 125 | ||
@@ -110,145 +128,101 @@ static void __init smp_store_cpu_info(int id) | |||
110 | } | 128 | } |
111 | 129 | ||
112 | /* | 130 | /* |
113 | * TSC synchronization. | 131 | * Synchronize TSCs of CPUs |
114 | * | 132 | * |
115 | * We first check whether all CPUs have their TSC's synchronized, | 133 | * This new algorithm is less accurate than the old "zero TSCs" |
116 | * then we print a warning if not, and always resync. | 134 | * one, but we cannot zero TSCs anymore in the new hotplug CPU |
135 | * model. | ||
117 | */ | 136 | */ |
118 | 137 | ||
119 | static atomic_t tsc_start_flag = ATOMIC_INIT(0); | 138 | static atomic_t __cpuinitdata tsc_flag; |
120 | static atomic_t tsc_count_start = ATOMIC_INIT(0); | 139 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); |
121 | static atomic_t tsc_count_stop = ATOMIC_INIT(0); | 140 | static unsigned long long __cpuinitdata bp_tsc, ap_tsc; |
122 | static unsigned long long tsc_values[NR_CPUS]; | ||
123 | 141 | ||
124 | #define NR_LOOPS 5 | 142 | #define NR_LOOPS 5 |
125 | 143 | ||
126 | extern unsigned int fast_gettimeoffset_quotient; | 144 | static void __cpuinit sync_tsc_bp_init(int init) |
127 | |||
128 | static void __init synchronize_tsc_bp (void) | ||
129 | { | 145 | { |
130 | int i; | 146 | if (init) |
131 | unsigned long long t0; | 147 | _raw_spin_lock(&tsc_sync_lock); |
132 | unsigned long long sum, avg; | 148 | else |
133 | long long delta; | 149 | _raw_spin_unlock(&tsc_sync_lock); |
134 | long one_usec; | 150 | atomic_set(&tsc_flag, 0); |
135 | int buggy = 0; | 151 | } |
136 | |||
137 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus()); | ||
138 | |||
139 | one_usec = cpu_khz; | ||
140 | |||
141 | atomic_set(&tsc_start_flag, 1); | ||
142 | wmb(); | ||
143 | |||
144 | /* | ||
145 | * We loop a few times to get a primed instruction cache, | ||
146 | * then the last pass is more or less synchronized and | ||
147 | * the BP and APs set their cycle counters to zero all at | ||
148 | * once. This reduces the chance of having random offsets | ||
149 | * between the processors, and guarantees that the maximum | ||
150 | * delay between the cycle counters is never bigger than | ||
151 | * the latency of information-passing (cachelines) between | ||
152 | * two CPUs. | ||
153 | */ | ||
154 | for (i = 0; i < NR_LOOPS; i++) { | ||
155 | /* | ||
156 | * all APs synchronize but they loop on '== num_cpus' | ||
157 | */ | ||
158 | while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb(); | ||
159 | atomic_set(&tsc_count_stop, 0); | ||
160 | wmb(); | ||
161 | /* | ||
162 | * this lets the APs save their current TSC: | ||
163 | */ | ||
164 | atomic_inc(&tsc_count_start); | ||
165 | |||
166 | sync_core(); | ||
167 | rdtscll(tsc_values[smp_processor_id()]); | ||
168 | /* | ||
169 | * We clear the TSC in the last loop: | ||
170 | */ | ||
171 | if (i == NR_LOOPS-1) | ||
172 | write_tsc(0, 0); | ||
173 | |||
174 | /* | ||
175 | * Wait for all APs to leave the synchronization point: | ||
176 | */ | ||
177 | while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb(); | ||
178 | atomic_set(&tsc_count_start, 0); | ||
179 | wmb(); | ||
180 | atomic_inc(&tsc_count_stop); | ||
181 | } | ||
182 | |||
183 | sum = 0; | ||
184 | for (i = 0; i < NR_CPUS; i++) { | ||
185 | if (cpu_isset(i, cpu_callout_map)) { | ||
186 | t0 = tsc_values[i]; | ||
187 | sum += t0; | ||
188 | } | ||
189 | } | ||
190 | avg = sum / num_booting_cpus(); | ||
191 | |||
192 | sum = 0; | ||
193 | for (i = 0; i < NR_CPUS; i++) { | ||
194 | if (!cpu_isset(i, cpu_callout_map)) | ||
195 | continue; | ||
196 | |||
197 | delta = tsc_values[i] - avg; | ||
198 | if (delta < 0) | ||
199 | delta = -delta; | ||
200 | /* | ||
201 | * We report bigger than 2 microseconds clock differences. | ||
202 | */ | ||
203 | if (delta > 2*one_usec) { | ||
204 | long realdelta; | ||
205 | if (!buggy) { | ||
206 | buggy = 1; | ||
207 | printk("\n"); | ||
208 | } | ||
209 | realdelta = delta / one_usec; | ||
210 | if (tsc_values[i] < avg) | ||
211 | realdelta = -realdelta; | ||
212 | |||
213 | printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", | ||
214 | i, realdelta); | ||
215 | } | ||
216 | 152 | ||
217 | sum += delta; | 153 | /* |
218 | } | 154 | * Synchronize TSC on AP with BP. |
219 | if (!buggy) | 155 | */ |
220 | printk("passed.\n"); | 156 | static void __cpuinit __sync_tsc_ap(void) |
157 | { | ||
158 | if (!cpu_has_tsc) | ||
159 | return; | ||
160 | Dprintk("AP %d syncing TSC\n", smp_processor_id()); | ||
161 | |||
162 | while (atomic_read(&tsc_flag) != 0) | ||
163 | cpu_relax(); | ||
164 | atomic_inc(&tsc_flag); | ||
165 | mb(); | ||
166 | _raw_spin_lock(&tsc_sync_lock); | ||
167 | wrmsrl(MSR_IA32_TSC, bp_tsc); | ||
168 | _raw_spin_unlock(&tsc_sync_lock); | ||
169 | rdtscll(ap_tsc); | ||
170 | mb(); | ||
171 | atomic_inc(&tsc_flag); | ||
172 | mb(); | ||
221 | } | 173 | } |
222 | 174 | ||
223 | static void __init synchronize_tsc_ap (void) | 175 | static void __cpuinit sync_tsc_ap(void) |
224 | { | 176 | { |
225 | int i; | 177 | int i; |
178 | for (i = 0; i < NR_LOOPS; i++) | ||
179 | __sync_tsc_ap(); | ||
180 | } | ||
226 | 181 | ||
227 | /* | 182 | /* |
228 | * Not every cpu is online at the time | 183 | * Synchronize TSC from BP to AP. |
229 | * this gets called, so we first wait for the BP to | 184 | */ |
230 | * finish SMP initialization: | 185 | static void __cpuinit __sync_tsc_bp(int cpu) |
231 | */ | 186 | { |
232 | while (!atomic_read(&tsc_start_flag)) mb(); | 187 | if (!cpu_has_tsc) |
233 | 188 | return; | |
234 | for (i = 0; i < NR_LOOPS; i++) { | ||
235 | atomic_inc(&tsc_count_start); | ||
236 | while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb(); | ||
237 | 189 | ||
238 | sync_core(); | 190 | /* Wait for AP */ |
239 | rdtscll(tsc_values[smp_processor_id()]); | 191 | while (atomic_read(&tsc_flag) == 0) |
240 | if (i == NR_LOOPS-1) | 192 | cpu_relax(); |
241 | write_tsc(0, 0); | 193 | /* Save BPs TSC */ |
194 | sync_core(); | ||
195 | rdtscll(bp_tsc); | ||
196 | /* Don't do the sync core here to avoid too much latency. */ | ||
197 | mb(); | ||
198 | /* Start the AP */ | ||
199 | _raw_spin_unlock(&tsc_sync_lock); | ||
200 | /* Wait for AP again */ | ||
201 | while (atomic_read(&tsc_flag) < 2) | ||
202 | cpu_relax(); | ||
203 | rdtscl(bp_tsc); | ||
204 | barrier(); | ||
205 | } | ||
242 | 206 | ||
243 | atomic_inc(&tsc_count_stop); | 207 | static void __cpuinit sync_tsc_bp(int cpu) |
244 | while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); | 208 | { |
209 | int i; | ||
210 | for (i = 0; i < NR_LOOPS - 1; i++) { | ||
211 | __sync_tsc_bp(cpu); | ||
212 | sync_tsc_bp_init(1); | ||
245 | } | 213 | } |
214 | __sync_tsc_bp(cpu); | ||
215 | printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n", | ||
216 | cpu, ap_tsc - bp_tsc); | ||
246 | } | 217 | } |
247 | #undef NR_LOOPS | ||
248 | 218 | ||
249 | static atomic_t init_deasserted; | 219 | static atomic_t init_deasserted __cpuinitdata; |
250 | 220 | ||
251 | static void __init smp_callin(void) | 221 | /* |
222 | * Report back to the Boot Processor. | ||
223 | * Running on AP. | ||
224 | */ | ||
225 | void __cpuinit smp_callin(void) | ||
252 | { | 226 | { |
253 | int cpuid, phys_id; | 227 | int cpuid, phys_id; |
254 | unsigned long timeout; | 228 | unsigned long timeout; |
@@ -259,7 +233,8 @@ static void __init smp_callin(void) | |||
259 | * our local APIC. We have to wait for the IPI or we'll | 233 | * our local APIC. We have to wait for the IPI or we'll |
260 | * lock up on an APIC access. | 234 | * lock up on an APIC access. |
261 | */ | 235 | */ |
262 | while (!atomic_read(&init_deasserted)); | 236 | while (!atomic_read(&init_deasserted)) |
237 | cpu_relax(); | ||
263 | 238 | ||
264 | /* | 239 | /* |
265 | * (This works even if the APIC is not enabled.) | 240 | * (This works even if the APIC is not enabled.) |
@@ -290,7 +265,7 @@ static void __init smp_callin(void) | |||
290 | */ | 265 | */ |
291 | if (cpu_isset(cpuid, cpu_callout_map)) | 266 | if (cpu_isset(cpuid, cpu_callout_map)) |
292 | break; | 267 | break; |
293 | rep_nop(); | 268 | cpu_relax(); |
294 | } | 269 | } |
295 | 270 | ||
296 | if (!time_before(jiffies, timeout)) { | 271 | if (!time_before(jiffies, timeout)) { |
@@ -325,20 +300,12 @@ static void __init smp_callin(void) | |||
325 | * Allow the master to continue. | 300 | * Allow the master to continue. |
326 | */ | 301 | */ |
327 | cpu_set(cpuid, cpu_callin_map); | 302 | cpu_set(cpuid, cpu_callin_map); |
328 | |||
329 | /* | ||
330 | * Synchronize the TSC with the BP | ||
331 | */ | ||
332 | if (cpu_has_tsc) | ||
333 | synchronize_tsc_ap(); | ||
334 | } | 303 | } |
335 | 304 | ||
336 | static int cpucount; | ||
337 | |||
338 | /* | 305 | /* |
339 | * Activate a secondary processor. | 306 | * Setup code on secondary processor (after comming out of the trampoline) |
340 | */ | 307 | */ |
341 | void __init start_secondary(void) | 308 | void __cpuinit start_secondary(void) |
342 | { | 309 | { |
343 | /* | 310 | /* |
344 | * Dont put anything before smp_callin(), SMP | 311 | * Dont put anything before smp_callin(), SMP |
@@ -348,17 +315,18 @@ void __init start_secondary(void) | |||
348 | cpu_init(); | 315 | cpu_init(); |
349 | smp_callin(); | 316 | smp_callin(); |
350 | 317 | ||
318 | /* | ||
319 | * Synchronize the TSC with the BP | ||
320 | */ | ||
321 | sync_tsc_ap(); | ||
322 | |||
351 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | 323 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ |
352 | barrier(); | 324 | barrier(); |
353 | 325 | ||
354 | Dprintk("cpu %d: waiting for commence\n", smp_processor_id()); | ||
355 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | ||
356 | rep_nop(); | ||
357 | |||
358 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); | 326 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); |
359 | setup_secondary_APIC_clock(); | 327 | setup_secondary_APIC_clock(); |
360 | 328 | ||
361 | Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); | 329 | Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); |
362 | 330 | ||
363 | if (nmi_watchdog == NMI_IO_APIC) { | 331 | if (nmi_watchdog == NMI_IO_APIC) { |
364 | disable_8259A_irq(0); | 332 | disable_8259A_irq(0); |
@@ -367,26 +335,22 @@ void __init start_secondary(void) | |||
367 | } | 335 | } |
368 | 336 | ||
369 | 337 | ||
370 | enable_APIC_timer(); | 338 | enable_APIC_timer(); |
371 | 339 | ||
372 | /* | 340 | /* |
373 | * low-memory mappings have been cleared, flush them from | 341 | * Allow the master to continue. |
374 | * the local TLBs too. | ||
375 | */ | 342 | */ |
376 | local_flush_tlb(); | ||
377 | |||
378 | Dprintk("cpu %d eSetting cpu_online_map\n", smp_processor_id()); | ||
379 | cpu_set(smp_processor_id(), cpu_online_map); | 343 | cpu_set(smp_processor_id(), cpu_online_map); |
380 | wmb(); | 344 | mb(); |
381 | 345 | ||
382 | cpu_idle(); | 346 | cpu_idle(); |
383 | } | 347 | } |
384 | 348 | ||
385 | extern volatile unsigned long init_rsp; | 349 | extern volatile unsigned long init_rsp; |
386 | extern void (*initial_code)(void); | 350 | extern void (*initial_code)(void); |
387 | 351 | ||
388 | #if APIC_DEBUG | 352 | #if APIC_DEBUG |
389 | static inline void inquire_remote_apic(int apicid) | 353 | static void inquire_remote_apic(int apicid) |
390 | { | 354 | { |
391 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 355 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
392 | char *names[] = { "ID", "VERSION", "SPIV" }; | 356 | char *names[] = { "ID", "VERSION", "SPIV" }; |
@@ -423,7 +387,10 @@ static inline void inquire_remote_apic(int apicid) | |||
423 | } | 387 | } |
424 | #endif | 388 | #endif |
425 | 389 | ||
426 | static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) | 390 | /* |
391 | * Kick the secondary to wake up. | ||
392 | */ | ||
393 | static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) | ||
427 | { | 394 | { |
428 | unsigned long send_status = 0, accept_status = 0; | 395 | unsigned long send_status = 0, accept_status = 0; |
429 | int maxlvt, timeout, num_starts, j; | 396 | int maxlvt, timeout, num_starts, j; |
@@ -546,33 +513,36 @@ static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_ | |||
546 | return (send_status | accept_status); | 513 | return (send_status | accept_status); |
547 | } | 514 | } |
548 | 515 | ||
549 | static void __init do_boot_cpu (int apicid) | 516 | /* |
517 | * Boot one CPU. | ||
518 | */ | ||
519 | static int __cpuinit do_boot_cpu(int cpu, int apicid) | ||
550 | { | 520 | { |
551 | struct task_struct *idle; | 521 | struct task_struct *idle; |
552 | unsigned long boot_error; | 522 | unsigned long boot_error; |
553 | int timeout, cpu; | 523 | int timeout; |
554 | unsigned long start_rip; | 524 | unsigned long start_rip; |
555 | |||
556 | cpu = ++cpucount; | ||
557 | /* | 525 | /* |
558 | * We can't use kernel_thread since we must avoid to | 526 | * We can't use kernel_thread since we must avoid to |
559 | * reschedule the child. | 527 | * reschedule the child. |
560 | */ | 528 | */ |
561 | idle = fork_idle(cpu); | 529 | idle = fork_idle(cpu); |
562 | if (IS_ERR(idle)) | 530 | if (IS_ERR(idle)) { |
563 | panic("failed fork for CPU %d", cpu); | 531 | printk("failed fork for CPU %d\n", cpu); |
532 | return PTR_ERR(idle); | ||
533 | } | ||
564 | x86_cpu_to_apicid[cpu] = apicid; | 534 | x86_cpu_to_apicid[cpu] = apicid; |
565 | 535 | ||
566 | cpu_pda[cpu].pcurrent = idle; | 536 | cpu_pda[cpu].pcurrent = idle; |
567 | 537 | ||
568 | start_rip = setup_trampoline(); | 538 | start_rip = setup_trampoline(); |
569 | 539 | ||
570 | init_rsp = idle->thread.rsp; | 540 | init_rsp = idle->thread.rsp; |
571 | per_cpu(init_tss,cpu).rsp0 = init_rsp; | 541 | per_cpu(init_tss,cpu).rsp0 = init_rsp; |
572 | initial_code = start_secondary; | 542 | initial_code = start_secondary; |
573 | clear_ti_thread_flag(idle->thread_info, TIF_FORK); | 543 | clear_ti_thread_flag(idle->thread_info, TIF_FORK); |
574 | 544 | ||
575 | printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, | 545 | printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, |
576 | start_rip, init_rsp); | 546 | start_rip, init_rsp); |
577 | 547 | ||
578 | /* | 548 | /* |
@@ -609,7 +579,7 @@ static void __init do_boot_cpu (int apicid) | |||
609 | /* | 579 | /* |
610 | * Starting actual IPI sequence... | 580 | * Starting actual IPI sequence... |
611 | */ | 581 | */ |
612 | boot_error = wakeup_secondary_via_INIT(apicid, start_rip); | 582 | boot_error = wakeup_secondary_via_INIT(apicid, start_rip); |
613 | 583 | ||
614 | if (!boot_error) { | 584 | if (!boot_error) { |
615 | /* | 585 | /* |
@@ -650,58 +620,131 @@ static void __init do_boot_cpu (int apicid) | |||
650 | if (boot_error) { | 620 | if (boot_error) { |
651 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | 621 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ |
652 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | 622 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ |
653 | cpucount--; | 623 | cpu_clear(cpu, cpu_present_map); |
624 | cpu_clear(cpu, cpu_possible_map); | ||
654 | x86_cpu_to_apicid[cpu] = BAD_APICID; | 625 | x86_cpu_to_apicid[cpu] = BAD_APICID; |
655 | x86_cpu_to_log_apicid[cpu] = BAD_APICID; | 626 | x86_cpu_to_log_apicid[cpu] = BAD_APICID; |
627 | return -EIO; | ||
656 | } | 628 | } |
629 | |||
630 | return 0; | ||
657 | } | 631 | } |
658 | 632 | ||
659 | static void smp_tune_scheduling (void) | 633 | cycles_t cacheflush_time; |
634 | unsigned long cache_decay_ticks; | ||
635 | |||
636 | /* | ||
637 | * Construct cpu_sibling_map[], so that we can tell the sibling CPU | ||
638 | * on SMT systems efficiently. | ||
639 | */ | ||
640 | static __cpuinit void detect_siblings(void) | ||
660 | { | 641 | { |
661 | int cachesize; /* kB */ | 642 | int cpu; |
662 | unsigned long bandwidth = 1000; /* MB/s */ | ||
663 | /* | ||
664 | * Rough estimation for SMP scheduling, this is the number of | ||
665 | * cycles it takes for a fully memory-limited process to flush | ||
666 | * the SMP-local cache. | ||
667 | * | ||
668 | * (For a P5 this pretty much means we will choose another idle | ||
669 | * CPU almost always at wakeup time (this is due to the small | ||
670 | * L1 cache), on PIIs it's around 50-100 usecs, depending on | ||
671 | * the cache size) | ||
672 | */ | ||
673 | 643 | ||
674 | if (!cpu_khz) { | 644 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
675 | return; | 645 | cpus_clear(cpu_sibling_map[cpu]); |
676 | } else { | 646 | cpus_clear(cpu_core_map[cpu]); |
677 | cachesize = boot_cpu_data.x86_cache_size; | 647 | } |
678 | if (cachesize == -1) { | 648 | |
679 | cachesize = 16; /* Pentiums, 2x8kB cache */ | 649 | for_each_online_cpu (cpu) { |
680 | bandwidth = 100; | 650 | struct cpuinfo_x86 *c = cpu_data + cpu; |
651 | int siblings = 0; | ||
652 | int i; | ||
653 | if (smp_num_siblings > 1) { | ||
654 | for_each_online_cpu (i) { | ||
655 | if (cpu_core_id[cpu] == phys_proc_id[i]) { | ||
656 | siblings++; | ||
657 | cpu_set(i, cpu_sibling_map[cpu]); | ||
658 | } | ||
659 | } | ||
660 | } else { | ||
661 | siblings++; | ||
662 | cpu_set(cpu, cpu_sibling_map[cpu]); | ||
663 | } | ||
664 | |||
665 | if (siblings != smp_num_siblings) { | ||
666 | printk(KERN_WARNING | ||
667 | "WARNING: %d siblings found for CPU%d, should be %d\n", | ||
668 | siblings, cpu, smp_num_siblings); | ||
669 | smp_num_siblings = siblings; | ||
681 | } | 670 | } |
671 | if (c->x86_num_cores > 1) { | ||
672 | for_each_online_cpu(i) { | ||
673 | if (phys_proc_id[cpu] == phys_proc_id[i]) | ||
674 | cpu_set(i, cpu_core_map[cpu]); | ||
675 | } | ||
676 | } else | ||
677 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | ||
682 | } | 678 | } |
683 | } | 679 | } |
684 | 680 | ||
685 | /* | 681 | /* |
686 | * Cycle through the processors sending APIC IPIs to boot each. | 682 | * Cleanup possible dangling ends... |
687 | */ | 683 | */ |
688 | 684 | static __cpuinit void smp_cleanup_boot(void) | |
689 | static void __init smp_boot_cpus(unsigned int max_cpus) | ||
690 | { | 685 | { |
691 | unsigned apicid, cpu, bit, kicked; | 686 | /* |
687 | * Paranoid: Set warm reset code and vector here back | ||
688 | * to default values. | ||
689 | */ | ||
690 | CMOS_WRITE(0, 0xf); | ||
692 | 691 | ||
693 | nmi_watchdog_default(); | 692 | /* |
693 | * Reset trampoline flag | ||
694 | */ | ||
695 | *((volatile int *) phys_to_virt(0x467)) = 0; | ||
694 | 696 | ||
697 | #ifndef CONFIG_HOTPLUG_CPU | ||
695 | /* | 698 | /* |
696 | * Setup boot CPU information | 699 | * Free pages reserved for SMP bootup. |
700 | * When you add hotplug CPU support later remove this | ||
701 | * Note there is more work to be done for later CPU bootup. | ||
697 | */ | 702 | */ |
698 | smp_store_cpu_info(0); /* Final full version of the data */ | ||
699 | printk(KERN_INFO "CPU%d: ", 0); | ||
700 | print_cpu_info(&cpu_data[0]); | ||
701 | 703 | ||
702 | current_thread_info()->cpu = 0; | 704 | free_page((unsigned long) __va(PAGE_SIZE)); |
703 | smp_tune_scheduling(); | 705 | free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE)); |
706 | #endif | ||
707 | } | ||
708 | |||
709 | /* | ||
710 | * Fall back to non SMP mode after errors. | ||
711 | * | ||
712 | * RED-PEN audit/test this more. I bet there is more state messed up here. | ||
713 | */ | ||
714 | static __cpuinit void disable_smp(void) | ||
715 | { | ||
716 | cpu_present_map = cpumask_of_cpu(0); | ||
717 | cpu_possible_map = cpumask_of_cpu(0); | ||
718 | if (smp_found_config) | ||
719 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | ||
720 | else | ||
721 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
722 | cpu_set(0, cpu_sibling_map[0]); | ||
723 | cpu_set(0, cpu_core_map[0]); | ||
724 | } | ||
725 | |||
726 | /* | ||
727 | * Handle user cpus=... parameter. | ||
728 | */ | ||
729 | static __cpuinit void enforce_max_cpus(unsigned max_cpus) | ||
730 | { | ||
731 | int i, k; | ||
732 | k = 0; | ||
733 | for (i = 0; i < NR_CPUS; i++) { | ||
734 | if (!cpu_possible(i)) | ||
735 | continue; | ||
736 | if (++k > max_cpus) { | ||
737 | cpu_clear(i, cpu_possible_map); | ||
738 | cpu_clear(i, cpu_present_map); | ||
739 | } | ||
740 | } | ||
741 | } | ||
704 | 742 | ||
743 | /* | ||
744 | * Various sanity checks. | ||
745 | */ | ||
746 | static int __cpuinit smp_sanity_check(unsigned max_cpus) | ||
747 | { | ||
705 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 748 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
706 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", | 749 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", |
707 | hard_smp_processor_id()); | 750 | hard_smp_processor_id()); |
@@ -714,15 +757,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
714 | */ | 757 | */ |
715 | if (!smp_found_config) { | 758 | if (!smp_found_config) { |
716 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | 759 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); |
717 | io_apic_irqs = 0; | 760 | disable_smp(); |
718 | cpu_online_map = cpumask_of_cpu(0); | ||
719 | cpu_set(0, cpu_sibling_map[0]); | ||
720 | cpu_set(0, cpu_core_map[0]); | ||
721 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
722 | if (APIC_init_uniprocessor()) | 761 | if (APIC_init_uniprocessor()) |
723 | printk(KERN_NOTICE "Local APIC not detected." | 762 | printk(KERN_NOTICE "Local APIC not detected." |
724 | " Using dummy APIC emulation.\n"); | 763 | " Using dummy APIC emulation.\n"); |
725 | goto smp_done; | 764 | return -1; |
726 | } | 765 | } |
727 | 766 | ||
728 | /* | 767 | /* |
@@ -742,213 +781,146 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
742 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 781 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
743 | boot_cpu_id); | 782 | boot_cpu_id); |
744 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); | 783 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); |
745 | io_apic_irqs = 0; | 784 | nr_ioapics = 0; |
746 | cpu_online_map = cpumask_of_cpu(0); | 785 | return -1; |
747 | cpu_set(0, cpu_sibling_map[0]); | ||
748 | cpu_set(0, cpu_core_map[0]); | ||
749 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
750 | disable_apic = 1; | ||
751 | goto smp_done; | ||
752 | } | 786 | } |
753 | 787 | ||
754 | verify_local_APIC(); | ||
755 | |||
756 | /* | 788 | /* |
757 | * If SMP should be disabled, then really disable it! | 789 | * If SMP should be disabled, then really disable it! |
758 | */ | 790 | */ |
759 | if (!max_cpus) { | 791 | if (!max_cpus) { |
760 | smp_found_config = 0; | ||
761 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); | 792 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); |
762 | io_apic_irqs = 0; | 793 | nr_ioapics = 0; |
763 | cpu_online_map = cpumask_of_cpu(0); | 794 | return -1; |
764 | cpu_set(0, cpu_sibling_map[0]); | ||
765 | cpu_set(0, cpu_core_map[0]); | ||
766 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
767 | disable_apic = 1; | ||
768 | goto smp_done; | ||
769 | } | 795 | } |
770 | 796 | ||
771 | connect_bsp_APIC(); | 797 | return 0; |
772 | setup_local_APIC(); | 798 | } |
773 | |||
774 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) | ||
775 | BUG(); | ||
776 | |||
777 | x86_cpu_to_apicid[0] = boot_cpu_id; | ||
778 | |||
779 | /* | ||
780 | * Now scan the CPU present map and fire up the other CPUs. | ||
781 | */ | ||
782 | Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); | ||
783 | 799 | ||
784 | kicked = 1; | 800 | /* |
785 | for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { | 801 | * Prepare for SMP bootup. The MP table or ACPI has been read |
786 | apicid = cpu_present_to_apicid(bit); | 802 | * earlier. Just do some sanity checking here and enable APIC mode. |
787 | /* | 803 | */ |
788 | * Don't even attempt to start the boot CPU! | 804 | void __cpuinit smp_prepare_cpus(unsigned int max_cpus) |
789 | */ | 805 | { |
790 | if (apicid == boot_cpu_id || (apicid == BAD_APICID)) | 806 | int i; |
791 | continue; | ||
792 | 807 | ||
793 | if (!physid_isset(apicid, phys_cpu_present_map)) | 808 | nmi_watchdog_default(); |
794 | continue; | 809 | current_cpu_data = boot_cpu_data; |
795 | if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) | 810 | current_thread_info()->cpu = 0; /* needed? */ |
796 | continue; | ||
797 | 811 | ||
798 | do_boot_cpu(apicid); | 812 | enforce_max_cpus(max_cpus); |
799 | ++kicked; | ||
800 | } | ||
801 | 813 | ||
802 | /* | 814 | /* |
803 | * Cleanup possible dangling ends... | 815 | * Fill in cpu_present_mask |
804 | */ | 816 | */ |
805 | { | 817 | for (i = 0; i < NR_CPUS; i++) { |
806 | /* | 818 | int apicid = cpu_present_to_apicid(i); |
807 | * Install writable page 0 entry to set BIOS data area. | 819 | if (physid_isset(apicid, phys_cpu_present_map)) { |
808 | */ | 820 | cpu_set(i, cpu_present_map); |
809 | local_flush_tlb(); | 821 | /* possible map would be different if we supported real |
810 | 822 | CPU hotplug. */ | |
811 | /* | 823 | cpu_set(i, cpu_possible_map); |
812 | * Paranoid: Set warm reset code and vector here back | 824 | } |
813 | * to default values. | ||
814 | */ | ||
815 | CMOS_WRITE(0, 0xf); | ||
816 | |||
817 | *((volatile int *) phys_to_virt(0x467)) = 0; | ||
818 | } | 825 | } |
819 | 826 | ||
820 | /* | 827 | if (smp_sanity_check(max_cpus) < 0) { |
821 | * Allow the user to impress friends. | 828 | printk(KERN_INFO "SMP disabled\n"); |
822 | */ | 829 | disable_smp(); |
823 | 830 | return; | |
824 | Dprintk("Before bogomips.\n"); | ||
825 | if (!cpucount) { | ||
826 | printk(KERN_INFO "Only one processor found.\n"); | ||
827 | } else { | ||
828 | unsigned long bogosum = 0; | ||
829 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
830 | if (cpu_isset(cpu, cpu_callout_map)) | ||
831 | bogosum += cpu_data[cpu].loops_per_jiffy; | ||
832 | printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | ||
833 | cpucount+1, | ||
834 | bogosum/(500000/HZ), | ||
835 | (bogosum/(5000/HZ))%100); | ||
836 | Dprintk("Before bogocount - setting activated=1.\n"); | ||
837 | } | 831 | } |
838 | 832 | ||
833 | |||
839 | /* | 834 | /* |
840 | * Construct cpu_sibling_map[], so that we can tell the | 835 | * Switch from PIC to APIC mode. |
841 | * sibling CPU efficiently. | ||
842 | */ | 836 | */ |
843 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 837 | connect_bsp_APIC(); |
844 | cpus_clear(cpu_sibling_map[cpu]); | 838 | setup_local_APIC(); |
845 | cpus_clear(cpu_core_map[cpu]); | ||
846 | } | ||
847 | |||
848 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
849 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
850 | int siblings = 0; | ||
851 | int i; | ||
852 | if (!cpu_isset(cpu, cpu_callout_map)) | ||
853 | continue; | ||
854 | |||
855 | if (smp_num_siblings > 1) { | ||
856 | for (i = 0; i < NR_CPUS; i++) { | ||
857 | if (!cpu_isset(i, cpu_callout_map)) | ||
858 | continue; | ||
859 | if (phys_proc_id[cpu] == cpu_core_id[i]) { | ||
860 | siblings++; | ||
861 | cpu_set(i, cpu_sibling_map[cpu]); | ||
862 | } | ||
863 | } | ||
864 | } else { | ||
865 | siblings++; | ||
866 | cpu_set(cpu, cpu_sibling_map[cpu]); | ||
867 | } | ||
868 | 839 | ||
869 | if (siblings != smp_num_siblings) { | 840 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { |
870 | printk(KERN_WARNING | 841 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
871 | "WARNING: %d siblings found for CPU%d, should be %d\n", | 842 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); |
872 | siblings, cpu, smp_num_siblings); | 843 | /* Or can we switch back to PIC here? */ |
873 | smp_num_siblings = siblings; | ||
874 | } | ||
875 | if (c->x86_num_cores > 1) { | ||
876 | for (i = 0; i < NR_CPUS; i++) { | ||
877 | if (!cpu_isset(i, cpu_callout_map)) | ||
878 | continue; | ||
879 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
880 | cpu_set(i, cpu_core_map[cpu]); | ||
881 | } | ||
882 | } | ||
883 | } else | ||
884 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | ||
885 | } | 844 | } |
886 | 845 | x86_cpu_to_apicid[0] = boot_cpu_id; | |
887 | Dprintk("Boot done.\n"); | ||
888 | 846 | ||
889 | /* | 847 | /* |
890 | * Here we can be sure that there is an IO-APIC in the system. Let's | 848 | * Now start the IO-APICs |
891 | * go and set it up: | ||
892 | */ | 849 | */ |
893 | if (!skip_ioapic_setup && nr_ioapics) | 850 | if (!skip_ioapic_setup && nr_ioapics) |
894 | setup_IO_APIC(); | 851 | setup_IO_APIC(); |
895 | else | 852 | else |
896 | nr_ioapics = 0; | 853 | nr_ioapics = 0; |
897 | 854 | ||
898 | setup_boot_APIC_clock(); | ||
899 | |||
900 | /* | 855 | /* |
901 | * Synchronize the TSC with the AP | 856 | * Set up local APIC timer on boot CPU. |
902 | */ | 857 | */ |
903 | if (cpu_has_tsc && cpucount) | ||
904 | synchronize_tsc_bp(); | ||
905 | 858 | ||
906 | smp_done: | 859 | setup_boot_APIC_clock(); |
907 | time_init_smp(); | ||
908 | } | 860 | } |
909 | 861 | ||
910 | /* These are wrappers to interface to the new boot process. Someone | 862 | /* |
911 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ | 863 | * Early setup to make printk work. |
912 | void __init smp_prepare_cpus(unsigned int max_cpus) | 864 | */ |
865 | void __init smp_prepare_boot_cpu(void) | ||
913 | { | 866 | { |
914 | smp_boot_cpus(max_cpus); | 867 | int me = smp_processor_id(); |
868 | cpu_set(me, cpu_online_map); | ||
869 | cpu_set(me, cpu_callout_map); | ||
915 | } | 870 | } |
916 | 871 | ||
917 | void __devinit smp_prepare_boot_cpu(void) | 872 | /* |
873 | * Entry point to boot a CPU. | ||
874 | * | ||
875 | * This is all __cpuinit, not __devinit for now because we don't support | ||
876 | * CPU hotplug (yet). | ||
877 | */ | ||
878 | int __cpuinit __cpu_up(unsigned int cpu) | ||
918 | { | 879 | { |
919 | cpu_set(smp_processor_id(), cpu_online_map); | 880 | int err; |
920 | cpu_set(smp_processor_id(), cpu_callout_map); | 881 | int apicid = cpu_present_to_apicid(cpu); |
921 | } | ||
922 | 882 | ||
923 | int __devinit __cpu_up(unsigned int cpu) | 883 | WARN_ON(irqs_disabled()); |
924 | { | ||
925 | /* This only works at boot for x86. See "rewrite" above. */ | ||
926 | if (cpu_isset(cpu, smp_commenced_mask)) { | ||
927 | local_irq_enable(); | ||
928 | return -ENOSYS; | ||
929 | } | ||
930 | 884 | ||
931 | /* In case one didn't come up */ | 885 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
932 | if (!cpu_isset(cpu, cpu_callin_map)) { | 886 | |
933 | local_irq_enable(); | 887 | if (apicid == BAD_APICID || apicid == boot_cpu_id || |
934 | return -EIO; | 888 | !physid_isset(apicid, phys_cpu_present_map)) { |
889 | printk("__cpu_up: bad cpu %d\n", cpu); | ||
890 | return -EINVAL; | ||
891 | } | ||
892 | sync_tsc_bp_init(1); | ||
893 | |||
894 | /* Boot it! */ | ||
895 | err = do_boot_cpu(cpu, apicid); | ||
896 | if (err < 0) { | ||
897 | sync_tsc_bp_init(0); | ||
898 | Dprintk("do_boot_cpu failed %d\n", err); | ||
899 | return err; | ||
935 | } | 900 | } |
936 | local_irq_enable(); | 901 | |
902 | sync_tsc_bp(cpu); | ||
937 | 903 | ||
938 | /* Unleash the CPU! */ | 904 | /* Unleash the CPU! */ |
939 | Dprintk("waiting for cpu %d\n", cpu); | 905 | Dprintk("waiting for cpu %d\n", cpu); |
940 | 906 | ||
941 | cpu_set(cpu, smp_commenced_mask); | ||
942 | while (!cpu_isset(cpu, cpu_online_map)) | 907 | while (!cpu_isset(cpu, cpu_online_map)) |
943 | mb(); | 908 | cpu_relax(); |
944 | return 0; | 909 | return 0; |
945 | } | 910 | } |
946 | 911 | ||
947 | void __init smp_cpus_done(unsigned int max_cpus) | 912 | /* |
913 | * Finish the SMP boot. | ||
914 | */ | ||
915 | void __cpuinit smp_cpus_done(unsigned int max_cpus) | ||
948 | { | 916 | { |
917 | zap_low_mappings(); | ||
918 | smp_cleanup_boot(); | ||
919 | |||
949 | #ifdef CONFIG_X86_IO_APIC | 920 | #ifdef CONFIG_X86_IO_APIC |
950 | setup_ioapic_dest(); | 921 | setup_ioapic_dest(); |
951 | #endif | 922 | #endif |
952 | zap_low_mappings(); | ||
953 | } | ||
954 | 923 | ||
924 | detect_siblings(); | ||
925 | time_init_gtod(); | ||
926 | } | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index c7a1b50b4af3..943a9bfba209 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -916,9 +916,16 @@ void __init time_init(void) | |||
916 | setup_irq(0, &irq0); | 916 | setup_irq(0, &irq0); |
917 | 917 | ||
918 | set_cyc2ns_scale(cpu_khz / 1000); | 918 | set_cyc2ns_scale(cpu_khz / 1000); |
919 | |||
920 | #ifndef CONFIG_SMP | ||
921 | time_init_gtod(); | ||
922 | #endif | ||
919 | } | 923 | } |
920 | 924 | ||
921 | void __init time_init_smp(void) | 925 | /* |
926 | * Decide after all CPUs are booted what mode gettimeofday should use. | ||
927 | */ | ||
928 | void __init time_init_gtod(void) | ||
922 | { | 929 | { |
923 | char *timetype; | 930 | char *timetype; |
924 | 931 | ||
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 66f0be191ab4..513e52c71821 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h | |||
@@ -163,6 +163,7 @@ extern inline unsigned int cpuid_edx(unsigned int op) | |||
163 | #define EFER_NX (1<<_EFER_NX) | 163 | #define EFER_NX (1<<_EFER_NX) |
164 | 164 | ||
165 | /* Intel MSRs. Some also available on other CPUs */ | 165 | /* Intel MSRs. Some also available on other CPUs */ |
166 | #define MSR_IA32_TSC 0x10 | ||
166 | #define MSR_IA32_PLATFORM_ID 0x17 | 167 | #define MSR_IA32_PLATFORM_ID 0x17 |
167 | 168 | ||
168 | #define MSR_IA32_PERFCTR0 0xc1 | 169 | #define MSR_IA32_PERFCTR0 0xc1 |
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 217bd9ace69b..d0f8f8b4c394 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h | |||
@@ -29,7 +29,7 @@ extern void config_acpi_tables(void); | |||
29 | extern void ia32_syscall(void); | 29 | extern void ia32_syscall(void); |
30 | extern void iommu_hole_init(void); | 30 | extern void iommu_hole_init(void); |
31 | 31 | ||
32 | extern void time_init_smp(void); | 32 | extern void time_init_gtod(void); |
33 | 33 | ||
34 | extern void do_softirq_thunk(void); | 34 | extern void do_softirq_thunk(void); |
35 | 35 | ||
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index f5eaa1ab48ff..96844fecbde8 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h | |||
@@ -31,12 +31,16 @@ extern int disable_apic; | |||
31 | 31 | ||
32 | struct pt_regs; | 32 | struct pt_regs; |
33 | 33 | ||
34 | extern cpumask_t cpu_present_mask; | ||
35 | extern cpumask_t cpu_possible_map; | ||
36 | extern cpumask_t cpu_online_map; | ||
37 | extern cpumask_t cpu_callout_map; | ||
38 | |||
34 | /* | 39 | /* |
35 | * Private routines/data | 40 | * Private routines/data |
36 | */ | 41 | */ |
37 | 42 | ||
38 | extern void smp_alloc_memory(void); | 43 | extern void smp_alloc_memory(void); |
39 | extern cpumask_t cpu_online_map; | ||
40 | extern volatile unsigned long smp_invalidate_needed; | 44 | extern volatile unsigned long smp_invalidate_needed; |
41 | extern int pic_mode; | 45 | extern int pic_mode; |
42 | extern int smp_num_siblings; | 46 | extern int smp_num_siblings; |
@@ -44,7 +48,6 @@ extern void smp_flush_tlb(void); | |||
44 | extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); | 48 | extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); |
45 | extern void smp_send_reschedule(int cpu); | 49 | extern void smp_send_reschedule(int cpu); |
46 | extern void smp_invalidate_rcv(void); /* Process an NMI */ | 50 | extern void smp_invalidate_rcv(void); /* Process an NMI */ |
47 | extern void (*mtrr_hook) (void); | ||
48 | extern void zap_low_mappings(void); | 51 | extern void zap_low_mappings(void); |
49 | void smp_stop_cpu(void); | 52 | void smp_stop_cpu(void); |
50 | extern cpumask_t cpu_sibling_map[NR_CPUS]; | 53 | extern cpumask_t cpu_sibling_map[NR_CPUS]; |
@@ -60,10 +63,6 @@ extern u8 cpu_core_id[NR_CPUS]; | |||
60 | * compresses data structures. | 63 | * compresses data structures. |
61 | */ | 64 | */ |
62 | 65 | ||
63 | extern cpumask_t cpu_callout_map; | ||
64 | extern cpumask_t cpu_callin_map; | ||
65 | #define cpu_possible_map cpu_callout_map | ||
66 | |||
67 | static inline int num_booting_cpus(void) | 66 | static inline int num_booting_cpus(void) |
68 | { | 67 | { |
69 | return cpus_weight(cpu_callout_map); | 68 | return cpus_weight(cpu_callout_map); |
@@ -77,7 +76,7 @@ extern __inline int hard_smp_processor_id(void) | |||
77 | return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); | 76 | return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID)); |
78 | } | 77 | } |
79 | 78 | ||
80 | #define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu(hard_smp_processor_id())) | 79 | extern int safe_smp_processor_id(void); |
81 | 80 | ||
82 | #endif /* !ASSEMBLY */ | 81 | #endif /* !ASSEMBLY */ |
83 | 82 | ||
@@ -99,22 +98,6 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
99 | return cpus_addr(cpumask)[0]; | 98 | return cpus_addr(cpumask)[0]; |
100 | } | 99 | } |
101 | 100 | ||
102 | static inline int x86_apicid_to_cpu(u8 apicid) | ||
103 | { | ||
104 | int i; | ||
105 | |||
106 | for (i = 0; i < NR_CPUS; ++i) | ||
107 | if (x86_cpu_to_apicid[i] == apicid) | ||
108 | return i; | ||
109 | |||
110 | /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI, | ||
111 | * or called too early. Either way, we must be CPU 0. */ | ||
112 | if (x86_cpu_to_apicid[0] == BAD_APICID) | ||
113 | return 0; | ||
114 | |||
115 | return -1; | ||
116 | } | ||
117 | |||
118 | static inline int cpu_present_to_apicid(int mps_cpu) | 101 | static inline int cpu_present_to_apicid(int mps_cpu) |
119 | { | 102 | { |
120 | if (mps_cpu < NR_CPUS) | 103 | if (mps_cpu < NR_CPUS) |