diff options
Diffstat (limited to 'arch/ppc64/kernel/pSeries_smp.c')
-rw-r--r-- | arch/ppc64/kernel/pSeries_smp.c | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c new file mode 100644 index 000000000000..c60d8cb2b84d --- /dev/null +++ b/arch/ppc64/kernel/pSeries_smp.c | |||
@@ -0,0 +1,451 @@ | |||
1 | /* | ||
2 | * SMP support for pSeries machines. | ||
3 | * | ||
4 | * Dave Engebretsen, Peter Bergner, and | ||
5 | * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com | ||
6 | * | ||
7 | * Plus various changes from other IBM teams... | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or | ||
10 | * modify it under the terms of the GNU General Public License | ||
11 | * as published by the Free Software Foundation; either version | ||
12 | * 2 of the License, or (at your option) any later version. | ||
13 | */ | ||
14 | |||
15 | #undef DEBUG | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/smp.h> | ||
22 | #include <linux/interrupt.h> | ||
23 | #include <linux/delay.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/spinlock.h> | ||
26 | #include <linux/cache.h> | ||
27 | #include <linux/err.h> | ||
28 | #include <linux/sysdev.h> | ||
29 | #include <linux/cpu.h> | ||
30 | |||
31 | #include <asm/ptrace.h> | ||
32 | #include <asm/atomic.h> | ||
33 | #include <asm/irq.h> | ||
34 | #include <asm/page.h> | ||
35 | #include <asm/pgtable.h> | ||
36 | #include <asm/io.h> | ||
37 | #include <asm/prom.h> | ||
38 | #include <asm/smp.h> | ||
39 | #include <asm/paca.h> | ||
40 | #include <asm/time.h> | ||
41 | #include <asm/machdep.h> | ||
42 | #include <asm/xics.h> | ||
43 | #include <asm/cputable.h> | ||
44 | #include <asm/system.h> | ||
45 | #include <asm/rtas.h> | ||
46 | #include <asm/plpar_wrappers.h> | ||
47 | #include <asm/pSeries_reconfig.h> | ||
48 | |||
49 | #include "mpic.h" | ||
50 | |||
51 | #ifdef DEBUG | ||
52 | #define DBG(fmt...) udbg_printf(fmt) | ||
53 | #else | ||
54 | #define DBG(fmt...) | ||
55 | #endif | ||
56 | |||
57 | /* | ||
58 | * The primary thread of each non-boot processor is recorded here before | ||
59 | * smp init. | ||
60 | */ | ||
61 | static cpumask_t of_spin_map; | ||
62 | |||
63 | extern void pSeries_secondary_smp_init(unsigned long); | ||
64 | |||
65 | #ifdef CONFIG_HOTPLUG_CPU | ||
66 | |||
67 | /* Get state of physical CPU. | ||
68 | * Return codes: | ||
69 | * 0 - The processor is in the RTAS stopped state | ||
70 | * 1 - stop-self is in progress | ||
71 | * 2 - The processor is not in the RTAS stopped state | ||
72 | * -1 - Hardware Error | ||
73 | * -2 - Hardware Busy, Try again later. | ||
74 | */ | ||
75 | static int query_cpu_stopped(unsigned int pcpu) | ||
76 | { | ||
77 | int cpu_status; | ||
78 | int status, qcss_tok; | ||
79 | |||
80 | qcss_tok = rtas_token("query-cpu-stopped-state"); | ||
81 | if (qcss_tok == RTAS_UNKNOWN_SERVICE) | ||
82 | return -1; | ||
83 | status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu); | ||
84 | if (status != 0) { | ||
85 | printk(KERN_ERR | ||
86 | "RTAS query-cpu-stopped-state failed: %i\n", status); | ||
87 | return status; | ||
88 | } | ||
89 | |||
90 | return cpu_status; | ||
91 | } | ||
92 | |||
93 | int pSeries_cpu_disable(void) | ||
94 | { | ||
95 | systemcfg->processorCount--; | ||
96 | |||
97 | /*fix boot_cpuid here*/ | ||
98 | if (smp_processor_id() == boot_cpuid) | ||
99 | boot_cpuid = any_online_cpu(cpu_online_map); | ||
100 | |||
101 | /* FIXME: abstract this to not be platform specific later on */ | ||
102 | xics_migrate_irqs_away(); | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | void pSeries_cpu_die(unsigned int cpu) | ||
107 | { | ||
108 | int tries; | ||
109 | int cpu_status; | ||
110 | unsigned int pcpu = get_hard_smp_processor_id(cpu); | ||
111 | |||
112 | for (tries = 0; tries < 25; tries++) { | ||
113 | cpu_status = query_cpu_stopped(pcpu); | ||
114 | if (cpu_status == 0 || cpu_status == -1) | ||
115 | break; | ||
116 | msleep(200); | ||
117 | } | ||
118 | if (cpu_status != 0) { | ||
119 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | ||
120 | cpu, pcpu, cpu_status); | ||
121 | } | ||
122 | |||
123 | /* Isolation and deallocation are definatly done by | ||
124 | * drslot_chrp_cpu. If they were not they would be | ||
125 | * done here. Change isolate state to Isolate and | ||
126 | * change allocation-state to Unusable. | ||
127 | */ | ||
128 | paca[cpu].cpu_start = 0; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Update cpu_present_map and paca(s) for a new cpu node. The wrinkle | ||
133 | * here is that a cpu device node may represent up to two logical cpus | ||
134 | * in the SMT case. We must honor the assumption in other code that | ||
135 | * the logical ids for sibling SMT threads x and y are adjacent, such | ||
136 | * that x^1 == y and y^1 == x. | ||
137 | */ | ||
138 | static int pSeries_add_processor(struct device_node *np) | ||
139 | { | ||
140 | unsigned int cpu; | ||
141 | cpumask_t candidate_map, tmp = CPU_MASK_NONE; | ||
142 | int err = -ENOSPC, len, nthreads, i; | ||
143 | u32 *intserv; | ||
144 | |||
145 | intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); | ||
146 | if (!intserv) | ||
147 | return 0; | ||
148 | |||
149 | nthreads = len / sizeof(u32); | ||
150 | for (i = 0; i < nthreads; i++) | ||
151 | cpu_set(i, tmp); | ||
152 | |||
153 | lock_cpu_hotplug(); | ||
154 | |||
155 | BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map)); | ||
156 | |||
157 | /* Get a bitmap of unoccupied slots. */ | ||
158 | cpus_xor(candidate_map, cpu_possible_map, cpu_present_map); | ||
159 | if (cpus_empty(candidate_map)) { | ||
160 | /* If we get here, it most likely means that NR_CPUS is | ||
161 | * less than the partition's max processors setting. | ||
162 | */ | ||
163 | printk(KERN_ERR "Cannot add cpu %s; this system configuration" | ||
164 | " supports %d logical cpus.\n", np->full_name, | ||
165 | cpus_weight(cpu_possible_map)); | ||
166 | goto out_unlock; | ||
167 | } | ||
168 | |||
169 | while (!cpus_empty(tmp)) | ||
170 | if (cpus_subset(tmp, candidate_map)) | ||
171 | /* Found a range where we can insert the new cpu(s) */ | ||
172 | break; | ||
173 | else | ||
174 | cpus_shift_left(tmp, tmp, nthreads); | ||
175 | |||
176 | if (cpus_empty(tmp)) { | ||
177 | printk(KERN_ERR "Unable to find space in cpu_present_map for" | ||
178 | " processor %s with %d thread(s)\n", np->name, | ||
179 | nthreads); | ||
180 | goto out_unlock; | ||
181 | } | ||
182 | |||
183 | for_each_cpu_mask(cpu, tmp) { | ||
184 | BUG_ON(cpu_isset(cpu, cpu_present_map)); | ||
185 | cpu_set(cpu, cpu_present_map); | ||
186 | set_hard_smp_processor_id(cpu, *intserv++); | ||
187 | } | ||
188 | err = 0; | ||
189 | out_unlock: | ||
190 | unlock_cpu_hotplug(); | ||
191 | return err; | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Update the present map for a cpu node which is going away, and set | ||
196 | * the hard id in the paca(s) to -1 to be consistent with boot time | ||
197 | * convention for non-present cpus. | ||
198 | */ | ||
199 | static void pSeries_remove_processor(struct device_node *np) | ||
200 | { | ||
201 | unsigned int cpu; | ||
202 | int len, nthreads, i; | ||
203 | u32 *intserv; | ||
204 | |||
205 | intserv = (u32 *)get_property(np, "ibm,ppc-interrupt-server#s", &len); | ||
206 | if (!intserv) | ||
207 | return; | ||
208 | |||
209 | nthreads = len / sizeof(u32); | ||
210 | |||
211 | lock_cpu_hotplug(); | ||
212 | for (i = 0; i < nthreads; i++) { | ||
213 | for_each_present_cpu(cpu) { | ||
214 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | ||
215 | continue; | ||
216 | BUG_ON(cpu_online(cpu)); | ||
217 | cpu_clear(cpu, cpu_present_map); | ||
218 | set_hard_smp_processor_id(cpu, -1); | ||
219 | break; | ||
220 | } | ||
221 | if (cpu == NR_CPUS) | ||
222 | printk(KERN_WARNING "Could not find cpu to remove " | ||
223 | "with physical id 0x%x\n", intserv[i]); | ||
224 | } | ||
225 | unlock_cpu_hotplug(); | ||
226 | } | ||
227 | |||
228 | static int pSeries_smp_notifier(struct notifier_block *nb, unsigned long action, void *node) | ||
229 | { | ||
230 | int err = NOTIFY_OK; | ||
231 | |||
232 | switch (action) { | ||
233 | case PSERIES_RECONFIG_ADD: | ||
234 | if (pSeries_add_processor(node)) | ||
235 | err = NOTIFY_BAD; | ||
236 | break; | ||
237 | case PSERIES_RECONFIG_REMOVE: | ||
238 | pSeries_remove_processor(node); | ||
239 | break; | ||
240 | default: | ||
241 | err = NOTIFY_DONE; | ||
242 | break; | ||
243 | } | ||
244 | return err; | ||
245 | } | ||
246 | |||
247 | static struct notifier_block pSeries_smp_nb = { | ||
248 | .notifier_call = pSeries_smp_notifier, | ||
249 | }; | ||
250 | |||
251 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
252 | |||
253 | /** | ||
254 | * smp_startup_cpu() - start the given cpu | ||
255 | * | ||
256 | * At boot time, there is nothing to do for primary threads which were | ||
257 | * started from Open Firmware. For anything else, call RTAS with the | ||
258 | * appropriate start location. | ||
259 | * | ||
260 | * Returns: | ||
261 | * 0 - failure | ||
262 | * 1 - success | ||
263 | */ | ||
264 | static inline int __devinit smp_startup_cpu(unsigned int lcpu) | ||
265 | { | ||
266 | int status; | ||
267 | unsigned long start_here = __pa((u32)*((unsigned long *) | ||
268 | pSeries_secondary_smp_init)); | ||
269 | unsigned int pcpu; | ||
270 | |||
271 | if (cpu_isset(lcpu, of_spin_map)) | ||
272 | /* Already started by OF and sitting in spin loop */ | ||
273 | return 1; | ||
274 | |||
275 | pcpu = get_hard_smp_processor_id(lcpu); | ||
276 | |||
277 | /* Fixup atomic count: it exited inside IRQ handler. */ | ||
278 | paca[lcpu].__current->thread_info->preempt_count = 0; | ||
279 | |||
280 | status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL, | ||
281 | pcpu, start_here, lcpu); | ||
282 | if (status != 0) { | ||
283 | printk(KERN_ERR "start-cpu failed: %i\n", status); | ||
284 | return 0; | ||
285 | } | ||
286 | return 1; | ||
287 | } | ||
288 | |||
289 | static inline void smp_xics_do_message(int cpu, int msg) | ||
290 | { | ||
291 | set_bit(msg, &xics_ipi_message[cpu].value); | ||
292 | mb(); | ||
293 | xics_cause_IPI(cpu); | ||
294 | } | ||
295 | |||
296 | static void smp_xics_message_pass(int target, int msg) | ||
297 | { | ||
298 | unsigned int i; | ||
299 | |||
300 | if (target < NR_CPUS) { | ||
301 | smp_xics_do_message(target, msg); | ||
302 | } else { | ||
303 | for_each_online_cpu(i) { | ||
304 | if (target == MSG_ALL_BUT_SELF | ||
305 | && i == smp_processor_id()) | ||
306 | continue; | ||
307 | smp_xics_do_message(i, msg); | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | |||
312 | static int __init smp_xics_probe(void) | ||
313 | { | ||
314 | xics_request_IPIs(); | ||
315 | |||
316 | return cpus_weight(cpu_possible_map); | ||
317 | } | ||
318 | |||
319 | static void __devinit smp_xics_setup_cpu(int cpu) | ||
320 | { | ||
321 | if (cpu != boot_cpuid) | ||
322 | xics_setup_cpu(); | ||
323 | |||
324 | if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) | ||
325 | vpa_init(cpu); | ||
326 | |||
327 | cpu_clear(cpu, of_spin_map); | ||
328 | |||
329 | /* | ||
330 | * Put the calling processor into the GIQ. This is really only | ||
331 | * necessary from a secondary thread as the OF start-cpu interface | ||
332 | * performs this function for us on primary threads. | ||
333 | */ | ||
334 | rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE, | ||
335 | (1UL << interrupt_server_size) - 1 - default_distrib_server, 1); | ||
336 | } | ||
337 | |||
338 | static DEFINE_SPINLOCK(timebase_lock); | ||
339 | static unsigned long timebase = 0; | ||
340 | |||
341 | static void __devinit pSeries_give_timebase(void) | ||
342 | { | ||
343 | spin_lock(&timebase_lock); | ||
344 | rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); | ||
345 | timebase = get_tb(); | ||
346 | spin_unlock(&timebase_lock); | ||
347 | |||
348 | while (timebase) | ||
349 | barrier(); | ||
350 | rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); | ||
351 | } | ||
352 | |||
353 | static void __devinit pSeries_take_timebase(void) | ||
354 | { | ||
355 | while (!timebase) | ||
356 | barrier(); | ||
357 | spin_lock(&timebase_lock); | ||
358 | set_tb(timebase >> 32, timebase & 0xffffffff); | ||
359 | timebase = 0; | ||
360 | spin_unlock(&timebase_lock); | ||
361 | } | ||
362 | |||
363 | static void __devinit smp_pSeries_kick_cpu(int nr) | ||
364 | { | ||
365 | BUG_ON(nr < 0 || nr >= NR_CPUS); | ||
366 | |||
367 | if (!smp_startup_cpu(nr)) | ||
368 | return; | ||
369 | |||
370 | /* | ||
371 | * The processor is currently spinning, waiting for the | ||
372 | * cpu_start field to become non-zero After we set cpu_start, | ||
373 | * the processor will continue on to secondary_start | ||
374 | */ | ||
375 | paca[nr].cpu_start = 1; | ||
376 | } | ||
377 | |||
378 | static int smp_pSeries_cpu_bootable(unsigned int nr) | ||
379 | { | ||
380 | /* Special case - we inhibit secondary thread startup | ||
381 | * during boot if the user requests it. Odd-numbered | ||
382 | * cpus are assumed to be secondary threads. | ||
383 | */ | ||
384 | if (system_state < SYSTEM_RUNNING && | ||
385 | cur_cpu_spec->cpu_features & CPU_FTR_SMT && | ||
386 | !smt_enabled_at_boot && nr % 2 != 0) | ||
387 | return 0; | ||
388 | |||
389 | return 1; | ||
390 | } | ||
391 | |||
392 | static struct smp_ops_t pSeries_mpic_smp_ops = { | ||
393 | .message_pass = smp_mpic_message_pass, | ||
394 | .probe = smp_mpic_probe, | ||
395 | .kick_cpu = smp_pSeries_kick_cpu, | ||
396 | .setup_cpu = smp_mpic_setup_cpu, | ||
397 | }; | ||
398 | |||
399 | static struct smp_ops_t pSeries_xics_smp_ops = { | ||
400 | .message_pass = smp_xics_message_pass, | ||
401 | .probe = smp_xics_probe, | ||
402 | .kick_cpu = smp_pSeries_kick_cpu, | ||
403 | .setup_cpu = smp_xics_setup_cpu, | ||
404 | .cpu_bootable = smp_pSeries_cpu_bootable, | ||
405 | }; | ||
406 | |||
407 | /* This is called very early */ | ||
408 | void __init smp_init_pSeries(void) | ||
409 | { | ||
410 | int i; | ||
411 | |||
412 | DBG(" -> smp_init_pSeries()\n"); | ||
413 | |||
414 | if (ppc64_interrupt_controller == IC_OPEN_PIC) | ||
415 | smp_ops = &pSeries_mpic_smp_ops; | ||
416 | else | ||
417 | smp_ops = &pSeries_xics_smp_ops; | ||
418 | |||
419 | #ifdef CONFIG_HOTPLUG_CPU | ||
420 | smp_ops->cpu_disable = pSeries_cpu_disable; | ||
421 | smp_ops->cpu_die = pSeries_cpu_die; | ||
422 | |||
423 | /* Processors can be added/removed only on LPAR */ | ||
424 | if (systemcfg->platform == PLATFORM_PSERIES_LPAR) | ||
425 | pSeries_reconfig_notifier_register(&pSeries_smp_nb); | ||
426 | #endif | ||
427 | |||
428 | /* Mark threads which are still spinning in hold loops. */ | ||
429 | if (cur_cpu_spec->cpu_features & CPU_FTR_SMT) | ||
430 | for_each_present_cpu(i) { | ||
431 | if (i % 2 == 0) | ||
432 | /* | ||
433 | * Even-numbered logical cpus correspond to | ||
434 | * primary threads. | ||
435 | */ | ||
436 | cpu_set(i, of_spin_map); | ||
437 | } | ||
438 | else | ||
439 | of_spin_map = cpu_present_map; | ||
440 | |||
441 | cpu_clear(boot_cpuid, of_spin_map); | ||
442 | |||
443 | /* Non-lpar has additional take/give timebase */ | ||
444 | if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { | ||
445 | smp_ops->give_timebase = pSeries_give_timebase; | ||
446 | smp_ops->take_timebase = pSeries_take_timebase; | ||
447 | } | ||
448 | |||
449 | DBG(" <- smp_init_pSeries()\n"); | ||
450 | } | ||
451 | |||