diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:24 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:24 -0400 |
commit | 250c22777fe1ccd7ac588579a6c16db4c0161cc5 (patch) | |
tree | 55c317efb7d792ec6fdae1d1937c67a502c48dec /arch/x86/kernel/smp_64.c | |
parent | 2db55d344e529492545cb3b755c7e9ba8e4fa94e (diff) |
x86_64: move kernel
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/smp_64.c')
-rw-r--r-- | arch/x86/kernel/smp_64.c | 523 |
1 files changed, 523 insertions, 0 deletions
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c new file mode 100644 index 000000000000..df4a82812adb --- /dev/null +++ b/arch/x86/kernel/smp_64.c | |||
@@ -0,0 +1,523 @@ | |||
1 | /* | ||
2 | * Intel SMP support routines. | ||
3 | * | ||
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | ||
5 | * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> | ||
6 | * (c) 2002,2003 Andi Kleen, SuSE Labs. | ||
7 | * | ||
8 | * This code is released under the GNU General Public License version 2 or | ||
9 | * later. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <linux/mm.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/smp.h> | ||
18 | #include <linux/kernel_stat.h> | ||
19 | #include <linux/mc146818rtc.h> | ||
20 | #include <linux/interrupt.h> | ||
21 | |||
22 | #include <asm/mtrr.h> | ||
23 | #include <asm/pgalloc.h> | ||
24 | #include <asm/tlbflush.h> | ||
25 | #include <asm/mach_apic.h> | ||
26 | #include <asm/mmu_context.h> | ||
27 | #include <asm/proto.h> | ||
28 | #include <asm/apicdef.h> | ||
29 | #include <asm/idle.h> | ||
30 | |||
31 | /* | ||
32 | * Smarter SMP flushing macros. | ||
33 | * c/o Linus Torvalds. | ||
34 | * | ||
35 | * These mean you can really definitely utterly forget about | ||
36 | * writing to user space from interrupts. (Its not allowed anyway). | ||
37 | * | ||
38 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | ||
39 | * | ||
40 | * More scalable flush, from Andi Kleen | ||
41 | * | ||
42 | * To avoid global state use 8 different call vectors. | ||
43 | * Each CPU uses a specific vector to trigger flushes on other | ||
44 | * CPUs. Depending on the received vector the target CPUs look into | ||
45 | * the right per cpu variable for the flush data. | ||
46 | * | ||
47 | * With more than 8 CPUs they are hashed to the 8 available | ||
48 | * vectors. The limited global vector space forces us to this right now. | ||
49 | * In future when interrupts are split into per CPU domains this could be | ||
50 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
51 | */ | ||
52 | |||
53 | union smp_flush_state { | ||
54 | struct { | ||
55 | cpumask_t flush_cpumask; | ||
56 | struct mm_struct *flush_mm; | ||
57 | unsigned long flush_va; | ||
58 | #define FLUSH_ALL -1ULL | ||
59 | spinlock_t tlbstate_lock; | ||
60 | }; | ||
61 | char pad[SMP_CACHE_BYTES]; | ||
62 | } ____cacheline_aligned; | ||
63 | |||
64 | /* State is put into the per CPU data section, but padded | ||
65 | to a full cache line because other CPUs can access it and we don't | ||
66 | want false sharing in the per cpu data segment. */ | ||
67 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | ||
68 | |||
69 | /* | ||
70 | * We cannot call mmdrop() because we are in interrupt context, | ||
71 | * instead update mm->cpu_vm_mask. | ||
72 | */ | ||
73 | static inline void leave_mm(int cpu) | ||
74 | { | ||
75 | if (read_pda(mmu_state) == TLBSTATE_OK) | ||
76 | BUG(); | ||
77 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | ||
78 | load_cr3(swapper_pg_dir); | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * | ||
83 | * The flush IPI assumes that a thread switch happens in this order: | ||
84 | * [cpu0: the cpu that switches] | ||
85 | * 1) switch_mm() either 1a) or 1b) | ||
86 | * 1a) thread switch to a different mm | ||
87 | * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); | ||
88 | * Stop ipi delivery for the old mm. This is not synchronized with | ||
89 | * the other cpus, but smp_invalidate_interrupt ignore flush ipis | ||
90 | * for the wrong mm, and in the worst case we perform a superfluous | ||
91 | * tlb flush. | ||
92 | * 1a2) set cpu mmu_state to TLBSTATE_OK | ||
93 | * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 | ||
94 | * was in lazy tlb mode. | ||
95 | * 1a3) update cpu active_mm | ||
96 | * Now cpu0 accepts tlb flushes for the new mm. | ||
97 | * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); | ||
98 | * Now the other cpus will send tlb flush ipis. | ||
99 | * 1a4) change cr3. | ||
100 | * 1b) thread switch without mm change | ||
101 | * cpu active_mm is correct, cpu0 already handles | ||
102 | * flush ipis. | ||
103 | * 1b1) set cpu mmu_state to TLBSTATE_OK | ||
104 | * 1b2) test_and_set the cpu bit in cpu_vm_mask. | ||
105 | * Atomically set the bit [other cpus will start sending flush ipis], | ||
106 | * and test the bit. | ||
107 | * 1b3) if the bit was 0: leave_mm was called, flush the tlb. | ||
108 | * 2) switch %%esp, ie current | ||
109 | * | ||
110 | * The interrupt must handle 2 special cases: | ||
111 | * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. | ||
112 | * - the cpu performs speculative tlb reads, i.e. even if the cpu only | ||
113 | * runs in kernel space, the cpu could load tlb entries for user space | ||
114 | * pages. | ||
115 | * | ||
116 | * The good news is that cpu mmu_state is local to each cpu, no | ||
117 | * write/read ordering problems. | ||
118 | */ | ||
119 | |||
120 | /* | ||
121 | * TLB flush IPI: | ||
122 | * | ||
123 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | ||
124 | * 2) Leave the mm if we are in the lazy tlb mode. | ||
125 | * | ||
126 | * Interrupts are disabled. | ||
127 | */ | ||
128 | |||
129 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | ||
130 | { | ||
131 | int cpu; | ||
132 | int sender; | ||
133 | union smp_flush_state *f; | ||
134 | |||
135 | cpu = smp_processor_id(); | ||
136 | /* | ||
137 | * orig_rax contains the negated interrupt vector. | ||
138 | * Use that to determine where the sender put the data. | ||
139 | */ | ||
140 | sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START; | ||
141 | f = &per_cpu(flush_state, sender); | ||
142 | |||
143 | if (!cpu_isset(cpu, f->flush_cpumask)) | ||
144 | goto out; | ||
145 | /* | ||
146 | * This was a BUG() but until someone can quote me the | ||
147 | * line from the intel manual that guarantees an IPI to | ||
148 | * multiple CPUs is retried _only_ on the erroring CPUs | ||
149 | * its staying as a return | ||
150 | * | ||
151 | * BUG(); | ||
152 | */ | ||
153 | |||
154 | if (f->flush_mm == read_pda(active_mm)) { | ||
155 | if (read_pda(mmu_state) == TLBSTATE_OK) { | ||
156 | if (f->flush_va == FLUSH_ALL) | ||
157 | local_flush_tlb(); | ||
158 | else | ||
159 | __flush_tlb_one(f->flush_va); | ||
160 | } else | ||
161 | leave_mm(cpu); | ||
162 | } | ||
163 | out: | ||
164 | ack_APIC_irq(); | ||
165 | cpu_clear(cpu, f->flush_cpumask); | ||
166 | } | ||
167 | |||
168 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | ||
169 | unsigned long va) | ||
170 | { | ||
171 | int sender; | ||
172 | union smp_flush_state *f; | ||
173 | |||
174 | /* Caller has disabled preemption */ | ||
175 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; | ||
176 | f = &per_cpu(flush_state, sender); | ||
177 | |||
178 | /* Could avoid this lock when | ||
179 | num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | ||
180 | probably not worth checking this for a cache-hot lock. */ | ||
181 | spin_lock(&f->tlbstate_lock); | ||
182 | |||
183 | f->flush_mm = mm; | ||
184 | f->flush_va = va; | ||
185 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | ||
186 | |||
187 | /* | ||
188 | * We have to send the IPI only to | ||
189 | * CPUs affected. | ||
190 | */ | ||
191 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); | ||
192 | |||
193 | while (!cpus_empty(f->flush_cpumask)) | ||
194 | cpu_relax(); | ||
195 | |||
196 | f->flush_mm = NULL; | ||
197 | f->flush_va = 0; | ||
198 | spin_unlock(&f->tlbstate_lock); | ||
199 | } | ||
200 | |||
201 | int __cpuinit init_smp_flush(void) | ||
202 | { | ||
203 | int i; | ||
204 | for_each_cpu_mask(i, cpu_possible_map) { | ||
205 | spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); | ||
206 | } | ||
207 | return 0; | ||
208 | } | ||
209 | |||
210 | core_initcall(init_smp_flush); | ||
211 | |||
212 | void flush_tlb_current_task(void) | ||
213 | { | ||
214 | struct mm_struct *mm = current->mm; | ||
215 | cpumask_t cpu_mask; | ||
216 | |||
217 | preempt_disable(); | ||
218 | cpu_mask = mm->cpu_vm_mask; | ||
219 | cpu_clear(smp_processor_id(), cpu_mask); | ||
220 | |||
221 | local_flush_tlb(); | ||
222 | if (!cpus_empty(cpu_mask)) | ||
223 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | ||
224 | preempt_enable(); | ||
225 | } | ||
226 | EXPORT_SYMBOL(flush_tlb_current_task); | ||
227 | |||
228 | void flush_tlb_mm (struct mm_struct * mm) | ||
229 | { | ||
230 | cpumask_t cpu_mask; | ||
231 | |||
232 | preempt_disable(); | ||
233 | cpu_mask = mm->cpu_vm_mask; | ||
234 | cpu_clear(smp_processor_id(), cpu_mask); | ||
235 | |||
236 | if (current->active_mm == mm) { | ||
237 | if (current->mm) | ||
238 | local_flush_tlb(); | ||
239 | else | ||
240 | leave_mm(smp_processor_id()); | ||
241 | } | ||
242 | if (!cpus_empty(cpu_mask)) | ||
243 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | ||
244 | |||
245 | preempt_enable(); | ||
246 | } | ||
247 | EXPORT_SYMBOL(flush_tlb_mm); | ||
248 | |||
249 | void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) | ||
250 | { | ||
251 | struct mm_struct *mm = vma->vm_mm; | ||
252 | cpumask_t cpu_mask; | ||
253 | |||
254 | preempt_disable(); | ||
255 | cpu_mask = mm->cpu_vm_mask; | ||
256 | cpu_clear(smp_processor_id(), cpu_mask); | ||
257 | |||
258 | if (current->active_mm == mm) { | ||
259 | if(current->mm) | ||
260 | __flush_tlb_one(va); | ||
261 | else | ||
262 | leave_mm(smp_processor_id()); | ||
263 | } | ||
264 | |||
265 | if (!cpus_empty(cpu_mask)) | ||
266 | flush_tlb_others(cpu_mask, mm, va); | ||
267 | |||
268 | preempt_enable(); | ||
269 | } | ||
270 | EXPORT_SYMBOL(flush_tlb_page); | ||
271 | |||
272 | static void do_flush_tlb_all(void* info) | ||
273 | { | ||
274 | unsigned long cpu = smp_processor_id(); | ||
275 | |||
276 | __flush_tlb_all(); | ||
277 | if (read_pda(mmu_state) == TLBSTATE_LAZY) | ||
278 | leave_mm(cpu); | ||
279 | } | ||
280 | |||
281 | void flush_tlb_all(void) | ||
282 | { | ||
283 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * this function sends a 'reschedule' IPI to another CPU. | ||
288 | * it goes straight through and wastes no time serializing | ||
289 | * anything. Worst case is that we lose a reschedule ... | ||
290 | */ | ||
291 | |||
292 | void smp_send_reschedule(int cpu) | ||
293 | { | ||
294 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Structure and data for smp_call_function(). This is designed to minimise | ||
299 | * static memory requirements. It also looks cleaner. | ||
300 | */ | ||
301 | static DEFINE_SPINLOCK(call_lock); | ||
302 | |||
303 | struct call_data_struct { | ||
304 | void (*func) (void *info); | ||
305 | void *info; | ||
306 | atomic_t started; | ||
307 | atomic_t finished; | ||
308 | int wait; | ||
309 | }; | ||
310 | |||
311 | static struct call_data_struct * call_data; | ||
312 | |||
313 | void lock_ipi_call_lock(void) | ||
314 | { | ||
315 | spin_lock_irq(&call_lock); | ||
316 | } | ||
317 | |||
318 | void unlock_ipi_call_lock(void) | ||
319 | { | ||
320 | spin_unlock_irq(&call_lock); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * this function sends a 'generic call function' IPI to one other CPU | ||
325 | * in the system. | ||
326 | * | ||
327 | * cpu is a standard Linux logical CPU number. | ||
328 | */ | ||
329 | static void | ||
330 | __smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
331 | int nonatomic, int wait) | ||
332 | { | ||
333 | struct call_data_struct data; | ||
334 | int cpus = 1; | ||
335 | |||
336 | data.func = func; | ||
337 | data.info = info; | ||
338 | atomic_set(&data.started, 0); | ||
339 | data.wait = wait; | ||
340 | if (wait) | ||
341 | atomic_set(&data.finished, 0); | ||
342 | |||
343 | call_data = &data; | ||
344 | wmb(); | ||
345 | /* Send a message to all other CPUs and wait for them to respond */ | ||
346 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); | ||
347 | |||
348 | /* Wait for response */ | ||
349 | while (atomic_read(&data.started) != cpus) | ||
350 | cpu_relax(); | ||
351 | |||
352 | if (!wait) | ||
353 | return; | ||
354 | |||
355 | while (atomic_read(&data.finished) != cpus) | ||
356 | cpu_relax(); | ||
357 | } | ||
358 | |||
359 | /* | ||
360 | * smp_call_function_single - Run a function on a specific CPU | ||
361 | * @func: The function to run. This must be fast and non-blocking. | ||
362 | * @info: An arbitrary pointer to pass to the function. | ||
363 | * @nonatomic: Currently unused. | ||
364 | * @wait: If true, wait until function has completed on other CPUs. | ||
365 | * | ||
366 | * Retrurns 0 on success, else a negative status code. | ||
367 | * | ||
368 | * Does not return until the remote CPU is nearly ready to execute <func> | ||
369 | * or is or has executed. | ||
370 | */ | ||
371 | |||
372 | int smp_call_function_single (int cpu, void (*func) (void *info), void *info, | ||
373 | int nonatomic, int wait) | ||
374 | { | ||
375 | /* prevent preemption and reschedule on another processor */ | ||
376 | int me = get_cpu(); | ||
377 | |||
378 | /* Can deadlock when called with interrupts disabled */ | ||
379 | WARN_ON(irqs_disabled()); | ||
380 | |||
381 | if (cpu == me) { | ||
382 | local_irq_disable(); | ||
383 | func(info); | ||
384 | local_irq_enable(); | ||
385 | put_cpu(); | ||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | spin_lock(&call_lock); | ||
390 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | ||
391 | spin_unlock(&call_lock); | ||
392 | put_cpu(); | ||
393 | return 0; | ||
394 | } | ||
395 | EXPORT_SYMBOL(smp_call_function_single); | ||
396 | |||
397 | /* | ||
398 | * this function sends a 'generic call function' IPI to all other CPUs | ||
399 | * in the system. | ||
400 | */ | ||
401 | static void __smp_call_function (void (*func) (void *info), void *info, | ||
402 | int nonatomic, int wait) | ||
403 | { | ||
404 | struct call_data_struct data; | ||
405 | int cpus = num_online_cpus()-1; | ||
406 | |||
407 | if (!cpus) | ||
408 | return; | ||
409 | |||
410 | data.func = func; | ||
411 | data.info = info; | ||
412 | atomic_set(&data.started, 0); | ||
413 | data.wait = wait; | ||
414 | if (wait) | ||
415 | atomic_set(&data.finished, 0); | ||
416 | |||
417 | call_data = &data; | ||
418 | wmb(); | ||
419 | /* Send a message to all other CPUs and wait for them to respond */ | ||
420 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
421 | |||
422 | /* Wait for response */ | ||
423 | while (atomic_read(&data.started) != cpus) | ||
424 | cpu_relax(); | ||
425 | |||
426 | if (!wait) | ||
427 | return; | ||
428 | |||
429 | while (atomic_read(&data.finished) != cpus) | ||
430 | cpu_relax(); | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * smp_call_function - run a function on all other CPUs. | ||
435 | * @func: The function to run. This must be fast and non-blocking. | ||
436 | * @info: An arbitrary pointer to pass to the function. | ||
437 | * @nonatomic: currently unused. | ||
438 | * @wait: If true, wait (atomically) until function has completed on other | ||
439 | * CPUs. | ||
440 | * | ||
441 | * Returns 0 on success, else a negative status code. Does not return until | ||
442 | * remote CPUs are nearly ready to execute func or are or have executed. | ||
443 | * | ||
444 | * You must not call this function with disabled interrupts or from a | ||
445 | * hardware interrupt handler or from a bottom half handler. | ||
446 | * Actually there are a few legal cases, like panic. | ||
447 | */ | ||
448 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | ||
449 | int wait) | ||
450 | { | ||
451 | spin_lock(&call_lock); | ||
452 | __smp_call_function(func,info,nonatomic,wait); | ||
453 | spin_unlock(&call_lock); | ||
454 | return 0; | ||
455 | } | ||
456 | EXPORT_SYMBOL(smp_call_function); | ||
457 | |||
458 | static void stop_this_cpu(void *dummy) | ||
459 | { | ||
460 | local_irq_disable(); | ||
461 | /* | ||
462 | * Remove this CPU: | ||
463 | */ | ||
464 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
465 | disable_local_APIC(); | ||
466 | for (;;) | ||
467 | halt(); | ||
468 | } | ||
469 | |||
470 | void smp_send_stop(void) | ||
471 | { | ||
472 | int nolock; | ||
473 | unsigned long flags; | ||
474 | |||
475 | if (reboot_force) | ||
476 | return; | ||
477 | |||
478 | /* Don't deadlock on the call lock in panic */ | ||
479 | nolock = !spin_trylock(&call_lock); | ||
480 | local_irq_save(flags); | ||
481 | __smp_call_function(stop_this_cpu, NULL, 0, 0); | ||
482 | if (!nolock) | ||
483 | spin_unlock(&call_lock); | ||
484 | disable_local_APIC(); | ||
485 | local_irq_restore(flags); | ||
486 | } | ||
487 | |||
488 | /* | ||
489 | * Reschedule call back. Nothing to do, | ||
490 | * all the work is done automatically when | ||
491 | * we return from the interrupt. | ||
492 | */ | ||
493 | asmlinkage void smp_reschedule_interrupt(void) | ||
494 | { | ||
495 | ack_APIC_irq(); | ||
496 | } | ||
497 | |||
498 | asmlinkage void smp_call_function_interrupt(void) | ||
499 | { | ||
500 | void (*func) (void *info) = call_data->func; | ||
501 | void *info = call_data->info; | ||
502 | int wait = call_data->wait; | ||
503 | |||
504 | ack_APIC_irq(); | ||
505 | /* | ||
506 | * Notify initiating CPU that I've grabbed the data and am | ||
507 | * about to execute the function | ||
508 | */ | ||
509 | mb(); | ||
510 | atomic_inc(&call_data->started); | ||
511 | /* | ||
512 | * At this point the info structure may be out of scope unless wait==1 | ||
513 | */ | ||
514 | exit_idle(); | ||
515 | irq_enter(); | ||
516 | (*func)(info); | ||
517 | irq_exit(); | ||
518 | if (wait) { | ||
519 | mb(); | ||
520 | atomic_inc(&call_data->finished); | ||
521 | } | ||
522 | } | ||
523 | |||