diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/entry.S | 15 | ||||
-rw-r--r-- | arch/x86_64/kernel/i8259.c | 20 | ||||
-rw-r--r-- | arch/x86_64/kernel/smp.c | 123 |
3 files changed, 108 insertions, 50 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 3620508c8bd9..873c39d8f818 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -536,8 +536,19 @@ ENTRY(thermal_interrupt) | |||
536 | ENTRY(reschedule_interrupt) | 536 | ENTRY(reschedule_interrupt) |
537 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | 537 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
538 | 538 | ||
539 | ENTRY(invalidate_interrupt) | 539 | .macro INVALIDATE_ENTRY num |
540 | apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt | 540 | ENTRY(invalidate_interrupt\num) |
541 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | ||
542 | .endm | ||
543 | |||
544 | INVALIDATE_ENTRY 0 | ||
545 | INVALIDATE_ENTRY 1 | ||
546 | INVALIDATE_ENTRY 2 | ||
547 | INVALIDATE_ENTRY 3 | ||
548 | INVALIDATE_ENTRY 4 | ||
549 | INVALIDATE_ENTRY 5 | ||
550 | INVALIDATE_ENTRY 6 | ||
551 | INVALIDATE_ENTRY 7 | ||
541 | 552 | ||
542 | ENTRY(call_function_interrupt) | 553 | ENTRY(call_function_interrupt) |
543 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | 554 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt |
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index a89169095129..909fc63c03b6 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -486,7 +486,14 @@ void spurious_interrupt(void); | |||
486 | void error_interrupt(void); | 486 | void error_interrupt(void); |
487 | void reschedule_interrupt(void); | 487 | void reschedule_interrupt(void); |
488 | void call_function_interrupt(void); | 488 | void call_function_interrupt(void); |
489 | void invalidate_interrupt(void); | 489 | void invalidate_interrupt0(void); |
490 | void invalidate_interrupt1(void); | ||
491 | void invalidate_interrupt2(void); | ||
492 | void invalidate_interrupt3(void); | ||
493 | void invalidate_interrupt4(void); | ||
494 | void invalidate_interrupt5(void); | ||
495 | void invalidate_interrupt6(void); | ||
496 | void invalidate_interrupt7(void); | ||
490 | void thermal_interrupt(void); | 497 | void thermal_interrupt(void); |
491 | void i8254_timer_resume(void); | 498 | void i8254_timer_resume(void); |
492 | 499 | ||
@@ -562,8 +569,15 @@ void __init init_IRQ(void) | |||
562 | */ | 569 | */ |
563 | set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 570 | set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
564 | 571 | ||
565 | /* IPI for invalidation */ | 572 | /* IPIs for invalidation */ |
566 | set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | 573 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); |
574 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | ||
575 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | ||
576 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | ||
577 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | ||
578 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | ||
579 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | ||
580 | set_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | ||
567 | 581 | ||
568 | /* IPI for generic function call */ | 582 | /* IPI for generic function call */ |
569 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 583 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 82d38f145b43..801db885955c 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <asm/proto.h> | 29 | #include <asm/proto.h> |
30 | #include <asm/apicdef.h> | 30 | #include <asm/apicdef.h> |
31 | 31 | ||
32 | #define __cpuinit __init | ||
33 | |||
32 | /* | 34 | /* |
33 | * Smarter SMP flushing macros. | 35 | * Smarter SMP flushing macros. |
34 | * c/o Linus Torvalds. | 36 | * c/o Linus Torvalds. |
@@ -37,19 +39,41 @@ | |||
37 | * writing to user space from interrupts. (Its not allowed anyway). | 39 | * writing to user space from interrupts. (Its not allowed anyway). |
38 | * | 40 | * |
39 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | 41 | * Optimizations Manfred Spraul <manfred@colorfullife.com> |
42 | * | ||
43 | * More scalable flush, from Andi Kleen | ||
44 | * | ||
45 | * To avoid global state use 8 different call vectors. | ||
46 | * Each CPU uses a specific vector to trigger flushes on other | ||
47 | * CPUs. Depending on the received vector the target CPUs look into | ||
48 | * the right per cpu variable for the flush data. | ||
49 | * | ||
50 | * With more than 8 CPUs they are hashed to the 8 available | ||
51 | * vectors. The limited global vector space forces us to this right now. | ||
52 | * In future when interrupts are split into per CPU domains this could be | ||
53 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
40 | */ | 54 | */ |
41 | 55 | ||
42 | static cpumask_t flush_cpumask; | 56 | union smp_flush_state { |
43 | static struct mm_struct * flush_mm; | 57 | struct { |
44 | static unsigned long flush_va; | 58 | cpumask_t flush_cpumask; |
45 | static DEFINE_SPINLOCK(tlbstate_lock); | 59 | struct mm_struct *flush_mm; |
60 | unsigned long flush_va; | ||
46 | #define FLUSH_ALL -1ULL | 61 | #define FLUSH_ALL -1ULL |
62 | spinlock_t tlbstate_lock; | ||
63 | }; | ||
64 | char pad[SMP_CACHE_BYTES]; | ||
65 | } ____cacheline_aligned; | ||
66 | |||
67 | /* State is put into the per CPU data section, but padded | ||
68 | to a full cache line because other CPUs can access it and we don't | ||
69 | want false sharing in the per cpu data segment. */ | ||
70 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | ||
47 | 71 | ||
48 | /* | 72 | /* |
49 | * We cannot call mmdrop() because we are in interrupt context, | 73 | * We cannot call mmdrop() because we are in interrupt context, |
50 | * instead update mm->cpu_vm_mask. | 74 | * instead update mm->cpu_vm_mask. |
51 | */ | 75 | */ |
52 | static inline void leave_mm (unsigned long cpu) | 76 | static inline void leave_mm(int cpu) |
53 | { | 77 | { |
54 | if (read_pda(mmu_state) == TLBSTATE_OK) | 78 | if (read_pda(mmu_state) == TLBSTATE_OK) |
55 | BUG(); | 79 | BUG(); |
@@ -101,15 +125,25 @@ static inline void leave_mm (unsigned long cpu) | |||
101 | * | 125 | * |
102 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | 126 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. |
103 | * 2) Leave the mm if we are in the lazy tlb mode. | 127 | * 2) Leave the mm if we are in the lazy tlb mode. |
128 | * | ||
129 | * Interrupts are disabled. | ||
104 | */ | 130 | */ |
105 | 131 | ||
106 | asmlinkage void smp_invalidate_interrupt (void) | 132 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) |
107 | { | 133 | { |
108 | unsigned long cpu; | 134 | int cpu; |
135 | int sender; | ||
136 | union smp_flush_state *f; | ||
109 | 137 | ||
110 | cpu = get_cpu(); | 138 | cpu = smp_processor_id(); |
139 | /* | ||
140 | * orig_rax contains the interrupt vector - 256. | ||
141 | * Use that to determine where the sender put the data. | ||
142 | */ | ||
143 | sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; | ||
144 | f = &per_cpu(flush_state, sender); | ||
111 | 145 | ||
112 | if (!cpu_isset(cpu, flush_cpumask)) | 146 | if (!cpu_isset(cpu, f->flush_cpumask)) |
113 | goto out; | 147 | goto out; |
114 | /* | 148 | /* |
115 | * This was a BUG() but until someone can quote me the | 149 | * This was a BUG() but until someone can quote me the |
@@ -120,64 +154,63 @@ asmlinkage void smp_invalidate_interrupt (void) | |||
120 | * BUG(); | 154 | * BUG(); |
121 | */ | 155 | */ |
122 | 156 | ||
123 | if (flush_mm == read_pda(active_mm)) { | 157 | if (f->flush_mm == read_pda(active_mm)) { |
124 | if (read_pda(mmu_state) == TLBSTATE_OK) { | 158 | if (read_pda(mmu_state) == TLBSTATE_OK) { |
125 | if (flush_va == FLUSH_ALL) | 159 | if (f->flush_va == FLUSH_ALL) |
126 | local_flush_tlb(); | 160 | local_flush_tlb(); |
127 | else | 161 | else |
128 | __flush_tlb_one(flush_va); | 162 | __flush_tlb_one(f->flush_va); |
129 | } else | 163 | } else |
130 | leave_mm(cpu); | 164 | leave_mm(cpu); |
131 | } | 165 | } |
132 | out: | 166 | out: |
133 | ack_APIC_irq(); | 167 | ack_APIC_irq(); |
134 | cpu_clear(cpu, flush_cpumask); | 168 | cpu_clear(cpu, f->flush_cpumask); |
135 | put_cpu_no_resched(); | ||
136 | } | 169 | } |
137 | 170 | ||
138 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | 171 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, |
139 | unsigned long va) | 172 | unsigned long va) |
140 | { | 173 | { |
141 | cpumask_t tmp; | 174 | int sender; |
142 | /* | 175 | union smp_flush_state *f; |
143 | * A couple of (to be removed) sanity checks: | ||
144 | * | ||
145 | * - we do not send IPIs to not-yet booted CPUs. | ||
146 | * - current CPU must not be in mask | ||
147 | * - mask must exist :) | ||
148 | */ | ||
149 | BUG_ON(cpus_empty(cpumask)); | ||
150 | cpus_and(tmp, cpumask, cpu_online_map); | ||
151 | BUG_ON(!cpus_equal(tmp, cpumask)); | ||
152 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
153 | if (!mm) | ||
154 | BUG(); | ||
155 | 176 | ||
156 | /* | 177 | /* Caller has disabled preemption */ |
157 | * I'm not happy about this global shared spinlock in the | 178 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; |
158 | * MM hot path, but we'll see how contended it is. | 179 | f = &per_cpu(flush_state, sender); |
159 | * Temporarily this turns IRQs off, so that lockups are | 180 | |
160 | * detected by the NMI watchdog. | 181 | /* Could avoid this lock when |
161 | */ | 182 | num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is |
162 | spin_lock(&tlbstate_lock); | 183 | probably not worth checking this for a cache-hot lock. */ |
163 | 184 | spin_lock(&f->tlbstate_lock); | |
164 | flush_mm = mm; | 185 | |
165 | flush_va = va; | 186 | f->flush_mm = mm; |
166 | cpus_or(flush_cpumask, cpumask, flush_cpumask); | 187 | f->flush_va = va; |
188 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | ||
167 | 189 | ||
168 | /* | 190 | /* |
169 | * We have to send the IPI only to | 191 | * We have to send the IPI only to |
170 | * CPUs affected. | 192 | * CPUs affected. |
171 | */ | 193 | */ |
172 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); | 194 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); |
173 | 195 | ||
174 | while (!cpus_empty(flush_cpumask)) | 196 | while (!cpus_empty(f->flush_cpumask)) |
175 | mb(); /* nothing. lockup detection does not belong here */; | 197 | cpu_relax(); |
176 | 198 | ||
177 | flush_mm = NULL; | 199 | f->flush_mm = NULL; |
178 | flush_va = 0; | 200 | f->flush_va = 0; |
179 | spin_unlock(&tlbstate_lock); | 201 | spin_unlock(&f->tlbstate_lock); |
180 | } | 202 | } |
203 | |||
204 | int __cpuinit init_smp_flush(void) | ||
205 | { | ||
206 | int i; | ||
207 | for_each_cpu_mask(i, cpu_possible_map) { | ||
208 | spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i)); | ||
209 | } | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | core_initcall(init_smp_flush); | ||
181 | 214 | ||
182 | void flush_tlb_current_task(void) | 215 | void flush_tlb_current_task(void) |
183 | { | 216 | { |