diff options
Diffstat (limited to 'arch/x86_64/kernel/smp.c')
-rw-r--r-- | arch/x86_64/kernel/smp.c | 123 |
1 files changed, 78 insertions, 45 deletions
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 82d38f145b43..801db885955c 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <asm/proto.h> | 29 | #include <asm/proto.h> |
30 | #include <asm/apicdef.h> | 30 | #include <asm/apicdef.h> |
31 | 31 | ||
32 | #define __cpuinit __init | ||
33 | |||
32 | /* | 34 | /* |
33 | * Smarter SMP flushing macros. | 35 | * Smarter SMP flushing macros. |
34 | * c/o Linus Torvalds. | 36 | * c/o Linus Torvalds. |
@@ -37,19 +39,41 @@ | |||
37 | * writing to user space from interrupts. (Its not allowed anyway). | 39 | * writing to user space from interrupts. (Its not allowed anyway). |
38 | * | 40 | * |
39 | * Optimizations Manfred Spraul <manfred@colorfullife.com> | 41 | * Optimizations Manfred Spraul <manfred@colorfullife.com> |
42 | * | ||
43 | * More scalable flush, from Andi Kleen | ||
44 | * | ||
45 | * To avoid global state use 8 different call vectors. | ||
46 | * Each CPU uses a specific vector to trigger flushes on other | ||
47 | * CPUs. Depending on the received vector the target CPUs look into | ||
48 | * the right per cpu variable for the flush data. | ||
49 | * | ||
50 | * With more than 8 CPUs they are hashed to the 8 available | ||
51 | * vectors. The limited global vector space forces us to this right now. | ||
52 | * In future when interrupts are split into per CPU domains this could be | ||
53 | * fixed, at the cost of triggering multiple IPIs in some cases. | ||
40 | */ | 54 | */ |
41 | 55 | ||
42 | static cpumask_t flush_cpumask; | 56 | union smp_flush_state { |
43 | static struct mm_struct * flush_mm; | 57 | struct { |
44 | static unsigned long flush_va; | 58 | cpumask_t flush_cpumask; |
45 | static DEFINE_SPINLOCK(tlbstate_lock); | 59 | struct mm_struct *flush_mm; |
60 | unsigned long flush_va; | ||
46 | #define FLUSH_ALL -1ULL | 61 | #define FLUSH_ALL -1ULL |
62 | spinlock_t tlbstate_lock; | ||
63 | }; | ||
64 | char pad[SMP_CACHE_BYTES]; | ||
65 | } ____cacheline_aligned; | ||
66 | |||
67 | /* State is put into the per CPU data section, but padded | ||
68 | to a full cache line because other CPUs can access it and we don't | ||
69 | want false sharing in the per cpu data segment. */ | ||
70 | static DEFINE_PER_CPU(union smp_flush_state, flush_state); | ||
47 | 71 | ||
48 | /* | 72 | /* |
49 | * We cannot call mmdrop() because we are in interrupt context, | 73 | * We cannot call mmdrop() because we are in interrupt context, |
50 | * instead update mm->cpu_vm_mask. | 74 | * instead update mm->cpu_vm_mask. |
51 | */ | 75 | */ |
52 | static inline void leave_mm (unsigned long cpu) | 76 | static inline void leave_mm(int cpu) |
53 | { | 77 | { |
54 | if (read_pda(mmu_state) == TLBSTATE_OK) | 78 | if (read_pda(mmu_state) == TLBSTATE_OK) |
55 | BUG(); | 79 | BUG(); |
@@ -101,15 +125,25 @@ static inline void leave_mm (unsigned long cpu) | |||
101 | * | 125 | * |
102 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. | 126 | * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. |
103 | * 2) Leave the mm if we are in the lazy tlb mode. | 127 | * 2) Leave the mm if we are in the lazy tlb mode. |
128 | * | ||
129 | * Interrupts are disabled. | ||
104 | */ | 130 | */ |
105 | 131 | ||
106 | asmlinkage void smp_invalidate_interrupt (void) | 132 | asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) |
107 | { | 133 | { |
108 | unsigned long cpu; | 134 | int cpu; |
135 | int sender; | ||
136 | union smp_flush_state *f; | ||
109 | 137 | ||
110 | cpu = get_cpu(); | 138 | cpu = smp_processor_id(); |
139 | /* | ||
140 | * orig_rax contains the interrupt vector - 256. | ||
141 | * Use that to determine where the sender put the data. | ||
142 | */ | ||
143 | sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START; | ||
144 | f = &per_cpu(flush_state, sender); | ||
111 | 145 | ||
112 | if (!cpu_isset(cpu, flush_cpumask)) | 146 | if (!cpu_isset(cpu, f->flush_cpumask)) |
113 | goto out; | 147 | goto out; |
114 | /* | 148 | /* |
115 | * This was a BUG() but until someone can quote me the | 149 | * This was a BUG() but until someone can quote me the |
@@ -120,64 +154,63 @@ asmlinkage void smp_invalidate_interrupt (void) | |||
120 | * BUG(); | 154 | * BUG(); |
121 | */ | 155 | */ |
122 | 156 | ||
123 | if (flush_mm == read_pda(active_mm)) { | 157 | if (f->flush_mm == read_pda(active_mm)) { |
124 | if (read_pda(mmu_state) == TLBSTATE_OK) { | 158 | if (read_pda(mmu_state) == TLBSTATE_OK) { |
125 | if (flush_va == FLUSH_ALL) | 159 | if (f->flush_va == FLUSH_ALL) |
126 | local_flush_tlb(); | 160 | local_flush_tlb(); |
127 | else | 161 | else |
128 | __flush_tlb_one(flush_va); | 162 | __flush_tlb_one(f->flush_va); |
129 | } else | 163 | } else |
130 | leave_mm(cpu); | 164 | leave_mm(cpu); |
131 | } | 165 | } |
132 | out: | 166 | out: |
133 | ack_APIC_irq(); | 167 | ack_APIC_irq(); |
134 | cpu_clear(cpu, flush_cpumask); | 168 | cpu_clear(cpu, f->flush_cpumask); |
135 | put_cpu_no_resched(); | ||
136 | } | 169 | } |
137 | 170 | ||
138 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | 171 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, |
139 | unsigned long va) | 172 | unsigned long va) |
140 | { | 173 | { |
141 | cpumask_t tmp; | 174 | int sender; |
142 | /* | 175 | union smp_flush_state *f; |
143 | * A couple of (to be removed) sanity checks: | ||
144 | * | ||
145 | * - we do not send IPIs to not-yet booted CPUs. | ||
146 | * - current CPU must not be in mask | ||
147 | * - mask must exist :) | ||
148 | */ | ||
149 | BUG_ON(cpus_empty(cpumask)); | ||
150 | cpus_and(tmp, cpumask, cpu_online_map); | ||
151 | BUG_ON(!cpus_equal(tmp, cpumask)); | ||
152 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | ||
153 | if (!mm) | ||
154 | BUG(); | ||
155 | 176 | ||
156 | /* | 177 | /* Caller has disabled preemption */ |
157 | * I'm not happy about this global shared spinlock in the | 178 | sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; |
158 | * MM hot path, but we'll see how contended it is. | 179 | f = &per_cpu(flush_state, sender); |
159 | * Temporarily this turns IRQs off, so that lockups are | 180 | |
160 | * detected by the NMI watchdog. | 181 | /* Could avoid this lock when |
161 | */ | 182 | num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is |
162 | spin_lock(&tlbstate_lock); | 183 | probably not worth checking this for a cache-hot lock. */ |
163 | 184 | spin_lock(&f->tlbstate_lock); | |
164 | flush_mm = mm; | 185 | |
165 | flush_va = va; | 186 | f->flush_mm = mm; |
166 | cpus_or(flush_cpumask, cpumask, flush_cpumask); | 187 | f->flush_va = va; |
188 | cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); | ||
167 | 189 | ||
168 | /* | 190 | /* |
169 | * We have to send the IPI only to | 191 | * We have to send the IPI only to |
170 | * CPUs affected. | 192 | * CPUs affected. |
171 | */ | 193 | */ |
172 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); | 194 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); |
173 | 195 | ||
174 | while (!cpus_empty(flush_cpumask)) | 196 | while (!cpus_empty(f->flush_cpumask)) |
175 | mb(); /* nothing. lockup detection does not belong here */; | 197 | cpu_relax(); |
176 | 198 | ||
177 | flush_mm = NULL; | 199 | f->flush_mm = NULL; |
178 | flush_va = 0; | 200 | f->flush_va = 0; |
179 | spin_unlock(&tlbstate_lock); | 201 | spin_unlock(&f->tlbstate_lock); |
180 | } | 202 | } |
203 | |||
204 | int __cpuinit init_smp_flush(void) | ||
205 | { | ||
206 | int i; | ||
207 | for_each_cpu_mask(i, cpu_possible_map) { | ||
208 | spin_lock_init(&per_cpu(flush_state.tlbstate_lock, i)); | ||
209 | } | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | core_initcall(init_smp_flush); | ||
181 | 214 | ||
182 | void flush_tlb_current_task(void) | 215 | void flush_tlb_current_task(void) |
183 | { | 216 | { |