aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorGlauber Costa <gcosta@redhat.com>2008-03-03 12:12:54 -0500
committerIngo Molnar <mingo@elte.hu>2008-04-17 11:40:56 -0400
commitc048fdfe6178e082be918d4062c86d9764979112 (patch)
tree51814f6d4c259bd5a6c3eaa02c3fb5ff7f844228 /arch/x86/kernel
parent8202350367ac11d571f6dd4c21c2027a4d235276 (diff)
x86: create tlb files
this patch creates tlb_32.c and tlb_64.c, with tlb-related functions that used to live in smp*.c files. Signed-off-by: Glauber Costa <gcosta@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/smp_32.c235
-rw-r--r--arch/x86/kernel/smp_64.c275
-rw-r--r--arch/x86/kernel/tlb_32.c243
-rw-r--r--arch/x86/kernel/tlb_64.c273
5 files changed, 517 insertions, 511 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e3b01f96c565..362ab6a9d5b2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -47,7 +47,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
47apm-y := apm_32.o 47apm-y := apm_32.o
48obj-$(CONFIG_APM) += apm.o 48obj-$(CONFIG_APM) += apm.o
49obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smp.o 49obj-$(CONFIG_X86_SMP) += smp_$(BITS).o smpboot_$(BITS).o smp.o
50obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o 50obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
51obj-$(CONFIG_X86_32_SMP) += smpcommon.o 51obj-$(CONFIG_X86_32_SMP) += smpcommon.o
52obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o 52obj-$(CONFIG_X86_64_SMP) += smp_64.o smpboot_64.o tsc_sync.o smpcommon.o
53obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 53obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index d80623aba9c5..d8fdec5f19bc 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -104,238 +104,3 @@
104 * or are signal timing bugs worked around in hardware and there's 104 * or are signal timing bugs worked around in hardware and there's
105 * about nothing of note with C stepping upwards. 105 * about nothing of note with C stepping upwards.
106 */ 106 */
107
108DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
109
110#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
111
112/*
113 * Smarter SMP flushing macros.
114 * c/o Linus Torvalds.
115 *
116 * These mean you can really definitely utterly forget about
117 * writing to user space from interrupts. (Its not allowed anyway).
118 *
119 * Optimizations Manfred Spraul <manfred@colorfullife.com>
120 */
121
122static cpumask_t flush_cpumask;
123static struct mm_struct * flush_mm;
124static unsigned long flush_va;
125static DEFINE_SPINLOCK(tlbstate_lock);
126
127/*
128 * We cannot call mmdrop() because we are in interrupt context,
129 * instead update mm->cpu_vm_mask.
130 *
131 * We need to reload %cr3 since the page tables may be going
132 * away from under us..
133 */
134void leave_mm(int cpu)
135{
136 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
137 BUG();
138 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
139 load_cr3(swapper_pg_dir);
140}
141EXPORT_SYMBOL_GPL(leave_mm);
142
143/*
144 *
145 * The flush IPI assumes that a thread switch happens in this order:
146 * [cpu0: the cpu that switches]
147 * 1) switch_mm() either 1a) or 1b)
148 * 1a) thread switch to a different mm
149 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
150 * Stop ipi delivery for the old mm. This is not synchronized with
151 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
152 * for the wrong mm, and in the worst case we perform a superfluous
153 * tlb flush.
154 * 1a2) set cpu_tlbstate to TLBSTATE_OK
155 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
156 * was in lazy tlb mode.
157 * 1a3) update cpu_tlbstate[].active_mm
158 * Now cpu0 accepts tlb flushes for the new mm.
159 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
160 * Now the other cpus will send tlb flush ipis.
161 * 1a4) change cr3.
162 * 1b) thread switch without mm change
163 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
164 * flush ipis.
165 * 1b1) set cpu_tlbstate to TLBSTATE_OK
166 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
167 * Atomically set the bit [other cpus will start sending flush ipis],
168 * and test the bit.
169 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
170 * 2) switch %%esp, ie current
171 *
172 * The interrupt must handle 2 special cases:
173 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
174 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
175 * runs in kernel space, the cpu could load tlb entries for user space
176 * pages.
177 *
178 * The good news is that cpu_tlbstate is local to each cpu, no
179 * write/read ordering problems.
180 */
181
182/*
183 * TLB flush IPI:
184 *
185 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
186 * 2) Leave the mm if we are in the lazy tlb mode.
187 */
188
189void smp_invalidate_interrupt(struct pt_regs *regs)
190{
191 unsigned long cpu;
192
193 cpu = get_cpu();
194
195 if (!cpu_isset(cpu, flush_cpumask))
196 goto out;
197 /*
198 * This was a BUG() but until someone can quote me the
199 * line from the intel manual that guarantees an IPI to
200 * multiple CPUs is retried _only_ on the erroring CPUs
201 * its staying as a return
202 *
203 * BUG();
204 */
205
206 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
207 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
208 if (flush_va == TLB_FLUSH_ALL)
209 local_flush_tlb();
210 else
211 __flush_tlb_one(flush_va);
212 } else
213 leave_mm(cpu);
214 }
215 ack_APIC_irq();
216 smp_mb__before_clear_bit();
217 cpu_clear(cpu, flush_cpumask);
218 smp_mb__after_clear_bit();
219out:
220 put_cpu_no_resched();
221 __get_cpu_var(irq_stat).irq_tlb_count++;
222}
223
224void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
225 unsigned long va)
226{
227 cpumask_t cpumask = *cpumaskp;
228
229 /*
230 * A couple of (to be removed) sanity checks:
231 *
232 * - current CPU must not be in mask
233 * - mask must exist :)
234 */
235 BUG_ON(cpus_empty(cpumask));
236 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
237 BUG_ON(!mm);
238
239#ifdef CONFIG_HOTPLUG_CPU
240 /* If a CPU which we ran on has gone down, OK. */
241 cpus_and(cpumask, cpumask, cpu_online_map);
242 if (unlikely(cpus_empty(cpumask)))
243 return;
244#endif
245
246 /*
247 * i'm not happy about this global shared spinlock in the
248 * MM hot path, but we'll see how contended it is.
249 * AK: x86-64 has a faster method that could be ported.
250 */
251 spin_lock(&tlbstate_lock);
252
253 flush_mm = mm;
254 flush_va = va;
255 cpus_or(flush_cpumask, cpumask, flush_cpumask);
256 /*
257 * We have to send the IPI only to
258 * CPUs affected.
259 */
260 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
261
262 while (!cpus_empty(flush_cpumask))
263 /* nothing. lockup detection does not belong here */
264 cpu_relax();
265
266 flush_mm = NULL;
267 flush_va = 0;
268 spin_unlock(&tlbstate_lock);
269}
270
271void flush_tlb_current_task(void)
272{
273 struct mm_struct *mm = current->mm;
274 cpumask_t cpu_mask;
275
276 preempt_disable();
277 cpu_mask = mm->cpu_vm_mask;
278 cpu_clear(smp_processor_id(), cpu_mask);
279
280 local_flush_tlb();
281 if (!cpus_empty(cpu_mask))
282 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
283 preempt_enable();
284}
285
286void flush_tlb_mm (struct mm_struct * mm)
287{
288 cpumask_t cpu_mask;
289
290 preempt_disable();
291 cpu_mask = mm->cpu_vm_mask;
292 cpu_clear(smp_processor_id(), cpu_mask);
293
294 if (current->active_mm == mm) {
295 if (current->mm)
296 local_flush_tlb();
297 else
298 leave_mm(smp_processor_id());
299 }
300 if (!cpus_empty(cpu_mask))
301 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
302
303 preempt_enable();
304}
305
306void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
307{
308 struct mm_struct *mm = vma->vm_mm;
309 cpumask_t cpu_mask;
310
311 preempt_disable();
312 cpu_mask = mm->cpu_vm_mask;
313 cpu_clear(smp_processor_id(), cpu_mask);
314
315 if (current->active_mm == mm) {
316 if(current->mm)
317 __flush_tlb_one(va);
318 else
319 leave_mm(smp_processor_id());
320 }
321
322 if (!cpus_empty(cpu_mask))
323 flush_tlb_others(cpu_mask, mm, va);
324
325 preempt_enable();
326}
327EXPORT_SYMBOL(flush_tlb_page);
328
329static void do_flush_tlb_all(void* info)
330{
331 unsigned long cpu = smp_processor_id();
332
333 __flush_tlb_all();
334 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
335 leave_mm(cpu);
336}
337
338void flush_tlb_all(void)
339{
340 on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
341}
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index d28e8685709d..26448fff0abd 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -8,278 +8,3 @@
8 * This code is released under the GNU General Public License version 2 or 8 * This code is released under the GNU General Public License version 2 or
9 * later. 9 * later.
10 */ 10 */
11
12#include <linux/init.h>
13
14#include <linux/mm.h>
15#include <linux/delay.h>
16#include <linux/spinlock.h>
17#include <linux/smp.h>
18#include <linux/kernel_stat.h>
19#include <linux/mc146818rtc.h>
20#include <linux/interrupt.h>
21
22#include <asm/mtrr.h>
23#include <asm/pgalloc.h>
24#include <asm/tlbflush.h>
25#include <asm/mach_apic.h>
26#include <asm/mmu_context.h>
27#include <asm/proto.h>
28#include <asm/apicdef.h>
29#include <asm/idle.h>
30
31/*
32 * Smarter SMP flushing macros.
33 * c/o Linus Torvalds.
34 *
35 * These mean you can really definitely utterly forget about
36 * writing to user space from interrupts. (Its not allowed anyway).
37 *
38 * Optimizations Manfred Spraul <manfred@colorfullife.com>
39 *
40 * More scalable flush, from Andi Kleen
41 *
42 * To avoid global state use 8 different call vectors.
43 * Each CPU uses a specific vector to trigger flushes on other
44 * CPUs. Depending on the received vector the target CPUs look into
45 * the right per cpu variable for the flush data.
46 *
47 * With more than 8 CPUs they are hashed to the 8 available
48 * vectors. The limited global vector space forces us to this right now.
49 * In future when interrupts are split into per CPU domains this could be
50 * fixed, at the cost of triggering multiple IPIs in some cases.
51 */
52
53union smp_flush_state {
54 struct {
55 cpumask_t flush_cpumask;
56 struct mm_struct *flush_mm;
57 unsigned long flush_va;
58 spinlock_t tlbstate_lock;
59 };
60 char pad[SMP_CACHE_BYTES];
61} ____cacheline_aligned;
62
63/* State is put into the per CPU data section, but padded
64 to a full cache line because other CPUs can access it and we don't
65 want false sharing in the per cpu data segment. */
66static DEFINE_PER_CPU(union smp_flush_state, flush_state);
67
68/*
69 * We cannot call mmdrop() because we are in interrupt context,
70 * instead update mm->cpu_vm_mask.
71 */
72void leave_mm(int cpu)
73{
74 if (read_pda(mmu_state) == TLBSTATE_OK)
75 BUG();
76 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
77 load_cr3(swapper_pg_dir);
78}
79EXPORT_SYMBOL_GPL(leave_mm);
80
81/*
82 *
83 * The flush IPI assumes that a thread switch happens in this order:
84 * [cpu0: the cpu that switches]
85 * 1) switch_mm() either 1a) or 1b)
86 * 1a) thread switch to a different mm
87 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
88 * Stop ipi delivery for the old mm. This is not synchronized with
89 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
90 * for the wrong mm, and in the worst case we perform a superfluous
91 * tlb flush.
92 * 1a2) set cpu mmu_state to TLBSTATE_OK
93 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
94 * was in lazy tlb mode.
95 * 1a3) update cpu active_mm
96 * Now cpu0 accepts tlb flushes for the new mm.
97 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
98 * Now the other cpus will send tlb flush ipis.
99 * 1a4) change cr3.
100 * 1b) thread switch without mm change
101 * cpu active_mm is correct, cpu0 already handles
102 * flush ipis.
103 * 1b1) set cpu mmu_state to TLBSTATE_OK
104 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
105 * Atomically set the bit [other cpus will start sending flush ipis],
106 * and test the bit.
107 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
108 * 2) switch %%esp, ie current
109 *
110 * The interrupt must handle 2 special cases:
111 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
112 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
113 * runs in kernel space, the cpu could load tlb entries for user space
114 * pages.
115 *
116 * The good news is that cpu mmu_state is local to each cpu, no
117 * write/read ordering problems.
118 */
119
120/*
121 * TLB flush IPI:
122 *
123 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
124 * 2) Leave the mm if we are in the lazy tlb mode.
125 *
126 * Interrupts are disabled.
127 */
128
129asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
130{
131 int cpu;
132 int sender;
133 union smp_flush_state *f;
134
135 cpu = smp_processor_id();
136 /*
137 * orig_rax contains the negated interrupt vector.
138 * Use that to determine where the sender put the data.
139 */
140 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
141 f = &per_cpu(flush_state, sender);
142
143 if (!cpu_isset(cpu, f->flush_cpumask))
144 goto out;
145 /*
146 * This was a BUG() but until someone can quote me the
147 * line from the intel manual that guarantees an IPI to
148 * multiple CPUs is retried _only_ on the erroring CPUs
149 * its staying as a return
150 *
151 * BUG();
152 */
153
154 if (f->flush_mm == read_pda(active_mm)) {
155 if (read_pda(mmu_state) == TLBSTATE_OK) {
156 if (f->flush_va == TLB_FLUSH_ALL)
157 local_flush_tlb();
158 else
159 __flush_tlb_one(f->flush_va);
160 } else
161 leave_mm(cpu);
162 }
163out:
164 ack_APIC_irq();
165 cpu_clear(cpu, f->flush_cpumask);
166 add_pda(irq_tlb_count, 1);
167}
168
169void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
170 unsigned long va)
171{
172 int sender;
173 union smp_flush_state *f;
174 cpumask_t cpumask = *cpumaskp;
175
176 /* Caller has disabled preemption */
177 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
178 f = &per_cpu(flush_state, sender);
179
180 /*
181 * Could avoid this lock when
182 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
183 * probably not worth checking this for a cache-hot lock.
184 */
185 spin_lock(&f->tlbstate_lock);
186
187 f->flush_mm = mm;
188 f->flush_va = va;
189 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
190
191 /*
192 * We have to send the IPI only to
193 * CPUs affected.
194 */
195 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
196
197 while (!cpus_empty(f->flush_cpumask))
198 cpu_relax();
199
200 f->flush_mm = NULL;
201 f->flush_va = 0;
202 spin_unlock(&f->tlbstate_lock);
203}
204
205int __cpuinit init_smp_flush(void)
206{
207 int i;
208
209 for_each_cpu_mask(i, cpu_possible_map) {
210 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
211 }
212 return 0;
213}
214core_initcall(init_smp_flush);
215
216void flush_tlb_current_task(void)
217{
218 struct mm_struct *mm = current->mm;
219 cpumask_t cpu_mask;
220
221 preempt_disable();
222 cpu_mask = mm->cpu_vm_mask;
223 cpu_clear(smp_processor_id(), cpu_mask);
224
225 local_flush_tlb();
226 if (!cpus_empty(cpu_mask))
227 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
228 preempt_enable();
229}
230
231void flush_tlb_mm (struct mm_struct * mm)
232{
233 cpumask_t cpu_mask;
234
235 preempt_disable();
236 cpu_mask = mm->cpu_vm_mask;
237 cpu_clear(smp_processor_id(), cpu_mask);
238
239 if (current->active_mm == mm) {
240 if (current->mm)
241 local_flush_tlb();
242 else
243 leave_mm(smp_processor_id());
244 }
245 if (!cpus_empty(cpu_mask))
246 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
247
248 preempt_enable();
249}
250
251void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
252{
253 struct mm_struct *mm = vma->vm_mm;
254 cpumask_t cpu_mask;
255
256 preempt_disable();
257 cpu_mask = mm->cpu_vm_mask;
258 cpu_clear(smp_processor_id(), cpu_mask);
259
260 if (current->active_mm == mm) {
261 if(current->mm)
262 __flush_tlb_one(va);
263 else
264 leave_mm(smp_processor_id());
265 }
266
267 if (!cpus_empty(cpu_mask))
268 flush_tlb_others(cpu_mask, mm, va);
269
270 preempt_enable();
271}
272
273static void do_flush_tlb_all(void* info)
274{
275 unsigned long cpu = smp_processor_id();
276
277 __flush_tlb_all();
278 if (read_pda(mmu_state) == TLBSTATE_LAZY)
279 leave_mm(cpu);
280}
281
282void flush_tlb_all(void)
283{
284 on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
285}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
new file mode 100644
index 000000000000..9bb2363851af
--- /dev/null
+++ b/arch/x86/kernel/tlb_32.c
@@ -0,0 +1,243 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
38 BUG();
39 cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
40 load_cr3(swapper_pg_dir);
41}
42EXPORT_SYMBOL_GPL(leave_mm);
43
44/*
45 *
46 * The flush IPI assumes that a thread switch happens in this order:
47 * [cpu0: the cpu that switches]
48 * 1) switch_mm() either 1a) or 1b)
49 * 1a) thread switch to a different mm
50 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
51 * Stop ipi delivery for the old mm. This is not synchronized with
52 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
53 * for the wrong mm, and in the worst case we perform a superfluous
54 * tlb flush.
55 * 1a2) set cpu_tlbstate to TLBSTATE_OK
56 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
57 * was in lazy tlb mode.
58 * 1a3) update cpu_tlbstate[].active_mm
59 * Now cpu0 accepts tlb flushes for the new mm.
60 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
61 * Now the other cpus will send tlb flush ipis.
62 * 1a4) change cr3.
63 * 1b) thread switch without mm change
64 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
65 * flush ipis.
66 * 1b1) set cpu_tlbstate to TLBSTATE_OK
67 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
68 * Atomically set the bit [other cpus will start sending flush ipis],
69 * and test the bit.
70 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
71 * 2) switch %%esp, ie current
72 *
73 * The interrupt must handle 2 special cases:
74 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
75 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
76 * runs in kernel space, the cpu could load tlb entries for user space
77 * pages.
78 *
79 * The good news is that cpu_tlbstate is local to each cpu, no
80 * write/read ordering problems.
81 */
82
83/*
84 * TLB flush IPI:
85 *
86 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
87 * 2) Leave the mm if we are in the lazy tlb mode.
88 */
89
90void smp_invalidate_interrupt(struct pt_regs *regs)
91{
92 unsigned long cpu;
93
94 cpu = get_cpu();
95
96 if (!cpu_isset(cpu, flush_cpumask))
97 goto out;
98 /*
99 * This was a BUG() but until someone can quote me the
100 * line from the intel manual that guarantees an IPI to
101 * multiple CPUs is retried _only_ on the erroring CPUs
102 * its staying as a return
103 *
104 * BUG();
105 */
106
107 if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
108 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
109 if (flush_va == TLB_FLUSH_ALL)
110 local_flush_tlb();
111 else
112 __flush_tlb_one(flush_va);
113 } else
114 leave_mm(cpu);
115 }
116 ack_APIC_irq();
117 smp_mb__before_clear_bit();
118 cpu_clear(cpu, flush_cpumask);
119 smp_mb__after_clear_bit();
120out:
121 put_cpu_no_resched();
122 __get_cpu_var(irq_stat).irq_tlb_count++;
123}
124
125void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
126 unsigned long va)
127{
128 cpumask_t cpumask = *cpumaskp;
129
130 /*
131 * A couple of (to be removed) sanity checks:
132 *
133 * - current CPU must not be in mask
134 * - mask must exist :)
135 */
136 BUG_ON(cpus_empty(cpumask));
137 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
138 BUG_ON(!mm);
139
140#ifdef CONFIG_HOTPLUG_CPU
141 /* If a CPU which we ran on has gone down, OK. */
142 cpus_and(cpumask, cpumask, cpu_online_map);
143 if (unlikely(cpus_empty(cpumask)))
144 return;
145#endif
146
147 /*
148 * i'm not happy about this global shared spinlock in the
149 * MM hot path, but we'll see how contended it is.
150 * AK: x86-64 has a faster method that could be ported.
151 */
152 spin_lock(&tlbstate_lock);
153
154 flush_mm = mm;
155 flush_va = va;
156 cpus_or(flush_cpumask, cpumask, flush_cpumask);
157 /*
158 * We have to send the IPI only to
159 * CPUs affected.
160 */
161 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
162
163 while (!cpus_empty(flush_cpumask))
164 /* nothing. lockup detection does not belong here */
165 cpu_relax();
166
167 flush_mm = NULL;
168 flush_va = 0;
169 spin_unlock(&tlbstate_lock);
170}
171
172void flush_tlb_current_task(void)
173{
174 struct mm_struct *mm = current->mm;
175 cpumask_t cpu_mask;
176
177 preempt_disable();
178 cpu_mask = mm->cpu_vm_mask;
179 cpu_clear(smp_processor_id(), cpu_mask);
180
181 local_flush_tlb();
182 if (!cpus_empty(cpu_mask))
183 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
184 preempt_enable();
185}
186
187void flush_tlb_mm(struct mm_struct *mm)
188{
189 cpumask_t cpu_mask;
190
191 preempt_disable();
192 cpu_mask = mm->cpu_vm_mask;
193 cpu_clear(smp_processor_id(), cpu_mask);
194
195 if (current->active_mm == mm) {
196 if (current->mm)
197 local_flush_tlb();
198 else
199 leave_mm(smp_processor_id());
200 }
201 if (!cpus_empty(cpu_mask))
202 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
203
204 preempt_enable();
205}
206
207void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
208{
209 struct mm_struct *mm = vma->vm_mm;
210 cpumask_t cpu_mask;
211
212 preempt_disable();
213 cpu_mask = mm->cpu_vm_mask;
214 cpu_clear(smp_processor_id(), cpu_mask);
215
216 if (current->active_mm == mm) {
217 if (current->mm)
218 __flush_tlb_one(va);
219 else
220 leave_mm(smp_processor_id());
221 }
222
223 if (!cpus_empty(cpu_mask))
224 flush_tlb_others(cpu_mask, mm, va);
225
226 preempt_enable();
227}
228EXPORT_SYMBOL(flush_tlb_page);
229
230static void do_flush_tlb_all(void *info)
231{
232 unsigned long cpu = smp_processor_id();
233
234 __flush_tlb_all();
235 if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
236 leave_mm(cpu);
237}
238
239void flush_tlb_all(void)
240{
241 on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
242}
243
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
new file mode 100644
index 000000000000..615d84817758
--- /dev/null
+++ b/arch/x86/kernel/tlb_64.c
@@ -0,0 +1,273 @@
1#include <linux/init.h>
2
3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h>
10
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h>
14#include <asm/mach_apic.h>
15#include <asm/mmu_context.h>
16#include <asm/proto.h>
17#include <asm/apicdef.h>
18#include <asm/idle.h>
19/*
20 * Smarter SMP flushing macros.
21 * c/o Linus Torvalds.
22 *
23 * These mean you can really definitely utterly forget about
24 * writing to user space from interrupts. (Its not allowed anyway).
25 *
26 * Optimizations Manfred Spraul <manfred@colorfullife.com>
27 *
28 * More scalable flush, from Andi Kleen
29 *
30 * To avoid global state use 8 different call vectors.
31 * Each CPU uses a specific vector to trigger flushes on other
32 * CPUs. Depending on the received vector the target CPUs look into
33 * the right per cpu variable for the flush data.
34 *
35 * With more than 8 CPUs they are hashed to the 8 available
36 * vectors. The limited global vector space forces us to this right now.
37 * In future when interrupts are split into per CPU domains this could be
38 * fixed, at the cost of triggering multiple IPIs in some cases.
39 */
40
41union smp_flush_state {
42 struct {
43 cpumask_t flush_cpumask;
44 struct mm_struct *flush_mm;
45 unsigned long flush_va;
46 spinlock_t tlbstate_lock;
47 };
48 char pad[SMP_CACHE_BYTES];
49} ____cacheline_aligned;
50
51/* State is put into the per CPU data section, but padded
52 to a full cache line because other CPUs can access it and we don't
53 want false sharing in the per cpu data segment. */
54static DEFINE_PER_CPU(union smp_flush_state, flush_state);
55
56/*
57 * We cannot call mmdrop() because we are in interrupt context,
58 * instead update mm->cpu_vm_mask.
59 */
60void leave_mm(int cpu)
61{
62 if (read_pda(mmu_state) == TLBSTATE_OK)
63 BUG();
64 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
65 load_cr3(swapper_pg_dir);
66}
67EXPORT_SYMBOL_GPL(leave_mm);
68
69/*
70 *
71 * The flush IPI assumes that a thread switch happens in this order:
72 * [cpu0: the cpu that switches]
73 * 1) switch_mm() either 1a) or 1b)
74 * 1a) thread switch to a different mm
75 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
76 * Stop ipi delivery for the old mm. This is not synchronized with
77 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
78 * for the wrong mm, and in the worst case we perform a superfluous
79 * tlb flush.
80 * 1a2) set cpu mmu_state to TLBSTATE_OK
81 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
82 * was in lazy tlb mode.
83 * 1a3) update cpu active_mm
84 * Now cpu0 accepts tlb flushes for the new mm.
85 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
86 * Now the other cpus will send tlb flush ipis.
87 * 1a4) change cr3.
88 * 1b) thread switch without mm change
89 * cpu active_mm is correct, cpu0 already handles
90 * flush ipis.
91 * 1b1) set cpu mmu_state to TLBSTATE_OK
92 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
93 * Atomically set the bit [other cpus will start sending flush ipis],
94 * and test the bit.
95 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
96 * 2) switch %%esp, ie current
97 *
98 * The interrupt must handle 2 special cases:
99 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
100 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
101 * runs in kernel space, the cpu could load tlb entries for user space
102 * pages.
103 *
104 * The good news is that cpu mmu_state is local to each cpu, no
105 * write/read ordering problems.
106 */
107
108/*
109 * TLB flush IPI:
110 *
111 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
112 * 2) Leave the mm if we are in the lazy tlb mode.
113 *
114 * Interrupts are disabled.
115 */
116
117asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
118{
119 int cpu;
120 int sender;
121 union smp_flush_state *f;
122
123 cpu = smp_processor_id();
124 /*
125 * orig_rax contains the negated interrupt vector.
126 * Use that to determine where the sender put the data.
127 */
128 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
129 f = &per_cpu(flush_state, sender);
130
131 if (!cpu_isset(cpu, f->flush_cpumask))
132 goto out;
133 /*
134 * This was a BUG() but until someone can quote me the
135 * line from the intel manual that guarantees an IPI to
136 * multiple CPUs is retried _only_ on the erroring CPUs
137 * its staying as a return
138 *
139 * BUG();
140 */
141
142 if (f->flush_mm == read_pda(active_mm)) {
143 if (read_pda(mmu_state) == TLBSTATE_OK) {
144 if (f->flush_va == TLB_FLUSH_ALL)
145 local_flush_tlb();
146 else
147 __flush_tlb_one(f->flush_va);
148 } else
149 leave_mm(cpu);
150 }
151out:
152 ack_APIC_irq();
153 cpu_clear(cpu, f->flush_cpumask);
154 add_pda(irq_tlb_count, 1);
155}
156
157void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
158 unsigned long va)
159{
160 int sender;
161 union smp_flush_state *f;
162 cpumask_t cpumask = *cpumaskp;
163
164 /* Caller has disabled preemption */
165 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
166 f = &per_cpu(flush_state, sender);
167
168 /*
169 * Could avoid this lock when
170 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
171 * probably not worth checking this for a cache-hot lock.
172 */
173 spin_lock(&f->tlbstate_lock);
174
175 f->flush_mm = mm;
176 f->flush_va = va;
177 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
178
179 /*
180 * We have to send the IPI only to
181 * CPUs affected.
182 */
183 send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
184
185 while (!cpus_empty(f->flush_cpumask))
186 cpu_relax();
187
188 f->flush_mm = NULL;
189 f->flush_va = 0;
190 spin_unlock(&f->tlbstate_lock);
191}
192
193int __cpuinit init_smp_flush(void)
194{
195 int i;
196
197 for_each_cpu_mask(i, cpu_possible_map) {
198 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
199 }
200 return 0;
201}
202core_initcall(init_smp_flush);
203
204void flush_tlb_current_task(void)
205{
206 struct mm_struct *mm = current->mm;
207 cpumask_t cpu_mask;
208
209 preempt_disable();
210 cpu_mask = mm->cpu_vm_mask;
211 cpu_clear(smp_processor_id(), cpu_mask);
212
213 local_flush_tlb();
214 if (!cpus_empty(cpu_mask))
215 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
216 preempt_enable();
217}
218
219void flush_tlb_mm(struct mm_struct *mm)
220{
221 cpumask_t cpu_mask;
222
223 preempt_disable();
224 cpu_mask = mm->cpu_vm_mask;
225 cpu_clear(smp_processor_id(), cpu_mask);
226
227 if (current->active_mm == mm) {
228 if (current->mm)
229 local_flush_tlb();
230 else
231 leave_mm(smp_processor_id());
232 }
233 if (!cpus_empty(cpu_mask))
234 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
235
236 preempt_enable();
237}
238
239void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
240{
241 struct mm_struct *mm = vma->vm_mm;
242 cpumask_t cpu_mask;
243
244 preempt_disable();
245 cpu_mask = mm->cpu_vm_mask;
246 cpu_clear(smp_processor_id(), cpu_mask);
247
248 if (current->active_mm == mm) {
249 if (current->mm)
250 __flush_tlb_one(va);
251 else
252 leave_mm(smp_processor_id());
253 }
254
255 if (!cpus_empty(cpu_mask))
256 flush_tlb_others(cpu_mask, mm, va);
257
258 preempt_enable();
259}
260
261static void do_flush_tlb_all(void *info)
262{
263 unsigned long cpu = smp_processor_id();
264
265 __flush_tlb_all();
266 if (read_pda(mmu_state) == TLBSTATE_LAZY)
267 leave_mm(cpu);
268}
269
270void flush_tlb_all(void)
271{
272 on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
273}