x86: make x86_32 use tlb_64.c

Impact: less contention when issuing invalidate IPI, cleanup Make x86_32 use the same tlb code as 64bit. The 64bit code uses multiple IPI vectors for tlb shootdown to reduce contention. This patch makes x86_32 allocate the same 8 IPIs as x86_64 and share the code paths. Note that the usage of asmlinkage is inconsistent for x86_32 and 64 and calls for further cleanup. This has been noted with a FIXME comment in tlb_64.c. Signed-off-by: Tejun Heo <tj@kernel.org>
author: Tejun Heo <tj@kernel.org> 2009-01-21 03:26:06 -0500
committer: Tejun Heo <tj@kernel.org> 2009-01-21 03:26:06 -0500
commit: 02cf94c370e0dc9bf408fe45eb86fe9ad58eaf7f (patch)
tree: e12620b5fead5ec6d90b54046a5025c2b28234a0 /arch
parent: 6dd01bedee6c3191643db303a1dc530bad56ec55 (diff)
7 files changed, 47 insertions, 248 deletions
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4ee8f800504b..9a83a10a5d51 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -58,8 +58,11 @@
 # define CALL_FUNCTION_VECTOR           0xfc
 # define CALL_FUNCTION_SINGLE_VECTOR    0xfb
 # define THERMAL_APIC_VECTOR            0xfa
-/* 0xf1 - 0xf9 : free */
+/* 0xf8 - 0xf9 : free */
-# define INVALIDATE_TLB_VECTOR          0xf0
+# define INVALIDATE_TLB_VECTOR_END      0xf7
+# define INVALIDATE_TLB_VECTOR_START    0xf0    /* f0-f7 used for TLB flush */
+# define NUM_INVALIDATE_TLB_VECTORS     8
 #else
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8e31dd..6fa399ad1de2 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
 */
 #ifdef CONFIG_X86_SMP
 BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
-BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
 BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
 BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
 BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
+                 smp_invalidate_interrupt)
+BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
+                 smp_invalidate_interrupt)
 #endif
 /*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index eb074530c7d3..a62a15c22227 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -58,7 +58,7 @@ obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y                           := apm_32.o
 obj-$(CONFIG_APM)               += apm.o
 obj-$(CONFIG_X86_SMP)           += smp.o
-obj-$(CONFIG_X86_SMP)           += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
+obj-$(CONFIG_X86_SMP)           += smpboot.o tsc_sync.o ipi.o tlb_64.o
 obj-$(CONFIG_X86_32_SMP)        += smpcommon.o
 obj-$(CONFIG_X86_64_SMP)        += tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)    += trampoline_$(BITS).o
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
 ENDPROC(common_interrupt)
        CFI_ENDPROC
-#define BUILD_INTERRUPT(name, nr)       \
+#define BUILD_INTERRUPT3(name, nr, fn)  \
 ENTRY(name)                             \
        RING0_INT_FRAME;                \
        pushl $~(nr);                   \
@@ -680,11 +680,13 @@ ENTRY(name)				\
        SAVE_ALL;                       \
        TRACE_IRQS_OFF                  \
        movl %esp,%eax;                 \
-        call smp_##name;                \
+        call fn;                        \
        jmp ret_from_intr;              \
        CFI_ENDPROC;                    \
 ENDPROC(name)
+#define BUILD_INTERRUPT(name, nr)       BUILD_INTERRUPT3(name, nr, smp_##name)
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674d..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
         */
        alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-        /* IPI for invalidation */
+        /* IPIs for invalidation */
-        alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+        alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
        /* IPI for generic function call */
        alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index 93fcb05c7d43..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,239 +0,0 @@
-#include <linux/spinlock.h>
-#include <linux/cpu.h>
-#include <linux/interrupt.h>
-#include <asm/tlbflush.h>
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
-                        = { &init_mm, 0, };
-/* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
-/*
- *      Smarter SMP flushing macros.
- *              c/o Linus Torvalds.
- *
- *      These mean you can really definitely utterly forget about
- *      writing to user space from interrupts. (Its not allowed anyway).
- *
- *      Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-static cpumask_var_t flush_cpumask;
-static struct mm_struct *flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-void leave_mm(int cpu)
-{
-        BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
-        cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
-        load_cr3(swapper_pg_dir);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- *      Stop ipi delivery for the old mm. This is not synchronized with
- *      the other cpus, but smp_invalidate_interrupt ignore flush ipis
- *      for the wrong mm, and in the worst case we perform a superfluous
- *      tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- *      Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *      was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- *      Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- *      Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *      cpu_tlbstate[].active_mm is correct, cpu0 already handles
- *      flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- *      Atomically set the bit [other cpus will start sending flush ipis],
- *      and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- *
- * Interrupts are disabled.
- */
-void smp_invalidate_interrupt(struct pt_regs *regs)
-{
-        unsigned int cpu;
-        cpu = smp_processor_id();
-        if (!cpumask_test_cpu(cpu, flush_cpumask))
-                goto out;
-                /*
-                 * This was a BUG() but until someone can quote me the
-                 * line from the intel manual that guarantees an IPI to
-                 * multiple CPUs is retried _only_ on the erroring CPUs
-                 * its staying as a return
-                 *
-                 * BUG();
-                 */
-        if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
-                if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
-                        if (flush_va == TLB_FLUSH_ALL)
-                                local_flush_tlb();
-                        else
-                                __flush_tlb_one(flush_va);
-                } else
-                        leave_mm(cpu);
-        }
-out:
-        ack_APIC_irq();
-        smp_mb__before_clear_bit();
-        cpumask_clear_cpu(cpu, flush_cpumask);
-        smp_mb__after_clear_bit();
-        inc_irq_stat(irq_tlb_count);
-}
-void native_flush_tlb_others(const struct cpumask *cpumask,
-                             struct mm_struct *mm, unsigned long va)
-{
-        /*
-         * - mask must exist :)
-         */
-        BUG_ON(cpumask_empty(cpumask));
-        BUG_ON(!mm);
-        /*
-         * i'm not happy about this global shared spinlock in the
-         * MM hot path, but we'll see how contended it is.
-         * AK: x86-64 has a faster method that could be ported.
-         */
-        spin_lock(&tlbstate_lock);
-        cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
-#ifdef CONFIG_HOTPLUG_CPU
-        /* If a CPU which we ran on has gone down, OK. */
-        cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
-        if (unlikely(cpumask_empty(flush_cpumask))) {
-                spin_unlock(&tlbstate_lock);
-                return;
-        }
-#endif
-        flush_mm = mm;
-        flush_va = va;
-        /*
-         * Make the above memory operations globally visible before
-         * sending the IPI.
-         */
-        smp_mb();
-        /*
-         * We have to send the IPI only to
-         * CPUs affected.
-         */
-        send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
-        while (!cpumask_empty(flush_cpumask))
-                /* nothing. lockup detection does not belong here */
-                cpu_relax();
-        flush_mm = NULL;
-        flush_va = 0;
-        spin_unlock(&tlbstate_lock);
-}
-void flush_tlb_current_task(void)
-{
-        struct mm_struct *mm = current->mm;
-        preempt_disable();
-        local_flush_tlb();
-        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
-        preempt_enable();
-}
-void flush_tlb_mm(struct mm_struct *mm)
-{
-        preempt_disable();
-        if (current->active_mm == mm) {
-                if (current->mm)
-                        local_flush_tlb();
-                else
-                        leave_mm(smp_processor_id());
-        }
-        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
-        preempt_enable();
-}
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
-{
-        struct mm_struct *mm = vma->vm_mm;
-        preempt_disable();
-        if (current->active_mm == mm) {
-                if (current->mm)
-                        __flush_tlb_one(va);
-                 else
-                        leave_mm(smp_processor_id());
-        }
-        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(&mm->cpu_vm_mask, mm, va);
-        preempt_enable();
-}
-static void do_flush_tlb_all(void *info)
-{
-        unsigned long cpu = smp_processor_id();
-        __flush_tlb_all();
-        if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
-                leave_mm(cpu);
-}
-void flush_tlb_all(void)
-{
-        on_each_cpu(do_flush_tlb_all, NULL, 1);
-}
-static int init_flush_cpumask(void)
-{
-        alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
-        return 0;
-}
-early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 19ac661422f7..b3ca1b940654 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -113,7 +113,17 @@ EXPORT_SYMBOL_GPL(leave_mm);
 * Interrupts are disabled.
 */
-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+/*
+ * FIXME: use of asmlinkage is not consistent.  On x86_64 it's noop
+ * but still used for documentation purpose but the usage is slightly
+ * inconsistent.  On x86_32, asmlinkage is regparm(0) but interrupt
+ * entry calls in with the first parameter in %eax.  Maybe define
+ * intrlinkage?
+ */
+#ifdef CONFIG_X86_64
+asmlinkage
+#endif
+void smp_invalidate_interrupt(struct pt_regs *regs)
 {
        unsigned int cpu;
        unsigned int sender;
author	Tejun Heo <tj@kernel.org>	2009-01-21 03:26:06 -0500
committer	Tejun Heo <tj@kernel.org>	2009-01-21 03:26:06 -0500
commit	02cf94c370e0dc9bf408fe45eb86fe9ad58eaf7f (patch)
tree	e12620b5fead5ec6d90b54046a5025c2b28234a0 /arch
parent	6dd01bedee6c3191643db303a1dc530bad56ec55 (diff)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4ee8f800504b..9a83a10a5d51 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h
@@ -58,8 +58,11 @@
58	# define CALL_FUNCTION_VECTOR 0xfc	58	# define CALL_FUNCTION_VECTOR 0xfc
59	# define CALL_FUNCTION_SINGLE_VECTOR 0xfb	59	# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
60	# define THERMAL_APIC_VECTOR 0xfa	60	# define THERMAL_APIC_VECTOR 0xfa
61	/* 0xf1 - 0xf9 : free */	61	/* 0xf8 - 0xf9 : free */
62	# define INVALIDATE_TLB_VECTOR 0xf0	62	# define INVALIDATE_TLB_VECTOR_END 0xf7
		63	# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
		64
		65	# define NUM_INVALIDATE_TLB_VECTORS 8
63		66
64	#else	67	#else
65		68


diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8e31dd..6fa399ad1de2 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
11	*/	11	*/
12	#ifdef CONFIG_X86_SMP	12	#ifdef CONFIG_X86_SMP
13	BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)	13	BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14	BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15	BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)	14	BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16	BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)	15	BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17	BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)	16	BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
		17
		18	BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
		19	smp_invalidate_interrupt)
		20	BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
		21	smp_invalidate_interrupt)
		22	BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
		23	smp_invalidate_interrupt)
		24	BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
		25	smp_invalidate_interrupt)
		26	BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
		27	smp_invalidate_interrupt)
		28	BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
		29	smp_invalidate_interrupt)
		30	BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
		31	smp_invalidate_interrupt)
		32	BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
		33	smp_invalidate_interrupt)
18	#endif	34	#endif
19		35
20	/*	36	/*


diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index eb074530c7d3..a62a15c22227 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile
@@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
58	apm-y := apm_32.o	58	apm-y := apm_32.o
59	obj-$(CONFIG_APM) += apm.o	59	obj-$(CONFIG_APM) += apm.o
60	obj-$(CONFIG_X86_SMP) += smp.o	60	obj-$(CONFIG_X86_SMP) += smp.o
61	obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o	61	obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_64.o
62	obj-$(CONFIG_X86_32_SMP) += smpcommon.o	62	obj-$(CONFIG_X86_32_SMP) += smpcommon.o
63	obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o	63	obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
64	obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o	64	obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o


diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 46469029e9d3..a0b91aac72a1 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672	ENDPROC(common_interrupt)	672	ENDPROC(common_interrupt)
673	CFI_ENDPROC	673	CFI_ENDPROC
674		674
675	#define BUILD_INTERRUPT(name, nr) \	675	#define BUILD_INTERRUPT3(name, nr, fn) \
676	ENTRY(name) \	676	ENTRY(name) \
677	RING0_INT_FRAME; \	677	RING0_INT_FRAME; \
678	pushl $~(nr); \	678	pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680	SAVE_ALL; \	680	SAVE_ALL; \
681	TRACE_IRQS_OFF \	681	TRACE_IRQS_OFF \
682	movl %esp,%eax; \	682	movl %esp,%eax; \
683	call smp_##name; \	683	call fn; \
684	jmp ret_from_intr; \	684	jmp ret_from_intr; \
685	CFI_ENDPROC; \	685	CFI_ENDPROC; \
686	ENDPROC(name)	686	ENDPROC(name)
687		687
		688	#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
		689
688	/* The include is where all of the SMP etc. interrupts come from */	690	/* The include is where all of the SMP etc. interrupts come from */
689	#include "entry_arch.h"	691	#include "entry_arch.h"
690		692


diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e674d..bf629cadec1a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149	*/	149	*/
150	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);	150	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151		151
152	/* IPI for invalidation */	152	/* IPIs for invalidation */
153	alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);	153	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
		154	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
		155	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
		156	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
		157	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
		158	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
		159	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
		160	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154		161
155	/* IPI for generic function call */	162	/* IPI for generic function call */
156	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);	163	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);


diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c deleted file mode 100644 index 93fcb05c7d43..000000000000 --- a/arch/x86/kernel/tlb_32.c +++ /dev/null
@@ -1,239 +0,0 @@
1	#include <linux/spinlock.h>
2	#include <linux/cpu.h>
3	#include <linux/interrupt.h>
4
5	#include <asm/tlbflush.h>
6
7	DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
8	= { &init_mm, 0, };
9
10	/* must come after the send_IPI functions above for inlining */
11	#include <mach_ipi.h>
12
13	/*
14	* Smarter SMP flushing macros.
15	* c/o Linus Torvalds.
16	*
17	* These mean you can really definitely utterly forget about
18	* writing to user space from interrupts. (Its not allowed anyway).
19	*
20	* Optimizations Manfred Spraul <manfred@colorfullife.com>
21	*/
22
23	static cpumask_var_t flush_cpumask;
24	static struct mm_struct *flush_mm;
25	static unsigned long flush_va;
26	static DEFINE_SPINLOCK(tlbstate_lock);
27
28	/*
29	* We cannot call mmdrop() because we are in interrupt context,
30	* instead update mm->cpu_vm_mask.
31	*
32	* We need to reload %cr3 since the page tables may be going
33	* away from under us..
34	*/
35	void leave_mm(int cpu)
36	{
37	BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39	load_cr3(swapper_pg_dir);
40	}
41	EXPORT_SYMBOL_GPL(leave_mm);
42
43	/*
44	*
45	* The flush IPI assumes that a thread switch happens in this order:
46	* [cpu0: the cpu that switches]
47	* 1) switch_mm() either 1a) or 1b)
48	* 1a) thread switch to a different mm
49	* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50	* Stop ipi delivery for the old mm. This is not synchronized with
51	* the other cpus, but smp_invalidate_interrupt ignore flush ipis
52	* for the wrong mm, and in the worst case we perform a superfluous
53	* tlb flush.
54	* 1a2) set cpu_tlbstate to TLBSTATE_OK
55	* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56	* was in lazy tlb mode.
57	* 1a3) update cpu_tlbstate[].active_mm
58	* Now cpu0 accepts tlb flushes for the new mm.
59	* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60	* Now the other cpus will send tlb flush ipis.
61	* 1a4) change cr3.
62	* 1b) thread switch without mm change
63	* cpu_tlbstate[].active_mm is correct, cpu0 already handles
64	* flush ipis.
65	* 1b1) set cpu_tlbstate to TLBSTATE_OK
66	* 1b2) test_and_set the cpu bit in cpu_vm_mask.
67	* Atomically set the bit [other cpus will start sending flush ipis],
68	* and test the bit.
69	* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70	* 2) switch %%esp, ie current
71	*
72	* The interrupt must handle 2 special cases:
73	* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74	* - the cpu performs speculative tlb reads, i.e. even if the cpu only
75	* runs in kernel space, the cpu could load tlb entries for user space
76	* pages.
77	*
78	* The good news is that cpu_tlbstate is local to each cpu, no
79	* write/read ordering problems.
80	*/
81
82	/*
83	* TLB flush IPI:
84	*
85	* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86	* 2) Leave the mm if we are in the lazy tlb mode.
87	*
88	* Interrupts are disabled.
89	*/
90
91	void smp_invalidate_interrupt(struct pt_regs *regs)
92	{
93	unsigned int cpu;
94
95	cpu = smp_processor_id();
96
97	if (!cpumask_test_cpu(cpu, flush_cpumask))
98	goto out;
99	/*
100	* This was a BUG() but until someone can quote me the
101	* line from the intel manual that guarantees an IPI to
102	* multiple CPUs is retried _only_ on the erroring CPUs
103	* its staying as a return
104	*
105	* BUG();
106	*/
107
108	if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
109	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
110	if (flush_va == TLB_FLUSH_ALL)
111	local_flush_tlb();
112	else
113	__flush_tlb_one(flush_va);
114	} else
115	leave_mm(cpu);
116	}
117	out:
118	ack_APIC_irq();
119	smp_mb__before_clear_bit();
120	cpumask_clear_cpu(cpu, flush_cpumask);
121	smp_mb__after_clear_bit();
122	inc_irq_stat(irq_tlb_count);
123	}
124
125	void native_flush_tlb_others(const struct cpumask *cpumask,
126	struct mm_struct *mm, unsigned long va)
127	{
128	/*
129	* - mask must exist :)
130	*/
131	BUG_ON(cpumask_empty(cpumask));
132	BUG_ON(!mm);
133
134	/*
135	* i'm not happy about this global shared spinlock in the
136	* MM hot path, but we'll see how contended it is.
137	* AK: x86-64 has a faster method that could be ported.
138	*/
139	spin_lock(&tlbstate_lock);
140
141	cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
142	#ifdef CONFIG_HOTPLUG_CPU
143	/* If a CPU which we ran on has gone down, OK. */
144	cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
145	if (unlikely(cpumask_empty(flush_cpumask))) {
146	spin_unlock(&tlbstate_lock);
147	return;
148	}
149	#endif
150	flush_mm = mm;
151	flush_va = va;
152
153	/*
154	* Make the above memory operations globally visible before
155	* sending the IPI.
156	*/
157	smp_mb();
158	/*
159	* We have to send the IPI only to
160	* CPUs affected.
161	*/
162	send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
163
164	while (!cpumask_empty(flush_cpumask))
165	/* nothing. lockup detection does not belong here */
166	cpu_relax();
167
168	flush_mm = NULL;
169	flush_va = 0;
170	spin_unlock(&tlbstate_lock);
171	}
172
173	void flush_tlb_current_task(void)
174	{
175	struct mm_struct *mm = current->mm;
176
177	preempt_disable();
178
179	local_flush_tlb();
180	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
181	flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
182	preempt_enable();
183	}
184
185	void flush_tlb_mm(struct mm_struct *mm)
186	{
187
188	preempt_disable();
189
190	if (current->active_mm == mm) {
191	if (current->mm)
192	local_flush_tlb();
193	else
194	leave_mm(smp_processor_id());
195	}
196	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
197	flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
198
199	preempt_enable();
200	}
201
202	void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
203	{
204	struct mm_struct *mm = vma->vm_mm;
205
206	preempt_disable();
207
208	if (current->active_mm == mm) {
209	if (current->mm)
210	__flush_tlb_one(va);
211	else
212	leave_mm(smp_processor_id());
213	}
214
215	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
216	flush_tlb_others(&mm->cpu_vm_mask, mm, va);
217	preempt_enable();
218	}
219
220	static void do_flush_tlb_all(void *info)
221	{
222	unsigned long cpu = smp_processor_id();
223
224	__flush_tlb_all();
225	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
226	leave_mm(cpu);
227	}
228
229	void flush_tlb_all(void)
230	{
231	on_each_cpu(do_flush_tlb_all, NULL, 1);
232	}
233
234	static int init_flush_cpumask(void)
235	{
236	alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
237	return 0;
238	}
239	early_initcall(init_flush_cpumask);


diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 19ac661422f7..b3ca1b940654 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c
@@ -113,7 +113,17 @@ EXPORT_SYMBOL_GPL(leave_mm);
113	* Interrupts are disabled.	113	* Interrupts are disabled.
114	*/	114	*/
115		115
116	asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)	116	/*
		117	* FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
		118	* but still used for documentation purpose but the usage is slightly
		119	* inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
		120	* entry calls in with the first parameter in %eax. Maybe define
		121	* intrlinkage?
		122	*/
		123	#ifdef CONFIG_X86_64
		124	asmlinkage
		125	#endif
		126	void smp_invalidate_interrupt(struct pt_regs *regs)
117	{	127	{
118	unsigned int cpu;	128	unsigned int cpu;
119	unsigned int sender;	129	unsigned int sender;