1 files changed, 243 insertions, 0 deletions
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
new file mode 100644
index 000000000000..9bb2363851af
--- /dev/null
+++ b/arch/x86/kernel/tlb_32.c
@@ -0,0 +1,243 @@
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <asm/tlbflush.h>
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
+                        ____cacheline_aligned = { &init_mm, 0, };
+/* must come after the send_IPI functions above for inlining */
+#include <mach_ipi.h>
+/*
+ *      Smarter SMP flushing macros.
+ *              c/o Linus Torvalds.
+ *
+ *      These mean you can really definitely utterly forget about
+ *      writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *      Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+static cpumask_t flush_cpumask;
+static struct mm_struct *flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+void leave_mm(int cpu)
+{
+        if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+                BUG();
+        cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+        load_cr3(swapper_pg_dir);
+}
+EXPORT_SYMBOL_GPL(leave_mm);
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ *      Stop ipi delivery for the old mm. This is not synchronized with
+ *      the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *      for the wrong mm, and in the worst case we perform a superfluous
+ *      tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ *      Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *      was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ *      Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ *      Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *      cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *      flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ *      Atomically set the bit [other cpus will start sending flush ipis],
+ *      and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+void smp_invalidate_interrupt(struct pt_regs *regs)
+{
+        unsigned long cpu;
+        cpu = get_cpu();
+        if (!cpu_isset(cpu, flush_cpumask))
+                goto out;
+                /*
+                 * This was a BUG() but until someone can quote me the
+                 * line from the intel manual that guarantees an IPI to
+                 * multiple CPUs is retried _only_ on the erroring CPUs
+                 * its staying as a return
+                 *
+                 * BUG();
+                 */
+        if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+                if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+                        if (flush_va == TLB_FLUSH_ALL)
+                                local_flush_tlb();
+                        else
+                                __flush_tlb_one(flush_va);
+                } else
+                        leave_mm(cpu);
+        }
+        ack_APIC_irq();
+        smp_mb__before_clear_bit();
+        cpu_clear(cpu, flush_cpumask);
+        smp_mb__after_clear_bit();
+out:
+        put_cpu_no_resched();
+        __get_cpu_var(irq_stat).irq_tlb_count++;
+}
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+                             unsigned long va)
+{
+        cpumask_t cpumask = *cpumaskp;
+        /*
+         * A couple of (to be removed) sanity checks:
+         *
+         * - current CPU must not be in mask
+         * - mask must exist :)
+         */
+        BUG_ON(cpus_empty(cpumask));
+        BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+        BUG_ON(!mm);
+#ifdef CONFIG_HOTPLUG_CPU
+        /* If a CPU which we ran on has gone down, OK. */
+        cpus_and(cpumask, cpumask, cpu_online_map);
+        if (unlikely(cpus_empty(cpumask)))
+                return;
+#endif
+        /*
+         * i'm not happy about this global shared spinlock in the
+         * MM hot path, but we'll see how contended it is.
+         * AK: x86-64 has a faster method that could be ported.
+         */
+        spin_lock(&tlbstate_lock);
+        flush_mm = mm;
+        flush_va = va;
+        cpus_or(flush_cpumask, cpumask, flush_cpumask);
+        /*
+         * We have to send the IPI only to
+         * CPUs affected.
+         */
+        send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+        while (!cpus_empty(flush_cpumask))
+                /* nothing. lockup detection does not belong here */
+                cpu_relax();
+        flush_mm = NULL;
+        flush_va = 0;
+        spin_unlock(&tlbstate_lock);
+}
+void flush_tlb_current_task(void)
+{
+        struct mm_struct *mm = current->mm;
+        cpumask_t cpu_mask;
+        preempt_disable();
+        cpu_mask = mm->cpu_vm_mask;
+        cpu_clear(smp_processor_id(), cpu_mask);
+        local_flush_tlb();
+        if (!cpus_empty(cpu_mask))
+                flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+        preempt_enable();
+}
+void flush_tlb_mm(struct mm_struct *mm)
+{
+        cpumask_t cpu_mask;
+        preempt_disable();
+        cpu_mask = mm->cpu_vm_mask;
+        cpu_clear(smp_processor_id(), cpu_mask);
+        if (current->active_mm == mm) {
+                if (current->mm)
+                        local_flush_tlb();
+                else
+                        leave_mm(smp_processor_id());
+        }
+        if (!cpus_empty(cpu_mask))
+                flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+        preempt_enable();
+}
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+{
+        struct mm_struct *mm = vma->vm_mm;
+        cpumask_t cpu_mask;
+        preempt_disable();
+        cpu_mask = mm->cpu_vm_mask;
+        cpu_clear(smp_processor_id(), cpu_mask);
+        if (current->active_mm == mm) {
+                if (current->mm)
+                        __flush_tlb_one(va);
+                 else
+                        leave_mm(smp_processor_id());
+        }
+        if (!cpus_empty(cpu_mask))
+                flush_tlb_others(cpu_mask, mm, va);
+        preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_page);
+static void do_flush_tlb_all(void *info)
+{
+        unsigned long cpu = smp_processor_id();
+        __flush_tlb_all();
+        if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+                leave_mm(cpu);
+}
+void flush_tlb_all(void)
+{
+        on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}

diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c new file mode 100644 index 000000000000..9bb2363851af --- /dev/null +++ b/arch/x86/kernel/tlb_32.c
@@ -0,0 +1,243 @@
	1	#include <linux/spinlock.h>
	2	#include <linux/cpu.h>
	3	#include <linux/interrupt.h>
	4
	5	#include <asm/tlbflush.h>
	6
	7	DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
	8	____cacheline_aligned = { &init_mm, 0, };
	9
	10	/* must come after the send_IPI functions above for inlining */
	11	#include <mach_ipi.h>
	12
	13	/*
	14	* Smarter SMP flushing macros.
	15	* c/o Linus Torvalds.
	16	*
	17	* These mean you can really definitely utterly forget about
	18	* writing to user space from interrupts. (Its not allowed anyway).
	19	*
	20	* Optimizations Manfred Spraul <manfred@colorfullife.com>
	21	*/
	22
	23	static cpumask_t flush_cpumask;
	24	static struct mm_struct *flush_mm;
	25	static unsigned long flush_va;
	26	static DEFINE_SPINLOCK(tlbstate_lock);
	27
	28	/*
	29	* We cannot call mmdrop() because we are in interrupt context,
	30	* instead update mm->cpu_vm_mask.
	31	*
	32	* We need to reload %cr3 since the page tables may be going
	33	* away from under us..
	34	*/
	35	void leave_mm(int cpu)
	36	{
	37	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
	38	BUG();
	39	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
	40	load_cr3(swapper_pg_dir);
	41	}
	42	EXPORT_SYMBOL_GPL(leave_mm);
	43
	44	/*
	45	*
	46	* The flush IPI assumes that a thread switch happens in this order:
	47	* [cpu0: the cpu that switches]
	48	* 1) switch_mm() either 1a) or 1b)
	49	* 1a) thread switch to a different mm
	50	* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
	51	* Stop ipi delivery for the old mm. This is not synchronized with
	52	* the other cpus, but smp_invalidate_interrupt ignore flush ipis
	53	* for the wrong mm, and in the worst case we perform a superfluous
	54	* tlb flush.
	55	* 1a2) set cpu_tlbstate to TLBSTATE_OK
	56	* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
	57	* was in lazy tlb mode.
	58	* 1a3) update cpu_tlbstate[].active_mm
	59	* Now cpu0 accepts tlb flushes for the new mm.
	60	* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
	61	* Now the other cpus will send tlb flush ipis.
	62	* 1a4) change cr3.
	63	* 1b) thread switch without mm change
	64	* cpu_tlbstate[].active_mm is correct, cpu0 already handles
	65	* flush ipis.
	66	* 1b1) set cpu_tlbstate to TLBSTATE_OK
	67	* 1b2) test_and_set the cpu bit in cpu_vm_mask.
	68	* Atomically set the bit [other cpus will start sending flush ipis],
	69	* and test the bit.
	70	* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
	71	* 2) switch %%esp, ie current
	72	*
	73	* The interrupt must handle 2 special cases:
	74	* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
	75	* - the cpu performs speculative tlb reads, i.e. even if the cpu only
	76	* runs in kernel space, the cpu could load tlb entries for user space
	77	* pages.
	78	*
	79	* The good news is that cpu_tlbstate is local to each cpu, no
	80	* write/read ordering problems.
	81	*/
	82
	83	/*
	84	* TLB flush IPI:
	85	*
	86	* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
	87	* 2) Leave the mm if we are in the lazy tlb mode.
	88	*/
	89
	90	void smp_invalidate_interrupt(struct pt_regs *regs)
	91	{
	92	unsigned long cpu;
	93
	94	cpu = get_cpu();
	95
	96	if (!cpu_isset(cpu, flush_cpumask))
	97	goto out;
	98	/*
	99	* This was a BUG() but until someone can quote me the
	100	* line from the intel manual that guarantees an IPI to
	101	* multiple CPUs is retried _only_ on the erroring CPUs
	102	* its staying as a return
	103	*
	104	* BUG();
	105	*/
	106
	107	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
	108	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
	109	if (flush_va == TLB_FLUSH_ALL)
	110	local_flush_tlb();
	111	else
	112	__flush_tlb_one(flush_va);
	113	} else
	114	leave_mm(cpu);
	115	}
	116	ack_APIC_irq();
	117	smp_mb__before_clear_bit();
	118	cpu_clear(cpu, flush_cpumask);
	119	smp_mb__after_clear_bit();
	120	out:
	121	put_cpu_no_resched();
	122	__get_cpu_var(irq_stat).irq_tlb_count++;
	123	}
	124
	125	void native_flush_tlb_others(const cpumask_t cpumaskp, struct mm_struct mm,
	126	unsigned long va)
	127	{
	128	cpumask_t cpumask = *cpumaskp;
	129
	130	/*
	131	* A couple of (to be removed) sanity checks:
	132	*
	133	* - current CPU must not be in mask
	134	* - mask must exist :)
	135	*/
	136	BUG_ON(cpus_empty(cpumask));
	137	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
	138	BUG_ON(!mm);
	139
	140	#ifdef CONFIG_HOTPLUG_CPU
	141	/* If a CPU which we ran on has gone down, OK. */
	142	cpus_and(cpumask, cpumask, cpu_online_map);
	143	if (unlikely(cpus_empty(cpumask)))
	144	return;
	145	#endif
	146
	147	/*
	148	* i'm not happy about this global shared spinlock in the
	149	* MM hot path, but we'll see how contended it is.
	150	* AK: x86-64 has a faster method that could be ported.
	151	*/
	152	spin_lock(&tlbstate_lock);
	153
	154	flush_mm = mm;
	155	flush_va = va;
	156	cpus_or(flush_cpumask, cpumask, flush_cpumask);
	157	/*
	158	* We have to send the IPI only to
	159	* CPUs affected.
	160	*/
	161	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
	162
	163	while (!cpus_empty(flush_cpumask))
	164	/* nothing. lockup detection does not belong here */
	165	cpu_relax();
	166
	167	flush_mm = NULL;
	168	flush_va = 0;
	169	spin_unlock(&tlbstate_lock);
	170	}
	171
	172	void flush_tlb_current_task(void)
	173	{
	174	struct mm_struct *mm = current->mm;
	175	cpumask_t cpu_mask;
	176
	177	preempt_disable();
	178	cpu_mask = mm->cpu_vm_mask;
	179	cpu_clear(smp_processor_id(), cpu_mask);
	180
	181	local_flush_tlb();
	182	if (!cpus_empty(cpu_mask))
	183	flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
	184	preempt_enable();
	185	}
	186
	187	void flush_tlb_mm(struct mm_struct *mm)
	188	{
	189	cpumask_t cpu_mask;
	190
	191	preempt_disable();
	192	cpu_mask = mm->cpu_vm_mask;
	193	cpu_clear(smp_processor_id(), cpu_mask);
	194
	195	if (current->active_mm == mm) {
	196	if (current->mm)
	197	local_flush_tlb();
	198	else
	199	leave_mm(smp_processor_id());
	200	}
	201	if (!cpus_empty(cpu_mask))
	202	flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
	203
	204	preempt_enable();
	205	}
	206
	207	void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
	208	{
	209	struct mm_struct *mm = vma->vm_mm;
	210	cpumask_t cpu_mask;
	211
	212	preempt_disable();
	213	cpu_mask = mm->cpu_vm_mask;
	214	cpu_clear(smp_processor_id(), cpu_mask);
	215
	216	if (current->active_mm == mm) {
	217	if (current->mm)
	218	__flush_tlb_one(va);
	219	else
	220	leave_mm(smp_processor_id());
	221	}
	222
	223	if (!cpus_empty(cpu_mask))
	224	flush_tlb_others(cpu_mask, mm, va);
	225
	226	preempt_enable();
	227	}
	228	EXPORT_SYMBOL(flush_tlb_page);
	229
	230	static void do_flush_tlb_all(void *info)
	231	{
	232	unsigned long cpu = smp_processor_id();
	233
	234	__flush_tlb_all();
	235	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
	236	leave_mm(cpu);
	237	}
	238
	239	void flush_tlb_all(void)
	240	{
	241	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
	242	}
	243