1 files changed, 47 insertions, 1 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c03f14ab6667..49358481c733 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
   want false sharing in the per cpu data segment. */
 static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
+static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
 /*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
        union smp_flush_state *f;
        /* Caller has disabled preemption */
-        sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+        sender = this_cpu_read(tlb_vector_offset);
        f = &flush_state[sender];
        /*
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
        flush_tlb_others_ipi(cpumask, mm, va);
 }
+static void __cpuinit calculate_tlb_offset(void)
+{
+        int cpu, node, nr_node_vecs;
+        /*
+         * we are changing tlb_vector_offset for each CPU in runtime, but this
+         * will not cause inconsistency, as the write is atomic under X86. we
+         * might see more lock contentions in a short time, but after all CPU's
+         * tlb_vector_offset are changed, everything should go normal
+         *
+         * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
+         * waste some vectors.
+         **/
+        if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
+                nr_node_vecs = 1;
+        else
+                nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
+        for_each_online_node(node) {
+                int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
+                        nr_node_vecs;
+                int cpu_offset = 0;
+                for_each_cpu(cpu, cpumask_of_node(node)) {
+                        per_cpu(tlb_vector_offset, cpu) = node_offset +
+                                cpu_offset;
+                        cpu_offset++;
+                        cpu_offset = cpu_offset % nr_node_vecs;
+                }
+        }
+}
+static int tlb_cpuhp_notify(struct notifier_block *n,
+                unsigned long action, void *hcpu)
+{
+        switch (action & 0xf) {
+        case CPU_ONLINE:
+        case CPU_DEAD:
+                calculate_tlb_offset();
+        }
+        return NOTIFY_OK;
+}
 static int __cpuinit init_smp_flush(void)
 {
        int i;
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void)
        for (i = 0; i < ARRAY_SIZE(flush_state); i++)
                raw_spin_lock_init(&flush_state[i].tlbstate_lock);
+        calculate_tlb_offset();
+        hotcpu_notifier(tlb_cpuhp_notify, 0);
        return 0;
 }
 core_initcall(init_smp_flush);

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index c03f14ab6667..49358481c733 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
5	#include <linux/smp.h>	5	#include <linux/smp.h>
6	#include <linux/interrupt.h>	6	#include <linux/interrupt.h>
7	#include <linux/module.h>	7	#include <linux/module.h>
		8	#include <linux/cpu.h>
8		9
9	#include <asm/tlbflush.h>	10	#include <asm/tlbflush.h>
10	#include <asm/mmu_context.h>	11	#include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
52	want false sharing in the per cpu data segment. */	53	want false sharing in the per cpu data segment. */
53	static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];	54	static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
54		55
		56	static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
		57
55	/*	58	/*
56	* We cannot call mmdrop() because we are in interrupt context,	59	* We cannot call mmdrop() because we are in interrupt context,
57	* instead update mm->cpu_vm_mask.	60	* instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
173	union smp_flush_state *f;	176	union smp_flush_state *f;
174		177
175	/* Caller has disabled preemption */	178	/* Caller has disabled preemption */
176	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;	179	sender = this_cpu_read(tlb_vector_offset);
177	f = &flush_state[sender];	180	f = &flush_state[sender];
178		181
179	/*	182	/*
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
218	flush_tlb_others_ipi(cpumask, mm, va);	221	flush_tlb_others_ipi(cpumask, mm, va);
219	}	222	}
220		223
		224	static void __cpuinit calculate_tlb_offset(void)
		225	{
		226	int cpu, node, nr_node_vecs;
		227	/*
		228	* we are changing tlb_vector_offset for each CPU in runtime, but this
		229	* will not cause inconsistency, as the write is atomic under X86. we
		230	* might see more lock contentions in a short time, but after all CPU's
		231	* tlb_vector_offset are changed, everything should go normal
		232	*
		233	* Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
		234	* waste some vectors.
		235	**/
		236	if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
		237	nr_node_vecs = 1;
		238	else
		239	nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
		240
		241	for_each_online_node(node) {
		242	int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
		243	nr_node_vecs;
		244	int cpu_offset = 0;
		245	for_each_cpu(cpu, cpumask_of_node(node)) {
		246	per_cpu(tlb_vector_offset, cpu) = node_offset +
		247	cpu_offset;
		248	cpu_offset++;
		249	cpu_offset = cpu_offset % nr_node_vecs;
		250	}
		251	}
		252	}
		253
		254	static int tlb_cpuhp_notify(struct notifier_block *n,
		255	unsigned long action, void *hcpu)
		256	{
		257	switch (action & 0xf) {
		258	case CPU_ONLINE:
		259	case CPU_DEAD:
		260	calculate_tlb_offset();
		261	}
		262	return NOTIFY_OK;
		263	}
		264
221	static int __cpuinit init_smp_flush(void)	265	static int __cpuinit init_smp_flush(void)
222	{	266	{
223	int i;	267	int i;
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void)
225	for (i = 0; i < ARRAY_SIZE(flush_state); i++)	269	for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226	raw_spin_lock_init(&flush_state[i].tlbstate_lock);	270	raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227		271
		272	calculate_tlb_offset();
		273	hotcpu_notifier(tlb_cpuhp_notify, 0);
228	return 0;	274	return 0;
229	}	275	}
230	core_initcall(init_smp_flush);	276	core_initcall(init_smp_flush);