Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp

Conflicts: litmus/sched_cedf.c
author: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
commit: c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree: ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/mm/tlb.c
parent: ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent: 6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
1 files changed, 53 insertions, 10 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c03f14ab6667..d6c0418c3e47 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
   want false sharing in the per cpu data segment. */
 static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
+static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
 /*
 * We cannot call mmdrop() because we are in interrupt context,
 * instead update mm->cpu_vm_mask.
@@ -173,15 +176,11 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
        union smp_flush_state *f;
        /* Caller has disabled preemption */
-        sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+        sender = this_cpu_read(tlb_vector_offset);
        f = &flush_state[sender];
-        /*
+        if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
-         * Could avoid this lock when
+                raw_spin_lock(&f->tlbstate_lock);
-         * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
-         * probably not worth checking this for a cache-hot lock.
-         */
-        raw_spin_lock(&f->tlbstate_lock);
        f->flush_mm = mm;
        f->flush_va = va;
@@ -199,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
        f->flush_mm = NULL;
        f->flush_va = 0;
-        raw_spin_unlock(&f->tlbstate_lock);
+        if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+                raw_spin_unlock(&f->tlbstate_lock);
 }
 void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -208,16 +208,57 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
        if (is_uv_system()) {
                unsigned int cpu;
-                cpu = get_cpu();
+                cpu = smp_processor_id();
                cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
                if (cpumask)
                        flush_tlb_others_ipi(cpumask, mm, va);
-                put_cpu();
                return;
        }
        flush_tlb_others_ipi(cpumask, mm, va);
 }
+static void __cpuinit calculate_tlb_offset(void)
+{
+        int cpu, node, nr_node_vecs, idx = 0;
+        /*
+         * we are changing tlb_vector_offset for each CPU in runtime, but this
+         * will not cause inconsistency, as the write is atomic under X86. we
+         * might see more lock contentions in a short time, but after all CPU's
+         * tlb_vector_offset are changed, everything should go normal
+         *
+         * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
+         * waste some vectors.
+         **/
+        if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
+                nr_node_vecs = 1;
+        else
+                nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
+        for_each_online_node(node) {
+                int node_offset = (idx % NUM_INVALIDATE_TLB_VECTORS) *
+                        nr_node_vecs;
+                int cpu_offset = 0;
+                for_each_cpu(cpu, cpumask_of_node(node)) {
+                        per_cpu(tlb_vector_offset, cpu) = node_offset +
+                                cpu_offset;
+                        cpu_offset++;
+                        cpu_offset = cpu_offset % nr_node_vecs;
+                }
+                idx++;
+        }
+}
+static int __cpuinit tlb_cpuhp_notify(struct notifier_block *n,
+                unsigned long action, void *hcpu)
+{
+        switch (action & 0xf) {
+        case CPU_ONLINE:
+        case CPU_DEAD:
+                calculate_tlb_offset();
+        }
+        return NOTIFY_OK;
+}
 static int __cpuinit init_smp_flush(void)
 {
        int i;
@@ -225,6 +266,8 @@ static int __cpuinit init_smp_flush(void)
        for (i = 0; i < ARRAY_SIZE(flush_state); i++)
                raw_spin_lock_init(&flush_state[i].tlbstate_lock);
+        calculate_tlb_offset();
+        hotcpu_notifier(tlb_cpuhp_notify, 0);
        return 0;
 }
 core_initcall(init_smp_flush);
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
commit	c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree	ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/mm/tlb.c
parent	ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent	6a00f206debf8a5c8899055726ad127dbeeed098 (diff)