x86: change flush_tlb_others to take a const struct cpumask

Impact: reduce stack usage, use new cpumask API. This is made a little more tricky by uv_flush_tlb_others which actually alters its argument, for an IPI to be sent to the remaining cpus in the mask. I solve this by allocating a cpumask_var_t for this case and falling back to IPI should this fail. To eliminate temporaries in the caller, all flush_tlb_others implementations now do the this-cpu-elimination step themselves. Note also the curious "cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask)" which has been there since pre-git and yet f->flush_cpumask is always zero at this point. Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Mike Travis <travis@sgi.com>
author: Rusty Russell <rusty@rustcorp.com.au> 2009-01-11 00:58:09 -0500
committer: Ingo Molnar <mingo@elte.hu> 2009-01-11 13:13:06 -0500
commit: 4595f9620cda8a1e973588e743cf5f8436dd20c6 (patch)
tree: 98a47cff17f58262979c7d04590cb3ffc0deead9
parent: 802bf931f2688ad125b73db597ce63cc842fb27a (diff)
7 files changed, 93 insertions, 101 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ba3e2ff6aedc..c26c6bf4da00 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -244,7 +244,8 @@ struct pv_mmu_ops {
        void (*flush_tlb_user)(void);
        void (*flush_tlb_kernel)(void);
        void (*flush_tlb_single)(unsigned long addr);
-        void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
+        void (*flush_tlb_others)(const struct cpumask *cpus,
+                                 struct mm_struct *mm,
                                 unsigned long va);
        /* Hooks for allocating and freeing a pagetable top-level */
@@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr)
        PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
 }
-static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
+static inline void flush_tlb_others(const struct cpumask *cpumask,
+                                    struct mm_struct *mm,
                                    unsigned long va)
 {
-        PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
+        PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
 }
 static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e7bbb549116..f4e1b550ce61 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
                __flush_tlb();
 }
-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
+static inline void native_flush_tlb_others(const struct cpumask *cpumask,
                                           struct mm_struct *mm,
                                           unsigned long va)
 {
@@ -142,8 +142,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
        flush_tlb_mm(vma->vm_mm);
 }
-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+void native_flush_tlb_others(const struct cpumask *cpumask,
-                             unsigned long va);
+                             struct mm_struct *mm, unsigned long va);
 #define TLBSTATE_OK     1
 #define TLBSTATE_LAZY   2
@@ -166,7 +166,7 @@ static inline void reset_lazy_tlbstate(void)
 #endif  /* SMP */
 #ifndef CONFIG_PARAVIRT
-#define flush_tlb_others(mask, mm, va)  native_flush_tlb_others(&mask, mm, va)
+#define flush_tlb_others(mask, mm, va)  native_flush_tlb_others(mask, mm, va)
 #endif
 static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 50423c7b56b2..74e6393bfddb 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
 #define cpubit_isset(cpu, bau_local_cpumask) \
        test_bit((cpu), (bau_local_cpumask).bits)
-extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+extern int uv_flush_tlb_others(struct cpumask *,
+                               struct mm_struct *, unsigned long);
 extern void uv_bau_message_intr1(void);
 extern void uv_bau_timeout_intr1(void);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ce5054642247..ec53818f4e38 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
 *      Optimizations Manfred Spraul <manfred@colorfullife.com>
 */
-static cpumask_t flush_cpumask;
+static cpumask_var_t flush_cpumask;
 static struct mm_struct *flush_mm;
 static unsigned long flush_va;
 static DEFINE_SPINLOCK(tlbstate_lock);
@@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
        cpu = get_cpu();
-        if (!cpu_isset(cpu, flush_cpumask))
+        if (!cpumask_test_cpu(cpu, flush_cpumask))
                goto out;
                /*
                 * This was a BUG() but until someone can quote me the
@@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
        }
        ack_APIC_irq();
        smp_mb__before_clear_bit();
-        cpu_clear(cpu, flush_cpumask);
+        cpumask_clear_cpu(cpu, flush_cpumask);
        smp_mb__after_clear_bit();
 out:
        put_cpu_no_resched();
        inc_irq_stat(irq_tlb_count);
 }
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+void native_flush_tlb_others(const struct cpumask *cpumask,
-                             unsigned long va)
+                             struct mm_struct *mm, unsigned long va)
 {
-        cpumask_t cpumask = *cpumaskp;
        /*
-         * A couple of (to be removed) sanity checks:
-         *
-         * - current CPU must not be in mask
         * - mask must exist :)
         */
-        BUG_ON(cpus_empty(cpumask));
+        BUG_ON(cpumask_empty(cpumask));
-        BUG_ON(cpu_isset(smp_processor_id(), cpumask));
        BUG_ON(!mm);
-#ifdef CONFIG_HOTPLUG_CPU
-        /* If a CPU which we ran on has gone down, OK. */
-        cpus_and(cpumask, cpumask, cpu_online_map);
-        if (unlikely(cpus_empty(cpumask)))
-                return;
-#endif
        /*
         * i'm not happy about this global shared spinlock in the
         * MM hot path, but we'll see how contended it is.
@@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
         */
        spin_lock(&tlbstate_lock);
+        cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
+#ifdef CONFIG_HOTPLUG_CPU
+        /* If a CPU which we ran on has gone down, OK. */
+        cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
+        if (unlikely(cpumask_empty(flush_cpumask))) {
+                spin_unlock(&tlbstate_lock);
+                return;
+        }
+#endif
        flush_mm = mm;
        flush_va = va;
-        cpus_or(flush_cpumask, cpumask, flush_cpumask);
        /*
         * Make the above memory operations globally visible before
@@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
         * We have to send the IPI only to
         * CPUs affected.
         */
-        send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
+        send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
-        while (!cpus_empty(flush_cpumask))
+        while (!cpumask_empty(flush_cpumask))
                /* nothing. lockup detection does not belong here */
                cpu_relax();
@@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
 void flush_tlb_current_task(void)
 {
        struct mm_struct *mm = current->mm;
-        cpumask_t cpu_mask;
        preempt_disable();
-        cpu_mask = mm->cpu_vm_mask;
-        cpu_clear(smp_processor_id(), cpu_mask);
        local_flush_tlb();
-        if (!cpus_empty(cpu_mask))
+        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+                flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
        preempt_enable();
 }
 void flush_tlb_mm(struct mm_struct *mm)
 {
-        cpumask_t cpu_mask;
        preempt_disable();
-        cpu_mask = mm->cpu_vm_mask;
-        cpu_clear(smp_processor_id(), cpu_mask);
        if (current->active_mm == mm) {
                if (current->mm)
@@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm)
                else
                        leave_mm(smp_processor_id());
        }
-        if (!cpus_empty(cpu_mask))
+        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+                flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
        preempt_enable();
 }
@@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
        struct mm_struct *mm = vma->vm_mm;
-        cpumask_t cpu_mask;
        preempt_disable();
-        cpu_mask = mm->cpu_vm_mask;
-        cpu_clear(smp_processor_id(), cpu_mask);
        if (current->active_mm == mm) {
                if (current->mm)
@@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
                        leave_mm(smp_processor_id());
        }
-        if (!cpus_empty(cpu_mask))
+        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(cpu_mask, mm, va);
+                flush_tlb_others(&mm->cpu_vm_mask, mm, va);
        preempt_enable();
 }
 EXPORT_SYMBOL(flush_tlb_page);
@@ -254,3 +239,9 @@ void reset_lazy_tlbstate(void)
        per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
 }
+static int init_flush_cpumask(void)
+{
+        alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
+        return 0;
+}
+early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index f8be6f1d2e48..38836aef51b4 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -43,10 +43,10 @@
 union smp_flush_state {
        struct {
-                cpumask_t flush_cpumask;
                struct mm_struct *flush_mm;
                unsigned long flush_va;
                spinlock_t tlbstate_lock;
+                DECLARE_BITMAP(flush_cpumask, NR_CPUS);
        };
        char pad[SMP_CACHE_BYTES];
 } ____cacheline_aligned;
@@ -131,7 +131,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
        sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
        f = &per_cpu(flush_state, sender);
-        if (!cpu_isset(cpu, f->flush_cpumask))
+        if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
                goto out;
                /*
                 * This was a BUG() but until someone can quote me the
@@ -153,19 +153,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
        }
 out:
        ack_APIC_irq();
-        cpu_clear(cpu, f->flush_cpumask);
+        cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
        inc_irq_stat(irq_tlb_count);
 }
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+static void flush_tlb_others_ipi(const struct cpumask *cpumask,
-                             unsigned long va)
+                                 struct mm_struct *mm, unsigned long va)
 {
        int sender;
        union smp_flush_state *f;
-        cpumask_t cpumask = *cpumaskp;
-        if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
-                return;
        /* Caller has disabled preemption */
        sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
@@ -180,7 +176,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
        f->flush_mm = mm;
        f->flush_va = va;
-        cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+        cpumask_andnot(to_cpumask(f->flush_cpumask),
+                       cpumask, cpumask_of(smp_processor_id()));
        /*
         * Make the above memory operations globally visible before
@@ -191,9 +188,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
         * We have to send the IPI only to
         * CPUs affected.
         */
-        send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+        send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender);
-        while (!cpus_empty(f->flush_cpumask))
+        while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
                cpu_relax();
        f->flush_mm = NULL;
@@ -201,6 +198,24 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
        spin_unlock(&f->tlbstate_lock);
 }
+void native_flush_tlb_others(const struct cpumask *cpumask,
+                             struct mm_struct *mm, unsigned long va)
+{
+        if (is_uv_system()) {
+                cpumask_var_t after_uv_flush;
+                if (alloc_cpumask_var(&after_uv_flush, GFP_ATOMIC)) {
+                        cpumask_andnot(after_uv_flush,
+                                       cpumask, cpumask_of(smp_processor_id()));
+                        if (!uv_flush_tlb_others(after_uv_flush, mm, va))
+                                flush_tlb_others_ipi(after_uv_flush, mm, va);
+                        free_cpumask_var(after_uv_flush);
+                        return;
+                }
+        }
+        flush_tlb_others_ipi(cpumask, mm, va);
+}
 static int __cpuinit init_smp_flush(void)
 {
        int i;
@@ -215,25 +230,18 @@ core_initcall(init_smp_flush);
 void flush_tlb_current_task(void)
 {
        struct mm_struct *mm = current->mm;
-        cpumask_t cpu_mask;
        preempt_disable();
-        cpu_mask = mm->cpu_vm_mask;
-        cpu_clear(smp_processor_id(), cpu_mask);
        local_flush_tlb();
-        if (!cpus_empty(cpu_mask))
+        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+                flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
        preempt_enable();
 }
 void flush_tlb_mm(struct mm_struct *mm)
 {
-        cpumask_t cpu_mask;
        preempt_disable();
-        cpu_mask = mm->cpu_vm_mask;
-        cpu_clear(smp_processor_id(), cpu_mask);
        if (current->active_mm == mm) {
                if (current->mm)
@@ -241,8 +249,8 @@ void flush_tlb_mm(struct mm_struct *mm)
                else
                        leave_mm(smp_processor_id());
        }
-        if (!cpus_empty(cpu_mask))
+        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+                flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
        preempt_enable();
 }
@@ -250,11 +258,8 @@ void flush_tlb_mm(struct mm_struct *mm)
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
        struct mm_struct *mm = vma->vm_mm;
-        cpumask_t cpu_mask;
        preempt_disable();
-        cpu_mask = mm->cpu_vm_mask;
-        cpu_clear(smp_processor_id(), cpu_mask);
        if (current->active_mm == mm) {
                if (current->mm)
@@ -263,8 +268,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
                        leave_mm(smp_processor_id());
        }
-        if (!cpus_empty(cpu_mask))
+        if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
-                flush_tlb_others(cpu_mask, mm, va);
+                flush_tlb_others(&mm->cpu_vm_mask, mm, va);
        preempt_enable();
 }
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f885023167e0..690dcf1a27d4 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
 * The cpumaskp mask contains the cpus the broadcast was sent to.
 *
 * Returns 1 if all remote flushing was done. The mask is zeroed.
- * Returns 0 if some remote flushing remains to be done. The mask is left
+ * Returns 0 if some remote flushing remains to be done. The mask will have
- * unchanged.
+ * some bits still set.
 */
 int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
-                           cpumask_t *cpumaskp)
+                           struct cpumask *cpumaskp)
 {
        int completion_status = 0;
        int right_shift;
@@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
         * Success, so clear the remote cpu's from the mask so we don't
         * use the IPI method of shootdown on them.
         */
-        for_each_cpu_mask(bit, *cpumaskp) {
+        for_each_cpu(bit, cpumaskp) {
                blade = uv_cpu_to_blade_id(bit);
                if (blade == this_blade)
                        continue;
-                cpu_clear(bit, *cpumaskp);
+                cpumask_clear_cpu(bit, cpumaskp);
        }
-        if (!cpus_empty(*cpumaskp))
+        if (!cpumask_empty(cpumaskp))
                return 0;
        return 1;
 }
@@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
 * Returns 1 if all remote flushing was done.
 * Returns 0 if some remote flushing remains to be done.
 */
-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
+int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
                        unsigned long va)
 {
        int i;
@@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
        bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
        i = 0;
-        for_each_cpu_mask(bit, *cpumaskp) {
+        for_each_cpu(bit, cpumaskp) {
                blade = uv_cpu_to_blade_id(bit);
                BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
                if (blade == this_blade) {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bea215230b20..965539ec425f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr)
        preempt_enable();
 }
-static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
+static void xen_flush_tlb_others(const struct cpumask *cpus,
-                                 unsigned long va)
+                                 struct mm_struct *mm, unsigned long va)
 {
        struct {
                struct mmuext_op op;
-                cpumask_t mask;
+                DECLARE_BITMAP(mask, NR_CPUS);
        } *args;
-        cpumask_t cpumask = *cpus;
        struct multicall_space mcs;
-        /*
+        BUG_ON(cpumask_empty(cpus));
-         * A couple of (to be removed) sanity checks:
-         *
-         * - current CPU must not be in mask
-         * - mask must exist :)
-         */
-        BUG_ON(cpus_empty(cpumask));
-        BUG_ON(cpu_isset(smp_processor_id(), cpumask));
        BUG_ON(!mm);
-        /* If a CPU which we ran on has gone down, OK. */
-        cpus_and(cpumask, cpumask, cpu_online_map);
-        if (cpus_empty(cpumask))
-                return;
        mcs = xen_mc_entry(sizeof(*args));
        args = mcs.args;
-        args->mask = cpumask;
+        args->op.arg2.vcpumask = to_cpumask(args->mask);
-        args->op.arg2.vcpumask = &args->mask;
+        /* Remove us, and any offline CPUS. */
+        cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
+        cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
+        if (unlikely(cpumask_empty(to_cpumask(args->mask))))
+                goto issue;
        if (va == TLB_FLUSH_ALL) {
                args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
@@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
        MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
+issue:
        xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
author	Rusty Russell <rusty@rustcorp.com.au>	2009-01-11 00:58:09 -0500
committer	Ingo Molnar <mingo@elte.hu>	2009-01-11 13:13:06 -0500
commit	4595f9620cda8a1e973588e743cf5f8436dd20c6 (patch)
tree	98a47cff17f58262979c7d04590cb3ffc0deead9
parent	802bf931f2688ad125b73db597ce63cc842fb27a (diff)