[SPARC64]: Fix TLB context allocation with SMT style shared TLBs.

The context allocation scheme we use depends upon there being a 1<-->1 mapping from cpu to physical TLB for correctness. Chips like Niagara break this assumption. So what we do is notify all cpus with a cross call when the context version number changes, and if necessary this makes them allocate a valid context for the address space they are running at the time. Stress tested with make -j1024, make -j2048, and make -j4096 kernel builds on a 32-strand, 8 core, T2000 with 16GB of ram. Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2006-02-23 17:19:28 -0500
committer: David S. Miller <davem@sunset.davemloft.net> 2006-03-20 04:14:00 -0500
commit: a0663a79ad4faebe1db4a56e2e767b120b12333a (patch)
tree: 612a53e387a6aea6116f8a1637050fa13c6d9f80 /arch/sparc64
parent: 074d82cf688fe2dfa7ba4a2317c56f62d13fb522 (diff)
2 files changed, 37 insertions, 12 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 0cd9b16612e7..1ce940811492 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -885,26 +885,44 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
        put_cpu();
 }
+static void __smp_receive_signal_mask(cpumask_t mask)
+{
+        smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
+}
 void smp_receive_signal(int cpu)
 {
        cpumask_t mask = cpumask_of_cpu(cpu);
-        if (cpu_online(cpu)) {
+        if (cpu_online(cpu))
-                u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);
+                __smp_receive_signal_mask(mask);
-                if (tlb_type == spitfire)
-                        spitfire_xcall_deliver(data0, 0, 0, mask);
-                else if (tlb_type == cheetah || tlb_type == cheetah_plus)
-                        cheetah_xcall_deliver(data0, 0, 0, mask);
-                else if (tlb_type == hypervisor)
-                        hypervisor_xcall_deliver(data0, 0, 0, mask);
-        }
 }
 void smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
-        /* Just return, rtrap takes care of the rest. */
+        struct mm_struct *mm;
        clear_softint(1 << irq);
+        /* See if we need to allocate a new TLB context because
+         * the version of the one we are using is now out of date.
+         */
+        mm = current->active_mm;
+        if (likely(mm)) {
+                if (unlikely(!CTX_VALID(mm->context))) {
+                        unsigned long flags;
+                        spin_lock_irqsave(&mm->context.lock, flags);
+                        get_new_mmu_context(mm);
+                        load_secondary_context(mm);
+                        spin_unlock_irqrestore(&mm->context.lock, flags);
+                }
+        }
+}
+void smp_new_mmu_context_version(void)
+{
+        __smp_receive_signal_mask(cpu_online_map);
 }
 void smp_report_regs(void)
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 16f0db38d932..ccf083aecb65 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -629,17 +629,20 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
 * let the user have CTX 0 (nucleus) or we ever use a CTX
 * version of zero (and thus NO_CONTEXT would not be caught
 * by version mis-match tests in mmu_context.h).
+ *
+ * Always invoked with interrupts disabled.
 */
 void get_new_mmu_context(struct mm_struct *mm)
 {
        unsigned long ctx, new_ctx;
        unsigned long orig_pgsz_bits;
-        
+        int new_version;
        spin_lock(&ctx_alloc_lock);
        orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
        ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
        new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+        new_version = 0;
        if (new_ctx >= (1 << CTX_NR_BITS)) {
                new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
                if (new_ctx >= ctx) {
@@ -662,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
                                mmu_context_bmap[i + 2] = 0;
                                mmu_context_bmap[i + 3] = 0;
                        }
+                        new_version = 1;
                        goto out;
                }
        }
@@ -671,6 +675,9 @@ out:
        tlb_context_cache = new_ctx;
        mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
        spin_unlock(&ctx_alloc_lock);
+        if (unlikely(new_version))
+                smp_new_mmu_context_version();
 }
 void sparc_ultra_dump_itlb(void)
author	David S. Miller <davem@davemloft.net>	2006-02-23 17:19:28 -0500
committer	David S. Miller <davem@sunset.davemloft.net>	2006-03-20 04:14:00 -0500
commit	a0663a79ad4faebe1db4a56e2e767b120b12333a (patch)
tree	612a53e387a6aea6116f8a1637050fa13c6d9f80 /arch/sparc64
parent	074d82cf688fe2dfa7ba4a2317c56f62d13fb522 (diff)

diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 0cd9b16612e7..1ce940811492 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c
@@ -885,26 +885,44 @@ void flush_dcache_page_all(struct mm_struct mm, struct page page)
885	put_cpu();	885	put_cpu();
886	}	886	}
887		887
		888	static void __smp_receive_signal_mask(cpumask_t mask)
		889	{
		890	smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask);
		891	}
		892
888	void smp_receive_signal(int cpu)	893	void smp_receive_signal(int cpu)
889	{	894	{
890	cpumask_t mask = cpumask_of_cpu(cpu);	895	cpumask_t mask = cpumask_of_cpu(cpu);
891		896
892	if (cpu_online(cpu)) {	897	if (cpu_online(cpu))
893	u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff);	898	__smp_receive_signal_mask(mask);
894
895	if (tlb_type == spitfire)
896	spitfire_xcall_deliver(data0, 0, 0, mask);
897	else if (tlb_type == cheetah \|\| tlb_type == cheetah_plus)
898	cheetah_xcall_deliver(data0, 0, 0, mask);
899	else if (tlb_type == hypervisor)
900	hypervisor_xcall_deliver(data0, 0, 0, mask);
901	}
902	}	899	}
903		900
904	void smp_receive_signal_client(int irq, struct pt_regs *regs)	901	void smp_receive_signal_client(int irq, struct pt_regs *regs)
905	{	902	{
906	/* Just return, rtrap takes care of the rest. */	903	struct mm_struct *mm;
		904
907	clear_softint(1 << irq);	905	clear_softint(1 << irq);
		906
		907	/* See if we need to allocate a new TLB context because
		908	* the version of the one we are using is now out of date.
		909	*/
		910	mm = current->active_mm;
		911	if (likely(mm)) {
		912	if (unlikely(!CTX_VALID(mm->context))) {
		913	unsigned long flags;
		914
		915	spin_lock_irqsave(&mm->context.lock, flags);
		916	get_new_mmu_context(mm);
		917	load_secondary_context(mm);
		918	spin_unlock_irqrestore(&mm->context.lock, flags);
		919	}
		920	}
		921	}
		922
		923	void smp_new_mmu_context_version(void)
		924	{
		925	__smp_receive_signal_mask(cpu_online_map);
908	}	926	}
909		927
910	void smp_report_regs(void)	928	void smp_report_regs(void)


diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 16f0db38d932..ccf083aecb65 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c
@@ -629,17 +629,20 @@ void __flush_dcache_range(unsigned long start, unsigned long end)
629	* let the user have CTX 0 (nucleus) or we ever use a CTX	629	* let the user have CTX 0 (nucleus) or we ever use a CTX
630	* version of zero (and thus NO_CONTEXT would not be caught	630	* version of zero (and thus NO_CONTEXT would not be caught
631	* by version mis-match tests in mmu_context.h).	631	* by version mis-match tests in mmu_context.h).
		632	*
		633	* Always invoked with interrupts disabled.
632	*/	634	*/
633	void get_new_mmu_context(struct mm_struct *mm)	635	void get_new_mmu_context(struct mm_struct *mm)
634	{	636	{
635	unsigned long ctx, new_ctx;	637	unsigned long ctx, new_ctx;
636	unsigned long orig_pgsz_bits;	638	unsigned long orig_pgsz_bits;
637		639	int new_version;
638		640
639	spin_lock(&ctx_alloc_lock);	641	spin_lock(&ctx_alloc_lock);
640	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);	642	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
641	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;	643	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
642	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);	644	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
		645	new_version = 0;
643	if (new_ctx >= (1 << CTX_NR_BITS)) {	646	if (new_ctx >= (1 << CTX_NR_BITS)) {
644	new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);	647	new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
645	if (new_ctx >= ctx) {	648	if (new_ctx >= ctx) {
@@ -662,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm)
662	mmu_context_bmap[i + 2] = 0;	665	mmu_context_bmap[i + 2] = 0;
663	mmu_context_bmap[i + 3] = 0;	666	mmu_context_bmap[i + 3] = 0;
664	}	667	}
		668	new_version = 1;
665	goto out;	669	goto out;
666	}	670	}
667	}	671	}
@@ -671,6 +675,9 @@ out:
671	tlb_context_cache = new_ctx;	675	tlb_context_cache = new_ctx;
672	mm->context.sparc64_ctx_val = new_ctx \| orig_pgsz_bits;	676	mm->context.sparc64_ctx_val = new_ctx \| orig_pgsz_bits;
673	spin_unlock(&ctx_alloc_lock);	677	spin_unlock(&ctx_alloc_lock);
		678
		679	if (unlikely(new_version))
		680	smp_new_mmu_context_version();
674	}	681	}
675		682
676	void sparc_ultra_dump_itlb(void)	683	void sparc_ultra_dump_itlb(void)