diff options
author | David S. Miller <davem@davemloft.net> | 2006-02-23 17:19:28 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-03-20 04:14:00 -0500 |
commit | a0663a79ad4faebe1db4a56e2e767b120b12333a (patch) | |
tree | 612a53e387a6aea6116f8a1637050fa13c6d9f80 /arch/sparc64 | |
parent | 074d82cf688fe2dfa7ba4a2317c56f62d13fb522 (diff) |
[SPARC64]: Fix TLB context allocation with SMT style shared TLBs.
The context allocation scheme we use depends upon there being a 1<-->1
mapping from cpu to physical TLB for correctness. Chips like Niagara
break this assumption.
So what we do is notify all cpus with a cross call when the context
version number changes, and if necessary this makes them allocate
a valid context for the address space they are running at the time.
Stress tested with make -j1024, make -j2048, and make -j4096 kernel
builds on a 32-strand, 8 core, T2000 with 16GB of ram.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64')
-rw-r--r-- | arch/sparc64/kernel/smp.c | 40 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 9 |
2 files changed, 37 insertions, 12 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 0cd9b16612e7..1ce940811492 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -885,26 +885,44 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) | |||
885 | put_cpu(); | 885 | put_cpu(); |
886 | } | 886 | } |
887 | 887 | ||
888 | static void __smp_receive_signal_mask(cpumask_t mask) | ||
889 | { | ||
890 | smp_cross_call_masked(&xcall_receive_signal, 0, 0, 0, mask); | ||
891 | } | ||
892 | |||
888 | void smp_receive_signal(int cpu) | 893 | void smp_receive_signal(int cpu) |
889 | { | 894 | { |
890 | cpumask_t mask = cpumask_of_cpu(cpu); | 895 | cpumask_t mask = cpumask_of_cpu(cpu); |
891 | 896 | ||
892 | if (cpu_online(cpu)) { | 897 | if (cpu_online(cpu)) |
893 | u64 data0 = (((u64)&xcall_receive_signal) & 0xffffffff); | 898 | __smp_receive_signal_mask(mask); |
894 | |||
895 | if (tlb_type == spitfire) | ||
896 | spitfire_xcall_deliver(data0, 0, 0, mask); | ||
897 | else if (tlb_type == cheetah || tlb_type == cheetah_plus) | ||
898 | cheetah_xcall_deliver(data0, 0, 0, mask); | ||
899 | else if (tlb_type == hypervisor) | ||
900 | hypervisor_xcall_deliver(data0, 0, 0, mask); | ||
901 | } | ||
902 | } | 899 | } |
903 | 900 | ||
904 | void smp_receive_signal_client(int irq, struct pt_regs *regs) | 901 | void smp_receive_signal_client(int irq, struct pt_regs *regs) |
905 | { | 902 | { |
906 | /* Just return, rtrap takes care of the rest. */ | 903 | struct mm_struct *mm; |
904 | |||
907 | clear_softint(1 << irq); | 905 | clear_softint(1 << irq); |
906 | |||
907 | /* See if we need to allocate a new TLB context because | ||
908 | * the version of the one we are using is now out of date. | ||
909 | */ | ||
910 | mm = current->active_mm; | ||
911 | if (likely(mm)) { | ||
912 | if (unlikely(!CTX_VALID(mm->context))) { | ||
913 | unsigned long flags; | ||
914 | |||
915 | spin_lock_irqsave(&mm->context.lock, flags); | ||
916 | get_new_mmu_context(mm); | ||
917 | load_secondary_context(mm); | ||
918 | spin_unlock_irqrestore(&mm->context.lock, flags); | ||
919 | } | ||
920 | } | ||
921 | } | ||
922 | |||
923 | void smp_new_mmu_context_version(void) | ||
924 | { | ||
925 | __smp_receive_signal_mask(cpu_online_map); | ||
908 | } | 926 | } |
909 | 927 | ||
910 | void smp_report_regs(void) | 928 | void smp_report_regs(void) |
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 16f0db38d932..ccf083aecb65 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c | |||
@@ -629,17 +629,20 @@ void __flush_dcache_range(unsigned long start, unsigned long end) | |||
629 | * let the user have CTX 0 (nucleus) or we ever use a CTX | 629 | * let the user have CTX 0 (nucleus) or we ever use a CTX |
630 | * version of zero (and thus NO_CONTEXT would not be caught | 630 | * version of zero (and thus NO_CONTEXT would not be caught |
631 | * by version mis-match tests in mmu_context.h). | 631 | * by version mis-match tests in mmu_context.h). |
632 | * | ||
633 | * Always invoked with interrupts disabled. | ||
632 | */ | 634 | */ |
633 | void get_new_mmu_context(struct mm_struct *mm) | 635 | void get_new_mmu_context(struct mm_struct *mm) |
634 | { | 636 | { |
635 | unsigned long ctx, new_ctx; | 637 | unsigned long ctx, new_ctx; |
636 | unsigned long orig_pgsz_bits; | 638 | unsigned long orig_pgsz_bits; |
637 | 639 | int new_version; | |
638 | 640 | ||
639 | spin_lock(&ctx_alloc_lock); | 641 | spin_lock(&ctx_alloc_lock); |
640 | orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); | 642 | orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); |
641 | ctx = (tlb_context_cache + 1) & CTX_NR_MASK; | 643 | ctx = (tlb_context_cache + 1) & CTX_NR_MASK; |
642 | new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); | 644 | new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); |
645 | new_version = 0; | ||
643 | if (new_ctx >= (1 << CTX_NR_BITS)) { | 646 | if (new_ctx >= (1 << CTX_NR_BITS)) { |
644 | new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); | 647 | new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); |
645 | if (new_ctx >= ctx) { | 648 | if (new_ctx >= ctx) { |
@@ -662,6 +665,7 @@ void get_new_mmu_context(struct mm_struct *mm) | |||
662 | mmu_context_bmap[i + 2] = 0; | 665 | mmu_context_bmap[i + 2] = 0; |
663 | mmu_context_bmap[i + 3] = 0; | 666 | mmu_context_bmap[i + 3] = 0; |
664 | } | 667 | } |
668 | new_version = 1; | ||
665 | goto out; | 669 | goto out; |
666 | } | 670 | } |
667 | } | 671 | } |
@@ -671,6 +675,9 @@ out: | |||
671 | tlb_context_cache = new_ctx; | 675 | tlb_context_cache = new_ctx; |
672 | mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; | 676 | mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; |
673 | spin_unlock(&ctx_alloc_lock); | 677 | spin_unlock(&ctx_alloc_lock); |
678 | |||
679 | if (unlikely(new_version)) | ||
680 | smp_new_mmu_context_version(); | ||
674 | } | 681 | } |
675 | 682 | ||
676 | void sparc_ultra_dump_itlb(void) | 683 | void sparc_ultra_dump_itlb(void) |