aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc
diff options
context:
space:
mode:
authorPavel Tatashin <pasha.tatashin@oracle.com>2017-05-31 11:25:24 -0400
committerDavid S. Miller <davem@davemloft.net>2017-06-06 16:45:29 -0400
commita0582f26ec9dfd5360ea2f35dd9a1b026f8adda0 (patch)
tree946f34cd669dc4edb9e3a272ba8e475a30538950 /arch/sparc
parent7a5b4bbf49fe86ce77488a70c5dccfe2d50d7a2d (diff)
sparc64: new context wrap
The current wrap implementation has a race issue: it is called outside of the ctx_alloc_lock, and also does not wait for all CPUs to complete the wrap. This means that a thread can get a new context with a new version and another thread might still be running with the same context. The problem is especially severe on CPUs with shared TLBs, like sun4v. I used the following test to very quickly reproduce the problem: - start over 8K processes (must be more than context IDs) - write and read values at a memory location in every process. Very quickly memory corruptions start happening, and what we read back does not equal what we wrote. Several approaches were explored before settling on this one: Approach 1: Move smp_new_mmu_context_version() inside ctx_alloc_lock, and wait for every process to complete the wrap. (Note: every CPU must WAIT before leaving smp_new_mmu_context_version_client() until every one arrives). This approach ends up with deadlocks, as some threads own locks which other threads are waiting for, and they never receive softint until these threads exit smp_new_mmu_context_version_client(). Since we do not allow the exit, deadlock happens. Approach 2: Handle wrap right during mondo interrupt. Use etrap/rtrap to enter into into C code, and issue new versions to every CPU. This approach adds some overhead to runtime: in switch_mm() we must add some checks to make sure that versions have not changed due to wrap while we were loading the new secondary context. (could be protected by PSTATE_IE but that degrades performance as on M7 and older CPUs as it takes 50 cycles for each access). Also, we still need a global per-cpu array of MMs to know where we need to load new contexts, otherwise we can change context to a thread that is going way (if we received mondo between switch_mm() and switch_to() time). Finally, there are some issues with window registers in rtrap() when context IDs are changed during CPU mondo time. The approach in this patch is the simplest and has almost no impact on runtime. We use the array with mm's where last secondary contexts were loaded onto CPUs and bump their versions to the new generation without changing context IDs. If a new process comes in to get a context ID, it will go through get_new_mmu_context() because of version mismatch. But the running processes do not need to be interrupted. And wrap is quicker as we do not need to xcall and wait for everyone to receive and complete wrap. Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Reviewed-by: Bob Picco <bob.picco@oracle.com> Reviewed-by: Steven Sistare <steven.sistare@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/mm/init_64.c81
1 files changed, 54 insertions, 27 deletions
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index a4c0bc8af820..3c40ebd50f92 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -713,6 +713,53 @@ unsigned long tlb_context_cache = CTX_FIRST_VERSION;
713DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR); 713DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
714DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0}; 714DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
715 715
716static void mmu_context_wrap(void)
717{
718 unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
719 unsigned long new_ver, new_ctx, old_ctx;
720 struct mm_struct *mm;
721 int cpu;
722
723 bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
724
725 /* Reserve kernel context */
726 set_bit(0, mmu_context_bmap);
727
728 new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
729 if (unlikely(new_ver == 0))
730 new_ver = CTX_FIRST_VERSION;
731 tlb_context_cache = new_ver;
732
733 /*
734 * Make sure that any new mm that are added into per_cpu_secondary_mm,
735 * are going to go through get_new_mmu_context() path.
736 */
737 mb();
738
739 /*
740 * Updated versions to current on those CPUs that had valid secondary
741 * contexts
742 */
743 for_each_online_cpu(cpu) {
744 /*
745 * If a new mm is stored after we took this mm from the array,
746 * it will go into get_new_mmu_context() path, because we
747 * already bumped the version in tlb_context_cache.
748 */
749 mm = per_cpu(per_cpu_secondary_mm, cpu);
750
751 if (unlikely(!mm || mm == &init_mm))
752 continue;
753
754 old_ctx = mm->context.sparc64_ctx_val;
755 if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
756 new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
757 set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
758 mm->context.sparc64_ctx_val = new_ctx;
759 }
760 }
761}
762
716/* Caller does TLB context flushing on local CPU if necessary. 763/* Caller does TLB context flushing on local CPU if necessary.
717 * The caller also ensures that CTX_VALID(mm->context) is false. 764 * The caller also ensures that CTX_VALID(mm->context) is false.
718 * 765 *
@@ -727,50 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm)
727{ 774{
728 unsigned long ctx, new_ctx; 775 unsigned long ctx, new_ctx;
729 unsigned long orig_pgsz_bits; 776 unsigned long orig_pgsz_bits;
730 int new_version;
731 777
732 spin_lock(&ctx_alloc_lock); 778 spin_lock(&ctx_alloc_lock);
779retry:
780 /* wrap might have happened, test again if our context became valid */
781 if (unlikely(CTX_VALID(mm->context)))
782 goto out;
733 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); 783 orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
734 ctx = (tlb_context_cache + 1) & CTX_NR_MASK; 784 ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
735 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); 785 new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
736 new_version = 0;
737 if (new_ctx >= (1 << CTX_NR_BITS)) { 786 if (new_ctx >= (1 << CTX_NR_BITS)) {
738 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1); 787 new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
739 if (new_ctx >= ctx) { 788 if (new_ctx >= ctx) {
740 int i; 789 mmu_context_wrap();
741 new_ctx = (tlb_context_cache & CTX_VERSION_MASK) + 790 goto retry;
742 CTX_FIRST_VERSION + 1;
743 if (new_ctx == 1)
744 new_ctx = CTX_FIRST_VERSION + 1;
745
746 /* Don't call memset, for 16 entries that's just
747 * plain silly...
748 */
749 mmu_context_bmap[0] = 3;
750 mmu_context_bmap[1] = 0;
751 mmu_context_bmap[2] = 0;
752 mmu_context_bmap[3] = 0;
753 for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
754 mmu_context_bmap[i + 0] = 0;
755 mmu_context_bmap[i + 1] = 0;
756 mmu_context_bmap[i + 2] = 0;
757 mmu_context_bmap[i + 3] = 0;
758 }
759 new_version = 1;
760 goto out;
761 } 791 }
762 } 792 }
763 if (mm->context.sparc64_ctx_val) 793 if (mm->context.sparc64_ctx_val)
764 cpumask_clear(mm_cpumask(mm)); 794 cpumask_clear(mm_cpumask(mm));
765 mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63)); 795 mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
766 new_ctx |= (tlb_context_cache & CTX_VERSION_MASK); 796 new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
767out:
768 tlb_context_cache = new_ctx; 797 tlb_context_cache = new_ctx;
769 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; 798 mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
799out:
770 spin_unlock(&ctx_alloc_lock); 800 spin_unlock(&ctx_alloc_lock);
771
772 if (unlikely(new_version))
773 smp_new_mmu_context_version();
774} 801}
775 802
776static int numa_enabled = 1; 803static int numa_enabled = 1;