From 9c1e105238c474d19905af504f2e7f42d4f71f9e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 17 Aug 2009 15:17:54 +1000 Subject: powerpc: Allow perf_counters to access user memory at interrupt time This provides a mechanism to allow the perf_counters code to access user memory in a PMU interrupt routine. Such an access can cause various kinds of interrupt: SLB miss, MMU hash table miss, segment table miss, or TLB miss, depending on the processor. This commit only deals with 64-bit classic/server processors, which use an MMU hash table. 32-bit processors are already able to access user memory at interrupt time. Since we don't soft-disable on 32-bit, we avoid the possibility of reentering hash_page or the TLB miss handlers, since they run with interrupts disabled. On 64-bit processors, an SLB miss interrupt on a user address will update the slb_cache and slb_cache_ptr fields in the paca. This is OK except in the case where a PMU interrupt occurs in switch_slb, which also accesses those fields. To prevent this, we hard-disable interrupts in switch_slb. Interrupts are already soft-disabled at this point, and will get hard-enabled when they get soft-enabled later. This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice, and to make sure that it clears the slb_cache_ptr when called from other callers than switch_slb, the existing routine is renamed to __slb_flush_and_rebolt, which is called by switch_slb and the new version of slb_flush_and_rebolt. Similarly, switch_stab (used on POWER3 and RS64 processors) gets a hard_irq_disable() to protect the per-cpu variables used there and in ste_allocate. If a MMU hashtable miss interrupt occurs, normally we would call hash_page to look up the Linux PTE for the address and create a HPTE. However, hash_page is fairly complex and takes some locks, so to avoid the possibility of deadlock, we check the preemption count to see if we are in a (pseudo-)NMI handler, and if so, we don't call hash_page but instead treat it like a bad access that will get reported up through the exception table mechanism. An interrupt whose handler runs even though the interrupt occurred when soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI handler, which should use nmi_enter()/nmi_exit() rather than irq_enter()/irq_exit(). Acked-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/mm/slb.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'arch/powerpc/mm/slb.c') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b6..a685652effeb 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } -void slb_flush_and_rebolt(void) +static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ unsigned long linear_llp, vmalloc_llp, lflags, vflags; unsigned long ksp_esid_data, ksp_vsid_data; - WARN_ON(!irqs_disabled()); - linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; lflags = SLB_VSID_KERNEL | linear_llp; @@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void) ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; } - /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. - */ - hard_irq_disable(); - /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ asm volatile("isync\n" @@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void) : "memory"); } +void slb_flush_and_rebolt(void) +{ + + WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + + __slb_flush_and_rebolt(); + get_paca()->slb_cache_ptr = 0; +} + void slb_vmalloc_update(void) { unsigned long vflags; @@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long offset; unsigned long slbie_data = 0; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an SLB miss + * which would update the slb_cache/slb_cache_ptr fields in the PACA. + */ + hard_irq_disable(); + offset = get_paca()->slb_cache_ptr; if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; @@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) } asm volatile("isync" : : : "memory"); } else { - slb_flush_and_rebolt(); + __slb_flush_and_rebolt(); } /* Workaround POWER5 < DD2.1 issue */ -- cgit v1.2.2 From 5eb9bac0406f2beb84b21aac6feb89d33d9f3f5c Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Jul 2009 20:53:52 +0000 Subject: powerpc: Rearrange SLB preload code With the new top down layout it is likely that the pc and stack will be in the same segment, because the pc is most likely in a library allocated via a top down mmap. Right now we bail out early if these segments match. Rearrange the SLB preload code to sanity check all SLB preload addresses are not in the kernel, then check all addresses for conflicts. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/slb.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm/slb.c') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b6..227056c21eee 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -218,23 +218,18 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) else unmapped_base = TASK_UNMAPPED_BASE_USER64; - if (is_kernel_addr(pc)) - return; - slb_allocate(pc); - - if (esids_match(pc,stack)) + if (is_kernel_addr(pc) || is_kernel_addr(stack) || + is_kernel_addr(unmapped_base)) return; - if (is_kernel_addr(stack)) - return; - slb_allocate(stack); + slb_allocate(pc); - if (esids_match(pc,unmapped_base) || esids_match(stack,unmapped_base)) - return; + if (!esids_match(pc, stack)) + slb_allocate(stack); - if (is_kernel_addr(unmapped_base)) - return; - slb_allocate(unmapped_base); + if (!esids_match(pc, unmapped_base) && + !esids_match(stack, unmapped_base)) + slb_allocate(unmapped_base); } static inline void patch_slb_encoding(unsigned int *insn_addr, -- cgit v1.2.2 From de4376c2846bb5a8fc6fe8dbd0e4ff30905493e6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Jul 2009 20:53:53 +0000 Subject: powerpc: Preload application text segment instead of TASK_UNMAPPED_BASE TASK_UNMAPPED_BASE is not used with the new top down mmap layout. We can reuse this preload slot by loading in the segment at 0x10000000, where almost all PowerPC binaries are linked at. On a microbenchmark that bounces a token between two 64bit processes over pipes and calls gettimeofday each iteration (to access the VDSO), both the 32bit and 64bit context switch rate improves (tested on a 4GHz POWER6): 32bit: 273k/sec -> 283k/sec 64bit: 277k/sec -> 284k/sec Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/slb.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm/slb.c') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 227056c21eee..6bc8b4aeba5c 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -184,7 +184,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) unsigned long slbie_data = 0; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; + unsigned long exec_base; if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { @@ -212,14 +212,13 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) /* * preload some userspace segments into the SLB. + * Almost all 32 and 64bit PowerPC executables are linked at + * 0x10000000 so it makes sense to preload this segment. */ - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; + exec_base = 0x10000000; if (is_kernel_addr(pc) || is_kernel_addr(stack) || - is_kernel_addr(unmapped_base)) + is_kernel_addr(exec_base)) return; slb_allocate(pc); @@ -227,9 +226,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) if (!esids_match(pc, stack)) slb_allocate(stack); - if (!esids_match(pc, unmapped_base) && - !esids_match(stack, unmapped_base)) - slb_allocate(unmapped_base); + if (!esids_match(pc, exec_base) && + !esids_match(stack, exec_base)) + slb_allocate(exec_base); } static inline void patch_slb_encoding(unsigned int *insn_addr, -- cgit v1.2.2 From 46db2f86a3b2a94e0b33e0b4548fb7b7b6bdff66 Mon Sep 17 00:00:00 2001 From: Brian King Date: Fri, 28 Aug 2009 12:06:29 +0000 Subject: powerpc/pseries: Fix to handle slb resize across migration The SLB can change sizes across a live migration, which was not being handled, resulting in possible machine crashes during migration if migrating to a machine which has a smaller max SLB size than the source machine. Fix this by first reducing the SLB size to the minimum possible value, which is 32, prior to migration. Then during the device tree update which occurs after migration, we make the call to ensure the SLB gets updated. Also add the slb_size to the lparcfg output so that the migration tools can check to make sure the kernel has this capability before allowing migration in scenarios where the SLB size will change. BenH: Fixed #include -> to avoid breaking ppc32 build Signed-off-by: Brian King Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/slb.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm/slb.c') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 07961c5c169e..1d98ecc8eecd 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -249,14 +249,22 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) static inline void patch_slb_encoding(unsigned int *insn_addr, unsigned int immed) { - /* Assume the instruction had a "0" immediate value, just - * "or" in the new value - */ - *insn_addr |= immed; + *insn_addr = (*insn_addr & 0xffff0000) | immed; flush_icache_range((unsigned long)insn_addr, 4+ (unsigned long)insn_addr); } +void slb_set_size(u16 size) +{ + extern unsigned int *slb_compare_rr_to_size; + + if (mmu_slb_size == size) + return; + + mmu_slb_size = size; + patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size); +} + void slb_initialize(void) { unsigned long linear_llp, vmalloc_llp, io_llp; -- cgit v1.2.2