diff options
author | Paul Mackerras <paulus@samba.org> | 2009-08-17 01:17:54 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2009-08-18 00:48:43 -0400 |
commit | 9c1e105238c474d19905af504f2e7f42d4f71f9e (patch) | |
tree | 39406fa1c36e5894f2eb48a7f5fbb787736118a4 /arch | |
parent | 1660e9d3d04b6c636b7171bf6c08ac7b82a7de79 (diff) |
powerpc: Allow perf_counters to access user memory at interrupt time
This provides a mechanism to allow the perf_counters code to access
user memory in a PMU interrupt routine. Such an access can cause
various kinds of interrupt: SLB miss, MMU hash table miss, segment
table miss, or TLB miss, depending on the processor. This commit
only deals with 64-bit classic/server processors, which use an MMU
hash table. 32-bit processors are already able to access user memory
at interrupt time. Since we don't soft-disable on 32-bit, we avoid
the possibility of reentering hash_page or the TLB miss handlers,
since they run with interrupts disabled.
On 64-bit processors, an SLB miss interrupt on a user address will
update the slb_cache and slb_cache_ptr fields in the paca. This is
OK except in the case where a PMU interrupt occurs in switch_slb,
which also accesses those fields. To prevent this, we hard-disable
interrupts in switch_slb. Interrupts are already soft-disabled at
this point, and will get hard-enabled when they get soft-enabled
later.
This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice,
and to make sure that it clears the slb_cache_ptr when called from
other callers than switch_slb, the existing routine is renamed to
__slb_flush_and_rebolt, which is called by switch_slb and the new
version of slb_flush_and_rebolt.
Similarly, switch_stab (used on POWER3 and RS64 processors) gets a
hard_irq_disable() to protect the per-cpu variables used there and
in ste_allocate.
If a MMU hashtable miss interrupt occurs, normally we would call
hash_page to look up the Linux PTE for the address and create a HPTE.
However, hash_page is fairly complex and takes some locks, so to
avoid the possibility of deadlock, we check the preemption count
to see if we are in a (pseudo-)NMI handler, and if so, we don't call
hash_page but instead treat it like a bad access that will get
reported up through the exception table mechanism. An interrupt
whose handler runs even though the interrupt occurred when
soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI
handler, which should use nmi_enter()/nmi_exit() rather than
irq_enter()/irq_exit().
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/exceptions-64s.S | 19 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 37 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 11 |
4 files changed, 57 insertions, 12 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 561b64652311..197b15646eeb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -67,6 +67,8 @@ int main(void) | |||
67 | DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); | 67 | DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); |
68 | #ifdef CONFIG_PPC64 | 68 | #ifdef CONFIG_PPC64 |
69 | DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); | 69 | DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); |
70 | DEFINE(SIGSEGV, SIGSEGV); | ||
71 | DEFINE(NMI_MASK, NMI_MASK); | ||
70 | #else | 72 | #else |
71 | DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); | 73 | DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); |
72 | #endif /* CONFIG_PPC64 */ | 74 | #endif /* CONFIG_PPC64 */ |
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eb898112e577..8ac85e08ffae 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S | |||
@@ -729,6 +729,11 @@ BEGIN_FTR_SECTION | |||
729 | bne- do_ste_alloc /* If so handle it */ | 729 | bne- do_ste_alloc /* If so handle it */ |
730 | END_FTR_SECTION_IFCLR(CPU_FTR_SLB) | 730 | END_FTR_SECTION_IFCLR(CPU_FTR_SLB) |
731 | 731 | ||
732 | clrrdi r11,r1,THREAD_SHIFT | ||
733 | lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ | ||
734 | andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ | ||
735 | bne 77f /* then don't call hash_page now */ | ||
736 | |||
732 | /* | 737 | /* |
733 | * On iSeries, we soft-disable interrupts here, then | 738 | * On iSeries, we soft-disable interrupts here, then |
734 | * hard-enable interrupts so that the hash_page code can spin on | 739 | * hard-enable interrupts so that the hash_page code can spin on |
@@ -833,6 +838,20 @@ handle_page_fault: | |||
833 | bl .low_hash_fault | 838 | bl .low_hash_fault |
834 | b .ret_from_except | 839 | b .ret_from_except |
835 | 840 | ||
841 | /* | ||
842 | * We come here as a result of a DSI at a point where we don't want | ||
843 | * to call hash_page, such as when we are accessing memory (possibly | ||
844 | * user memory) inside a PMU interrupt that occurred while interrupts | ||
845 | * were soft-disabled. We want to invoke the exception handler for | ||
846 | * the access, or panic if there isn't a handler. | ||
847 | */ | ||
848 | 77: bl .save_nvgprs | ||
849 | mr r4,r3 | ||
850 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
851 | li r5,SIGSEGV | ||
852 | bl .bad_page_fault | ||
853 | b .ret_from_except | ||
854 | |||
836 | /* here we have a segment miss */ | 855 | /* here we have a segment miss */ |
837 | do_ste_alloc: | 856 | do_ste_alloc: |
838 | bl .ste_allocate /* try to insert stab entry */ | 857 | bl .ste_allocate /* try to insert stab entry */ |
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b6..a685652effeb 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c | |||
@@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, | |||
92 | : "memory" ); | 92 | : "memory" ); |
93 | } | 93 | } |
94 | 94 | ||
95 | void slb_flush_and_rebolt(void) | 95 | static void __slb_flush_and_rebolt(void) |
96 | { | 96 | { |
97 | /* If you change this make sure you change SLB_NUM_BOLTED | 97 | /* If you change this make sure you change SLB_NUM_BOLTED |
98 | * appropriately too. */ | 98 | * appropriately too. */ |
99 | unsigned long linear_llp, vmalloc_llp, lflags, vflags; | 99 | unsigned long linear_llp, vmalloc_llp, lflags, vflags; |
100 | unsigned long ksp_esid_data, ksp_vsid_data; | 100 | unsigned long ksp_esid_data, ksp_vsid_data; |
101 | 101 | ||
102 | WARN_ON(!irqs_disabled()); | ||
103 | |||
104 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; | 102 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
105 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; | 103 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; |
106 | lflags = SLB_VSID_KERNEL | linear_llp; | 104 | lflags = SLB_VSID_KERNEL | linear_llp; |
@@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void) | |||
117 | ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; | 115 | ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; |
118 | } | 116 | } |
119 | 117 | ||
120 | /* | ||
121 | * We can't take a PMU exception in the following code, so hard | ||
122 | * disable interrupts. | ||
123 | */ | ||
124 | hard_irq_disable(); | ||
125 | |||
126 | /* We need to do this all in asm, so we're sure we don't touch | 118 | /* We need to do this all in asm, so we're sure we don't touch |
127 | * the stack between the slbia and rebolting it. */ | 119 | * the stack between the slbia and rebolting it. */ |
128 | asm volatile("isync\n" | 120 | asm volatile("isync\n" |
@@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void) | |||
139 | : "memory"); | 131 | : "memory"); |
140 | } | 132 | } |
141 | 133 | ||
134 | void slb_flush_and_rebolt(void) | ||
135 | { | ||
136 | |||
137 | WARN_ON(!irqs_disabled()); | ||
138 | |||
139 | /* | ||
140 | * We can't take a PMU exception in the following code, so hard | ||
141 | * disable interrupts. | ||
142 | */ | ||
143 | hard_irq_disable(); | ||
144 | |||
145 | __slb_flush_and_rebolt(); | ||
146 | get_paca()->slb_cache_ptr = 0; | ||
147 | } | ||
148 | |||
142 | void slb_vmalloc_update(void) | 149 | void slb_vmalloc_update(void) |
143 | { | 150 | { |
144 | unsigned long vflags; | 151 | unsigned long vflags; |
@@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) | |||
180 | /* Flush all user entries from the segment table of the current processor. */ | 187 | /* Flush all user entries from the segment table of the current processor. */ |
181 | void switch_slb(struct task_struct *tsk, struct mm_struct *mm) | 188 | void switch_slb(struct task_struct *tsk, struct mm_struct *mm) |
182 | { | 189 | { |
183 | unsigned long offset = get_paca()->slb_cache_ptr; | 190 | unsigned long offset; |
184 | unsigned long slbie_data = 0; | 191 | unsigned long slbie_data = 0; |
185 | unsigned long pc = KSTK_EIP(tsk); | 192 | unsigned long pc = KSTK_EIP(tsk); |
186 | unsigned long stack = KSTK_ESP(tsk); | 193 | unsigned long stack = KSTK_ESP(tsk); |
187 | unsigned long unmapped_base; | 194 | unsigned long unmapped_base; |
188 | 195 | ||
196 | /* | ||
197 | * We need interrupts hard-disabled here, not just soft-disabled, | ||
198 | * so that a PMU interrupt can't occur, which might try to access | ||
199 | * user memory (to get a stack trace) and possible cause an SLB miss | ||
200 | * which would update the slb_cache/slb_cache_ptr fields in the PACA. | ||
201 | */ | ||
202 | hard_irq_disable(); | ||
203 | offset = get_paca()->slb_cache_ptr; | ||
189 | if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && | 204 | if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && |
190 | offset <= SLB_CACHE_ENTRIES) { | 205 | offset <= SLB_CACHE_ENTRIES) { |
191 | int i; | 206 | int i; |
@@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) | |||
200 | } | 215 | } |
201 | asm volatile("isync" : : : "memory"); | 216 | asm volatile("isync" : : : "memory"); |
202 | } else { | 217 | } else { |
203 | slb_flush_and_rebolt(); | 218 | __slb_flush_and_rebolt(); |
204 | } | 219 | } |
205 | 220 | ||
206 | /* Workaround POWER5 < DD2.1 issue */ | 221 | /* Workaround POWER5 < DD2.1 issue */ |
diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 98cd1dc2ae75..ab5fb48b3e90 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c | |||
@@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) | |||
164 | { | 164 | { |
165 | struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; | 165 | struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; |
166 | struct stab_entry *ste; | 166 | struct stab_entry *ste; |
167 | unsigned long offset = __get_cpu_var(stab_cache_ptr); | 167 | unsigned long offset; |
168 | unsigned long pc = KSTK_EIP(tsk); | 168 | unsigned long pc = KSTK_EIP(tsk); |
169 | unsigned long stack = KSTK_ESP(tsk); | 169 | unsigned long stack = KSTK_ESP(tsk); |
170 | unsigned long unmapped_base; | 170 | unsigned long unmapped_base; |
@@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) | |||
172 | /* Force previous translations to complete. DRENG */ | 172 | /* Force previous translations to complete. DRENG */ |
173 | asm volatile("isync" : : : "memory"); | 173 | asm volatile("isync" : : : "memory"); |
174 | 174 | ||
175 | /* | ||
176 | * We need interrupts hard-disabled here, not just soft-disabled, | ||
177 | * so that a PMU interrupt can't occur, which might try to access | ||
178 | * user memory (to get a stack trace) and possible cause an STAB miss | ||
179 | * which would update the stab_cache/stab_cache_ptr per-cpu variables. | ||
180 | */ | ||
181 | hard_irq_disable(); | ||
182 | |||
183 | offset = __get_cpu_var(stab_cache_ptr); | ||
175 | if (offset <= NR_STAB_CACHE_ENTRIES) { | 184 | if (offset <= NR_STAB_CACHE_ENTRIES) { |
176 | int i; | 185 | int i; |
177 | 186 | ||